964 files changed, 97901 insertions, 15126 deletions
diff --git a/.gitignore b/.gitignore
index a7ac2d2fc7..d947306558 100644
--- a/.gitignore
+++ b/.gitignore
@@ -15,6 +15,7 @@ core/method_bind.inc
 core/method_bind_ext.inc
 core/script_encryption_key.cpp
 core/global_defaults.cpp
+drivers/unix/os_unix_global_settings_path.cpp
 tools/editor/register_exporters.cpp
 tools/editor/doc_data_compressed.h
 tools/editor/editor_icons.cpp
@@ -296,3 +297,4 @@ cscope.po.out
 godot.creator.*
 
 projects/
+platform/windows/godot_res.res
diff --git a/README.md b/README.md
index 3456290f74..d5dadb93a7 100644
--- a/README.md
+++ b/README.md
@@ -7,12 +7,11 @@ The editor, language and APIs are feature rich, yet simple to learn, allowing yo
 
 ### About
 
-Godot has been developed by Juan Linietsky and Ariel Manzur for several years, and was born as an in-house engine, used to publish several work-for-hire titles.
-Development is sponsored by OKAM Studio (http://www.okamstudio.com).
+Godot has been developed by Juan Linietsky and Ariel Manzur for several years, and was born as an in-house engine, used to publish several work-for-hire titles. Godot is a member project of the [Software Freedom Conservancy](https://sfconservancy.org)
 
 ### Documentation
 
-Documentation has been moved to the [GitHub Wiki](https://github.com/okamstudio/godot/wiki).
+Documentation has been moved to the [OpenProject Wiki](http://godotengine.org/projects/godot-engine/wiki/Documentation).
 
 ### Binary Downloads, Community, etc.
 
@@ -23,4 +22,4 @@ http://www.godotengine.org
 ### Compiling from Source
 
 Compilation instructions for every platform can be found in the Wiki:
-https://github.com/okamstudio/godot/wiki/advanced
+http://godotengine.org/projects/godot-engine/wiki/Advanced_topics
diff --git a/SConstruct b/SConstruct
index 0087d97af3..95ad0010a2 100644
--- a/SConstruct
+++ b/SConstruct
@@ -59,7 +59,7 @@ platform_arg = ARGUMENTS.get("platform", False)
 if (os.name=="posix"):
 	pass
 elif (os.name=="nt"):
-    if (os.getenv("VSINSTALLDIR")==None or platform_arg=="android"):
+	if (os.getenv("VSINSTALLDIR")==None or platform_arg=="android"):
 		custom_tools=['mingw']
 
 env_base=Environment(tools=custom_tools,ENV = {'PATH' : os.environ['PATH']});
@@ -84,6 +84,10 @@ env_base.__class__.disable_module = methods.disable_module
 
 env_base.__class__.add_source_files = methods.add_source_files
 
+env_base["x86_opt_gcc"]=False
+env_base["x86_opt_vc"]=False
+env_base["armv7_opt_gcc"]=False
+
 customs = ['custom.py']
 
 profile = ARGUMENTS.get("profile", False)
@@ -106,7 +110,7 @@ opts.Add('opus','Build Opus Audio Format Support: (yes/no)','yes')
 opts.Add('minizip','Build Minizip Archive Support: (yes/no)','yes')
 opts.Add('squish','Squish BC Texture Compression in editor (yes/no)','yes')
 opts.Add('theora','Theora Video (yes/no)','yes')
-opts.Add('use_theoraplayer_binary', "Use precompiled binaries from libtheoraplayer for ogg/theora/vorbis (yes/no)", "no")
+opts.Add('theoralib','Theora Video (yes/no)','no')
 opts.Add('freetype','Freetype support in editor','yes')
 opts.Add('speex','Speex Audio (yes/no)','yes')
 opts.Add('xml','XML Save/Load support (yes/no)','yes')
@@ -123,6 +127,7 @@ opts.Add("CXX", "Compiler");
 opts.Add("CCFLAGS", "Custom flags for the C++ compiler");
 opts.Add("CFLAGS", "Custom flags for the C compiler");
 opts.Add("LINKFLAGS", "Custom flags for the linker");
+opts.Add('unix_global_settings_path', 'unix-specific path to system-wide settings. Currently only used by templates.','')
 opts.Add('disable_3d', 'Disable 3D nodes for smaller executable (yes/no)', "no")
 opts.Add('disable_advanced_gui', 'Disable advance 3D gui nodes and behaviors (yes/no)', "no")
 opts.Add('colored', 'Enable colored output for the compilation (yes/no)', 'no')
@@ -185,7 +190,7 @@ if selected_platform in platform_list:
 	if env['vsproj']=="yes":
 		env.vs_incs = []
 		env.vs_srcs = []
-		
+
 		def AddToVSProject( sources ):
 			for x in sources:
 				if type(x) == type(""):
@@ -197,12 +202,12 @@ if selected_platform in platform_list:
 					basename = pieces[0]
 					basename = basename.replace('\\\\','/')
 					env.vs_srcs = env.vs_srcs + [basename + ".cpp"]
-					env.vs_incs = env.vs_incs + [basename + ".h"]					
-					#print basename	
-		env.AddToVSProject = AddToVSProject				
-		
+					env.vs_incs = env.vs_incs + [basename + ".h"]
+					#print basename
+		env.AddToVSProject = AddToVSProject
+
 	env.extra_suffix=""
-	
+
 	if env["extra_suffix"] != '' :
 		env.extra_suffix += '.'+env["extra_suffix"]
 
@@ -229,7 +234,7 @@ if selected_platform in platform_list:
 	#must happen after the flags, so when flags are used by configure, stuff happens (ie, ssl on x11)
 	detect.configure(env)
 
-        #env['platform_libsuffix'] = env['LIBSUFFIX']
+	#env['platform_libsuffix'] = env['LIBSUFFIX']
 
 	suffix="."+selected_platform
 
@@ -284,10 +289,11 @@ if selected_platform in platform_list:
 
 	if (env['musepack']=='yes'):
 		env.Append(CPPFLAGS=['-DMUSEPACK_ENABLED']);
-        if (env['openssl']!='no'):
-            env.Append(CPPFLAGS=['-DOPENSSL_ENABLED']);
-            if (env['openssl']=="builtin"):
-                env.Append(CPPPATH=['#drivers/builtin_openssl2'])
+
+	if (env['openssl']!='no'):
+		env.Append(CPPFLAGS=['-DOPENSSL_ENABLED']);
+		if (env['openssl']=="builtin"):
+			env.Append(CPPPATH=['#drivers/builtin_openssl2'])
 
 	if (env["builtin_zlib"]=='yes'):
 		env.Append(CPPPATH=['#drivers/builtin_zlib/zlib'])
@@ -303,8 +309,12 @@ if selected_platform in platform_list:
 	if (env['opus']=='yes'):
 		env.Append(CPPFLAGS=['-DOPUS_ENABLED']);
 
+
 	if (env['theora']=='yes'):
-		env.Append(CPPFLAGS=['-DTHEORA_ENABLED']);
+		env['theoralib']='yes'
+		env.Append(CPPFLAGS=['-DTHEORA_ENABLED']);		
+	if (env['theoralib']=='yes'):
+		env.Append(CPPFLAGS=['-DTHEORALIB_ENABLED']);
 
 	if (env['png']=='yes'):
 		env.Append(CPPFLAGS=['-DPNG_ENABLED']);
@@ -337,7 +347,7 @@ if selected_platform in platform_list:
 
 	if (env['colored']=='yes'):
 		methods.colored(sys,env)
-		
+
 	if (env['etc1']=='yes'):
 		env.Append(CPPFLAGS=['-DETC1_ENABLED'])
 
@@ -356,22 +366,22 @@ if selected_platform in platform_list:
 	SConscript("main/SCsub")
 
 	SConscript("platform/"+selected_platform+"/SCsub"); # build selected platform
-	
-	# Microsoft Visual Studio Project Generation			
-	if (env['vsproj'])=="yes":		
-	
+
+	# Microsoft Visual Studio Project Generation
+	if (env['vsproj'])=="yes":
+
 		AddToVSProject(env.core_sources)
 		AddToVSProject(env.main_sources)
-		AddToVSProject(env.modules_sources)	
+		AddToVSProject(env.modules_sources)
 		AddToVSProject(env.scene_sources)
 		AddToVSProject(env.servers_sources)
 		AddToVSProject(env.tool_sources)
-		
+
 		#env['MSVS_VERSION']='9.0'
 		env['MSVSBUILDCOM'] = "scons platform=" + selected_platform + " target=" + env["target"] + " bits=" + env["bits"] + " tools=yes"
 		env['MSVSREBUILDCOM'] = "scons platform=" + selected_platform + " target=" + env["target"] + " bits=" + env["bits"] + " tools=yes vsproj=true"
 		env['MSVSCLEANCOM'] = "scons --clean platform=" + selected_platform + " target=" + env["target"] + " bits=" + env["bits"] + " tools=yes"
-			
+
 		debug_variants = ['Debug|Win32']+['Debug|x64']
 		release_variants = ['Release|Win32']+['Release|x64']
 		release_debug_variants = ['Release_Debug|Win32']+['Release_Debug|x64']
@@ -382,11 +392,11 @@ if selected_platform in platform_list:
 		targets = debug_targets + release_targets + release_debug_targets
 		msvproj = env.MSVSProject(target = ['#godot' + env['MSVSPROJECTSUFFIX'] ],
 								incs = env.vs_incs,
-								srcs = env.vs_srcs, 
-								runfile = targets, 
-								buildtarget = targets, 
-								auto_build_solution=1, 
-								variant = variants) 		
+								srcs = env.vs_srcs,
+								runfile = targets,
+								buildtarget = targets,
+								auto_build_solution=1,
+								variant = variants)
 
 else:
 
diff --git a/bin/tests/SCsub b/bin/tests/SCsub
index 6613df9c05..57c9bc63b2 100644
--- a/bin/tests/SCsub
+++ b/bin/tests/SCsub
@@ -10,5 +10,3 @@ Export('env')
 lib = env.Library("tests",env.tests_sources)
 
 env.Prepend(LIBS=[lib])
-
-
diff --git a/core/SCsub b/core/SCsub
index d04041141c..4ce91c794f 100644
--- a/core/SCsub
+++ b/core/SCsub
@@ -63,5 +63,3 @@ SConscript('bind/SCsub');
 lib = env.Library("core",env.core_sources)
 
 env.Prepend(LIBS=[lib])
-
-
diff --git a/core/bind/SCsub b/core/bind/SCsub
index c6ba1fa537..7b4a6acbc0 100644
--- a/core/bind/SCsub
+++ b/core/bind/SCsub
@@ -3,5 +3,3 @@ Import('env')
 env.add_source_files(env.core_sources,"*.cpp")
 
 Export('env')
-
-
diff --git a/core/error_macros.h b/core/error_macros.h
index 76da88287b..cc88686301 100644
--- a/core/error_macros.h
+++ b/core/error_macros.h
@@ -207,6 +207,11 @@ extern bool _err_error_exists;
 		_err_error_exists=false;\
 	} \
 
+#define ERR_PRINTS(m_string) \
+	{ \
+		_err_print_error(FUNCTION_STR,__FILE__,__LINE__,String(m_string).utf8().get_data());	\
+		_err_error_exists=false;\
+	} \
 
 /** Print a warning string.
  */
diff --git a/core/globals.cpp b/core/globals.cpp
index ffd4cf5d5e..aee708d0cd 100644
--- a/core/globals.cpp
+++ b/core/globals.cpp
@@ -332,6 +332,7 @@ Error Globals::setup(const String& p_path,const String & p_main_pack) {
 
 		String candidate = d->get_current_dir();
 		String current_dir = d->get_current_dir();
+		String exec_name = OS::get_singleton()->get_executable_path().get_file().basename();
 		bool found = false;
 		bool first_time=true;
 
@@ -339,7 +340,16 @@ Error Globals::setup(const String& p_path,const String & p_main_pack) {
 			//try to load settings in ascending through dirs shape!
 
 			//tries to open pack, but only first time
-			if (first_time && (_load_resource_pack(current_dir+"/data.pck") || _load_resource_pack(current_dir+"/data.pcz") )) {
+			if (first_time && (_load_resource_pack(current_dir+"/"+exec_name+".pck") || _load_resource_pack(current_dir+"/"+exec_name+".pcz") )) {
+				if (_load_settings("res://engine.cfg")==OK || _load_settings_binary("res://engine.cfb")==OK) {
+
+					_load_settings("res://override.cfg");
+					found=true;
+
+
+				}
+				break;
+			} else if (first_time && (_load_resource_pack(current_dir+"/data.pck") || _load_resource_pack(current_dir+"/data.pcz") )) {
 				if (_load_settings("res://engine.cfg")==OK || _load_settings_binary("res://engine.cfb")==OK) {
 
 					_load_settings("res://override.cfg");
diff --git a/core/image.cpp b/core/image.cpp
index 06b7a78488..eadb7ecc8b 100644
--- a/core/image.cpp
+++ b/core/image.cpp
@@ -322,7 +322,7 @@ void Image::set_pallete(const DVector<uint8_t>& p_data) {
 	DVector<uint8_t>::Write wp = data.write();
 	unsigned char *dst=wp.ptr() + pal_ofs;
 
-	DVector<uint8_t>::Read r = data.read();
+	DVector<uint8_t>::Read r = p_data.read();
 	const unsigned char *src=r.ptr();
 
 	copymem(dst, src, len);
diff --git a/core/io/SCsub b/core/io/SCsub
index 5aecb4b915..3ff9b355a4 100644
--- a/core/io/SCsub
+++ b/core/io/SCsub
@@ -5,5 +5,3 @@ env.add_source_files(env.core_sources,"*.c")
 #env.core_sources.append("io/fastlz.c")
 
 Export('env')
-
-
diff --git a/core/io/http_client.cpp b/core/io/http_client.cpp
index 24012660d2..58092efd4b 100644
--- a/core/io/http_client.cpp
+++ b/core/io/http_client.cpp
@@ -579,7 +579,7 @@ Error HTTPClient::_get_http_data(uint8_t* p_buffer, int p_bytes,int &r_received)
 
 void HTTPClient::_bind_methods() {
 
-	ObjectTypeDB::bind_method(_MD("connect:Error","host","port","use_ssl"),&HTTPClient::connect,DEFVAL(false),DEFVAL(true));
+	ObjectTypeDB::bind_method(_MD("connect:Error","host","port","use_ssl","verify_host"),&HTTPClient::connect,DEFVAL(false),DEFVAL(true));
 	ObjectTypeDB::bind_method(_MD("set_connection","connection:StreamPeer"),&HTTPClient::set_connection);
 	ObjectTypeDB::bind_method(_MD("request","method","url","headers","body"),&HTTPClient::request,DEFVAL(String()));
 	ObjectTypeDB::bind_method(_MD("send_body_text","body"),&HTTPClient::send_body_text);
@@ -601,6 +601,8 @@ void HTTPClient::_bind_methods() {
 	ObjectTypeDB::bind_method(_MD("get_status"),&HTTPClient::get_status);
 	ObjectTypeDB::bind_method(_MD("poll:Error"),&HTTPClient::poll);
 
+    ObjectTypeDB::bind_method(_MD("query_string_from_dict:String","fields"),&HTTPClient::query_string_from_dict);
+
 
 	BIND_CONSTANT( METHOD_GET );
 	BIND_CONSTANT( METHOD_HEAD );
@@ -689,6 +691,16 @@ void HTTPClient::set_read_chunk_size(int p_size) {
 	read_chunk_size=p_size;
 }
 
+String HTTPClient::query_string_from_dict(const Dictionary& p_dict) {
+    String query = "";
+    Array keys = p_dict.keys();
+    for (int i = 0; i < keys.size(); ++i) {
+        query += "&" + String(keys[i]).http_escape() + "=" + String(p_dict[keys[i]]).http_escape();
+    }
+    query.erase(0, 1);
+    return query;
+}
+
 HTTPClient::HTTPClient(){
 
 	tcp_connection = StreamPeerTCP::create_ref();
@@ -710,4 +722,3 @@ HTTPClient::~HTTPClient(){
 
 }
 
-
diff --git a/core/io/http_client.h b/core/io/http_client.h
index 21281f38c5..b103dc43fc 100644
--- a/core/io/http_client.h
+++ b/core/io/http_client.h
@@ -192,6 +192,8 @@ public:
 
 	Error poll();
 
+    String query_string_from_dict(const Dictionary& p_dict);
+
 	HTTPClient();
 	~HTTPClient();
 };
diff --git a/core/io/json.cpp b/core/io/json.cpp
index 14890abd26..22c99d0465 100644
--- a/core/io/json.cpp
+++ b/core/io/json.cpp
@@ -177,9 +177,6 @@ Error JSON::_get_token(const CharType *p_str, int &idx, int p_len, Token& r_toke
 							case 'n': res=10; break;
 							case 'f': res=12; break;
 							case 'r': res=13; break;
-							case '\"': res='\"'; break;
-							case '\\': res='\\'; break;
-							case '/': res='/'; break; //wtf
 							case 'u': {
 								//hexnumbarh - oct is deprecated
 
@@ -218,10 +215,13 @@ Error JSON::_get_token(const CharType *p_str, int &idx, int p_len, Token& r_toke
 
 
 							} break;
+							//case '\"': res='\"'; break;
+							//case '\\': res='\\'; break;
+							//case '/': res='/'; break;
 							default: {
-
-								r_err_str="Invalid escape sequence";
-								return ERR_PARSE_ERROR;
+								res = next;
+								//r_err_str="Invalid escape sequence";
+								//return ERR_PARSE_ERROR;
 							} break;
 						}
 
diff --git a/core/io/resource_format_binary.cpp b/core/io/resource_format_binary.cpp
index 1a0552e8d1..4cd3cd595f 100644
--- a/core/io/resource_format_binary.cpp
+++ b/core/io/resource_format_binary.cpp
@@ -905,7 +905,7 @@ void ResourceInteractiveLoaderBinary::open(FileAccess *p_f) {
 
 		error=ERR_FILE_UNRECOGNIZED;
 		ERR_EXPLAIN("Unrecognized binary resource file: "+local_path);
-		ERR_FAIL_V();
+		ERR_FAIL();
 	}
 
 	bool big_endian = f->get_32();
diff --git a/core/io/resource_format_xml.cpp b/core/io/resource_format_xml.cpp
index 66ae014dbc..48917a19ea 100644
--- a/core/io/resource_format_xml.cpp
+++ b/core/io/resource_format_xml.cpp
@@ -1955,7 +1955,6 @@ void ResourceFormatLoaderXML::get_recognized_extensions_for_type(const String& p
 		if (ext=="res")
 			continue;
 		p_extensions->push_back("x"+ext);
-		p_extensions->push_back(ext);
 	}
 
 	p_extensions->push_back("xml");
diff --git a/core/math/SCsub b/core/math/SCsub
index c6ba1fa537..7b4a6acbc0 100644
--- a/core/math/SCsub
+++ b/core/math/SCsub
@@ -3,5 +3,3 @@ Import('env')
 env.add_source_files(env.core_sources,"*.cpp")
 
 Export('env')
-
-
diff --git a/core/math/math_2d.cpp b/core/math/math_2d.cpp
index 88717723ce..ce03f089e5 100644
--- a/core/math/math_2d.cpp
+++ b/core/math/math_2d.cpp
@@ -29,7 +29,7 @@
 #include "math_2d.h"
 
 
-real_t Vector2::atan2() const {
+real_t Vector2::angle() const {
 
 	return Math::atan2(x,y);
 }
@@ -165,7 +165,7 @@ Vector2 Vector2::floor() const {
 Vector2 Vector2::rotated(float p_by) const {
 
 	Vector2 v;
-	v.set_rotation(atan2()+p_by);
+	v.set_rotation(angle()+p_by);
 	v*=length();
 	return v;
 }
diff --git a/core/math/math_2d.h b/core/math/math_2d.h
index 5e6cefd114..3d40e24091 100644
--- a/core/math/math_2d.h
+++ b/core/math/math_2d.h
@@ -133,7 +133,7 @@ struct Vector2 {
 	bool operator<(const Vector2& p_vec2) const { return (x==p_vec2.x)?(y<p_vec2.y):(x<p_vec2.x); }
 	bool operator<=(const Vector2& p_vec2) const { return (x==p_vec2.x)?(y<=p_vec2.y):(x<=p_vec2.x); }
 
-	real_t atan2() const;
+	real_t angle() const;
 
 	void set_rotation(float p_radians) {
 
diff --git a/core/math/quat.h b/core/math/quat.h
index de4aedaeec..f161e35074 100644
--- a/core/math/quat.h
+++ b/core/math/quat.h
@@ -73,7 +73,7 @@ public:
 			-x * v.x - y * v.y - z * v.z);
 	}
 
-	_FORCE_INLINE_ Vector3 xform(const Vector3& v) {
+	_FORCE_INLINE_ Vector3 xform(const Vector3& v) const {
 
 		Quat q = *this * v;
 		q *= this->inverse();
diff --git a/core/object.cpp b/core/object.cpp
index 07ac430d7a..96f0c86832 100644
--- a/core/object.cpp
+++ b/core/object.cpp
@@ -314,6 +314,7 @@ void Object::set(const StringName& p_name, const Variant& p_value, bool *r_valid
 
 	_edited=true;
 #endif
+
 	if (script_instance) {
 
 		if (script_instance->set(p_name,p_value)) {
@@ -326,9 +327,9 @@ void Object::set(const StringName& p_name, const Variant& p_value, bool *r_valid
 
 	//try built-in setgetter
 	{
-		if (ObjectTypeDB::set_property(this,p_name,p_value)) {
-			if (r_valid)
-				*r_valid=true;
+		if (ObjectTypeDB::set_property(this,p_name,p_value,r_valid)) {
+			//if (r_valid)
+			//	*r_valid=true;
 			return;
 		}
 	}
@@ -970,7 +971,10 @@ void Object::set_script_instance(ScriptInstance *p_instance) {
 
 	script_instance=p_instance;
 
-	script=p_instance->get_script().get_ref_ptr();
+	if (p_instance)
+		script=p_instance->get_script().get_ref_ptr();
+	else
+		script=RefPtr();
 }
 
 RefPtr Object::get_script() const {
@@ -1691,6 +1695,26 @@ void Object::get_translatable_strings(List<String> *p_strings) const {
 
 }
 
+Variant::Type Object::get_static_property_type(const StringName& p_property, bool *r_valid) const {
+
+	bool valid;
+	Variant::Type t = ObjectTypeDB::get_property_type(get_type_name(),p_property,&valid);
+	if (valid) {
+		if (r_valid)
+			*r_valid=true;
+		return t;
+	}
+
+	if (get_script_instance()) {
+		return get_script_instance()->get_property_type(p_property,r_valid);
+	}
+	if (r_valid)
+		*r_valid=false;
+
+	return Variant::NIL;
+
+}
+
 bool Object::is_queued_for_deletion() const {
 	return _is_queued_for_deletion;
 }
diff --git a/core/object.h b/core/object.h
index 981a83958c..5b6361796f 100644
--- a/core/object.h
+++ b/core/object.h
@@ -606,6 +606,8 @@ public:
 	void set_block_signals(bool p_block);
 	bool is_blocking_signals() const;
 
+	Variant::Type get_static_property_type(const StringName& p_property,bool *r_valid=NULL) const;
+
 	virtual void get_translatable_strings(List<String> *p_strings) const;
 
 	virtual void get_argument_options(const StringName& p_function,int p_idx,List<String>*r_options) const;
diff --git a/core/object_type_db.cpp b/core/object_type_db.cpp
index a64b3d2715..f8ba0a9b15 100644
--- a/core/object_type_db.cpp
+++ b/core/object_type_db.cpp
@@ -612,6 +612,7 @@ void ObjectTypeDB::add_property(StringName p_type,const PropertyInfo& p_pinfo, c
 	psg._setptr=mb_set;
 	psg._getptr=mb_get;
 	psg.index=p_index;
+	psg.type=p_pinfo.type;
 
 	type->property_setget[p_pinfo.name]=psg;
 
@@ -634,7 +635,7 @@ void ObjectTypeDB::get_property_list(StringName p_type,List<PropertyInfo> *p_lis
 	}
 
 }
-bool ObjectTypeDB::set_property(Object* p_object,const StringName& p_property, const Variant& p_value) {
+bool ObjectTypeDB::set_property(Object* p_object,const StringName& p_property, const Variant& p_value,bool *r_valid) {
 
 
 	TypeInfo *type=types.getptr(p_object->get_type_name());
@@ -643,13 +644,17 @@ bool ObjectTypeDB::set_property(Object* p_object,const StringName& p_property, c
 		const PropertySetGet *psg = check->property_setget.getptr(p_property);
 		if (psg) {
 
-			if (!psg->setter)
+			if (!psg->setter) {
+				if (r_valid)
+					*r_valid=false;
 				return true; //return true but do nothing
+			}
+
+			Variant::CallError ce;
 
 			if (psg->index>=0) {
 				Variant index=psg->index;
 				const Variant* arg[2]={&index,&p_value};
-				Variant::CallError ce;
 //				p_object->call(psg->setter,arg,2,ce);
 				if (psg->_setptr) {
 					psg->_setptr->call(p_object,arg,2,ce);
@@ -660,13 +665,16 @@ bool ObjectTypeDB::set_property(Object* p_object,const StringName& p_property, c
 
 			} else {
 				const Variant* arg[1]={&p_value};
-				Variant::CallError ce;
 				if (psg->_setptr) {
 					psg->_setptr->call(p_object,arg,1,ce);
 				} else {
 					p_object->call(psg->setter,arg,1,ce);
 				}
 			}
+
+			if (r_valid)
+				*r_valid=ce.error==Variant::CallError::CALL_OK;
+
 			return true;
 		}
 
@@ -718,6 +726,29 @@ bool ObjectTypeDB::get_property(Object* p_object,const StringName& p_property, V
 	return false;
 }
 
+Variant::Type ObjectTypeDB::get_property_type(const StringName& p_type, const StringName& p_property,bool *r_is_valid) {
+
+	TypeInfo *type=types.getptr(p_type);
+	TypeInfo *check=type;
+	while(check) {
+		const PropertySetGet *psg = check->property_setget.getptr(p_property);
+		if (psg) {
+
+			if (r_is_valid)
+				*r_is_valid=true;
+
+			return psg->type;
+		}
+
+		check=check->inherits_ptr;
+	}
+	if (r_is_valid)
+		*r_is_valid=false;
+
+	return Variant::NIL;
+
+}
+
 
 void ObjectTypeDB::set_method_flags(StringName p_type,StringName p_method,int p_flags) {
 
diff --git a/core/object_type_db.h b/core/object_type_db.h
index bfa0f921e5..319e3ec02c 100644
--- a/core/object_type_db.h
+++ b/core/object_type_db.h
@@ -117,6 +117,7 @@ class ObjectTypeDB {
 		StringName getter;
 		MethodBind *_setptr;
 		MethodBind *_getptr;
+		Variant::Type type;
 	};
 
 	struct TypeInfo {
@@ -456,8 +457,9 @@ public:
 
 	static void add_property(StringName p_type,const PropertyInfo& p_pinfo, const StringName& p_setter, const StringName& p_getter, int p_index=-1);
 	static void get_property_list(StringName p_type,List<PropertyInfo> *p_list,bool p_no_inheritance=false);
-	static bool set_property(Object* p_object,const StringName& p_property, const Variant& p_value);
+	static bool set_property(Object* p_object, const StringName& p_property, const Variant& p_value, bool *r_valid=NULL);
 	static bool get_property(Object* p_object,const StringName& p_property, Variant& r_value);
+	static Variant::Type get_property_type(const StringName& p_type, const StringName& p_property,bool *r_is_valid=NULL);
 
 
 
diff --git a/core/os/SCsub b/core/os/SCsub
index c6ba1fa537..7b4a6acbc0 100644
--- a/core/os/SCsub
+++ b/core/os/SCsub
@@ -3,5 +3,3 @@ Import('env')
 env.add_source_files(env.core_sources,"*.cpp")
 
 Export('env')
-
-
diff --git a/core/os/os.cpp b/core/os/os.cpp
index ee9f12b79d..8caf95e4d1 100644
--- a/core/os/os.cpp
+++ b/core/os/os.cpp
@@ -61,9 +61,16 @@ void OS::debug_break() {
 
 void OS::print_error(const char* p_function,const char* p_file,int p_line,const char *p_code,const char*p_rationale,ErrorType p_type) {
 
+	const char* err_type;
+	switch(p_type) {
+		case ERR_ERROR: err_type="**ERROR**"; break;
+		case ERR_WARNING: err_type="**WARNING**"; break;
+		case ERR_SCRIPT: err_type="**SCRIPT ERROR**"; break;
+	}
+
 	if (p_rationale && *p_rationale)
-		print("**ERROR**: %s\n ",p_rationale);
-	print("**ERROR**: At: %s:%i:%s() - %s\n",p_file,p_line,p_function,p_code);
+		print("%s: %s\n ",err_type,p_rationale);
+	print("%s: At: %s:%i:%s() - %s\n",err_type,p_file,p_line,p_function,p_code);
 }
 
 void OS::print(const char* p_format, ...) {
diff --git a/core/os/os.h b/core/os/os.h
index e5338b4a02..e908177df7 100644
--- a/core/os/os.h
+++ b/core/os/os.h
@@ -184,6 +184,7 @@ public:
 	virtual void set_low_processor_usage_mode(bool p_enabled);
 	virtual bool is_in_low_processor_usage_mode() const;
 
+	virtual String get_installed_templates_path() const { return ""; };
 	virtual String get_executable_path() const;
 	virtual Error execute(const String& p_path, const List<String>& p_arguments,bool p_blocking,ProcessID *r_child_id=NULL,String* r_pipe=NULL,int *r_exitcode=NULL)=0;
 	virtual Error kill(const ProcessID& p_pid)=0;
diff --git a/core/ring_buffer.h b/core/ring_buffer.h
index 3cf9cf9064..5cbd261ec8 100644
--- a/core/ring_buffer.h
+++ b/core/ring_buffer.h
@@ -155,6 +155,12 @@ public:
 	inline int size() {
 		return data.size();
 	};
+
+	inline void clear() {
+		read_pos = 0;
+		write_pos = 0;
+
+	}
 	
 	void resize(int p_power) {
 		int old_size = size();
diff --git a/core/script_language.cpp b/core/script_language.cpp
index 35c50b1022..b7a0f579f4 100644
--- a/core/script_language.cpp
+++ b/core/script_language.cpp
@@ -267,6 +267,20 @@ void PlaceHolderScriptInstance::get_property_list(List<PropertyInfo> *p_properti
 	}
 }
 
+Variant::Type PlaceHolderScriptInstance::get_property_type(const StringName& p_name,bool *r_is_valid) const {
+
+	if (values.has(p_name))	{
+		if (r_is_valid)
+			*r_is_valid=true;
+		return values[p_name].get_type();
+	}
+	if (r_is_valid)
+		*r_is_valid=false;
+
+	return Variant::NIL;
+}
+
+
 void PlaceHolderScriptInstance::update(const List<PropertyInfo> &p_properties,const Map<StringName,Variant>& p_values) {
 
 
diff --git a/core/script_language.h b/core/script_language.h
index 5a0f673b94..9660f141c7 100644
--- a/core/script_language.h
+++ b/core/script_language.h
@@ -82,6 +82,7 @@ public:
 	virtual StringName get_instance_base_type() const=0; // this may not work in all scripts, will return empty if so
 	virtual ScriptInstance* instance_create(Object *p_this)=0;
 	virtual bool instance_has(const Object *p_this) const=0;
+
 	
 	virtual bool has_source_code() const=0;
 	virtual String get_source_code() const=0;
@@ -109,6 +110,7 @@ public:
 	virtual bool set(const StringName& p_name, const Variant& p_value)=0;
 	virtual bool get(const StringName& p_name, Variant &r_ret) const=0;
 	virtual void get_property_list(List<PropertyInfo> *p_properties) const=0;
+	virtual Variant::Type get_property_type(const StringName& p_name,bool *r_is_valid=NULL) const=0;
 
 	virtual void get_method_list(List<MethodInfo> *p_list) const=0;
 	virtual bool has_method(const StringName& p_method) const=0;
@@ -208,6 +210,7 @@ public:
 	virtual bool set(const StringName& p_name, const Variant& p_value);
 	virtual bool get(const StringName& p_name, Variant &r_ret) const;
 	virtual void get_property_list(List<PropertyInfo> *p_properties) const;
+	virtual Variant::Type get_property_type(const StringName& p_name,bool *r_is_valid=NULL) const;
 
 	virtual void get_method_list(List<MethodInfo> *p_list) const {}
 	virtual bool has_method(const StringName& p_method) const { return false; }
diff --git a/core/ustring.cpp b/core/ustring.cpp
index 7582376fe0..bf2494e9b5 100644
--- a/core/ustring.cpp
+++ b/core/ustring.cpp
@@ -44,6 +44,11 @@
 #include <stdlib.h>
 #include <stdio.h>
 #endif
+
+#if defined(MINGW_ENABLED) || defined(_MSC_VER)
+#define snprintf _snprintf
+#endif
+
 /** STRING **/
 
 const char *CharString::get_data() const {
@@ -892,17 +897,8 @@ String String::num(double p_num,int p_decimals) {
 	}
 	char buf[256];
 
-#if defined(__GNUC__)
-#ifdef MINGW_ENABLED
-	//snprintf is inexplicably broken in mingw
-	//sprintf(buf,fmt,p_num);
-	_snprintf(buf,256,fmt,p_num);
-#else
+#if defined(__GNUC__) || defined(_MSC_VER)
 	snprintf(buf,256,fmt,p_num);
-#endif
-
-#elif defined(_MSC_VER)
-	_snprintf(buf,256,fmt,p_num);
 #else
 	sprintf(buf,fmt,p_num);
 #endif
@@ -1173,10 +1169,7 @@ String String::num_scientific(double p_num) {
 
 	char buf[256];
 
-#if defined(_MSC_VER) || defined(MINGW_ENABLED)
-
-	_snprintf(buf,256,"%lg",p_num);
-#elif defined(__GNUC__)
+#if defined(__GNUC__) || defined(_MSC_VER)
 	snprintf(buf,256,"%lg",p_num);
 #else
 	sprintf(buf,"%.16lg",p_num);
@@ -3079,6 +3072,52 @@ String String::world_wrap(int p_chars_per_line) const {
 	return ret;
 }
 
+String String::http_escape() const {
+    const CharString temp = utf8();
+    String res;
+    for (int i = 0; i < length(); ++i) {
+        CharType ord = temp[i];
+        if (ord == '.' || ord == '-' || ord == '_' || ord == '~' ||
+           (ord >= 'a' && ord <= 'z') ||
+           (ord >= 'A' && ord <= 'Z') ||
+           (ord >= '0' && ord <= '9')) {
+            res += ord;
+        } else {
+            char h_Val[3];
+#if defined(__GNUC__) || defined(_MSC_VER)
+            snprintf(h_Val, 3, "%.2X", ord);
+#else
+            sprintf(h_Val, "%.2X", ord);
+#endif
+            res += "%";
+            res += h_Val;
+        }
+    }
+    return res;
+}
+
+String String::http_unescape() const {
+    String res;
+    for (int i = 0; i < length(); ++i) {
+        if (ord_at(i) == '%' && i+2 < length()) {
+            CharType ord1 = ord_at(i+1);
+            if ((ord1 >= '0' && ord1 <= '9') || (ord1 >= 'A' && ord1 <= 'Z')) {
+                CharType ord2 = ord_at(i+2);
+                if ((ord2 >= '0' && ord2 <= '9') || (ord2 >= 'A' && ord2 <= 'Z')) {
+                    char bytes[2] = {ord1, ord2};
+                    res += (char)strtol(bytes, NULL, 16);
+                    i+=2;
+                }
+            } else {
+                res += ord_at(i);
+            }
+        } else {
+            res += ord_at(i);
+        }
+    }
+    return String::utf8(res.ascii());
+}
+
 String String::c_unescape() const {
 
 	String escaped=*this;
diff --git a/core/ustring.h b/core/ustring.h
index fa25a07eb0..2f3c4bff4d 100644
--- a/core/ustring.h
+++ b/core/ustring.h
@@ -207,6 +207,8 @@ public:
 
 	String xml_escape(bool p_escape_quotes=false) const;
 	String xml_unescape() const;
+    String http_escape() const;
+    String http_unescape() const;
 	String c_escape() const;
 	String c_unescape() const;
 	String world_wrap(int p_chars_per_line) const;
diff --git a/core/variant.h b/core/variant.h
index 8fd9662c36..e75a2b1c92 100644
--- a/core/variant.h
+++ b/core/variant.h
@@ -390,7 +390,7 @@ public:
 
 	Variant call(const StringName& p_method,const Variant** p_args,int p_argcount,CallError &r_error);
 	Variant call(const StringName& p_method,const Variant& p_arg1=Variant(),const Variant& p_arg2=Variant(),const Variant& p_arg3=Variant(),const Variant& p_arg4=Variant(),const Variant& p_arg5=Variant());
-	static Variant construct(const Variant::Type,const Variant** p_args,int p_argcount,CallError &r_error);
+	static Variant construct(const Variant::Type,const Variant** p_args,int p_argcount,CallError &r_error,bool p_strict=true);
 
 	void get_method_list(List<MethodInfo> *p_list) const;
 	bool has_method(const StringName& p_method) const;
diff --git a/core/variant_call.cpp b/core/variant_call.cpp
index 222618ffa0..2d10cf4d44 100644
--- a/core/variant_call.cpp
+++ b/core/variant_call.cpp
@@ -333,7 +333,7 @@ static void _call_##m_type##_##m_method(Variant& r_ret,Variant& p_self,const Var
 	VCALL_LOCALMEM1R(Vector2,dot);
 	VCALL_LOCALMEM1R(Vector2,slide);
 	VCALL_LOCALMEM1R(Vector2,reflect);
-	VCALL_LOCALMEM0R(Vector2,atan2);
+	VCALL_LOCALMEM0R(Vector2,angle);
 //	VCALL_LOCALMEM1R(Vector2,cross);
 
 	VCALL_LOCALMEM0R(Rect2,get_area);
@@ -409,6 +409,7 @@ static void _call_##m_type##_##m_method(Variant& r_ret,Variant& p_self,const Var
 	VCALL_LOCALMEM0R(Quat,normalized);
 	VCALL_LOCALMEM0R(Quat,inverse);
 	VCALL_LOCALMEM1R(Quat,dot);
+	VCALL_LOCALMEM1R(Quat,xform);
 	VCALL_LOCALMEM2R(Quat,slerp);
 	VCALL_LOCALMEM2R(Quat,slerpni);
 	VCALL_LOCALMEM4R(Quat,cubic_slerp);
@@ -857,6 +858,11 @@ static void _call_##m_type##_##m_method(Variant& r_ret,Variant& p_self,const Var
 		r_ret=Transform(p_args[0]->operator Matrix3(),p_args[1]->operator Vector3());
 	}
 
+	static void Image_init1(Variant& r_ret, const Variant** p_args) {
+
+		r_ret=Image(*p_args[0],*p_args[1],*p_args[2],Image::Format(p_args[3]->operator int()));
+	}
+
 	static void add_constructor(VariantConstructFunc p_func,const Variant::Type p_type,
 			const String& p_name1="", const Variant::Type p_type1=Variant::NIL,
 			const String& p_name2="", const Variant::Type p_type2=Variant::NIL,
@@ -958,7 +964,7 @@ Variant Variant::call(const StringName& p_method,const Variant** p_args,int p_ar
 #define VCALL(m_type,m_method) _VariantCall::_call_##m_type##_##m_method
 
 
-Variant Variant::construct(const Variant::Type p_type,const Variant** p_args,int p_argcount,CallError &r_error) {
+Variant Variant::construct(const Variant::Type p_type, const Variant** p_args, int p_argcount, CallError &r_error, bool p_strict) {
 
 	r_error.error=Variant::CallError::CALL_ERROR_INVALID_METHOD;
 	ERR_FAIL_INDEX_V(p_type,VARIANT_MAX,Variant());
@@ -1034,7 +1040,7 @@ Variant Variant::construct(const Variant::Type p_type,const Variant** p_args,int
 
 	} else if (p_argcount==1 && p_args[0]->type==p_type) {
 		return *p_args[0]; //copy construct
-	} else if (p_argcount==1 && Variant::can_convert(p_args[0]->type,p_type)) {
+	} else if (p_argcount==1 && (!p_strict || Variant::can_convert(p_args[0]->type,p_type))) {
 		//near match construct
 
 		switch(p_type) {
@@ -1290,13 +1296,13 @@ _VariantCall::addfunc(Variant::m_vtype,Variant::m_ret,_SCS(#m_method),VCALL(m_cl
 	ADDFUNC1(STRING,STRING,String,pad_decimals,INT,"digits",varray());
 	ADDFUNC1(STRING,STRING,String,pad_zeros,INT,"digits",varray());
 
-	ADDFUNC0(STRING,STRING,String,to_ascii,varray());
-	ADDFUNC0(STRING,STRING,String,to_utf8,varray());
+	ADDFUNC0(STRING,RAW_ARRAY,String,to_ascii,varray());
+	ADDFUNC0(STRING,RAW_ARRAY,String,to_utf8,varray());
 
 
 	ADDFUNC0(VECTOR2,VECTOR2,Vector2,normalized,varray());
 	ADDFUNC0(VECTOR2,REAL,Vector2,length,varray());
-	ADDFUNC0(VECTOR2,REAL,Vector2,atan2,varray());
+	ADDFUNC0(VECTOR2,REAL,Vector2,angle,varray());
 	ADDFUNC0(VECTOR2,REAL,Vector2,length_squared,varray());
 	ADDFUNC1(VECTOR2,REAL,Vector2,distance_to,VECTOR2,"to",varray());
 	ADDFUNC1(VECTOR2,REAL,Vector2,distance_squared_to,VECTOR2,"to",varray());
@@ -1361,6 +1367,7 @@ _VariantCall::addfunc(Variant::m_vtype,Variant::m_ret,_SCS(#m_method),VCALL(m_cl
 	ADDFUNC0(QUAT,QUAT,Quat,normalized,varray());
 	ADDFUNC0(QUAT,QUAT,Quat,inverse,varray());
 	ADDFUNC1(QUAT,REAL,Quat,dot,QUAT,"b",varray());
+	ADDFUNC1(QUAT,VECTOR3,Quat,xform,VECTOR3,"v",varray());
 	ADDFUNC2(QUAT,QUAT,Quat,slerp,QUAT,"b",REAL,"t",varray());
 	ADDFUNC2(QUAT,QUAT,Quat,slerpni,QUAT,"b",REAL,"t",varray());
 	ADDFUNC4(QUAT,QUAT,Quat,cubic_slerp,QUAT,"b",QUAT,"pre_a",QUAT,"post_b",REAL,"t",varray());
@@ -1581,6 +1588,8 @@ _VariantCall::addfunc(Variant::m_vtype,Variant::m_ret,_SCS(#m_method),VCALL(m_cl
 	_VariantCall::add_constructor(_VariantCall::Transform_init1,Variant::TRANSFORM,"x_axis",Variant::VECTOR3,"y_axis",Variant::VECTOR3,"z_axis",Variant::VECTOR3,"origin",Variant::VECTOR3);
 	_VariantCall::add_constructor(_VariantCall::Transform_init2,Variant::TRANSFORM,"basis",Variant::MATRIX3,"origin",Variant::VECTOR3);
 
+	_VariantCall::add_constructor(_VariantCall::Image_init1,Variant::IMAGE,"width",Variant::INT,"height",Variant::INT,"mipmaps",Variant::BOOL,"format",Variant::INT);
+
 	/* REGISTER CONSTANTS */
 
 	_VariantCall::constant_data[Variant::VECTOR3].value["AXIS_X"]=Vector3::AXIS_X;
diff --git a/core/variant_op.cpp b/core/variant_op.cpp
index 1cdf6d7319..1bcfa7d2ae 100644
--- a/core/variant_op.cpp
+++ b/core/variant_op.cpp
@@ -586,7 +586,21 @@ void Variant::evaluate(const Operator& p_op, const Variant& p_a, const Variant&
 				} break;
 				DEFAULT_OP_LOCALMEM_NUM(*,VECTOR3,Vector3);
 				DEFAULT_OP_FAIL(PLANE);
-				DEFAULT_OP_FAIL(QUAT);
+				case QUAT: {
+
+					switch(p_b.type) {
+						case VECTOR3: {
+
+							_RETURN( reinterpret_cast<const Quat*>(p_a._data._mem)->xform( *(const Vector3*)p_b._data._mem) );
+						} break;
+						case QUAT: {
+
+							_RETURN( *reinterpret_cast<const Quat*>(p_a._data._mem) * *reinterpret_cast<const Quat*>(p_b._data._mem) );
+						} break;
+					};
+					r_valid=false;
+					return;
+				} break;
 				DEFAULT_OP_FAIL(_AABB);
 				case MATRIX3: {
 
@@ -2573,7 +2587,7 @@ bool Variant::in(const Variant& p_index, bool *r_valid) const {
 				String idx=p_index;
 				const String *str=reinterpret_cast<const String*>(_data._mem);
 
-				return str->find("idx")!=-1;
+				return str->find(idx)!=-1;
 			}
 
 		} break;
@@ -2804,9 +2818,9 @@ void Variant::get_property_list(List<PropertyInfo> *p_list) const {
 		} break;
 		case MATRIX32: {
 
-			p_list->push_back( PropertyInfo(Variant::REAL,"x"));
-			p_list->push_back( PropertyInfo(Variant::REAL,"y"));
-			p_list->push_back( PropertyInfo(Variant::REAL,"o"));
+			p_list->push_back( PropertyInfo(Variant::VECTOR2,"x"));
+			p_list->push_back( PropertyInfo(Variant::VECTOR2,"y"));
+			p_list->push_back( PropertyInfo(Variant::VECTOR2,"o"));
 
 		} break;
 		case PLANE: {
diff --git a/core/variant_parser.cpp b/core/variant_parser.cpp
new file mode 100644
index 0000000000..fed8c28740
--- /dev/null
+++ b/core/variant_parser.cpp
@@ -0,0 +1,1495 @@
+#include "variant_parser.h"
+#include "io/resource_loader.h"
+#include "os/keyboard.h"
+
+
+
+CharType VariantParser::StreamFile::get_char() {
+
+	return f->get_8();
+}
+
+bool VariantParser::StreamFile::is_utf8() const {
+
+	return true;
+}
+bool VariantParser::StreamFile::is_eof() const {
+
+	return f->eof_reached();
+}
+
+
+
+
+/////////////////////////////////////////////////////////////////////////////////////////////////
+
+
+
+const char * VariantParser::tk_name[TK_MAX] = {
+	"'{'",
+	"'}'",
+	"'['",
+	"']'",
+	"'('",
+	"')'",
+	"identifier",
+	"string",
+	"number",
+	"':'",
+	"','",
+	"'='",
+	"EOF",
+	"ERROR"
+};
+
+
+
+Error VariantParser::get_token(Stream *p_stream, Token& r_token, int &line, String &r_err_str) {
+
+	while (true) {
+
+		CharType cchar;
+		if (p_stream->saved) {
+			cchar=p_stream->saved;
+			p_stream->saved=0;
+		} else {
+			cchar=p_stream->get_char();
+			if (p_stream->is_eof()) {
+				r_token.type=TK_EOF;
+				return OK;
+			}
+		}
+
+		switch(cchar) {
+
+			case '\n': {
+
+				line++;
+				break;
+			};
+			case 0: {
+				r_token.type=TK_EOF;
+				return OK;
+			} break;
+			case '{': {
+
+				r_token.type=TK_CURLY_BRACKET_OPEN;
+				return OK;
+			};
+			case '}': {
+
+				r_token.type=TK_CURLY_BRACKET_CLOSE;
+				return OK;
+			};
+			case '[': {
+
+				r_token.type=TK_BRACKET_OPEN;
+				return OK;
+			};
+			case ']': {
+
+				r_token.type=TK_BRACKET_CLOSE;
+				return OK;
+			};
+			case '(': {
+
+				r_token.type=TK_PARENTHESIS_OPEN;
+				return OK;
+			};
+			case ')': {
+
+				r_token.type=TK_PARENTHESIS_CLOSE;
+				return OK;
+			};
+			case ':': {
+
+				r_token.type=TK_COLON;
+				return OK;
+			};
+			case ',': {
+
+				r_token.type=TK_COMMA;
+				return OK;
+			};
+			case '=': {
+
+				r_token.type=TK_EQUAL;
+				return OK;
+			};
+			case '"': {
+
+
+				String str;
+				while(true) {
+
+					CharType ch=p_stream->get_char();
+
+					if (ch==0) {
+						r_err_str="Unterminated String";
+						r_token.type=TK_ERROR;
+						return ERR_PARSE_ERROR;
+					} else if (ch=='"') {
+						break;
+					} else if (ch=='\\') {
+						//escaped characters...
+						CharType next = p_stream->get_char();
+						if (next==0) {
+							r_err_str="Unterminated String";
+							r_token.type=TK_ERROR;
+							return  ERR_PARSE_ERROR;
+						}
+						CharType res=0;
+
+						switch(next) {
+
+							case 'b': res=8; break;
+							case 't': res=9; break;
+							case 'n': res=10; break;
+							case 'f': res=12; break;
+							case 'r': res=13; break;
+							case 'u': {
+								//hexnumbarh - oct is deprecated
+
+
+								for(int j=0;j<4;j++) {
+									CharType c = p_stream->get_char();
+									if (c==0) {
+										r_err_str="Unterminated String";
+										r_token.type=TK_ERROR;
+										return ERR_PARSE_ERROR;
+									}
+									if (!((c>='0' && c<='9') || (c>='a' && c<='f') || (c>='A' && c<='F'))) {
+
+										r_err_str="Malformed hex constant in string";
+										r_token.type=TK_ERROR;
+										return ERR_PARSE_ERROR;
+									}
+									CharType v;
+									if (c>='0' && c<='9') {
+										v=c-'0';
+									} else if (c>='a' && c<='f') {
+										v=c-'a';
+										v+=10;
+									} else if (c>='A' && c<='F') {
+										v=c-'A';
+										v+=10;
+									} else {
+										ERR_PRINT("BUG");
+										v=0;
+									}
+
+									res<<=4;
+									res|=v;
+
+
+								}
+
+
+
+							} break;
+							//case '\"': res='\"'; break;
+							//case '\\': res='\\'; break;
+							//case '/': res='/'; break;
+							default: {
+								res = next;
+								//r_err_str="Invalid escape sequence";
+								//return ERR_PARSE_ERROR;
+							} break;
+						}
+
+						str+=res;
+
+					} else {
+						if (ch=='\n')
+							line++;
+						str+=ch;
+					}
+				}
+
+				if (p_stream->is_utf8()) {
+					str.parse_utf8( str.ascii(true).get_data() );
+				}
+				r_token.type=TK_STRING;
+				r_token.value=str;
+				return OK;
+
+			} break;
+			default: {
+
+				if (cchar<=32) {
+					break;
+				}
+
+				if (cchar=='-' || (cchar>='0' && cchar<='9')) {
+					//a number
+
+
+					String num;
+#define READING_SIGN 0
+#define READING_INT 1
+#define READING_DEC 2
+#define READING_EXP 3
+#define READING_DONE 4
+					int reading=READING_INT;
+
+					if (cchar=='-') {
+						num+='-';
+						cchar=p_stream->get_char();
+
+					}
+
+
+
+					CharType c = cchar;
+					bool exp_sign=false;
+					bool exp_beg=false;
+					bool is_float=false;
+
+					while(true) {
+
+						switch(reading) {
+							case READING_INT: {
+
+								if (c>='0' && c<='9') {
+									//pass
+								} else if (c=='.') {
+									reading=READING_DEC;
+									is_float=true;
+								} else if (c=='e') {
+									reading=READING_EXP;
+								} else {
+									reading=READING_DONE;
+								}
+
+							 } break;
+							case READING_DEC: {
+
+								if (c>='0' && c<='9') {
+
+								} else if (c=='e') {
+									reading=READING_EXP;
+
+								} else {
+									reading=READING_DONE;
+								}
+
+							 } break;
+							case READING_EXP: {
+
+								if (c>='0' && c<='9') {
+									exp_beg=true;
+
+								} else if ((c=='-' || c=='+') && !exp_sign && !exp_beg) {
+									exp_sign=true;
+
+								} else {
+									reading=READING_DONE;
+								}
+							 } break;
+						}
+
+						if (reading==READING_DONE)
+							break;
+						num+=String::chr(c);
+						c = p_stream->get_char();
+
+
+					}
+
+					p_stream->saved=c;
+
+
+					r_token.type=TK_NUMBER;
+					if (is_float)
+						r_token.value=num.to_double();
+					else
+						r_token.value=num.to_int();
+					return OK;
+
+				} else if ((cchar>='A' && cchar<='Z') || (cchar>='a' && cchar<='z') || cchar=='_') {
+
+					String id;
+					bool first=true;
+
+					while((cchar>='A' && cchar<='Z') || (cchar>='a' && cchar<='z') || cchar=='_' || (!first && cchar>='0' && cchar<='9')) {
+
+						id+=String::chr(cchar);
+						cchar=p_stream->get_char();
+						first=false;
+					}
+
+					p_stream->saved=cchar;
+
+					r_token.type=TK_IDENTIFIER;
+					r_token.value=id;
+					return OK;
+				} else {
+					r_err_str="Unexpected character.";
+					r_token.type=TK_ERROR;
+					return ERR_PARSE_ERROR;
+				}
+			}
+		}
+	}
+
+	r_token.type=TK_ERROR;
+	return ERR_PARSE_ERROR;
+}
+
+template<class T>
+Error VariantParser::_parse_construct(Stream *p_stream,Vector<T>& r_construct,int &line,String &r_err_str) {
+
+
+	Token token;
+	get_token(p_stream,token,line,r_err_str);
+	if (token.type!=TK_PARENTHESIS_OPEN) {
+		r_err_str="Expected '(' in constructor";
+		return ERR_PARSE_ERROR;
+	}
+
+
+	bool first=true;
+	while(true) {
+
+		if (!first) {
+			get_token(p_stream,token,line,r_err_str);
+			if (token.type==TK_COMMA) {
+				//do none
+			} else if (token.type==TK_PARENTHESIS_CLOSE) {
+				break;
+			} else {
+				r_err_str="Expected ',' or ')' in constructor";
+				return ERR_PARSE_ERROR;
+
+			}
+		}
+		get_token(p_stream,token,line,r_err_str);
+		if (token.type!=TK_NUMBER) {
+			r_err_str="Expected float in constructor";
+			return ERR_PARSE_ERROR;
+		}
+
+
+		r_construct.push_back(token.value);
+		first=false;
+	}
+
+	return OK;
+
+}
+
+Error VariantParser::parse_value(Token& token,Variant &value,Stream *p_stream,int &line,String &r_err_str,ResourceParser *p_res_parser) {
+
+
+
+/*	{
+		Error err = get_token(p_stream,token,line,r_err_str);
+		if (err)
+			return err;
+	}*/
+
+
+	if (token.type==TK_CURLY_BRACKET_OPEN) {
+
+		Dictionary d;
+		Error err = _parse_dictionary(d,p_stream,line,r_err_str,p_res_parser);
+		if (err)
+			return err;
+		value=d;
+		return OK;
+	} else if (token.type==TK_BRACKET_OPEN) {
+
+		Array a;
+		Error err = _parse_array(a,p_stream,line,r_err_str,p_res_parser);
+		if (err)
+			return err;
+		value=a;
+		return OK;
+
+	} else if (token.type==TK_IDENTIFIER) {
+/*
+		VECTOR2,		// 5
+		RECT2,
+		VECTOR3,
+		MATRIX32,
+		PLANE,
+		QUAT,			// 10
+		_AABB, //sorry naming convention fail :( not like it's used often
+		MATRIX3,
+		TRANSFORM,
+
+		// misc types
+		COLOR,
+		IMAGE,			// 15
+		NODE_PATH,
+		_RID,
+		OBJECT,
+		INPUT_EVENT,
+		DICTIONARY,		// 20
+		ARRAY,
+
+		// arrays
+		RAW_ARRAY,
+		INT_ARRAY,
+		REAL_ARRAY,
+		STRING_ARRAY,	// 25
+		VECTOR2_ARRAY,
+		VECTOR3_ARRAY,
+		COLOR_ARRAY,
+
+		VARIANT_MAX
+
+*/
+		String id = token.value;
+		if (id=="true")
+			value=true;
+		else if (id=="false")
+			value=false;
+		else if (id=="null")
+			value=Variant();
+		else if (id=="Vector2"){
+
+			Vector<float> args;
+			Error err = _parse_construct<float>(p_stream,args,line,r_err_str);
+			if (err)
+				return err;
+
+			if (args.size()!=2) {
+				r_err_str="Expected 2 arguments for constructor";
+			}
+
+			value=Vector2(args[0],args[1]);
+			return OK;
+		} else if (id=="Vector3"){
+
+			Vector<float> args;
+			Error err = _parse_construct<float>(p_stream,args,line,r_err_str);
+			if (err)
+				return err;
+
+			if (args.size()!=3) {
+				r_err_str="Expected 3 arguments for constructor";
+			}
+
+			value=Vector3(args[0],args[1],args[2]);
+			return OK;
+		} else if (id=="Matrix32"){
+
+			Vector<float> args;
+			Error err = _parse_construct<float>(p_stream,args,line,r_err_str);
+			if (err)
+				return err;
+
+			if (args.size()!=6) {
+				r_err_str="Expected 6 arguments for constructor";
+			}
+			Matrix32 m;
+			m[0]=Vector2(args[0],args[1]);
+			m[1]=Vector2(args[2],args[3]);
+			m[2]=Vector2(args[4],args[5]);
+			value=m;
+			return OK;
+		} else if (id=="Plane") {
+
+			Vector<float> args;
+			Error err = _parse_construct<float>(p_stream,args,line,r_err_str);
+			if (err)
+				return err;
+
+			if (args.size()!=4) {
+				r_err_str="Expected 4 arguments for constructor";
+			}
+
+			value=Plane(args[0],args[1],args[2],args[3]);
+			return OK;
+		} else if (id=="Quat") {
+
+			Vector<float> args;
+			Error err = _parse_construct<float>(p_stream,args,line,r_err_str);
+			if (err)
+				return err;
+
+			if (args.size()!=4) {
+				r_err_str="Expected 4 arguments for constructor";
+			}
+
+			value=Quat(args[0],args[1],args[2],args[3]);
+			return OK;
+
+		} else if (id=="AABB"){
+
+			Vector<float> args;
+			Error err = _parse_construct<float>(p_stream,args,line,r_err_str);
+			if (err)
+				return err;
+
+			if (args.size()!=6) {
+				r_err_str="Expected 6 arguments for constructor";
+			}
+
+			value=AABB(Vector3(args[0],args[1],args[2]),Vector3(args[3],args[4],args[5]));
+			return OK;
+
+		} else if (id=="Matrix3"){
+
+			Vector<float> args;
+			Error err = _parse_construct<float>(p_stream,args,line,r_err_str);
+			if (err)
+				return err;
+
+			if (args.size()!=9) {
+				r_err_str="Expected 9 arguments for constructor";
+			}
+
+			value=Matrix3(args[0],args[1],args[2],args[3],args[4],args[5],args[6],args[7],args[8]);
+			return OK;
+		} else if (id=="Transform"){
+
+			Vector<float> args;
+			Error err = _parse_construct<float>(p_stream,args,line,r_err_str);
+			if (err)
+				return err;
+
+			if (args.size()!=12) {
+				r_err_str="Expected 12 arguments for constructor";
+			}
+
+			value=Transform(Matrix3(args[0],args[1],args[2],args[3],args[4],args[5],args[6],args[7],args[8]),Vector3(args[9],args[10],args[11]));
+			return OK;
+
+		} else if (id=="Color") {
+
+			Vector<float> args;
+			Error err = _parse_construct<float>(p_stream,args,line,r_err_str);
+			if (err)
+				return err;
+
+			if (args.size()!=4) {
+				r_err_str="Expected 4 arguments for constructor";
+			}
+
+			value=Color(args[0],args[1],args[2],args[3]);
+			return OK;
+
+		} else if (id=="Image") {
+
+			//:|
+
+			get_token(p_stream,token,line,r_err_str);
+			if (token.type!=TK_PARENTHESIS_OPEN) {
+				r_err_str="Expected '('";
+				return ERR_PARSE_ERROR;
+			}
+
+
+			get_token(p_stream,token,line,r_err_str);
+			if (token.type==TK_PARENTHESIS_CLOSE) {
+				value=Image(); // just an Image()
+				return OK;
+			} else if (token.type!=TK_NUMBER) {
+				r_err_str="Expected number (width)";
+				return ERR_PARSE_ERROR;
+			}
+
+			get_token(p_stream,token,line,r_err_str);
+
+			int width=token.value;
+			if (token.type!=TK_COMMA) {
+				r_err_str="Expected ','";
+				return ERR_PARSE_ERROR;
+			}
+
+			get_token(p_stream,token,line,r_err_str);
+			if (token.type!=TK_NUMBER) {
+				r_err_str="Expected number (height)";
+				return ERR_PARSE_ERROR;
+			}
+
+			int height=token.value;
+
+			get_token(p_stream,token,line,r_err_str);
+			if (token.type!=TK_COMMA) {
+				r_err_str="Expected ','";
+				return ERR_PARSE_ERROR;
+			}
+
+			get_token(p_stream,token,line,r_err_str);
+			if (token.type!=TK_NUMBER) {
+				r_err_str="Expected number (mipmaps)";
+				return ERR_PARSE_ERROR;
+			}
+
+			int mipmaps=token.value;
+
+			get_token(p_stream,token,line,r_err_str);
+			if (token.type!=TK_COMMA) {
+				r_err_str="Expected ','";
+				return ERR_PARSE_ERROR;
+			}
+
+
+			get_token(p_stream,token,line,r_err_str);
+			if (token.type!=TK_IDENTIFIER) {
+				r_err_str="Expected identifier (format)";
+				return ERR_PARSE_ERROR;
+			}
+
+
+			String sformat=token.value;
+
+			Image::Format format;
+
+			if (sformat=="GRAYSCALE") format=Image::FORMAT_GRAYSCALE;
+			else if (sformat=="INTENSITY") format=Image::FORMAT_INTENSITY;
+			else if (sformat=="GRAYSCALE_ALPHA") format=Image::FORMAT_GRAYSCALE_ALPHA;
+			else if (sformat=="RGB") format=Image::FORMAT_RGB;
+			else if (sformat=="RGBA") format=Image::FORMAT_RGBA;
+			else if (sformat=="INDEXED") format=Image::FORMAT_INDEXED;
+			else if (sformat=="INDEXED_ALPHA") format=Image::FORMAT_INDEXED_ALPHA;
+			else if (sformat=="BC1") format=Image::FORMAT_BC1;
+			else if (sformat=="BC2") format=Image::FORMAT_BC2;
+			else if (sformat=="BC3") format=Image::FORMAT_BC3;
+			else if (sformat=="BC4") format=Image::FORMAT_BC4;
+			else if (sformat=="BC5") format=Image::FORMAT_BC5;
+			else if (sformat=="PVRTC2") format=Image::FORMAT_PVRTC2;
+			else if (sformat=="PVRTC2_ALPHA") format=Image::FORMAT_PVRTC2_ALPHA;
+			else if (sformat=="PVRTC4") format=Image::FORMAT_PVRTC4;
+			else if (sformat=="PVRTC4_ALPHA") format=Image::FORMAT_PVRTC4_ALPHA;
+			else if (sformat=="ATC") format=Image::FORMAT_ATC;
+			else if (sformat=="ATC_ALPHA_EXPLICIT") format=Image::FORMAT_ATC_ALPHA_EXPLICIT;
+			else if (sformat=="ATC_ALPHA_INTERPOLATED") format=Image::FORMAT_ATC_ALPHA_INTERPOLATED;
+			else if (sformat=="CUSTOM") format=Image::FORMAT_CUSTOM;
+			else {
+				r_err_str="Invalid image format: '"+sformat+"'";
+				return ERR_PARSE_ERROR;
+			};
+
+			int len = Image::get_image_data_size(width,height,format,mipmaps);
+
+			DVector<uint8_t> buffer;
+			buffer.resize(len);
+
+			if (buffer.size()!=len) {
+				r_err_str="Couldn't allocate image buffer of size: "+itos(len);
+			}
+
+			{
+				DVector<uint8_t>::Write w=buffer.write();
+
+				for(int i=0;i<len;i++) {
+					get_token(p_stream,token,line,r_err_str);
+					if (token.type!=TK_COMMA) {
+						r_err_str="Expected ','";
+						return ERR_PARSE_ERROR;
+					}
+
+					get_token(p_stream,token,line,r_err_str);
+					if (token.type!=TK_NUMBER) {
+						r_err_str="Expected number";
+						return ERR_PARSE_ERROR;
+					}
+
+					w[i]=int(token.value);
+
+				}
+			}
+
+
+			Image img(width,height,mipmaps,format,buffer);
+
+			value=img;
+
+			return OK;
+
+
+		} else if (id=="NodePath") {
+
+
+
+			get_token(p_stream,token,line,r_err_str);
+			if (token.type!=TK_PARENTHESIS_OPEN) {
+				r_err_str="Expected '('";
+				return ERR_PARSE_ERROR;
+			}
+
+			get_token(p_stream,token,line,r_err_str);
+			if (token.type!=TK_STRING) {
+				r_err_str="Expected string as argument for NodePath()";
+				return ERR_PARSE_ERROR;
+			}
+
+			value=NodePath(String(token.value));
+
+			get_token(p_stream,token,line,r_err_str);
+			if (token.type!=TK_PARENTHESIS_CLOSE) {
+				r_err_str="Expected ')'";
+				return ERR_PARSE_ERROR;
+			}
+
+		} else if (id=="RID") {
+
+
+
+			get_token(p_stream,token,line,r_err_str);
+			if (token.type!=TK_PARENTHESIS_OPEN) {
+				r_err_str="Expected '('";
+				return ERR_PARSE_ERROR;
+			}
+
+			get_token(p_stream,token,line,r_err_str);
+			if (token.type!=TK_NUMBER) {
+				r_err_str="Expected number as argument";
+				return ERR_PARSE_ERROR;
+			}
+
+			value=token.value;
+
+			get_token(p_stream,token,line,r_err_str);
+			if (token.type!=TK_PARENTHESIS_CLOSE) {
+				r_err_str="Expected ')'";
+				return ERR_PARSE_ERROR;
+			}
+
+
+			return OK;
+
+		} else if (id=="Resource" || id=="SubResource" || id=="ExtResource") {
+
+
+
+			get_token(p_stream,token,line,r_err_str);
+			if (token.type!=TK_PARENTHESIS_OPEN) {
+				r_err_str="Expected '('";
+				return ERR_PARSE_ERROR;
+			}
+
+
+			if (p_res_parser && id=="Resource" && p_res_parser->func){
+
+				RES res;
+				Error err = p_res_parser->func(p_res_parser->userdata,p_stream,res,line,r_err_str);
+				if (err)
+					return err;
+
+				value=res;
+
+				return OK;
+			} else if (p_res_parser && id=="ExtResource" && p_res_parser->ext_func){
+
+				RES res;
+				Error err = p_res_parser->ext_func(p_res_parser->userdata,p_stream,res,line,r_err_str);
+				if (err)
+					return err;
+
+				value=res;
+
+				return OK;
+			} else if (p_res_parser && id=="SubResource" && p_res_parser->sub_func){
+
+				RES res;
+				Error err = p_res_parser->sub_func(p_res_parser->userdata,p_stream,res,line,r_err_str);
+				if (err)
+					return err;
+
+				value=res;
+
+				return OK;
+			} else {
+
+				get_token(p_stream,token,line,r_err_str);
+				if (token.type==TK_STRING) {
+					String path=token.value;
+					RES res = ResourceLoader::load(path);
+					if (res.is_null()) {
+						r_err_str="Can't load resource at path: '"+path+"'.";
+						return ERR_PARSE_ERROR;
+					}
+
+					get_token(p_stream,token,line,r_err_str);
+					if (token.type!=TK_PARENTHESIS_CLOSE) {
+						r_err_str="Expected ')'";
+						return ERR_PARSE_ERROR;
+					}
+
+					value=res;
+					return OK;
+
+				} else {
+					r_err_str="Expected string as argument for Resource().";
+					return ERR_PARSE_ERROR;
+				}
+			}
+
+			return OK;
+
+
+		} else if (id=="InputEvent") {
+
+
+
+			get_token(p_stream,token,line,r_err_str);
+			if (token.type!=TK_PARENTHESIS_OPEN) {
+				r_err_str="Expected '('";
+				return ERR_PARSE_ERROR;
+			}
+
+			get_token(p_stream,token,line,r_err_str);
+
+			if (token.type!=TK_IDENTIFIER) {
+				r_err_str="Expected identifier";
+				return ERR_PARSE_ERROR;
+			}
+
+
+			String id = token.value;
+
+			InputEvent ie;
+
+			if (id=="KEY") {
+
+				get_token(p_stream,token,line,r_err_str);
+				if (token.type!=TK_COMMA) {
+					r_err_str="Expected ','";
+					return ERR_PARSE_ERROR;
+				}
+
+				ie.type=InputEvent::KEY;
+
+
+				get_token(p_stream,token,line,r_err_str);
+				if (token.type==TK_IDENTIFIER) {
+					String name=token.value;
+					ie.key.scancode=find_keycode(name);
+				} else if (token.type==TK_NUMBER) {
+
+					ie.key.scancode=token.value;
+				} else {
+
+					r_err_str="Expected string or integer for keycode";
+					return ERR_PARSE_ERROR;
+				}
+
+				get_token(p_stream,token,line,r_err_str);
+
+				if (token.type==TK_COMMA) {
+
+					get_token(p_stream,token,line,r_err_str);
+
+					if (token.type!=TK_IDENTIFIER) {
+						r_err_str="Expected identifier with modifier flas";
+						return ERR_PARSE_ERROR;
+					}
+
+					String mods=token.value;
+
+					if (mods.findn("C")!=-1)
+						ie.key.mod.control=true;
+					if (mods.findn("A")!=-1)
+						ie.key.mod.alt=true;
+					if (mods.findn("S")!=-1)
+						ie.key.mod.shift=true;
+					if (mods.findn("M")!=-1)
+						ie.key.mod.meta=true;
+
+					get_token(p_stream,token,line,r_err_str);
+					if (token.type!=TK_PARENTHESIS_CLOSE) {
+						r_err_str="Expected ')'";
+						return ERR_PARSE_ERROR;
+					}
+
+				} else if (token.type!=TK_PARENTHESIS_CLOSE) {
+
+					r_err_str="Expected ')' or modifier flags.";
+					return ERR_PARSE_ERROR;
+				}
+
+
+			} else if (id=="MBUTTON") {
+
+				get_token(p_stream,token,line,r_err_str);
+				if (token.type!=TK_COMMA) {
+					r_err_str="Expected ','";
+					return ERR_PARSE_ERROR;
+				}
+
+				ie.type=InputEvent::MOUSE_BUTTON;
+
+				get_token(p_stream,token,line,r_err_str);
+				if (token.type!=TK_NUMBER) {
+					r_err_str="Expected button index";
+					return ERR_PARSE_ERROR;
+				}
+
+				ie.mouse_button.button_index = token.value;
+
+				get_token(p_stream,token,line,r_err_str);
+				if (token.type!=TK_PARENTHESIS_CLOSE) {
+					r_err_str="Expected ')'";
+					return ERR_PARSE_ERROR;
+				}
+
+			} else if (id=="JBUTTON") {
+
+				get_token(p_stream,token,line,r_err_str);
+				if (token.type!=TK_COMMA) {
+					r_err_str="Expected ','";
+					return ERR_PARSE_ERROR;
+				}
+
+				ie.type=InputEvent::JOYSTICK_BUTTON;
+
+				get_token(p_stream,token,line,r_err_str);
+				if (token.type!=TK_NUMBER) {
+					r_err_str="Expected button index";
+					return ERR_PARSE_ERROR;
+				}
+
+				ie.joy_button.button_index = token.value;
+
+				get_token(p_stream,token,line,r_err_str);
+				if (token.type!=TK_PARENTHESIS_CLOSE) {
+					r_err_str="Expected ')'";
+					return ERR_PARSE_ERROR;
+				}
+
+			} else if (id=="JAXIS") {
+
+				get_token(p_stream,token,line,r_err_str);
+				if (token.type!=TK_COMMA) {
+					r_err_str="Expected ','";
+					return ERR_PARSE_ERROR;
+				}
+
+				ie.type=InputEvent::JOYSTICK_MOTION;
+
+				get_token(p_stream,token,line,r_err_str);
+				if (token.type!=TK_NUMBER) {
+					r_err_str="Expected axis index";
+					return ERR_PARSE_ERROR;
+				}
+
+				ie.joy_motion.axis = token.value;
+
+				get_token(p_stream,token,line,r_err_str);
+				if (token.type!=TK_PARENTHESIS_CLOSE) {
+					r_err_str="Expected ')'";
+					return ERR_PARSE_ERROR;
+				}
+
+			} else {
+
+				r_err_str="Invalid input event type.";
+				return ERR_PARSE_ERROR;
+			}
+
+			value=ie;
+
+			return OK;
+
+		} else if (id=="ByteArray") {
+
+			Vector<uint8_t> args;
+			Error err = _parse_construct<uint8_t>(p_stream,args,line,r_err_str);
+			if (err)
+				return err;
+
+			DVector<uint8_t> arr;
+			{
+				int len=args.size();
+				arr.resize(len);
+				DVector<uint8_t>::Write w = arr.write();
+				for(int i=0;i<len;i++) {
+					w[i]=args[i];
+				}
+			}
+
+			value=arr;
+
+			return OK;
+
+		} else if (id=="IntArray") {
+
+			Vector<int32_t> args;
+			Error err = _parse_construct<int32_t>(p_stream,args,line,r_err_str);
+			if (err)
+				return err;
+
+			DVector<int32_t> arr;
+			{
+				int len=args.size();
+				arr.resize(len);
+				DVector<int32_t>::Write w = arr.write();
+				for(int i=0;i<len;i++) {
+					w[i]=int(args[i]);
+				}
+			}
+
+			value=arr;
+
+			return OK;
+
+		} else if (id=="FloatArray") {
+
+			Vector<float> args;
+			Error err = _parse_construct<float>(p_stream,args,line,r_err_str);
+			if (err)
+				return err;
+
+			DVector<float> arr;
+			{
+				int len=args.size();
+				arr.resize(len);
+				DVector<float>::Write w = arr.write();
+				for(int i=0;i<len;i++) {
+					w[i]=args[i];
+				}
+			}
+
+			value=arr;
+
+			return OK;
+		} else if (id=="StringArray") {
+
+
+			get_token(p_stream,token,line,r_err_str);
+			if (token.type!=TK_PARENTHESIS_OPEN) {
+				r_err_str="Expected '('";
+				return ERR_PARSE_ERROR;
+			}
+
+			Vector<String> cs;
+
+			bool first=true;
+			while(true) {
+
+				if (!first) {
+					get_token(p_stream,token,line,r_err_str);
+					if (token.type==TK_COMMA) {
+						//do none
+					} else if (token.type!=TK_PARENTHESIS_CLOSE) {
+						break;
+					} else {
+						r_err_str="Expected ',' or ')'";
+						return ERR_PARSE_ERROR;
+
+					}
+				}
+				get_token(p_stream,token,line,r_err_str);
+				if (token.type!=TK_STRING) {
+					r_err_str="Expected string";
+					return ERR_PARSE_ERROR;
+				}
+
+				cs.push_back(token.value);
+			}
+
+
+			DVector<String> arr;
+			{
+				int len=cs.size();
+				arr.resize(len);
+				DVector<String>::Write w = arr.write();
+				for(int i=0;i<len;i++) {
+					w[i]=cs[i];
+				}
+			}
+
+			value=arr;
+
+			return OK;
+
+
+		} else if (id=="Vector2Array") {
+
+			Vector<float> args;
+			Error err = _parse_construct<float>(p_stream,args,line,r_err_str);
+			if (err)
+				return err;
+
+			DVector<Vector2> arr;
+			{
+				int len=args.size()/2;
+				arr.resize(len);
+				DVector<Vector2>::Write w = arr.write();
+				for(int i=0;i<len;i++) {
+					w[i]=Vector2(args[i*2+0],args[i*2+1]);
+				}
+			}
+
+			value=arr;
+
+			return OK;
+
+		} else if (id=="Vector3Array") {
+
+			Vector<float> args;
+			Error err = _parse_construct<float>(p_stream,args,line,r_err_str);
+			if (err)
+				return err;
+
+			DVector<Vector3> arr;
+			{
+				int len=args.size()/3;
+				arr.resize(len);
+				DVector<Vector3>::Write w = arr.write();
+				for(int i=0;i<len;i++) {
+					w[i]=Vector3(args[i*3+0],args[i*3+1],args[i*3+2]);
+				}
+			}
+
+			value=arr;
+
+			return OK;
+
+		} else if (id=="ColorArray") {
+
+			Vector<float> args;
+			Error err = _parse_construct<float>(p_stream,args,line,r_err_str);
+			if (err)
+				return err;
+
+			DVector<Color> arr;
+			{
+				int len=args.size()/4;
+				arr.resize(len);
+				DVector<Color>::Write w = arr.write();
+				for(int i=0;i<len;i++) {
+					w[i]=Color(args[i*3+0],args[i*3+1],args[i*3+2],args[i*3+3]);
+				}
+			}
+
+			value=arr;
+
+			return OK;
+
+		} else {
+			r_err_str="Unexpected identifier: '"+id+"'.";
+			return ERR_PARSE_ERROR;
+		}
+
+
+		/*
+				VECTOR2,		// 5
+				RECT2,
+				VECTOR3,
+				MATRIX32,
+				PLANE,
+				QUAT,			// 10
+				_AABB, //sorry naming convention fail :( not like it's used often
+				MATRIX3,
+				TRANSFORM,
+
+				// misc types
+				COLOR,
+				IMAGE,			// 15
+				NODE_PATH,
+				_RID,
+				OBJECT,
+				INPUT_EVENT,
+				DICTIONARY,		// 20
+				ARRAY,
+
+				// arrays
+				RAW_ARRAY,
+				INT_ARRAY,
+				REAL_ARRAY,
+				STRING_ARRAY,	// 25
+				VECTOR2_ARRAY,
+				VECTOR3_ARRAY,
+				COLOR_ARRAY,
+
+				VARIANT_MAX
+
+		*/
+
+		return OK;
+
+	} else if (token.type==TK_NUMBER) {
+
+		value=token.value;
+		return OK;
+	} else if (token.type==TK_STRING) {
+
+		value=token.value;
+		return OK;
+	} else {
+		r_err_str="Expected value, got "+String(tk_name[token.type])+".";
+		return ERR_PARSE_ERROR;
+	}
+
+	return ERR_PARSE_ERROR;
+}
+
+
+Error VariantParser::_parse_array(Array &array, Stream *p_stream, int &line, String &r_err_str, ResourceParser *p_res_parser) {
+
+	Token token;
+	bool need_comma=false;
+
+
+	while(true) {
+
+		if (p_stream->is_eof()) {
+			r_err_str="Unexpected End of File while parsing array";
+			return ERR_FILE_CORRUPT;
+		}
+
+		Error err = get_token(p_stream,token,line,r_err_str);
+		if (err!=OK)
+			return err;
+
+		if (token.type==TK_BRACKET_CLOSE) {
+
+			return OK;
+		}
+
+		if (need_comma) {
+
+			if (token.type!=TK_COMMA) {
+
+				r_err_str="Expected ','";
+				return ERR_PARSE_ERROR;
+			} else {
+				need_comma=false;
+				continue;
+			}
+		}
+
+		Variant v;
+		err = parse_value(token,v,p_stream,line,r_err_str,p_res_parser);
+		if (err)
+			return err;
+
+		array.push_back(v);
+		need_comma=true;
+
+	}
+
+	return OK;
+
+}
+
+Error VariantParser::_parse_dictionary(Dictionary &object, Stream *p_stream, int &line, String &r_err_str, ResourceParser *p_res_parser) {
+
+	bool at_key=true;
+	Variant key;
+	Token token;
+	bool need_comma=false;
+
+
+	while(true) {
+
+
+		if (p_stream->is_eof()) {
+			r_err_str="Unexpected End of File while parsing dictionary";
+			return ERR_FILE_CORRUPT;
+		}
+
+		if (at_key) {
+
+			Error err = get_token(p_stream,token,line,r_err_str);
+			if (err!=OK)
+				return err;
+
+			if (token.type==TK_CURLY_BRACKET_CLOSE) {
+
+				return OK;
+			}
+
+			if (need_comma) {
+
+				if (token.type!=TK_COMMA) {
+
+					r_err_str="Expected '}' or ','";
+					return ERR_PARSE_ERROR;
+				} else {
+					need_comma=false;
+					continue;
+				}
+			}
+
+
+
+
+			err = parse_value(token,key,p_stream,line,r_err_str,p_res_parser);
+
+			if (err)
+				return err;
+
+			err = get_token(p_stream,token,line,r_err_str);
+
+			if (err!=OK)
+				return err;
+			if (token.type!=TK_COLON) {
+
+				r_err_str="Expected ':'";
+				return ERR_PARSE_ERROR;
+			}
+			at_key=false;
+		} else {
+
+
+			Error err = get_token(p_stream,token,line,r_err_str);
+			if (err!=OK)
+				return err;
+
+			Variant v;
+			err = parse_value(token,v,p_stream,line,r_err_str,p_res_parser);
+			if (err)
+				return err;
+			object[key]=v;
+			need_comma=true;
+			at_key=true;
+		}
+	}
+
+	return OK;
+}
+
+
+Error VariantParser::_parse_tag(Token& token, Stream *p_stream, int &line, String &r_err_str, Tag& r_tag, ResourceParser *p_res_parser) {
+
+	r_tag.fields.clear();
+
+	if (token.type!=TK_BRACKET_OPEN) {
+		r_err_str="Expected '['";
+		return ERR_PARSE_ERROR;
+	}
+
+
+	get_token(p_stream,token,line,r_err_str);
+
+
+	if (token.type!=TK_IDENTIFIER) {
+		r_err_str="Expected identifier (tag name)";
+		return ERR_PARSE_ERROR;
+	}
+
+	r_tag.name=token.value;
+
+	while(true) {
+
+		if (p_stream->is_eof()) {
+			r_err_str="Unexpected End of File while parsing tag: "+r_tag.name;
+			return ERR_FILE_CORRUPT;
+		}
+
+		get_token(p_stream,token,line,r_err_str);
+		if (token.type==TK_BRACKET_CLOSE)
+			break;
+
+		if (token.type!=TK_IDENTIFIER) {
+			r_err_str="Expected Identifier";
+			return ERR_PARSE_ERROR;
+		}
+
+		String id=token.value;
+
+
+		get_token(p_stream,token,line,r_err_str);
+		if (token.type!=TK_EQUAL) {
+			r_err_str="Expected '='";
+			return ERR_PARSE_ERROR;
+		}
+
+		get_token(p_stream,token,line,r_err_str);
+		Variant value;
+		Error err = parse_value(token,value,p_stream,line,r_err_str,p_res_parser);
+		if (err)
+			return err;
+
+		r_tag.fields[id]=value;		
+
+	}
+
+
+	return OK;
+
+}
+
+Error VariantParser::parse_tag(Stream *p_stream, int &line, String &r_err_str, Tag& r_tag, ResourceParser *p_res_parser) {
+
+	Token token;
+	get_token(p_stream,token,line,r_err_str);
+
+	if (token.type==TK_EOF) {
+		return ERR_FILE_EOF;
+	}
+
+	if (token.type!=TK_BRACKET_OPEN) {
+		r_err_str="Expected '['";
+		return ERR_PARSE_ERROR;
+	}
+
+	return _parse_tag(token,p_stream,line,r_err_str,r_tag,p_res_parser);
+
+}
+
+Error VariantParser::parse_tag_assign_eof(Stream *p_stream, int &line, String &r_err_str, Tag& r_tag, String &r_assign, Variant &r_value, ResourceParser *p_res_parser) {
+
+
+	//assign..
+	String what;
+
+	while(true) {
+
+
+		CharType c;
+		if (p_stream->saved) {
+			c=p_stream->saved;
+			p_stream->saved=0;
+
+		} else {
+			c=p_stream->get_char();
+		}
+
+		if (p_stream->is_eof())
+			return ERR_FILE_EOF;
+
+		if (c=='[' && what.length()==0) {
+			//it's a tag!
+			p_stream->saved='['; //go back one
+
+			Error err = parse_tag(p_stream,line,r_err_str,r_tag,p_res_parser);
+
+			return err;
+		}
+
+		if (c>32) {
+			if (c!='=') {
+				what+=String::chr(c);
+			} else {
+				r_assign=what;
+				Token token;
+				get_token(p_stream,token,line,r_err_str);
+				Error err = parse_value(token,r_value,p_stream,line,r_err_str,p_res_parser);
+				if (err) {
+
+				}
+				return err;
+			}
+		} else if (c=='\n') {
+			line++;
+		}
+	}
+
+	return OK;
+}
+
+Error VariantParser::parse(Stream *p_stream, Variant& r_ret, String &r_err_str, int &r_err_line, ResourceParser *p_res_parser) {
+
+
+	Token token;
+	Error err = get_token(p_stream,token,r_err_line,r_err_str);
+	if (err)
+		return err;
+
+	if (token.type==TK_EOF) {
+		return ERR_FILE_EOF;
+	}
+
+	return parse_value(token,r_ret,p_stream,r_err_line,r_err_str,p_res_parser);
+
+}
+
+
diff --git a/core/variant_parser.h b/core/variant_parser.h
new file mode 100644
index 0000000000..e1d25f7512
--- /dev/null
+++ b/core/variant_parser.h
@@ -0,0 +1,103 @@
+#ifndef VARIANT_PARSER_H
+#define VARIANT_PARSER_H
+
+#include "variant.h"
+#include "os/file_access.h"
+#include "resource.h"
+
+class VariantParser {
+public:
+
+	struct Stream {
+
+		virtual CharType get_char()=0;
+		virtual bool is_utf8() const=0;
+		virtual bool is_eof() const=0;
+
+		CharType saved;
+
+		Stream() { saved=0; }
+		virtual ~Stream() {}
+	};
+
+	struct StreamFile : public Stream {
+
+		FileAccess *f;
+
+		virtual CharType get_char();
+		virtual bool is_utf8() const;
+		virtual bool is_eof() const;
+
+		StreamFile() { f=NULL; }
+
+	};
+
+	typedef Error (*ParseResourceFunc)(void* p_self, Stream* p_stream,Ref<Resource>& r_res,int &line,String &r_err_str);
+
+	struct ResourceParser {
+
+		void *userdata;
+		ParseResourceFunc func;
+		ParseResourceFunc ext_func;
+		ParseResourceFunc sub_func;
+
+	};
+
+	enum TokenType {
+		TK_CURLY_BRACKET_OPEN,
+		TK_CURLY_BRACKET_CLOSE,
+		TK_BRACKET_OPEN,
+		TK_BRACKET_CLOSE,
+		TK_PARENTHESIS_OPEN,
+		TK_PARENTHESIS_CLOSE,
+		TK_IDENTIFIER,
+		TK_STRING,
+		TK_NUMBER,
+		TK_COLON,
+		TK_COMMA,
+		TK_EQUAL,
+		TK_EOF,
+		TK_ERROR,
+		TK_MAX
+	};
+
+	enum Expecting {
+
+		EXPECT_OBJECT,
+		EXPECT_OBJECT_KEY,
+		EXPECT_COLON,
+		EXPECT_OBJECT_VALUE,
+	};
+
+	struct Token {
+
+		TokenType type;
+		Variant value;
+	};
+
+	struct Tag {
+
+		String name;
+		Map<String,Variant> fields;
+	};
+
+private:
+	static const char * tk_name[TK_MAX];
+
+	template<class T>
+	static Error _parse_construct(Stream *p_stream, Vector<T>& r_construct, int &line, String &r_err_str);
+	static Error _parse_dictionary(Dictionary &object, Stream *p_stream, int &line, String &r_err_str,ResourceParser *p_res_parser=NULL);
+	static Error _parse_array(Array &array, Stream *p_stream, int &line, String &r_err_str,ResourceParser *p_res_parser=NULL);
+	static Error _parse_tag(Token& token,Stream *p_stream, int &line, String &r_err_str,Tag& r_tag,ResourceParser *p_res_parser=NULL);
+
+public:
+
+	static Error parse_tag(Stream *p_stream, int &line, String &r_err_str,Tag& r_tag,ResourceParser *p_res_parser=NULL);
+	static Error parse_tag_assign_eof(Stream *p_stream, int &line, String &r_err_str, Tag& r_tag, String &r_assign, Variant &r_value,ResourceParser *p_res_parser=NULL);
+
+	static Error parse_value(Token& token,Variant &value, Stream *p_stream, int &line, String &r_err_str,ResourceParser *p_res_parser=NULL);
+	static Error get_token(Stream *p_stream,Token& r_token,int &line,String &r_err_str);
+	static Error parse(Stream *p_stream, Variant &r_ret, String &r_err_str, int &r_err_line,ResourceParser *p_res_parser=NULL);
+};
+
+#endif // VARIANT_PARSER_H
diff --git a/core/vector.h b/core/vector.h
index d103400622..78dff5eadb 100644
--- a/core/vector.h
+++ b/core/vector.h
@@ -42,7 +42,7 @@
 template<class T>
 class Vector {
 
-	mutable void* _ptr;
+	mutable T* _ptr;
  
  	// internal helpers
  
@@ -51,21 +51,21 @@ class Vector {
 		if (!_ptr)
  			return NULL;
  			
-		return reinterpret_cast<SafeRefCount*>(_ptr);
+		return reinterpret_cast<SafeRefCount*>((uint8_t*)_ptr-sizeof(int)-sizeof(SafeRefCount));
  	}
  	
 	_FORCE_INLINE_ int* _get_size() const  {
  	
 		if (!_ptr)
  			return NULL;
-		return reinterpret_cast<int*>(((uint8_t*)(_ptr))+sizeof(SafeRefCount));
+		return reinterpret_cast<int*>((uint8_t*)_ptr-sizeof(int));
  		
  	}
 	_FORCE_INLINE_ T* _get_data() const {
  	
 		if (!_ptr)
  			return NULL;
-		return reinterpret_cast<T*>(((uint8_t*)(_ptr))+sizeof(SafeRefCount)+sizeof(int));
+		return reinterpret_cast<T*>(_ptr);
  		
  	}
  	
@@ -88,11 +88,11 @@ public:
 	_FORCE_INLINE_ void clear() { resize(0); }
 	
 	_FORCE_INLINE_ int size() const {
-		
-		if (!_ptr)
-			return 0;
+		int* size = _get_size();
+		if (size)
+			return *size;
 		else		
-			return *reinterpret_cast<int*>(((uint8_t*)(_ptr))+sizeof(SafeRefCount));
+			return 0;
 	}
 	_FORCE_INLINE_ bool empty() const { return _ptr == 0; }
 	Error resize(int p_size);
@@ -174,7 +174,7 @@ void Vector<T>::_unref(void *p_data) {
 	if (!p_data)
 		return;
 		
-	SafeRefCount *src = reinterpret_cast<SafeRefCount*>(p_data);
+	SafeRefCount *src = reinterpret_cast<SafeRefCount*>((uint8_t*)p_data-sizeof(int)-sizeof(SafeRefCount));
 	
 	if (!src->unref())
 		return; // still in use
@@ -189,7 +189,7 @@ void Vector<T>::_unref(void *p_data) {
 	}
 	
 	// free mem
-	memfree(p_data);
+	memfree((uint8_t*)p_data-sizeof(int)-sizeof(SafeRefCount));
 
 }
 
@@ -201,7 +201,8 @@ void Vector<T>::_copy_on_write() {
 	
 	if (_get_refcount()->get() > 1 ) {
 		/* in use by more than me */
-		SafeRefCount *src_new=(SafeRefCount *)memalloc(_get_alloc_size(*_get_size()));
+		void* mem_new = memalloc(_get_alloc_size(*_get_size()));
+		SafeRefCount *src_new=(SafeRefCount *)mem_new;
 		src_new->init();
 		int * _size = (int*)(src_new+1);
 		*_size=*_get_size();
@@ -215,7 +216,7 @@ void Vector<T>::_copy_on_write() {
 		}
 		
 		_unref(_ptr);
-		_ptr=src_new;
+		_ptr=_data;
 	}
 
 }
@@ -260,16 +261,17 @@ Error Vector<T>::resize(int p_size) {
 
 		if (size()==0) {
 			// alloc from scratch
-			_ptr = (T*)memalloc(_get_alloc_size(p_size));
-			ERR_FAIL_COND_V( !_ptr ,ERR_OUT_OF_MEMORY);
+			void* ptr=memalloc(_get_alloc_size(p_size));
+			ERR_FAIL_COND_V( !ptr ,ERR_OUT_OF_MEMORY);
+			_ptr=(T*)((uint8_t*)ptr+sizeof(int)+sizeof(SafeRefCount));
 			_get_refcount()->init(); // init refcount
 			*_get_size()=0; // init size (currently, none)
 
 		} else {
 			
-			void *_ptrnew = (T*)memrealloc(_ptr,_get_alloc_size(p_size));
+			void *_ptrnew = (T*)memrealloc((uint8_t*)_ptr-sizeof(int)-sizeof(SafeRefCount),_get_alloc_size(p_size));
 			ERR_FAIL_COND_V( !_ptrnew ,ERR_OUT_OF_MEMORY);
-			_ptr=_ptrnew;
+			_ptr=(T*)((uint8_t*)_ptrnew+sizeof(int)+sizeof(SafeRefCount));
 		}
 
 		// construct the newly created elements
@@ -291,10 +293,10 @@ Error Vector<T>::resize(int p_size) {
 			t->~T();
 		}
 
-		void *_ptrnew = (T*)memrealloc(_ptr,_get_alloc_size(p_size));
+		void *_ptrnew = (T*)memrealloc((uint8_t*)_ptr-sizeof(int)-sizeof(SafeRefCount),_get_alloc_size(p_size));
 		ERR_FAIL_COND_V( !_ptrnew ,ERR_OUT_OF_MEMORY);
 		
-		_ptr=_ptrnew;
+		_ptr=(T*)((uint8_t*)_ptrnew+sizeof(int)+sizeof(SafeRefCount));
 		
 		*_get_size()=p_size;
 				
diff --git a/demos/2d/area_input/engine.cfg b/demos/2d/area_input/engine.cfg
index 3227e9278f..8fa2e15beb 100644
--- a/demos/2d/area_input/engine.cfg
+++ b/demos/2d/area_input/engine.cfg
@@ -2,3 +2,4 @@
 
 name="Area 2D Input Events"
 main_scene="res://input.scn"
+icon="res://icon.png"
diff --git a/demos/2d/area_input/icon.png b/demos/2d/area_input/icon.png
new file mode 100644
index 0000000000..2f412ecf68
--- /dev/null
+++ b/demos/2d/area_input/icon.png
diff --git a/demos/2d/dynamic_collision_shapes/engine.cfg b/demos/2d/dynamic_collision_shapes/engine.cfg
index 536b75f2f2..76a074f346 100644
--- a/demos/2d/dynamic_collision_shapes/engine.cfg
+++ b/demos/2d/dynamic_collision_shapes/engine.cfg
@@ -2,3 +2,4 @@
 
 name="Run-Time CollisionShape"
 main_scene="res://dynamic_colobjs.scn"
+icon="res://icon.png"
diff --git a/demos/2d/dynamic_collision_shapes/icon.png b/demos/2d/dynamic_collision_shapes/icon.png
new file mode 100644
index 0000000000..b47506d7c8
--- /dev/null
+++ b/demos/2d/dynamic_collision_shapes/icon.png
diff --git a/demos/2d/fog_of_war/engine.cfg b/demos/2d/fog_of_war/engine.cfg
index 5c4307b5bc..1f56851c58 100644
--- a/demos/2d/fog_of_war/engine.cfg
+++ b/demos/2d/fog_of_war/engine.cfg
@@ -2,7 +2,7 @@
 
 name="Fog of War"
 main_scene="res://fog.scn"
-icon="icon.png"
+icon="res://icon.png"
 
 [input]
 
diff --git a/demos/2d/fog_of_war/icon.png.flags b/demos/2d/fog_of_war/icon.png.flags
deleted file mode 100644
index dbef2209e8..0000000000
--- a/demos/2d/fog_of_war/icon.png.flags
+++ /dev/null
@@ -1 +0,0 @@
-gen_mipmaps=true
diff --git a/demos/2d/hdr/engine.cfg b/demos/2d/hdr/engine.cfg
index 3d8b4222d5..ab53a022f0 100644
--- a/demos/2d/hdr/engine.cfg
+++ b/demos/2d/hdr/engine.cfg
@@ -2,6 +2,7 @@
 
 name="HDR for 2D"
 main_scene="res://beach_cave.scn"
+icon="res://icon.png"
 
 [display]
 
diff --git a/demos/2d/hdr/icon.png b/demos/2d/hdr/icon.png
new file mode 100644
index 0000000000..461cd4638a
--- /dev/null
+++ b/demos/2d/hdr/icon.png
diff --git a/demos/2d/isometric_light/engine.cfg b/demos/2d/isometric_light/engine.cfg
index 08393f1724..a5b053aa95 100644
--- a/demos/2d/isometric_light/engine.cfg
+++ b/demos/2d/isometric_light/engine.cfg
@@ -2,6 +2,7 @@
 
 name="Isometric 2D + Lighting"
 main_scene="res://map.scn"
+icon="res://icon.png"
 
 [input]
 
diff --git a/demos/2d/isometric_light/icon.png b/demos/2d/isometric_light/icon.png
new file mode 100644
index 0000000000..0801f78ea5
--- /dev/null
+++ b/demos/2d/isometric_light/icon.png
diff --git a/demos/2d/light_mask/engine.cfg b/demos/2d/light_mask/engine.cfg
index 8b0ae6f61d..39608669ab 100644
--- a/demos/2d/light_mask/engine.cfg
+++ b/demos/2d/light_mask/engine.cfg
@@ -2,6 +2,7 @@
 
 name="Using Lights As Mask"
 main_scene="res://lightmask.scn"
+icon="res://icon.png"
 
 [rasterizer]
 
diff --git a/demos/2d/light_mask/icon.png b/demos/2d/light_mask/icon.png
new file mode 100644
index 0000000000..34a6b709f6
--- /dev/null
+++ b/demos/2d/light_mask/icon.png
diff --git a/demos/2d/lights_shadows/engine.cfg b/demos/2d/lights_shadows/engine.cfg
index 771288c209..80142633d3 100644
--- a/demos/2d/lights_shadows/engine.cfg
+++ b/demos/2d/lights_shadows/engine.cfg
@@ -2,6 +2,7 @@
 
 name="2D Lighting"
 main_scene="res://light_shadows.scn"
+icon="res://icon.png"
 
 [display]
 
diff --git a/demos/2d/lights_shadows/icon.png b/demos/2d/lights_shadows/icon.png
new file mode 100644
index 0000000000..554f01bb46
--- /dev/null
+++ b/demos/2d/lights_shadows/icon.png
diff --git a/demos/2d/lookat/engine.cfg b/demos/2d/lookat/engine.cfg
index 56917a39ec..81df107f0e 100644
--- a/demos/2d/lookat/engine.cfg
+++ b/demos/2d/lookat/engine.cfg
@@ -2,3 +2,4 @@
 
 name="Look At Pointer"
 main_scene="res://lookat.scn"
+icon="res://icon.png"
diff --git a/demos/2d/lookat/icon.png b/demos/2d/lookat/icon.png
new file mode 100644
index 0000000000..442cc1799f
--- /dev/null
+++ b/demos/2d/lookat/icon.png
diff --git a/demos/2d/motion/engine.cfg b/demos/2d/motion/engine.cfg
index 261111904c..6e660572d6 100644
--- a/demos/2d/motion/engine.cfg
+++ b/demos/2d/motion/engine.cfg
@@ -2,6 +2,7 @@
 
 name="Motion Test"
 main_scene="res://motion.scn"
+icon="res://icon.png"
 
 [display]
 
diff --git a/demos/2d/motion/icon.png b/demos/2d/motion/icon.png
new file mode 100644
index 0000000000..9e64961d3c
--- /dev/null
+++ b/demos/2d/motion/icon.png
diff --git a/demos/2d/navpoly/engine.cfg b/demos/2d/navpoly/engine.cfg
index 40515dd3d2..b750419915 100644
--- a/demos/2d/navpoly/engine.cfg
+++ b/demos/2d/navpoly/engine.cfg
@@ -2,6 +2,7 @@
 
 name="Navigation Polygon (2D)"
 main_scene="res://navigation.scn"
+icon="res://icon.png"
 
 [display]
 
diff --git a/demos/2d/navpoly/icon.png b/demos/2d/navpoly/icon.png
new file mode 100644
index 0000000000..7a28a367c6
--- /dev/null
+++ b/demos/2d/navpoly/icon.png
diff --git a/demos/2d/normalmaps/engine.cfg b/demos/2d/normalmaps/engine.cfg
index f0002dc2b8..4f9f4f67f0 100644
--- a/demos/2d/normalmaps/engine.cfg
+++ b/demos/2d/normalmaps/engine.cfg
@@ -2,6 +2,7 @@
 
 name="2D Normal Mapping"
 main_scene="res://normalmap.scn"
+icon="res://icon.png"
 
 [display]
 
diff --git a/demos/2d/normalmaps/icon.png b/demos/2d/normalmaps/icon.png
new file mode 100644
index 0000000000..11ff5de829
--- /dev/null
+++ b/demos/2d/normalmaps/icon.png
diff --git a/demos/2d/polygon_path_finder/engine.cfg b/demos/2d/polygon_path_finder/engine.cfg
new file mode 100644
index 0000000000..47450408af
--- /dev/null
+++ b/demos/2d/polygon_path_finder/engine.cfg
@@ -0,0 +1,5 @@
+[application]
+
+name="Polygon Pathfinder"
+main_scene="res://poly_with_holes.scn"
+icon="res://icon.png"
diff --git a/demos/2d/polygon_path_finder/icon.png b/demos/2d/polygon_path_finder/icon.png
new file mode 100644
index 0000000000..643f5595ee
--- /dev/null
+++ b/demos/2d/polygon_path_finder/icon.png
diff --git a/demos/2d/polygon_path_finder/poly_with_holes.scn b/demos/2d/polygon_path_finder/poly_with_holes.scn
new file mode 100644
index 0000000000..6b340377b7
--- /dev/null
+++ b/demos/2d/polygon_path_finder/poly_with_holes.scn
diff --git a/demos/2d/polygon_path_finder_demo/polygonpathfinder.gd b/demos/2d/polygon_path_finder/polygonpathfinder.gd
index a0e71dd127..a0e71dd127 100644
--- a/demos/2d/polygon_path_finder_demo/polygonpathfinder.gd
+++ b/demos/2d/polygon_path_finder/polygonpathfinder.gd
diff --git a/demos/2d/polygon_path_finder_demo/engine.cfg b/demos/2d/polygon_path_finder_demo/engine.cfg
deleted file mode 100644
index 41c4adf701..0000000000
--- a/demos/2d/polygon_path_finder_demo/engine.cfg
+++ /dev/null
@@ -1,5 +0,0 @@
-[application]
-
-name="polygon_path_finder_demo"
-main_scene="res://new_scene_poly_with_holes.scn"
-icon="icon.png"
diff --git a/demos/2d/polygon_path_finder_demo/icon.png b/demos/2d/polygon_path_finder_demo/icon.png
deleted file mode 100644
index 0c422e37b0..0000000000
--- a/demos/2d/polygon_path_finder_demo/icon.png
+++ /dev/null
diff --git a/demos/2d/polygon_path_finder_demo/icon.png.flags b/demos/2d/polygon_path_finder_demo/icon.png.flags
deleted file mode 100644
index dbef2209e8..0000000000
--- a/demos/2d/polygon_path_finder_demo/icon.png.flags
+++ /dev/null
@@ -1 +0,0 @@
-gen_mipmaps=true
diff --git a/demos/2d/polygon_path_finder_demo/new_scene_poly_with_holes.scn b/demos/2d/polygon_path_finder_demo/new_scene_poly_with_holes.scn
deleted file mode 100644
index 07838be41e..0000000000
--- a/demos/2d/polygon_path_finder_demo/new_scene_poly_with_holes.scn
+++ /dev/null
diff --git a/demos/2d/screen_space_shaders/engine.cfg b/demos/2d/screen_space_shaders/engine.cfg
index 527e2f8f0a..383ca7bf11 100644
--- a/demos/2d/screen_space_shaders/engine.cfg
+++ b/demos/2d/screen_space_shaders/engine.cfg
@@ -2,6 +2,7 @@
 
 name="Screen-Space Shaders"
 main_scene="res://screen_shaders.scn"
+icon="res://icon.png"
 
 [display]
 
diff --git a/demos/2d/screen_space_shaders/icon.png b/demos/2d/screen_space_shaders/icon.png
new file mode 100644
index 0000000000..e3cc049081
--- /dev/null
+++ b/demos/2d/screen_space_shaders/icon.png
diff --git a/demos/2d/sdf_font/engine.cfg b/demos/2d/sdf_font/engine.cfg
index bdf26ce741..bf983041fa 100644
--- a/demos/2d/sdf_font/engine.cfg
+++ b/demos/2d/sdf_font/engine.cfg
@@ -2,3 +2,4 @@
 
 name="Signed Distance Field Font"
 main_scene="res://sdf.scn"
+icon="res://icon.png"
diff --git a/demos/2d/sdf_font/icon.png b/demos/2d/sdf_font/icon.png
new file mode 100644
index 0000000000..0c700ad77c
--- /dev/null
+++ b/demos/2d/sdf_font/icon.png
diff --git a/demos/2d/splash/engine.cfg b/demos/2d/splash/engine.cfg
index cb50c7b1be..e461426305 100644
--- a/demos/2d/splash/engine.cfg
+++ b/demos/2d/splash/engine.cfg
@@ -2,6 +2,7 @@
 
 name="Splash Screen"
 main_scene="res://splash.xml"
+icon="res://icon.png"
 
 [display]
 
diff --git a/demos/2d/splash/icon.png b/demos/2d/splash/icon.png
new file mode 100644
index 0000000000..b8e24f209e
--- /dev/null
+++ b/demos/2d/splash/icon.png
diff --git a/demos/2d/sprite_shaders/engine.cfg b/demos/2d/sprite_shaders/engine.cfg
index 09f9a59566..17bdada188 100644
--- a/demos/2d/sprite_shaders/engine.cfg
+++ b/demos/2d/sprite_shaders/engine.cfg
@@ -2,3 +2,4 @@
 
 name="2D Shaders for Sprites"
 main_scene="res://sprite_shaders.scn"
+icon="res://icon.png"
diff --git a/demos/2d/sprite_shaders/icon.png b/demos/2d/sprite_shaders/icon.png
new file mode 100644
index 0000000000..8b13ef6bb4
--- /dev/null
+++ b/demos/2d/sprite_shaders/icon.png
diff --git a/demos/2d/texscreen/engine.cfg b/demos/2d/texscreen/engine.cfg
index fb683dfc1d..92d0e98d5b 100644
--- a/demos/2d/texscreen/engine.cfg
+++ b/demos/2d/texscreen/engine.cfg
@@ -2,6 +2,7 @@
 
 name="Glass Bubbles (Texscreen)"
 main_scene="res://bubbles.scn"
+icon="res://icon.png"
 
 [display]
 
diff --git a/demos/2d/texscreen/icon.png b/demos/2d/texscreen/icon.png
new file mode 100644
index 0000000000..d74d025ced
--- /dev/null
+++ b/demos/2d/texscreen/icon.png
diff --git a/demos/3d/navmesh/icon.png b/demos/3d/navmesh/icon.png
new file mode 100644
index 0000000000..51fef7b2df
--- /dev/null
+++ b/demos/3d/navmesh/icon.png
diff --git a/demos/3d/sat_test/engine.cfg b/demos/3d/sat_test/engine.cfg
index cc215c83e8..82c688635d 100644
--- a/demos/3d/sat_test/engine.cfg
+++ b/demos/3d/sat_test/engine.cfg
@@ -2,3 +2,4 @@
 
 name="SAT Collision Test"
 main_scene="res://sat_test.xml"
+icon="res://icon.png"
diff --git a/demos/3d/sat_test/icon.png b/demos/3d/sat_test/icon.png
new file mode 100644
index 0000000000..194456e10f
--- /dev/null
+++ b/demos/3d/sat_test/icon.png
diff --git a/demos/3d/truck_town/engine.cfg b/demos/3d/truck_town/engine.cfg
index 3c340e6dcd..b2a463e1e2 100644
--- a/demos/3d/truck_town/engine.cfg
+++ b/demos/3d/truck_town/engine.cfg
@@ -2,6 +2,7 @@
 
 name="Truck Town"
 main_scene="res://car_select.scn"
+icon="res://icon.png"
 
 [display]
 
diff --git a/demos/3d/truck_town/icon.png b/demos/3d/truck_town/icon.png
new file mode 100644
index 0000000000..7d7bd42116
--- /dev/null
+++ b/demos/3d/truck_town/icon.png
diff --git a/demos/gui/drag_and_drop/engine.cfg b/demos/gui/drag_and_drop/engine.cfg
index 448939c61d..49b9b93512 100644
--- a/demos/gui/drag_and_drop/engine.cfg
+++ b/demos/gui/drag_and_drop/engine.cfg
@@ -2,3 +2,4 @@
 
 name="Drag &amp; Drop (GUI)"
 main_scene="res://drag_and_drop.scn"
+icon="res://icon.png"
diff --git a/demos/gui/drag_and_drop/icon.png b/demos/gui/drag_and_drop/icon.png
new file mode 100644
index 0000000000..f900d8d4a3
--- /dev/null
+++ b/demos/gui/drag_and_drop/icon.png
diff --git a/demos/gui/input_mapping/controls.scn b/demos/gui/input_mapping/controls.scn
index 276712ba22..03567fb691 100644
--- a/demos/gui/input_mapping/controls.scn
+++ b/demos/gui/input_mapping/controls.scn
diff --git a/demos/gui/input_mapping/engine.cfg b/demos/gui/input_mapping/engine.cfg
index 959c0ac7d5..811635ce25 100644
--- a/demos/gui/input_mapping/engine.cfg
+++ b/demos/gui/input_mapping/engine.cfg
@@ -2,7 +2,7 @@
 
 name="Input Mapping GUI"
 main_scene="res://controls.scn"
-icon="icon.png"
+icon="res://icon.png"
 
 [display]
 
diff --git a/demos/gui/input_mapping/icon.png b/demos/gui/input_mapping/icon.png
new file mode 100644
index 0000000000..5a1abf4f58
--- /dev/null
+++ b/demos/gui/input_mapping/icon.png
diff --git a/demos/gui/rich_text_bbcode/engine.cfg b/demos/gui/rich_text_bbcode/engine.cfg
index e0ea296f6d..5f68b6a0e6 100644
--- a/demos/gui/rich_text_bbcode/engine.cfg
+++ b/demos/gui/rich_text_bbcode/engine.cfg
@@ -2,3 +2,4 @@
 
 name="Rich Text Label (BBCode)"
 main_scene="res://rich_text_bbcode.scn"
+icon="res://icon.png"
diff --git a/demos/gui/rich_text_bbcode/icon.png b/demos/gui/rich_text_bbcode/icon.png
new file mode 100644
index 0000000000..6db48a3a9b
--- /dev/null
+++ b/demos/gui/rich_text_bbcode/icon.png
diff --git a/demos/gui/rich_text_bbcode/rich_text_bbcode.scn b/demos/gui/rich_text_bbcode/rich_text_bbcode.scn
index ca02044bb8..081338fd91 100644
--- a/demos/gui/rich_text_bbcode/rich_text_bbcode.scn
+++ b/demos/gui/rich_text_bbcode/rich_text_bbcode.scn
diff --git a/demos/gui/translation/engine.cfg b/demos/gui/translation/engine.cfg
index 169b65e154..dcd3d1983d 100644
--- a/demos/gui/translation/engine.cfg
+++ b/demos/gui/translation/engine.cfg
@@ -2,6 +2,7 @@
 
 name="Translation Demo"
 main_scene="res://main.scn"
+icon="res://icon.png"
 
 [locale]
 
diff --git a/demos/gui/translation/icon.png b/demos/gui/translation/icon.png
new file mode 100644
index 0000000000..4be5ac1127
--- /dev/null
+++ b/demos/gui/translation/icon.png
diff --git a/demos/misc/instancing/engine.cfg b/demos/misc/instancing/engine.cfg
index 52a28a3fce..76b0c97721 100644
--- a/demos/misc/instancing/engine.cfg
+++ b/demos/misc/instancing/engine.cfg
@@ -2,6 +2,7 @@
 
 name="Scene Instancing Demo"
 main_scene="res://container.scn"
+icon="res://icon.png"
 
 [physics_2d]
 
diff --git a/demos/misc/instancing/icon.png b/demos/misc/instancing/icon.png
new file mode 100644
index 0000000000..79a4283de7
--- /dev/null
+++ b/demos/misc/instancing/icon.png
diff --git a/demos/misc/regex/engine.cfg b/demos/misc/regex/engine.cfg
index 0a6f4f869c..ef5483e096 100644
--- a/demos/misc/regex/engine.cfg
+++ b/demos/misc/regex/engine.cfg
@@ -2,3 +2,4 @@
 
 name="RegEx"
 main_scene="res://regex.scn"
+icon="res://icon.png"
diff --git a/demos/misc/regex/icon.png b/demos/misc/regex/icon.png
new file mode 100644
index 0000000000..7a5232ec4b
--- /dev/null
+++ b/demos/misc/regex/icon.png
diff --git a/demos/misc/regex/regex.gd b/demos/misc/regex/regex.gd
index e648c18093..409b4cab05 100644
--- a/demos/misc/regex/regex.gd
+++ b/demos/misc/regex/regex.gd
@@ -2,21 +2,23 @@ extends VBoxContainer
 
 var regex = RegEx.new()
 
-func update_expression():
-	regex.compile(get_node("Expression").get_text())
+func update_expression(text):
+	regex.compile(text)
 	update_text()
 
 func update_text():
 	var text = get_node("Text").get_text()
-	regex.find(text)
 	var list = get_node("List")
 	for child in list.get_children():
 		child.queue_free()
-	for res in regex.get_captures():
-		var label = Label.new()
-		label.set_text(res)
-		list.add_child(label)
+	if regex.is_valid():
+		regex.find(text)
+		for res in regex.get_captures():
+			var label = Label.new()
+			label.set_text(res)
+			list.add_child(label)
 
 func _ready():
 	get_node("Text").set_text("They asked me \"What's going on \\\"in the manor\\\"?\"")
-	update_expression()
+	update_expression(get_node("Expression").get_text())
+
diff --git a/demos/misc/regex/regex.scn b/demos/misc/regex/regex.scn
index 2b62d6b82a..1f46521d0d 100644
--- a/demos/misc/regex/regex.scn
+++ b/demos/misc/regex/regex.scn
diff --git a/demos/misc/tween/engine.cfg b/demos/misc/tween/engine.cfg
index f97e540dbd..3d3d639964 100644
--- a/demos/misc/tween/engine.cfg
+++ b/demos/misc/tween/engine.cfg
@@ -2,7 +2,7 @@
 
 name="Tween Demo"
 main_scene="res://main.xml"
-icon="icon.png"
+icon="res://icon.png"
 target_fps=60
 
 [display]
diff --git a/demos/misc/tween/icon.png b/demos/misc/tween/icon.png
index 3e991fcc29..ed55c24140 100644
--- a/demos/misc/tween/icon.png
+++ b/demos/misc/tween/icon.png
diff --git a/demos/misc/window_management/engine.cfg b/demos/misc/window_management/engine.cfg
index 0a34231673..911d3fd4a1 100644
--- a/demos/misc/window_management/engine.cfg
+++ b/demos/misc/window_management/engine.cfg
@@ -2,7 +2,7 @@
 
 name="Window Management"
 main_scene="res://window_management.scn"
-icon="icon.png"
+icon="res://icon.png"
 
 [display]
 
diff --git a/demos/misc/window_management/icon.png b/demos/misc/window_management/icon.png
index 0c422e37b0..ec5c7891f9 100644
--- a/demos/misc/window_management/icon.png
+++ b/demos/misc/window_management/icon.png
diff --git a/demos/misc/window_management/icon.png.flags b/demos/misc/window_management/icon.png.flags
deleted file mode 100644
index 5130fd1aab..0000000000
--- a/demos/misc/window_management/icon.png.flags
+++ /dev/null
@@ -1 +0,0 @@
-gen_mipmaps=false
diff --git a/demos/viewport/3d_in_2d/engine.cfg b/demos/viewport/3d_in_2d/engine.cfg
index cc893361b5..6d456d7bd4 100644
--- a/demos/viewport/3d_in_2d/engine.cfg
+++ b/demos/viewport/3d_in_2d/engine.cfg
@@ -2,3 +2,4 @@
 
 name="3D in 2D"
 main_scene="res://main.scn"
+icon="res://icon.png"
diff --git a/demos/viewport/3d_in_2d/icon.png b/demos/viewport/3d_in_2d/icon.png
new file mode 100644
index 0000000000..d8a332c18f
--- /dev/null
+++ b/demos/viewport/3d_in_2d/icon.png
diff --git a/demos/viewport/gui_in_3d/engine.cfg b/demos/viewport/gui_in_3d/engine.cfg
index 25a6636132..252e53ca33 100644
--- a/demos/viewport/gui_in_3d/engine.cfg
+++ b/demos/viewport/gui_in_3d/engine.cfg
@@ -2,3 +2,4 @@
 
 name="GUI in 3D"
 main_scene="res://gui_3d.scn"
+icon="res://icon.png"
diff --git a/demos/viewport/gui_in_3d/icon.png b/demos/viewport/gui_in_3d/icon.png
new file mode 100644
index 0000000000..22bdd791bb
--- /dev/null
+++ b/demos/viewport/gui_in_3d/icon.png
diff --git a/demos/viewport/screen_capture/engine.cfg b/demos/viewport/screen_capture/engine.cfg
index b25ed8258e..a843242720 100644
--- a/demos/viewport/screen_capture/engine.cfg
+++ b/demos/viewport/screen_capture/engine.cfg
@@ -2,6 +2,7 @@
 
 name="Screen Capturing"
 main_scene="res://screen_capture.scn"
+icon="res://icon.png"
 
 [display]
 
diff --git a/demos/viewport/screen_capture/icon.png b/demos/viewport/screen_capture/icon.png
new file mode 100644
index 0000000000..a696824775
--- /dev/null
+++ b/demos/viewport/screen_capture/icon.png
diff --git a/doc/base/classes.xml b/doc/base/classes.xml
index bd6ed64fdf..39b3791e84 100644
--- a/doc/base/classes.xml
+++ b/doc/base/classes.xml
@@ -1603,7 +1603,7 @@
 	Axis-Aligned Bounding Box.
 	</brief_description>
 	<description>
-	AABB provides an 3D Axis-Aligned Bounding Box. It consists of a position and a size, and several utility functions. It is typically used for simple (fast) overlap tests.
+	AABB provides an 3D Axis-Aligned Bounding Box. It consists of a position, a size, and several utility functions. It is typically used for simple (fast) overlap tests.
 	</description>
 	<methods>
 		<method name="encloses">
@@ -1628,7 +1628,7 @@
 			<return type="float">
 			</return>
 			<description>
-			Get the area inside the [AABB].
+			Get the area of the [AABB].
 			</description>
 		</method>
 		<method name="get_endpoint">
@@ -1697,7 +1697,7 @@
 			<argument index="0" name="by" type="float">
 			</argument>
 			<description>
-			Return a copy of the AABB grown a given a mount of units towards all the sides.
+			Return a copy of the [AABB] grown a given amount of units towards all the sides.
 			</description>
 		</method>
 		<method name="has_no_area">
@@ -1747,7 +1747,7 @@
 			<argument index="0" name="plane" type="Plane">
 			</argument>
 			<description>
-			Return true if the AABB is at both sides of a plane.
+			Return true if the [AABB] is at both sides of a plane.
 			</description>
 		</method>
 		<method name="intersects_segment">
@@ -1766,7 +1766,7 @@
 			<argument index="0" name="with" type="AABB">
 			</argument>
 			<description>
-			Combine this [AABB] with another one, a larger one is returned that contains both.
+			Combine this [AABB] with another, a larger one is returned that contains both.
 			</description>
 		</method>
 		<method name="AABB">
@@ -3197,6 +3197,24 @@
 			<description>
 			</description>
 		</method>
+		<method name="set_animation_process_mode">
+			<argument index="0" name="mode" type="int">
+			</argument>
+			<description>
+			</description>
+		</method>
+		<method name="get_animation_process_mode" qualifiers="const">
+			<return type="int">
+			</return>
+			<description>
+			</description>
+		</method>
+		<method name="advance">
+			<argument index="0" name="delta" type="float">
+			</argument>
+			<description>
+			</description>
+		</method>
 		<method name="reset">
 			<description>
 			</description>
@@ -4353,7 +4371,7 @@
 			Stop a given voice.
 			</description>
 		</method>
-		<method name="free">
+		<method name="free_rid">
 			<argument index="0" name="rid" type="RID">
 			</argument>
 			<description>
@@ -5477,7 +5495,7 @@
 		</constant>
 	</constants>
 </class>
-<class name="ButtonGroup" inherits="Control" category="Core">
+<class name="ButtonGroup" inherits="BoxContainer" category="Core">
 	<brief_description>
 	Group of Buttons.
 	</brief_description>
@@ -5523,6 +5541,10 @@
 	</methods>
 	<constants>
 	</constants>
+	<theme_items>
+		<theme_item name="panel" type="StyleBox">
+		</theme_item>
+	</theme_items>
 </class>
 <class name="Camera" inherits="Spatial" category="Core">
 	<brief_description>
@@ -6628,7 +6650,7 @@
 			<argument index="0" name="radius" type="float">
 			</argument>
 			<description>
-			Radius of the [CapsuleShape2D].
+			Set the radius of the [CapsuleShape2D].
 			</description>
 		</method>
 		<method name="get_radius" qualifiers="const">
@@ -6642,7 +6664,7 @@
 			<argument index="0" name="height" type="float">
 			</argument>
 			<description>
-			Height of the [CapsuleShape2D].
+			Set the height of the [CapsuleShape2D].
 			</description>
 		</method>
 		<method name="get_height" qualifiers="const">
@@ -6947,7 +6969,7 @@
 	Base node for 2D collisionables.
 	</brief_description>
 	<description>
-	CollisionObject2D is the base class for 2D physics collisionables. They can hold any number of 2D collision shapes. Usually, they are edited by placing CollisionBody2D and CollisionPolygon2D nodes as children. Such nodes are for reference ant not present outside the editor, so code should use the regular shape API.
+	CollisionObject2D is the base class for 2D physics collisionables. They can hold any number of 2D collision shapes. Usually, they are edited by placing [CollisionBody2D] and [CollisionPolygon2D] nodes as children. Such nodes are for reference and not present outside the editor, so code should use the regular shape API.
 	</description>
 	<methods>
 		<method name="_input_event" qualifiers="virtual">
@@ -7143,58 +7165,69 @@
 </class>
 <class name="CollisionPolygon2D" inherits="Node2D" category="Core">
 	<brief_description>
-	Editor-Only class.
+	Editor-only class for easy editing of collision polygons.
 	</brief_description>
 	<description>
-	Editor-Only class. This is not present when running the game. It's used in the editor to properly edit and position collision shapes in [CollisionObject2D]. This is not accessible from regular code. This class is for editing custom shape polygons.
+	Editor-only class. This is not present when running the game. It's used in the editor to properly edit and position collision shapes in [CollisionObject2D]. This is not accessible from regular code. This class is for editing custom shape polygons.
 	</description>
 	<methods>
 		<method name="set_polygon">
 			<argument index="0" name="polygon" type="Vector2Array">
 			</argument>
 			<description>
+			Set the array of points forming the polygon.
+			When editing the point list via the editor, depending on [method get_build_mode], it has to be a list of points (for [code]build_mode[/code]=0), or a list of lines (for [code]build_mode[/code]=1). In the second case, the even elements of the array define the start point of the line, and the odd elements the end point.
 			</description>
 		</method>
 		<method name="get_polygon" qualifiers="const">
 			<return type="Vector2Array">
 			</return>
 			<description>
+			Return the list of points that define the polygon.
 			</description>
 		</method>
 		<method name="set_build_mode">
 			<argument index="0" name="arg0" type="int">
 			</argument>
 			<description>
+			Set whether the polygon is to be a [ConvexPolygon2D] ([code]build_mode[/code]=0), or a [ConcavePolygon2D] ([code]build_mode[/code]=1).
 			</description>
 		</method>
 		<method name="get_build_mode" qualifiers="const">
 			<return type="int">
 			</return>
 			<description>
+			Return whether the polygon is a [ConvexPolygon2D] ([code]build_mode[/code]=0), or a [ConcavePolygon2D] ([code]build_mode[/code]=1).
 			</description>
 		</method>
 		<method name="set_trigger">
 			<argument index="0" name="arg0" type="bool">
 			</argument>
 			<description>
+			Set whether this polygon is a trigger. A trigger polygon detects collisions, but is otherwise unaffected by physics (i.e. colliding objects will not get blocked).
 			</description>
 		</method>
 		<method name="is_trigger" qualifiers="const">
 			<return type="bool">
 			</return>
 			<description>
+			Return whether this polygon is a trigger.
 			</description>
 		</method>
 		<method name="get_collision_object_first_shape" qualifiers="const">
 			<return type="int">
 			</return>
 			<description>
+			Return the index of the first shape generated by the editor.
+			When [code]build_mode[/code] is set to generate convex polygons, the shape shown in the editor may be decomopsed into many convex polygons. In that case, a range of indexes is needed to directly access the [Shape2D]s.
+			When [code]build_mode[/code] is set to generate concave polygons, there is only one [Shape2D] generated, so the start index and the end index are the same.
 			</description>
 		</method>
 		<method name="get_collision_object_last_shape" qualifiers="const">
 			<return type="int">
 			</return>
 			<description>
+			Return the index of the last shape generated by the editor.
 			</description>
 		</method>
 	</methods>
@@ -7253,40 +7286,45 @@
 </class>
 <class name="CollisionShape2D" inherits="Node2D" category="Core">
 	<brief_description>
-	Editor-Only class.
+	Editor-only class for easy editing of shapes.
 	</brief_description>
 	<description>
-	Editor-Only class. This is not present when running the game. It's used in the editor to properly edit and position collision shapes in [CollisionObject2D]. This is not accessible from regular code.
+	Editor-only class. This is not present when running the game. It's used in the editor to properly edit and position collision shapes in [CollisionObject2D]. This is not accessible from regular code.
 	</description>
 	<methods>
 		<method name="set_shape">
 			<argument index="0" name="shape" type="Object">
 			</argument>
 			<description>
+			Set this shape's [Shape2D]. This will not appear as a node, but can be directly edited as a property.
 			</description>
 		</method>
 		<method name="get_shape" qualifiers="const">
 			<return type="Object">
 			</return>
 			<description>
+			Return this shape's [Shape2D].
 			</description>
 		</method>
 		<method name="set_trigger">
 			<argument index="0" name="enable" type="bool">
 			</argument>
 			<description>
+			Set whether this shape is a trigger. A trigger shape detects collisions, but is otherwise unaffected by physics (i.e. will not block movement of colliding objects).
 			</description>
 		</method>
 		<method name="is_trigger" qualifiers="const">
 			<return type="bool">
 			</return>
 			<description>
+			Return whether this shape is a trigger.
 			</description>
 		</method>
 		<method name="get_collision_object_shape_index" qualifiers="const">
 			<return type="int">
 			</return>
 			<description>
+			Return the index of this shape inside its container [CollisionObject2D]. This can be used to directly access the underlying [Shape2D].
 			</description>
 		</method>
 	</methods>
@@ -7298,7 +7336,7 @@
 	Color in RGBA format.
 	</brief_description>
 	<description>
-	A color is represented as red, green and blue (r,g,b) components. Additionally, "a" represents the alpha component, often used for transparency. Values are in floating point and usually range from 0 to 1.  Some methods (such as set_modulate() ) may accept values > 1.
+	A color is represented as red, green and blue (r,g,b) components. Additionally, "a" represents the alpha component, often used for transparency. Values are in floating point and usually range from 0 to 1.  Some methods (such as set_modulate() ) may accept values &gt; 1.
 	</description>
 	<methods>
 		<method name="blend">
@@ -7666,7 +7704,7 @@
 	Concave polygon shape.
 	</brief_description>
 	<description>
-	Concave polygon shape resource, which can be set into a [PhysicsBody] or area.  This shape is created by feeding a list of triangles.
+	Concave polygon shape resource, which can be set into a [PhysicsBody] or area. This shape is created by feeding a list of triangles.
 	</description>
 	<methods>
 		<method name="set_faces">
@@ -7693,6 +7731,7 @@
 	</brief_description>
 	<description>
 	Concave polygon 2D shape resource for physics. It is made out of segments and is very optimal for complex polygonal concave collisions. It is really not advised to use for RigidBody nodes. A CollisionPolygon2D in convex decomposition mode (solids) or several convex objects are advised for that instead. Otherwise, a concave polygon 2D shape is better for static collisions.
+	The main difference between a [ConvexPolygonShape2D] and a [ConcavePolygonShape2D] is that a concave polygon assumes it is concave and uses a more complex method of collision detection, and a convex one forces itself to be convex in order to speed up collision detection.
 	</description>
 	<methods>
 		<method name="set_segments">
@@ -8583,14 +8622,15 @@
 	Convex Polygon Shape for 2D physics.
 	</brief_description>
 	<description>
-	Convex Polygon Shape for 2D physics.
+	Convex Polygon Shape for 2D physics. A convex polygon, whatever its shape, is internally decomposed into as many convex polygons as needed to ensure all collision checks against it are always done on convex polygons (which are faster to check).
+	The main difference between a [ConvexPolygonShape2D] and a [ConcavePolygonShape2D] is that a concave polygon assumes it is concave and uses a more complex method of collision detection, and a convex one forces itself to be convex in order to speed up collision detection.
 	</description>
 	<methods>
 		<method name="set_point_cloud">
 			<argument index="0" name="point_cloud" type="Vector2Array">
 			</argument>
 			<description>
-			Create the point set from a point cloud. The resulting convex hull will be set as the shape.
+			Currently, this method does nothing.
 			</description>
 		</method>
 		<method name="set_points">
@@ -8719,14 +8759,18 @@
 </class>
 <class name="Curve2D" inherits="Resource" category="Core">
 	<brief_description>
+	Describes a Bezier curve in 2D space.
 	</brief_description>
 	<description>
+	This class describes a Bezier curve in 2D space. It is mainly used to give a shape to a [Path2D], but can be manually sampled for other purposes.
+It keeps a cache of precalculated points along the curve, to speed further calculations up.
 	</description>
 	<methods>
 		<method name="get_point_count" qualifiers="const">
 			<return type="int">
 			</return>
 			<description>
+			Returns the number of points describing the curve.
 			</description>
 		</method>
 		<method name="add_point">
@@ -8739,6 +8783,8 @@
 			<argument index="3" name="atpos" type="int" default="-1">
 			</argument>
 			<description>
+			Adds a point to a curve, at position "pos", with control points "in" and "out".
+If "atpos" is given, the point is inserted before the point number "atpos", moving that point (and every point after) after the inserted point. If "atpos" is not given, or is an illegal value (atpos &lt;0 or atpos &gt;= [method get_point_count]), the point will be appended at the end of the point list.
 			</description>
 		</method>
 		<method name="set_point_pos">
@@ -8747,6 +8793,7 @@
 			<argument index="1" name="pos" type="Vector2">
 			</argument>
 			<description>
+			Sets the position for the vertex "idx". If the index is out of bounds, the function sends an error to the console.
 			</description>
 		</method>
 		<method name="get_point_pos" qualifiers="const">
@@ -8755,6 +8802,7 @@
 			<argument index="0" name="idx" type="int">
 			</argument>
 			<description>
+			Returns the position of the vertex "idx". If the index is out of bounds, the function sends an error to the console, and returns (0, 0).
 			</description>
 		</method>
 		<method name="set_point_in">
@@ -8763,6 +8811,7 @@
 			<argument index="1" name="pos" type="Vector2">
 			</argument>
 			<description>
+			Sets the position of the control point leading to the vertex "idx". If the index is out of bounds, the function sends an error to the console.
 			</description>
 		</method>
 		<method name="get_point_in" qualifiers="const">
@@ -8771,6 +8820,7 @@
 			<argument index="0" name="idx" type="int">
 			</argument>
 			<description>
+			Returns the position of the control point leading to the vertex "idx". If the index is out of bounds, the function sends an error to the console, and returns (0, 0).
 			</description>
 		</method>
 		<method name="set_point_out">
@@ -8779,6 +8829,7 @@
 			<argument index="1" name="pos" type="Vector2">
 			</argument>
 			<description>
+			Sets the position of the control point leading out of the vertex "idx". If the index is out of bounds, the function sends an error to the console.
 			</description>
 		</method>
 		<method name="get_point_out" qualifiers="const">
@@ -8787,12 +8838,14 @@
 			<argument index="0" name="idx" type="int">
 			</argument>
 			<description>
+			Returns the position of the control point leading out of the vertex "idx". If the index is out of bounds, the function sends an error to the console, and returns (0, 0).
 			</description>
 		</method>
 		<method name="remove_point">
 			<argument index="0" name="idx" type="int">
 			</argument>
 			<description>
+			Deletes the point "idx" from the curve. Sends an error to the console if "idx" is out of bounds.
 			</description>
 		</method>
 		<method name="interpolate" qualifiers="const">
@@ -8803,6 +8856,8 @@
 			<argument index="1" name="t" type="float">
 			</argument>
 			<description>
+			Returns the position between the vertex "idx" and the vertex "idx"+1, where "t" controls if the point is the first vertex (t = 0.0), the last vertex (t = 1.0), or in between. Values of "t" outside the range (0.0 &gt;= t  &lt;=1) give strange, but predictable results.
+If "idx" is out of bounds it is truncated to the first or last vertex, and "t" is ignored. If the curve has no points, the function sends an error to the console, and returns (0, 0).
 			</description>
 		</method>
 		<method name="interpolatef" qualifiers="const">
@@ -8811,24 +8866,28 @@
 			<argument index="0" name="fofs" type="float">
 			</argument>
 			<description>
+			Returns the position at the vertex "fofs". It calls [method interpolate] using the integer part of fofs as "idx", and its fractional part as "t".
 			</description>
 		</method>
 		<method name="set_bake_interval">
 			<argument index="0" name="distance" type="float">
 			</argument>
 			<description>
+			Sets the distance in pixels between two adjacent cached points. Changing it forces the cache to be recomputed the next time a xxx_baked_xxx function is called. The less distance, the more points the cache will have, and the more memory it will consume, so use with care.
 			</description>
 		</method>
 		<method name="get_bake_interval" qualifiers="const">
 			<return type="float">
 			</return>
 			<description>
+			Returns the distance between two adjacent cached points.
 			</description>
 		</method>
 		<method name="get_baked_length" qualifiers="const">
 			<return type="float">
 			</return>
 			<description>
+			Returns the total length of the curve, based on the cached points. Given enough density (see [method set_bake_interval]), it should be approximate enough.
 			</description>
 		</method>
 		<method name="interpolate_baked" qualifiers="const">
@@ -8839,12 +8898,16 @@
 			<argument index="1" name="cubic" type="bool" default="false">
 			</argument>
 			<description>
+			Returns a point within the curve at position "offset", where "offset" is measured as a pixel distance along the curve.
+To do that, it finds the two cached points where the "offset" lies between, then interpolates the values. This interpolation is cubic if "cubic" is set to true, or linear if set to false.
+Cubic interpolation tends to follow the curves better, but linear is faster (and often, precise enough).
 			</description>
 		</method>
 		<method name="get_baked_points" qualifiers="const">
 			<return type="Vector2Array">
 			</return>
 			<description>
+			Returns the cache of points as a [Vector2Array].
 			</description>
 		</method>
 		<method name="tesselate" qualifiers="const">
@@ -8855,6 +8918,10 @@
 			<argument index="1" name="tolerance_degrees" type="float" default="4">
 			</argument>
 			<description>
+			Returns a list of points along the curve, with a curvature controlled point density. That is, the curvier parts will have more points than the straighter parts.
+This approximation makes straight segments between each point, then subdivides those segments until the resulting shape is similar enough.
+"max_stages" controls how many subdivisions a curve segment may face before it is considered approximate enough. Each subdivision splits the segment in half, so the default 5 stages may mean up to 32 subdivisions per curve segment. Increase with care!
+"tolerance_degrees" controls how many degrees the midpoint of a segment may deviate from the real curve, before the segment has to be subdivided.
 			</description>
 		</method>
 	</methods>
@@ -8863,14 +8930,18 @@
 </class>
 <class name="Curve3D" inherits="Resource" category="Core">
 	<brief_description>
+	Describes a Bezier curve in 3D space.
 	</brief_description>
 	<description>
+	This class describes a Bezier curve in 3D space. It is mainly used to give a shape to a [Path], but can be manually sampled for other purposes.
+It keeps a cache of precalculated points along the curve, to speed further calculations up.
 	</description>
 	<methods>
 		<method name="get_point_count" qualifiers="const">
 			<return type="int">
 			</return>
 			<description>
+			Returns the number of points describing the curve.
 			</description>
 		</method>
 		<method name="add_point">
@@ -8883,6 +8954,8 @@
 			<argument index="3" name="atpos" type="int" default="-1">
 			</argument>
 			<description>
+			Adds a point to a curve, at position "pos", with control points "in" and "out".
+If "atpos" is given, the point is inserted before the point number "atpos", moving that point (and every point after) after the inserted point. If "atpos" is not given, or is an illegal value (atpos &lt;0 or atpos &gt;= [method get_point_count]), the point will be appended at the end of the point list.
 			</description>
 		</method>
 		<method name="set_point_pos">
@@ -8891,6 +8964,7 @@
 			<argument index="1" name="pos" type="Vector3">
 			</argument>
 			<description>
+			Sets the position for the vertex "idx". If the index is out of bounds, the function sends an error to the console.
 			</description>
 		</method>
 		<method name="get_point_pos" qualifiers="const">
@@ -8899,6 +8973,7 @@
 			<argument index="0" name="idx" type="int">
 			</argument>
 			<description>
+			Returns the position of the vertex "idx". If the index is out of bounds, the function sends an error to the console, and returns (0, 0, 0).
 			</description>
 		</method>
 		<method name="set_point_tilt">
@@ -8907,6 +8982,8 @@
 			<argument index="1" name="tilt" type="float">
 			</argument>
 			<description>
+			Sets the tilt angle in radians for the point "idx". If the index is out of bounds, the function sends an error to the console.
+The tilt controls the rotation along the look-at axis an object travelling the path would have. In the case of a curve controlling a [PathFollow], this tilt is an offset over the natural tilt the PathFollow calculates.
 			</description>
 		</method>
 		<method name="get_point_tilt" qualifiers="const">
@@ -8915,6 +8992,7 @@
 			<argument index="0" name="idx" type="int">
 			</argument>
 			<description>
+			Returns the tilt angle in radians for the point "idx". If the index is out of bounds, the function sends an error to the console, and returns 0.
 			</description>
 		</method>
 		<method name="set_point_in">
@@ -8923,6 +9001,7 @@
 			<argument index="1" name="pos" type="Vector3">
 			</argument>
 			<description>
+			Sets the position of the control point leading to the vertex "idx". If the index is out of bounds, the function sends an error to the console.
 			</description>
 		</method>
 		<method name="get_point_in" qualifiers="const">
@@ -8931,6 +9010,7 @@
 			<argument index="0" name="idx" type="int">
 			</argument>
 			<description>
+			Returns the position of the control point leading to the vertex "idx". If the index is out of bounds, the function sends an error to the console, and returns (0, 0, 0).
 			</description>
 		</method>
 		<method name="set_point_out">
@@ -8939,6 +9019,7 @@
 			<argument index="1" name="pos" type="Vector3">
 			</argument>
 			<description>
+			Sets the position of the control point leading out of the vertex "idx". If the index is out of bounds, the function sends an error to the console.
 			</description>
 		</method>
 		<method name="get_point_out" qualifiers="const">
@@ -8947,12 +9028,14 @@
 			<argument index="0" name="idx" type="int">
 			</argument>
 			<description>
+			Returns the position of the control point leading out of the vertex "idx". If the index is out of bounds, the function sends an error to the console, and returns (0, 0, 0).
 			</description>
 		</method>
 		<method name="remove_point">
 			<argument index="0" name="idx" type="int">
 			</argument>
 			<description>
+			Deletes the point "idx" from the curve. Sends an error to the console if "idx" is out of bounds.
 			</description>
 		</method>
 		<method name="interpolate" qualifiers="const">
@@ -8963,6 +9046,8 @@
 			<argument index="1" name="t" type="float">
 			</argument>
 			<description>
+			Returns the position between the vertex "idx" and the vertex "idx"+1, where "t" controls if the point is the first vertex (t = 0.0), the last vertex (t = 1.0), or in between. Values of "t" outside the range (0.0 &gt;= t  &lt;=1) give strange, but predictable results.
+If "idx" is out of bounds it is truncated to the first or last vertex, and "t" is ignored. If the curve has no points, the function sends an error to the console, and returns (0, 0, 0).
 			</description>
 		</method>
 		<method name="interpolatef" qualifiers="const">
@@ -8971,24 +9056,28 @@
 			<argument index="0" name="fofs" type="float">
 			</argument>
 			<description>
+			Returns the position at the vertex "fofs". It calls [method interpolate] using the integer part of fofs as "idx", and its fractional part as "t".
 			</description>
 		</method>
 		<method name="set_bake_interval">
 			<argument index="0" name="distance" type="float">
 			</argument>
 			<description>
+			Sets the distance in 3D units between two adjacent cached points. Changing it forces the cache to be recomputed the next time a xxx_baked_xxx function is called. The less distance, the more points the cache will have, and the more memory it will consume, so use with care.
 			</description>
 		</method>
 		<method name="get_bake_interval" qualifiers="const">
 			<return type="float">
 			</return>
 			<description>
+			Returns the distance between two adjacent cached points.
 			</description>
 		</method>
 		<method name="get_baked_length" qualifiers="const">
 			<return type="float">
 			</return>
 			<description>
+			Returns the total length of the curve, based on the cached points. Given enough density (see [method set_bake_interval]), it should be approximate enough.
 			</description>
 		</method>
 		<method name="interpolate_baked" qualifiers="const">
@@ -8999,18 +9088,23 @@
 			<argument index="1" name="cubic" type="bool" default="false">
 			</argument>
 			<description>
+			Returns a point within the curve at position "offset", where "offset" is measured as a distance in 3D units along the curve.
+To do that, it finds the two cached points where the "offset" lies between, then interpolates the values. This interpolation is cubic if "cubic" is set to true, or linear if set to false.
+Cubic interpolation tends to follow the curves better, but linear is faster (and often, precise enough).
 			</description>
 		</method>
 		<method name="get_baked_points" qualifiers="const">
 			<return type="Vector3Array">
 			</return>
 			<description>
+			Returns the cache of points as a [Vector3Array].
 			</description>
 		</method>
 		<method name="get_baked_tilts" qualifiers="const">
 			<return type="RealArray">
 			</return>
 			<description>
+			Returns the cache of tilts as a [RealArray].
 			</description>
 		</method>
 		<method name="tesselate" qualifiers="const">
@@ -9021,6 +9115,10 @@
 			<argument index="1" name="tolerance_degrees" type="float" default="4">
 			</argument>
 			<description>
+			Returns a list of points along the curve, with a curvature controlled point density. That is, the curvier parts will have more points than the straighter parts.
+This approximation makes straight segments between each point, then subdivides those segments until the resulting shape is similar enough.
+"max_stages" controls how many subdivisions a curve segment may face before it is considered approximate enough. Each subdivision splits the segment in half, so the default 5 stages may mean up to 32 subdivisions per curve segment. Increase with care!
+"tolerance_degrees" controls how many degrees the midpoint of a segment may deviate from the real curve, before the segment has to be subdivided.
 			</description>
 		</method>
 	</methods>
@@ -10683,6 +10781,14 @@
 		</constant>
 		<constant name="FLAG_DISCARD_ALPHA" value="3">
 		</constant>
+		<constant name="LIGHT_SHADER_LAMBERT" value="0">
+		</constant>
+		<constant name="LIGHT_SHADER_WRAP" value="1">
+		</constant>
+		<constant name="LIGHT_SHADER_VELVET" value="2">
+		</constant>
+		<constant name="LIGHT_SHADER_TOON" value="3">
+		</constant>
 	</constants>
 </class>
 <class name="Font" inherits="Resource" category="Core">
@@ -12440,9 +12546,13 @@
 			</argument>
 			<argument index="2" name="use_ssl" type="bool" default="false">
 			</argument>
-			<argument index="3" name="arg3" type="bool" default="true">
+			<argument index="3" name="verify_host" type="bool" default="true">
 			</argument>
 			<description>
+			Connect to a host. This needs to be done before any requests are sent.
+The host should not have http:// prepended but will strip the protocol identifier if provided.
+
+verify_host will check the SSL identity of the host if set to true.
 			</description>
 		</method>
 		<method name="set_connection">
@@ -12463,6 +12573,19 @@
 			<argument index="3" name="body" type="String" default="&quot;&quot;">
 			</argument>
 			<description>
+			Sends a request to the connected host. The url is the what is normally behind the hostname, i.e:
+http://somehost.com/index.php
+url would be "index.php"
+
+Headers are HTTP request headers
+
+To create a POST request with query strings to push to the server, do:
+var fields = {"username" : "user",
+                       "password" : "pass"}
+var queryString = httpClient.query_string_from_dict(fields)
+var headers = ["Content-Type: application/x-www-form-urlencoded",
+		   	             "Content-Length: " + str(queryString.length())]
+var result = httpClient.request(httpClient.METHOD_POST, "index.php", headers, queryString)
 			</description>
 		</method>
 		<method name="send_body_text">
@@ -12471,6 +12594,7 @@
 			<argument index="0" name="body" type="String">
 			</argument>
 			<description>
+			Stub function
 			</description>
 		</method>
 		<method name="send_body_data">
@@ -12479,6 +12603,7 @@
 			<argument index="0" name="body" type="RawArray">
 			</argument>
 			<description>
+			Stub function
 			</description>
 		</method>
 		<method name="close">
@@ -12531,12 +12656,14 @@
 			<argument index="0" name="bytes" type="int">
 			</argument>
 			<description>
+			Sets the size of the buffer used and maximum bytes to read per iteration
 			</description>
 		</method>
 		<method name="set_blocking_mode">
 			<argument index="0" name="enabled" type="bool">
 			</argument>
 			<description>
+			If set to true, execute will wait until all data is read from the response.
 			</description>
 		</method>
 		<method name="is_blocking_mode_enabled" qualifiers="const">
@@ -12549,12 +12676,28 @@
 			<return type="int">
 			</return>
 			<description>
+			Returns a status string like STATUS_REQUESTING. Need to call [method poll] in order to get status updates.
 			</description>
 		</method>
 		<method name="poll">
 			<return type="Error">
 			</return>
 			<description>
+			This needs to be called in order to have any request processed. Check results with [method get_status]
+			</description>
+		</method>
+		<method name="query_string_from_dict">
+			<return type="String">
+			</return>
+			<argument index="0" name="fields" type="Dictionary">
+			</argument>
+			<description>
+			Generates a GET/POST application/x-www-form-urlencoded style query string from a provided dictionary.
+
+var fields = {"username": "user", "password": "pass"}
+String queryString = httpClient.query_string_from_dict(fields)
+
+returns:= "username=user&amp;password=pass"
 			</description>
 		</method>
 	</methods>
@@ -13004,6 +13147,21 @@
 			<description>
 			</description>
 		</method>
+		<method name="Image">
+			<return type="Image">
+			</return>
+			<argument index="0" name="width" type="int">
+			</argument>
+			<argument index="1" name="height" type="int">
+			</argument>
+			<argument index="2" name="mipmaps" type="bool">
+			</argument>
+			<argument index="3" name="format" type="int">
+			</argument>
+			<description>
+			Create an empty image of a specific size and format.
+			</description>
+		</method>
 	</methods>
 	<constants>
 		<constant name="COMPRESS_BC" value="0">
@@ -14337,6 +14495,12 @@
 			<description>
 			</description>
 		</method>
+		<method name="get_instance_path" qualifiers="const">
+			<return type="String">
+			</return>
+			<description>
+			</description>
+		</method>
 	</methods>
 	<constants>
 	</constants>
@@ -14889,14 +15053,13 @@
 			<description>
 			</description>
 		</method>
-		<method name="can_move_to">
+		<method name="can_teleport_to">
 			<return type="bool">
 			</return>
 			<argument index="0" name="position" type="Vector3">
 			</argument>
-			<argument index="1" name="arg1" type="bool">
-			</argument>
 			<description>
+			Returns whether the KinematicBody can be teleported to the destination given as an argument, checking all collision shapes of the body against potential colliders at the destination.
 			</description>
 		</method>
 		<method name="is_colliding" qualifiers="const">
@@ -15786,6 +15949,18 @@
 	LineEdit provides a single line string editor, used for text fields.
 	</description>
 	<methods>
+		<method name="set_align">
+			<argument index="0" name="align" type="int">
+			</argument>
+			<description>
+			</description>
+		</method>
+		<method name="get_align" qualifiers="const">
+			<return type="int">
+			</return>
+			<description>
+			</description>
+		</method>
 		<method name="clear">
 			<description>
 			Clear the [LineEdit] text.
@@ -15899,6 +16074,14 @@
 		</signal>
 	</signals>
 	<constants>
+		<constant name="ALIGN_LEFT" value="0">
+		</constant>
+		<constant name="ALIGN_CENTER" value="1">
+		</constant>
+		<constant name="ALIGN_RIGHT" value="2">
+		</constant>
+		<constant name="ALIGN_FILL" value="3">
+		</constant>
 	</constants>
 	<theme_items>
 		<theme_item name="minimum_spaces" type="int">
@@ -16617,6 +16800,8 @@
 		</theme_item>
 		<theme_item name="pressed" type="StyleBox">
 		</theme_item>
+		<theme_item name="focus" type="StyleBox">
+		</theme_item>
 		<theme_item name="disabled" type="StyleBox">
 		</theme_item>
 		<theme_item name="normal" type="StyleBox">
@@ -18468,6 +18653,18 @@
 			Replace a node in a scene by a given one. Subscriptions that pass through this node will be lost.
 			</description>
 		</method>
+		<method name="set_scene_instance_load_placeholder">
+			<argument index="0" name="load_placeholder" type="bool">
+			</argument>
+			<description>
+			</description>
+		</method>
+		<method name="get_scene_instance_load_placeholder" qualifiers="const">
+			<return type="bool">
+			</return>
+			<description>
+			</description>
+		</method>
 		<method name="get_viewport" qualifiers="const">
 			<return type="Object">
 			</return>
@@ -21294,20 +21491,24 @@
 </class>
 <class name="Path" inherits="Spatial" category="Core">
 	<brief_description>
+	Container for a [Curve3D].
 	</brief_description>
 	<description>
+	This class is a container/Node-ification of a [Curve3D], so it can have [Spatial] properties and [Node] info.
 	</description>
 	<methods>
 		<method name="set_curve">
 			<argument index="0" name="curve" type="Curve3D">
 			</argument>
 			<description>
+			Sets the [Curve3D].
 			</description>
 		</method>
 		<method name="get_curve" qualifiers="const">
 			<return type="Curve3D">
 			</return>
 			<description>
+			Returns the [Curve3D] contained.
 			</description>
 		</method>
 	</methods>
@@ -21316,20 +21517,24 @@
 </class>
 <class name="Path2D" inherits="Node2D" category="Core">
 	<brief_description>
+	Container for a [Curve2D].
 	</brief_description>
 	<description>
+	This class is a container/Node-ification of a [Curve2D], so it can have [Node2D] properties and [Node] info.
 	</description>
 	<methods>
 		<method name="set_curve">
 			<argument index="0" name="curve" type="Curve2D">
 			</argument>
 			<description>
+			Sets the [Curve2D].
 			</description>
 		</method>
 		<method name="get_curve" qualifiers="const">
 			<return type="Curve2D">
 			</return>
 			<description>
+			Returns the [Curve2D] contained.
 			</description>
 		</method>
 	</methods>
@@ -21338,194 +21543,238 @@
 </class>
 <class name="PathFollow" inherits="Spatial" category="Core">
 	<brief_description>
+	Point sampler for a [Path].
 	</brief_description>
 	<description>
+	This node takes its parent [Path], and returns the coordinates of a point within it, given a distance from the first vertex.
+It is useful for making other nodes follow a path, without coding the movement pattern. For that, the nodes must be descendants of this node. Then, when setting an offset in this node, the descendant nodes will move accordingly.
 	</description>
 	<methods>
 		<method name="set_offset">
 			<argument index="0" name="offset" type="float">
 			</argument>
 			<description>
+			Sets the distance from the first vertex, measured in 3D units along the path. This sets this node's position to a point within the path.
 			</description>
 		</method>
 		<method name="get_offset" qualifiers="const">
 			<return type="float">
 			</return>
 			<description>
+			Returns the distance along the path in 3D units.
 			</description>
 		</method>
 		<method name="set_h_offset">
 			<argument index="0" name="h_offset" type="float">
 			</argument>
 			<description>
+			Moves this node in the X axis. As this node's position will be set every time its offset is set, this allows many PathFollow to share the same curve (and thus the same movement pattern), yet not return the same position for a given path offset.
+A similar effect may be achieved moving the this node's descendants.
 			</description>
 		</method>
 		<method name="get_h_offset" qualifiers="const">
 			<return type="float">
 			</return>
 			<description>
+			Returns the X displacement this node has from its parent [Path].
 			</description>
 		</method>
 		<method name="set_v_offset">
 			<argument index="0" name="v_offset" type="float">
 			</argument>
 			<description>
+			Moves this node in the Y axis, for the same reasons of [method set_h_offset].
 			</description>
 		</method>
 		<method name="get_v_offset" qualifiers="const">
 			<return type="float">
 			</return>
 			<description>
+			Returns the Y displacement this node has from its parent [Path].
 			</description>
 		</method>
 		<method name="set_unit_offset">
 			<argument index="0" name="unit_offset" type="float">
 			</argument>
 			<description>
+			Sets the distance from the first vertex, considering 0.0 as the first vertex and 1.0 as the last. This is just another way of expressing the offset within the path, as the offset supplied is multiplied internally by the path's length.
 			</description>
 		</method>
 		<method name="get_unit_offset" qualifiers="const">
 			<return type="float">
 			</return>
 			<description>
+			Returns the distance along the path as a number in the range 0.0 (for the first vertex) to 1.0 (for the last).
 			</description>
 		</method>
 		<method name="set_rotation_mode">
 			<argument index="0" name="rotation_mode" type="int">
 			</argument>
 			<description>
+			Allows or forbids rotation on one or more axes, per the constants below.
 			</description>
 		</method>
 		<method name="get_rotation_mode" qualifiers="const">
 			<return type="int">
 			</return>
 			<description>
+			Returns the rotation mode. The constants below list which axes are allowed to rotate for each mode.
 			</description>
 		</method>
 		<method name="set_cubic_interpolation">
 			<argument index="0" name="enable" type="bool">
 			</argument>
 			<description>
+			The points along the [Curve3D] of the [Path] are precomputed before use, for faster calculations. The point at the requested offset is then calculated interpolating between two adjacent cached points. This may present a problem if the curve makes sharp turns, as the cached points may not follow the curve closely enough.
+There are two answers to this problem: Either increase the number of cached points and increase memory consumption, or make a cubic interpolation between two points at the cost of (slightly) slower calculations.
+This method controls whether the position between two cached points is interpolated linearly, or cubicly.
 			</description>
 		</method>
 		<method name="get_cubic_interpolation" qualifiers="const">
 			<return type="bool">
 			</return>
 			<description>
+			This method returns whether the position between two cached points (see [method set_cubic_interpolation]) is interpolated linearly, or cubicly.
 			</description>
 		</method>
 		<method name="set_loop">
 			<argument index="0" name="loop" type="bool">
 			</argument>
 			<description>
+			If set, any offset outside the path's length (whether set by [method set_offset] or [method set_unit_offset] will wrap around, instead of stopping at the ends. Set it for cyclic paths.
 			</description>
 		</method>
 		<method name="has_loop" qualifiers="const">
 			<return type="bool">
 			</return>
 			<description>
+			Returns whether this node wraps its offsets around, or truncates them to the path ends.
 			</description>
 		</method>
 	</methods>
 	<constants>
 		<constant name="ROTATION_NONE" value="0">
+			Forbids the PathFollow to rotate.
 		</constant>
 		<constant name="ROTATION_Y" value="1">
+			Allows the PathFollow to rotate in the Y axis only.
 		</constant>
 		<constant name="ROTATION_XY" value="2">
+			Allows the PathFollow to rotate in both the X, and Y axes.
 		</constant>
 		<constant name="ROTATION_XYZ" value="3">
+			Allows the PathFollow to rotate in any axis.
 		</constant>
 	</constants>
 </class>
 <class name="PathFollow2D" inherits="Node2D" category="Core">
 	<brief_description>
+	Point sampler for a [Path2D].
 	</brief_description>
 	<description>
+	This node takes its parent [Path2D], and returns the coordinates of a point within it, given a distance from the first vertex.
+It is useful for making other nodes follow a path, without coding the movement pattern. For that, the nodes must be descendants of this node. Then, when setting an offset in this node, the descendant nodes will move accordingly.
 	</description>
 	<methods>
 		<method name="set_offset">
 			<argument index="0" name="offset" type="float">
 			</argument>
 			<description>
+			Sets the distance from the first vertex, measured in pixels along the path. This sets this node's position to a point within the path.
 			</description>
 		</method>
 		<method name="get_offset" qualifiers="const">
 			<return type="float">
 			</return>
 			<description>
+			Returns the distance along the path in pixels.
 			</description>
 		</method>
 		<method name="set_h_offset">
 			<argument index="0" name="h_offset" type="float">
 			</argument>
 			<description>
+			Moves this node horizontally. As this node's position will be set every time its offset is set, this allows many PathFollow2D to share the same curve (and thus the same movement pattern), yet not return the same position for a given path offset.
+A similar effect may be achieved moving this node's descendants.
 			</description>
 		</method>
 		<method name="get_h_offset" qualifiers="const">
 			<return type="float">
 			</return>
 			<description>
+			Returns the horizontal displacement this node has from its parent [Path2D].
 			</description>
 		</method>
 		<method name="set_v_offset">
 			<argument index="0" name="v_offset" type="float">
 			</argument>
 			<description>
+			Moves the PathFollow2D vertically, for the same reasons of [method set_h_offset].
 			</description>
 		</method>
 		<method name="get_v_offset" qualifiers="const">
 			<return type="float">
 			</return>
 			<description>
+			Returns the vertical displacement this node has from its parent [Path2D].
 			</description>
 		</method>
 		<method name="set_unit_offset">
 			<argument index="0" name="unit_offset" type="float">
 			</argument>
 			<description>
+			Sets the distance from the first vertex, considering 0.0 as the first vertex and 1.0 as the last. This is just another way of expressing the offset within the path, as the offset supplied is multiplied internally by the path's length.
 			</description>
 		</method>
 		<method name="get_unit_offset" qualifiers="const">
 			<return type="float">
 			</return>
 			<description>
+			Returns the distance along the path as a number in the range 0.0 (for the first vertex) to 1.0 (for the last).
 			</description>
 		</method>
 		<method name="set_rotate">
 			<argument index="0" name="enable" type="bool">
 			</argument>
 			<description>
+			If set, this node rotates to follow the path, making its descendants rotate.
 			</description>
 		</method>
 		<method name="is_rotating" qualifiers="const">
 			<return type="bool">
 			</return>
 			<description>
+			Returns whether this node rotates to follow the path.
 			</description>
 		</method>
 		<method name="set_cubic_interpolation">
 			<argument index="0" name="enable" type="bool">
 			</argument>
 			<description>
+			The points along the [Curve2D] of the [Path2D] are precomputed before use, for faster calculations. The point at the requested offset is then calculated interpolating between two adjacent cached points. This may present a problem if the curve makes sharp turns, as the cached points may not follow the curve closely enough.
+There are two answers to this problem: Either increase the number of cached points and increase memory consumption, or make a cubic interpolation between two points at the cost of (slightly) slower calculations.
+This method controls whether the position between two cached points is interpolated linearly, or cubicly.
 			</description>
 		</method>
 		<method name="get_cubic_interpolation" qualifiers="const">
 			<return type="bool">
 			</return>
 			<description>
+			This method returns whether the position between two cached points (see [method set_cubic_interpolation]) is interpolated linearly, or cubicly.
 			</description>
 		</method>
 		<method name="set_loop">
 			<argument index="0" name="loop" type="bool">
 			</argument>
 			<description>
+			If set, any offset outside the path's length (whether set by [method set_offset] or [method set_unit_offset] will wrap around, instead of stopping at the ends. Set it for cyclic paths.
 			</description>
 		</method>
 		<method name="has_loop" qualifiers="const">
 			<return type="bool">
 			</return>
 			<description>
+			Returns whether this node wraps its offsets around, or truncates them to the path ends.
 			</description>
 		</method>
 	</methods>
@@ -25885,6 +26134,14 @@
 			<description>
 			</description>
 		</method>
+		<method name="xform">
+			<return type="Vector3">
+			</return>
+			<argument index="0" name="v" type="Vector3">
+			</argument>
+			<description>
+			</description>
+		</method>
 		<method name="Quat">
 			<return type="Quat">
 			</return>
@@ -26129,12 +26386,14 @@
 			<return type="String">
 			</return>
 			<description>
+			Returns a copy of the array's contents formatted as String. Fast alternative to get_string_from_utf8(), assuming the content is ASCII-only (unlike the UTF-8 function, this function maps every byte to a character in the string, so any multibyte sequence will be torn apart).
 			</description>
 		</method>
 		<method name="get_string_from_utf8">
 			<return type="String">
 			</return>
 			<description>
+			Returns a copy of the array's contents formatted as String, assuming the array is formatted as UTF-8. Slower than get_string_from_ascii(), but works for UTF-8. Usually you should prefer this function over get_string_from_ascii() to support international input.
 			</description>
 		</method>
 		<method name="push_back">
@@ -26412,19 +26671,21 @@
 	Ray 2D shape resource for physics.
 	</brief_description>
 	<description>
-	Ray 2D shape resource for physics. A ray is not really a collision body, isntead it tries to separate itself from whatever is touching its far endpoint. It's often useful for ccharacters.
+	Ray 2D shape resource for physics. A ray is not really a collision body, isntead it tries to separate itself from whatever is touching its far endpoint. It's often useful for characters.
 	</description>
 	<methods>
 		<method name="set_length">
 			<argument index="0" name="length" type="float">
 			</argument>
 			<description>
+			Set the length of the ray.
 			</description>
 		</method>
 		<method name="get_length" qualifiers="const">
 			<return type="float">
 			</return>
 			<description>
+			Return the length of the ray.
 			</description>
 		</method>
 	</methods>
@@ -26487,8 +26748,10 @@
 </class>
 <class name="Rect2" category="Built-In Types">
 	<brief_description>
+	2D Axis-aligned bounding box.
 	</brief_description>
 	<description>
+	Rect2 provides an 2D Axis-Aligned Bounding Box. It consists of a position, a size, and several utility functions. It is typically used for fast overlap tests.
 	</description>
 	<methods>
 		<method name="clip">
@@ -26497,6 +26760,7 @@
 			<argument index="0" name="b" type="Rect2">
 			</argument>
 			<description>
+			Returns the intersection of this [Rect2] and b.
 			</description>
 		</method>
 		<method name="encloses">
@@ -26505,6 +26769,7 @@
 			<argument index="0" name="b" type="Rect2">
 			</argument>
 			<description>
+			Returns true if this [Rect2] completely encloses another one.
 			</description>
 		</method>
 		<method name="expand">
@@ -26513,12 +26778,14 @@
 			<argument index="0" name="to" type="Vector2">
 			</argument>
 			<description>
+			Return this [Rect2] expanded to include a given point.
 			</description>
 		</method>
 		<method name="get_area">
 			<return type="float">
 			</return>
 			<description>
+			Get the area of the [Rect2].
 			</description>
 		</method>
 		<method name="grow">
@@ -26527,12 +26794,14 @@
 			<argument index="0" name="by" type="float">
 			</argument>
 			<description>
+			Return a copy of the [Rect2] grown a given amount of units towards all the sides.
 			</description>
 		</method>
 		<method name="has_no_area">
 			<return type="bool">
 			</return>
 			<description>
+			Return true if the [Rect2] is flat or empty.
 			</description>
 		</method>
 		<method name="has_point">
@@ -26541,6 +26810,7 @@
 			<argument index="0" name="point" type="Vector2">
 			</argument>
 			<description>
+			Return true if the [Rect2] contains a point.
 			</description>
 		</method>
 		<method name="intersects">
@@ -26549,6 +26819,7 @@
 			<argument index="0" name="b" type="Rect2">
 			</argument>
 			<description>
+			Return true if the [Rect2] overlaps with another.
 			</description>
 		</method>
 		<method name="merge">
@@ -26557,6 +26828,7 @@
 			<argument index="0" name="b" type="Rect2">
 			</argument>
 			<description>
+			Combine this [Rect2] with another, a larger one is returned that contains both.
 			</description>
 		</method>
 		<method name="Rect2">
@@ -26567,6 +26839,7 @@
 			<argument index="1" name="size" type="Vector2">
 			</argument>
 			<description>
+			Construct a [Rect2] by position and size.
 			</description>
 		</method>
 		<method name="Rect2">
@@ -26581,6 +26854,7 @@
 			<argument index="3" name="height" type="float">
 			</argument>
 			<description>
+			Construct a [Rect2] by x, y, width and height.
 			</description>
 		</method>
 	</methods>
@@ -26656,7 +26930,7 @@
 	Reference frame for GUI.
 	</brief_description>
 	<description>
-	Reference frame for GUI. It's just like an empty control, except a red box is displayed while editing around it's size at all times.
+	Reference frame for GUI. It's just like an empty control, except a red box is displayed while editing around its size at all times.
 	</description>
 	<methods>
 	</methods>
@@ -26669,8 +26943,27 @@
 </class>
 <class name="RegEx" inherits="Reference" category="Core">
 	<brief_description>
-	</brief_description>
-	<description>
+	Simple regular expression matcher.
+	</brief_description>
+	<description>
+	Class for finding text patterns in a string using regular expressions. Regular expressions are a way to define patterns of text to be searched.
+	This class only finds patterns in a string. It can not perform replacements.
+	Usage of regular expressions is too long to be explained here, but Internet is full of tutorials and detailed explanations.
+	Currently supported features:
+		Capturing [code]()[/code] and non-capturing [code](?:)[/code] groups
+		Any character [code].[/code]
+		Shorthand caracter classes [code]\w \W \s \S \d \D[/code]
+		User-defined character classes such as [code][A-Za-z][/code]
+		Simple quantifiers [code]?[/code], [code]*[/code] and [code]+[/code]
+		Range quantifiers [code]{x,y}[/code]
+		Lazy (non-greedy) quantifiers [code]*?[/code]
+		Begining [code]^[/code] and end [code]$[/code] anchors
+		Alternation [code]|[/code]
+		Backreferences [code]\1[/code] and [code]\g{1}[/code]
+		POSIX character classes [code][[:alnum:]][/code]
+		Lookahead [code](?=)[/code], [code](?!)[/code] and lookbehind [code](?&lt;=)[/code], [code](?&lt;!)[/code]
+		ASCII [code]\xFF[/code] and Unicode [code]\uFFFF[/code] code points (in a style similar to Python)
+		Word boundaries [code]\b[/code], [code]\B[/code]
 	</description>
 	<methods>
 		<method name="compile">
@@ -26678,7 +26971,10 @@
 			</return>
 			<argument index="0" name="pattern" type="String">
 			</argument>
+			<argument index="1" name="capture" type="int" default="9">
+			</argument>
 			<description>
+            Compiles and assign the regular expression pattern to use. The limit on the number of capturing groups can be specified or made unlimited if negative.
 			</description>
 		</method>
 		<method name="find" qualifiers="const">
@@ -26691,22 +26987,26 @@
 			<argument index="2" name="end" type="int" default="-1">
 			</argument>
 			<description>
+			This method tries to find the pattern within the string, and returns the position where it was found. It also stores any capturing group (see [method get_capture]) for further retrieval.
 			</description>
 		</method>
 		<method name="clear">
 			<description>
+			This method resets the state of the object, as it was freshly created. Namely, it unassigns the regular expression of this object, and forgets all captures made by the last [method find].
 			</description>
 		</method>
 		<method name="is_valid" qualifiers="const">
 			<return type="bool">
 			</return>
 			<description>
+			Returns whether this object has a valid regular expression assigned.
 			</description>
 		</method>
 		<method name="get_capture_count" qualifiers="const">
 			<return type="int">
 			</return>
 			<description>
+			Returns the number of capturing groups. A captured group is the part of a string that matches a part of the pattern delimited by parentheses (unless they are non-capturing parentheses [i](?:)[/i]).
 			</description>
 		</method>
 		<method name="get_capture" qualifiers="const">
@@ -26715,12 +27015,14 @@
 			<argument index="0" name="capture" type="int">
 			</argument>
 			<description>
+			Returns a captured group. A captured group is the part of a string that matches a part of the pattern delimited by parentheses (unless they are non-capturing parentheses [i](?:)[/i]).
 			</description>
 		</method>
 		<method name="get_captures" qualifiers="const">
 			<return type="StringArray">
 			</return>
 			<description>
+			Return a list of all the captures made by the regular expression.
 			</description>
 		</method>
 	</methods>
@@ -27251,6 +27553,12 @@
 			<description>
 			</description>
 		</method>
+		<method name="scroll_to_line">
+			<argument index="0" name="arg0" type="int">
+			</argument>
+			<description>
+			</description>
+		</method>
 		<method name="set_tab_size">
 			<argument index="0" name="spaces" type="int">
 			</argument>
@@ -29337,24 +29645,28 @@
 			<argument index="0" name="a" type="Vector2">
 			</argument>
 			<description>
+			Set the first point's position.
 			</description>
 		</method>
 		<method name="get_a" qualifiers="const">
 			<return type="Vector2">
 			</return>
 			<description>
+			Return the first point's position.
 			</description>
 		</method>
 		<method name="set_b">
 			<argument index="0" name="b" type="Vector2">
 			</argument>
 			<description>
+			Set the second point's position.
 			</description>
 		</method>
 		<method name="get_b" qualifiers="const">
 			<return type="Vector2">
 			</return>
 			<description>
+			Return the second point's position.
 			</description>
 		</method>
 	</methods>
@@ -30402,13 +30714,14 @@
 			</argument>
 			<description>
 			Use a custom solver bias. No need to change this unless you really know what you are doing.
+			The solver bias is a factor controlling how much two objects "rebound" off each other, when colliding, to avoid them getting into each other because of numerical imprecision.
 			</description>
 		</method>
 		<method name="get_custom_solver_bias" qualifiers="const">
 			<return type="float">
 			</return>
 			<description>
-			Return the custom solver bias. No need to change this unless you really know what you are doing.
+			Return the custom solver bias.
 			</description>
 		</method>
 		<method name="collide">
@@ -30421,6 +30734,8 @@
 			<argument index="2" name="shape_xform" type="Matrix32">
 			</argument>
 			<description>
+			Return whether this shape is colliding with another.
+			This method needs the transformation matrix for this shape ([code]local_xform[/code]), the shape to check collisions with ([code]with_shape[/code]), and the transformation matrix of that shape ([code]shape_xform[/code]).
 			</description>
 		</method>
 		<method name="collide_with_motion">
@@ -30437,6 +30752,8 @@
 			<argument index="4" name="shape_motion" type="Vector2">
 			</argument>
 			<description>
+			Return whether this shape would collide with another, if a given movemen was applied.
+			This method needs the transformation matrix for this shape ([code]local_xform[/code]), the movement to test on this shape ([code]local_motion[/code]), the shape to check collisions with ([code]with_shape[/code]), the transformation matrix of that shape ([code]shape_xform[/code]), and the movement to test ont the other object ([code]shape_motion[/code]).
 			</description>
 		</method>
 		<method name="collide_and_get_contacts">
@@ -30447,6 +30764,8 @@
 			<argument index="2" name="shape_xform" type="Matrix32">
 			</argument>
 			<description>
+			Return a list of the points where this shape touches another. If there are no collisions, the list is empty.
+			This method needs the transformation matrix for this shape ([code]local_xform[/code]), the shape to check collisions with ([code]with_shape[/code]), and the transformation matrix of that shape ([code]shape_xform[/code]).
 			</description>
 		</method>
 		<method name="collide_with_motion_and_get_contacts">
@@ -30461,6 +30780,8 @@
 			<argument index="4" name="shape_motion" type="Vector2">
 			</argument>
 			<description>
+			Return a list of the points where this shape would touch another, if a given movement was applied. If there are no collisions, the list is empty.
+			This method needs the transformation matrix for this shape ([code]local_xform[/code]), the movement to test on this shape ([code]local_motion[/code]), the shape to check collisions with ([code]with_shape[/code]), the transformation matrix of that shape ([code]shape_xform[/code]), and the movement to test ont the other object ([code]shape_motion[/code]).
 			</description>
 		</method>
 	</methods>
@@ -32849,9 +33170,10 @@
 			</description>
 		</method>
 		<method name="to_ascii">
-			<return type="String">
+			<return type="RawArray">
 			</return>
 			<description>
+			Convert the String (which is a character array) to RawArray (which is an array of bytes). The conversion is speeded up in comparison to to_utf8() with the assumption that all the characters the String contains are only ASCII characters.
 			</description>
 		</method>
 		<method name="to_float">
@@ -32883,9 +33205,10 @@
 			</description>
 		</method>
 		<method name="to_utf8">
-			<return type="String">
+			<return type="RawArray">
 			</return>
 			<description>
+			Convert the String (which is an array of characters) to RawArray (which is an array of bytes). The conversion is a bit slower than to_ascii(), but supports all UTF-8 characters. Therefore, you should prefer this function over to_ascii().
 			</description>
 		</method>
 		<method name="xml_escape">
@@ -33782,15 +34105,17 @@
 		<method name="cursor_set_column">
 			<argument index="0" name="column" type="int">
 			</argument>
+			<argument index="1" name="arg1" type="bool">
+			</argument>
 			<description>
-			Set the current column of the text editor.
 			</description>
 		</method>
 		<method name="cursor_set_line">
 			<argument index="0" name="line" type="int">
 			</argument>
+			<argument index="1" name="arg1" type="bool">
+			</argument>
 			<description>
-			Set the current line of the text editor.
 			</description>
 		</method>
 		<method name="cursor_get_column" qualifiers="const">
@@ -36046,6 +36371,18 @@
 			<description>
 			</description>
 		</method>
+		<method name="set_hide_folding">
+			<argument index="0" name="hide" type="bool">
+			</argument>
+			<description>
+			</description>
+		</method>
+		<method name="is_folding_hidden" qualifiers="const">
+			<return type="bool">
+			</return>
+			<description>
+			</description>
+		</method>
 	</methods>
 	<signals>
 		<signal name="item_activated">
@@ -37198,6 +37535,14 @@
 	<description>
 	</description>
 	<methods>
+		<method name="angle">
+			<return type="float">
+			</return>
+			<description>
+			Returns the result of atan2 when called with the Vector's x and y as parameters (Math::atan2(x,y)).
+			Be aware that it therefore returns an angle oriented clockwise with regard to the (0, 1) unit vector, and not an angle oriented counter-clockwise with regard to the (1, 0) unit vector (which would be the typical trigonometric representation of the angle when calling Math::atan2(y,x)).
+			</description>
+		</method>
 		<method name="angle_to">
 			<return type="float">
 			</return>
@@ -37216,13 +37561,6 @@
 			Returns the angle in radians between the line connecting the two points and the x coordinate.
 			</description>
 		</method>
-		<method name="atan2">
-			<return type="float">
-			</return>
-			<description>
-			Returns the result of atan2 when called with the Vector's x and y as parameters.
-			</description>
-		</method>
 		<method name="cubic_interpolate">
 			<return type="Vector2">
 			</return>
@@ -37323,7 +37661,7 @@
 			<argument index="0" name="vec" type="Vector2">
 			</argument>
 			<description>
-			Reflects/mirrors the vector around another vector.
+			Like "slide", but reflects the Vector instead of continuing along the wall.
 			</description>
 		</method>
 		<method name="rotated">
@@ -37387,10 +37725,10 @@
 </class>
 <class name="Vector2Array" category="Built-In Types">
 	<brief_description>
-	An Array of Vector2's.
+	An Array of Vector2.
 	</brief_description>
 	<description>
-	An Array specifically designed to hold Vector2's.
+	An Array specifically designed to hold Vector2.
 	</description>
 	<methods>
 		<method name="get">
@@ -37399,21 +37737,21 @@
 			<argument index="0" name="idx" type="int">
 			</argument>
 			<description>
-			Get the Vector2 at the given index.
+			Returns the Vector2 at the given index.
 			</description>
 		</method>
 		<method name="push_back">
 			<argument index="0" name="vector2" type="Vector2">
 			</argument>
 			<description>
-			Insert a new Vector2.
+			Inserts a Vector2 at the end.
 			</description>
 		</method>
 		<method name="resize">
 			<argument index="0" name="idx" type="int">
 			</argument>
 			<description>
-			Set the size of the Vector2Array. If larger than the current size it will reserve some space beforehand, and if it is smaller it will cut off the array.
+			Sets the size of the Vector2Array. If larger than the current size it will reserve some space beforehand, and if it is smaller it will cut off the array.
 			</description>
 		</method>
 		<method name="set">
@@ -37422,7 +37760,7 @@
 			<argument index="1" name="vector2" type="Vector2">
 			</argument>
 			<description>
-			Set the Vector2 at the given index.
+			Changes the Vector2 at the given index.
 			</description>
 		</method>
 		<method name="size">
@@ -37450,19 +37788,21 @@
 	Vector class, which performs basic 3D vector math operations.
 	</brief_description>
 	<description>
-	Vector3 is one of the core classes of the engine, and includes several built-in helper functions to perform basic vecor math operations.
+	Vector3 is one of the core classes of the engine, and includes several built-in helper functions to perform basic vector math operations.
 	</description>
 	<methods>
 		<method name="abs">
 			<return type="Vector3">
 			</return>
 			<description>
+			Returns a new vector with all components in absolute values (e.g. positive).
 			</description>
 		</method>
 		<method name="ceil">
 			<return type="Vector3">
 			</return>
 			<description>
+			Returns a new vector with all components rounded up.
 			</description>
 		</method>
 		<method name="cross">
@@ -37486,7 +37826,7 @@
 			<argument index="3" name="t" type="float">
 			</argument>
 			<description>
-			Perform a cubic interpolation between vectors a,b,c,d (b is current), by the given amount (i).
+			Perform a cubic interpolation between vectors pre_a, a, b, post_b (a is current), by the given amount (t).
 			</description>
 		</method>
 		<method name="distance_squared_to">
@@ -37495,7 +37835,7 @@
 			<argument index="0" name="b" type="Vector3">
 			</argument>
 			<description>
-			Return the squared distance (distance minus the last square root) to b.
+			Return the squared distance (distance minus the last square root) to b. Prefer this function over distance_to if you need to sort vectors or need the squared distance for some formula.
 			</description>
 		</method>
 		<method name="distance_to">
@@ -37520,13 +37860,14 @@
 			<return type="Vector3">
 			</return>
 			<description>
+			Returns a new vector with all components rounded down.
 			</description>
 		</method>
 		<method name="inverse">
 			<return type="Vector3">
 			</return>
 			<description>
-			Returns the inverse of the vector. this is the same as Vector3( 1.0 / v.x, 1.0 / v.y, 1.0 / v.z )
+			Returns the inverse of the vector. This is the same as Vector3( 1.0 / v.x, 1.0 / v.y, 1.0 / v.z )
 			</description>
 		</method>
 		<method name="length">
@@ -37540,7 +37881,7 @@
 			<return type="float">
 			</return>
 			<description>
-			Return the length of the vector, squared.
+			Return the length of the vector, squared. Prefer this function over "length" if you need to sort vectors or need the squared length for some formula.
 			</description>
 		</method>
 		<method name="linear_interpolate">
@@ -37551,26 +37892,28 @@
 			<argument index="1" name="t" type="float">
 			</argument>
 			<description>
-			Linearly interpolates the vector to a given one (b), by the given amount (i).
+			Linearly interpolates the vector to a given one (b), by the given amount (t).
 			</description>
 		</method>
 		<method name="max_axis">
 			<return type="int">
 			</return>
 			<description>
+			Returns AXIS_X, AXIS_Y or AXIS_Z depending on which axis is the largest.
 			</description>
 		</method>
 		<method name="min_axis">
 			<return type="int">
 			</return>
 			<description>
+			Returns AXIS_X, AXIS_Y or AXIS_Z depending on which axis is the smallest.
 			</description>
 		</method>
 		<method name="normalized">
 			<return type="Vector3">
 			</return>
 			<description>
-			Return a copy of the normalized vector to unit length. This is the same as v / v.length()
+			Return a copy of the normalized vector to unit length. This is the same as v / v.length().
 			</description>
 		</method>
 		<method name="reflect">
@@ -37579,6 +37922,7 @@
 			<argument index="0" name="by" type="Vector3">
 			</argument>
 			<description>
+			Like "slide", but reflects the Vector instead of continuing along the wall.
 			</description>
 		</method>
 		<method name="rotated">
@@ -37589,6 +37933,7 @@
 			<argument index="1" name="phi" type="float">
 			</argument>
 			<description>
+			Rotates the vector around some axis by phi radians.
 			</description>
 		</method>
 		<method name="slide">
@@ -37597,6 +37942,7 @@
 			<argument index="0" name="by" type="Vector3">
 			</argument>
 			<description>
+			Slides the vector along a wall.
 			</description>
 		</method>
 		<method name="snapped">
@@ -37618,6 +37964,7 @@
 			<argument index="2" name="z" type="float">
 			</argument>
 			<description>
+			Returns a Vector3 with the given components.
 			</description>
 		</method>
 	</methods>
@@ -37631,17 +37978,22 @@
 	</members>
 	<constants>
 		<constant name="AXIS_X" value="0">
+			Enumerated value for the X axis. Returned by functions like max_axis or min_axis.
 		</constant>
 		<constant name="AXIS_Y" value="1">
+			Enumerated value for the Y axis.
 		</constant>
 		<constant name="AXIS_Z" value="2">
+			Enumerated value for the Z axis.
 		</constant>
 	</constants>
 </class>
 <class name="Vector3Array" category="Built-In Types">
 	<brief_description>
+	An Array of Vector3.
 	</brief_description>
 	<description>
+	An Array specifically designed to hold Vector3.
 	</description>
 	<methods>
 		<method name="get">
@@ -37650,18 +38002,21 @@
 			<argument index="0" name="idx" type="int">
 			</argument>
 			<description>
+			Returns the Vector3 at the given index.
 			</description>
 		</method>
 		<method name="push_back">
 			<argument index="0" name="vector3" type="Vector3">
 			</argument>
 			<description>
+			Inserts a Vector3 at the end.
 			</description>
 		</method>
 		<method name="resize">
 			<argument index="0" name="idx" type="int">
 			</argument>
 			<description>
+			Sets the size of the Vector3Array. If larger than the current size it will reserve some space beforehand, and if it is smaller it will cut off the array.
 			</description>
 		</method>
 		<method name="set">
@@ -37670,12 +38025,14 @@
 			<argument index="1" name="vector3" type="Vector3">
 			</argument>
 			<description>
+			Changes the Vector3 at the given index.
 			</description>
 		</method>
 		<method name="size">
 			<return type="int">
 			</return>
 			<description>
+			Returns the size of the array.
 			</description>
 		</method>
 		<method name="Vector3Array">
@@ -37684,6 +38041,7 @@
 			<argument index="0" name="from" type="Array">
 			</argument>
 			<description>
+			Constructs a new Vector3Array. Optionally, you can pass in an Array that will be converted.
 			</description>
 		</method>
 	</methods>
diff --git a/doc/core_classes.xml b/doc/core_classes.xml
index 02b46ac4b9..c37b50f122 100644
--- a/doc/core_classes.xml
+++ b/doc/core_classes.xml
@@ -575,8 +575,8 @@
 	3x3 Matrix.
 	</brief_description>
 	<description>
-	</description>
 	Matrix represent a 3x3 (3 rows by 3 columns) transformation matrix. it is used mainly to represent and accumulate transformations such as rotation or scale when used as an OCS (oriented coordinate system).
+	</description>
 	<methods>
 	<method name="invert">
 		<description>
@@ -937,8 +937,8 @@
 	Vector used for 2D Math.
 	</brief_description>
 	<description>
-	</description>
 	Vector class, which performs basic 2D vector math operations.
+	</description>
 	<methods>
 	<method name="operator+">
 		<argument index="0" name="b" type="Vector2">
diff --git a/doc/engine_classes.xml b/doc/engine_classes.xml
index af153a16ef..43602e26e9 100644
--- a/doc/engine_classes.xml
+++ b/doc/engine_classes.xml
@@ -1502,7 +1502,7 @@
 			<description>
 			</description>
 		</method>
-		<method name="free">
+		<method name="free_rid">
 			<argument index="0" name="rid" type="RID">
 			</argument>
 			<description>
diff --git a/drivers/SCsub b/drivers/SCsub
index bc46bf2cec..8e241830f8 100644
--- a/drivers/SCsub
+++ b/drivers/SCsub
@@ -31,7 +31,7 @@ SConscript("rtaudio/SCsub");
 SConscript("nedmalloc/SCsub");
 SConscript("nrex/SCsub");
 SConscript("chibi/SCsub");
-if (env["vorbis"]=="yes" or env["speex"]=="yes" or env["theora"]=="yes" or env["opus"]=="yes"):
+if (env["vorbis"]=="yes" or env["speex"]=="yes" or env["theoralib"]=="yes" or env["opus"]=="yes"):
         SConscript("ogg/SCsub");
 if (env["vorbis"]=="yes"):
         SConscript("vorbis/SCsub");
@@ -40,9 +40,7 @@ if (env["opus"]=="yes"):
 if (env["tools"]=="yes"):
 	SConscript("convex_decomp/SCsub");
 
-#if env["theora"]=="yes":
-#	SConscript("theoraplayer/SCsub")
-if (env["theora"]=="yes"):
+if (env["theoralib"]=="yes"):
 	SConscript("theora/SCsub");
 if (env['speex']=='yes'):
 	SConscript("speex/SCsub");
@@ -69,21 +67,27 @@ for f in env.drivers_sources:
 		fname = env.File(f).path
 	else:
 		fname = env.File(f)[0].path
-	#base = string.join(fname.split("/")[:-1], "/")
 	fname = fname.replace("\\", "/")
 	base = string.join(fname.split("/")[:2], "/")
 	if base != cur_base and len(list) > max_src:
-		lib = env.Library("drivers"+str(num), list)
-		lib_list.append(lib)
-		list = []
+		if num > 0:
+			lib = env.Library("drivers"+str(num), list)
+			lib_list.append(lib)
+			list = []
 		num = num+1
 	cur_base = base
 	list.append(f)
 
-if len(list) > 0:
-	lib = env.Library("drivers"+str(num), list)
-	lib_list.append(lib)
+lib = env.Library("drivers"+str(num), list)
+lib_list.append(lib)
 
+if len(lib_list) > 0:
+	import os, sys
+	if os.name=='posix' and sys.platform=='msys':
+		env.Replace(ARFLAGS=['rcsT'])
+
+		lib = env.Library("drivers_collated", lib_list)
+		lib_list = [lib]
 
 drivers_base=[]
 env.add_source_files(drivers_base,"*.cpp")
@@ -93,4 +97,3 @@ env.Prepend(LIBS=lib_list)
 
 #lib = env.Library("drivers",env.drivers_sources)
 #env.Prepend(LIBS=[lib])
-
diff --git a/drivers/alsa/SCsub b/drivers/alsa/SCsub
index bcd231579c..9fbb467baa 100644
--- a/drivers/alsa/SCsub
+++ b/drivers/alsa/SCsub
@@ -3,5 +3,3 @@ Import('env')
 env.add_source_files(env.drivers_sources,"*.cpp")
 
 Export('env')
-
-
diff --git a/drivers/builtin_openssl2/e_os.h b/drivers/builtin_openssl2/e_os.h
index e801b4106a..7323a7b4bf 100644
--- a/drivers/builtin_openssl2/e_os.h
+++ b/drivers/builtin_openssl2/e_os.h
@@ -318,8 +318,8 @@ static unsigned int _strlen31(const char *str)
 #      undef isupper
 #      undef isxdigit
 #    endif
-#    if defined(_MSC_VER) && !defined(_DLL) && defined(stdin)
-#      if _MSC_VER>=1300
+#    if defined(_MSC_VER) && !defined(_WIN32_WCE) && !defined(_DLL) && defined(stdin)
+#      if _MSC_VER>=1300 && _MSC_VER<1600
 #        undef stdin
 #        undef stdout
 #        undef stderr
@@ -327,7 +327,7 @@ static unsigned int _strlen31(const char *str)
 #        define stdin  (&__iob_func()[0])
 #        define stdout (&__iob_func()[1])
 #        define stderr (&__iob_func()[2])
-#      elif defined(I_CAN_LIVE_WITH_LNK4049)
+#      elif _MSC_VER<1300 && defined(I_CAN_LIVE_WITH_LNK4049)
 #        undef stdin
 #        undef stdout
 #        undef stderr
diff --git a/drivers/builtin_zlib/SCsub b/drivers/builtin_zlib/SCsub
index c322b236ab..e5c81c0b3b 100644
--- a/drivers/builtin_zlib/SCsub
+++ b/drivers/builtin_zlib/SCsub
@@ -1,7 +1,7 @@
 Import('env')
 
 zlib_sources = [
-	
+
 	"builtin_zlib/zlib/adler32.c",
 	"builtin_zlib/zlib/compress.c",
 	"builtin_zlib/zlib/crc32.c",
diff --git a/drivers/chibi/SCsub b/drivers/chibi/SCsub
index bcd231579c..9fbb467baa 100644
--- a/drivers/chibi/SCsub
+++ b/drivers/chibi/SCsub
@@ -3,5 +3,3 @@ Import('env')
 env.add_source_files(env.drivers_sources,"*.cpp")
 
 Export('env')
-
-
diff --git a/drivers/dds/SCsub b/drivers/dds/SCsub
index e475de1dba..159829384f 100644
--- a/drivers/dds/SCsub
+++ b/drivers/dds/SCsub
@@ -8,4 +8,3 @@ dds_sources = [
 env.drivers_sources+=dds_sources
 
 #env.add_source_files(env.drivers_sources, dds_sources)
-
diff --git a/drivers/etc1/SCsub b/drivers/etc1/SCsub
index 251c3ffb86..4ce921ad9f 100644
--- a/drivers/etc1/SCsub
+++ b/drivers/etc1/SCsub
@@ -12,4 +12,3 @@ if (env["etc1"] != "no"):
 #env.add_source_files(env.drivers_sources, etc_sources)
 
 Export('env')
-
diff --git a/drivers/gl_context/SCsub b/drivers/gl_context/SCsub
index 0177eec6a2..7cf8629fe1 100644
--- a/drivers/gl_context/SCsub
+++ b/drivers/gl_context/SCsub
@@ -3,4 +3,3 @@ Export('env');
 
 env.add_source_files(env.drivers_sources,"*.cpp")
 env.add_source_files(env.drivers_sources,"*.c")
-
diff --git a/drivers/gles2/rasterizer_gles2.cpp b/drivers/gles2/rasterizer_gles2.cpp
index 136e8162e9..d0f2f88ee6 100644
--- a/drivers/gles2/rasterizer_gles2.cpp
+++ b/drivers/gles2/rasterizer_gles2.cpp
@@ -4075,6 +4075,8 @@ void RasterizerGLES2::render_target_set_size(RID p_render_target,int p_width,int
 		glDeleteTextures(1,&rt->color);
 
 		rt->fbo=0;
+		rt->depth=0;
+		rt->color=0;
 		rt->width=0;
 		rt->height=0;
 		rt->texture_ptr->tex_id=0;
@@ -4094,12 +4096,14 @@ void RasterizerGLES2::render_target_set_size(RID p_render_target,int p_width,int
 	glBindFramebuffer(GL_FRAMEBUFFER, rt->fbo);
 
 	//depth
-	glGenRenderbuffers(1, &rt->depth);
-	glBindRenderbuffer(GL_RENDERBUFFER, rt->depth );
+	if (!low_memory_2d) {
+		glGenRenderbuffers(1, &rt->depth);
+		glBindRenderbuffer(GL_RENDERBUFFER, rt->depth );
 
-	glRenderbufferStorage(GL_RENDERBUFFER, use_depth24?_DEPTH_COMPONENT24_OES:GL_DEPTH_COMPONENT16, rt->width,rt->height);
+		glRenderbufferStorage(GL_RENDERBUFFER, use_depth24?_DEPTH_COMPONENT24_OES:GL_DEPTH_COMPONENT16, rt->width,rt->height);
 
-	glFramebufferRenderbuffer(GL_FRAMEBUFFER, GL_DEPTH_ATTACHMENT, GL_RENDERBUFFER, rt->depth);
+		glFramebufferRenderbuffer(GL_FRAMEBUFFER, GL_DEPTH_ATTACHMENT, GL_RENDERBUFFER, rt->depth);
+	}
 
 	//color
 	glGenTextures(1, &rt->color);
@@ -4637,10 +4641,16 @@ void RasterizerGLES2::_update_shader( Shader* p_shader) const {
 		if (light_flags.uses_light) {
 			enablers.push_back("#define USE_LIGHT_SHADER_CODE\n");
 		}
+		if (light_flags.uses_shadow_color) {
+			enablers.push_back("#define USE_LIGHT_SHADOW_COLOR\n");
+		}
 		if (light_flags.uses_time || fragment_flags.uses_time || vertex_flags.uses_time) {
 			enablers.push_back("#define USE_TIME\n");
 			uses_time=true;
 		}
+		if (vertex_flags.vertex_code_writes_position) {
+			enablers.push_back("#define VERTEX_SHADER_WRITE_POSITION\n");
+		}
 
 		material_shader.set_custom_shader_code(p_shader->custom_code_id,vertex_code, vertex_globals,fragment_code, light_code, fragment_globals,uniform_names,enablers);
 	} else if (p_shader->mode==VS::SHADER_CANVAS_ITEM) {
@@ -6526,80 +6536,84 @@ void RasterizerGLES2::_render_list_forward(RenderList *p_render_list,const Trans
 			material_shader.set_conditional(MaterialShaderGLES2::ENABLE_AMBIENT_LIGHTMAP,false);
 			material_shader.set_conditional(MaterialShaderGLES2::ENABLE_AMBIENT_DP_SAMPLER,false);
 
-			if (e->instance->sampled_light.is_valid()) {
+			if (material->flags[VS::MATERIAL_FLAG_UNSHADED] == false && current_debug != VS::SCENARIO_DEBUG_SHADELESS) {
 
-				SampledLight *sl = sampled_light_owner.get(e->instance->sampled_light);
-				if (sl) {
+				if (e->instance->sampled_light.is_valid()) {
 
-					baked_light=NULL; //can't mix
-					material_shader.set_conditional(MaterialShaderGLES2::ENABLE_AMBIENT_DP_SAMPLER,true);
-					glActiveTexture(GL_TEXTURE0+max_texture_units-3);
-					glBindTexture(GL_TEXTURE_2D,sl->texture); //bind the texture
-					sampled_light_dp_multiplier=sl->multiplier;
-					bind_dp_sampler=true;
+					SampledLight *sl = sampled_light_owner.get(e->instance->sampled_light);
+					if (sl) {
+
+						baked_light = NULL; //can't mix
+						material_shader.set_conditional(MaterialShaderGLES2::ENABLE_AMBIENT_DP_SAMPLER, true);
+						glActiveTexture(GL_TEXTURE0 + max_texture_units - 3);
+						glBindTexture(GL_TEXTURE_2D, sl->texture); //bind the texture
+						sampled_light_dp_multiplier = sl->multiplier;
+						bind_dp_sampler = true;
+					}
 				}
-			}
 
 
-			if (!additive && baked_light) {
+				if (!additive && baked_light) {
 
-				if (baked_light->mode==VS::BAKED_LIGHT_OCTREE && baked_light->octree_texture.is_valid() && e->instance->baked_light_octree_xform) {
-					material_shader.set_conditional(MaterialShaderGLES2::ENABLE_AMBIENT_OCTREE,true);
-					bind_baked_light_octree=true;
-					if (prev_baked_light!=baked_light) {
-						Texture *tex=texture_owner.get(baked_light->octree_texture);
-						if (tex) {
+					if (baked_light->mode == VS::BAKED_LIGHT_OCTREE && baked_light->octree_texture.is_valid() && e->instance->baked_light_octree_xform) {
+						material_shader.set_conditional(MaterialShaderGLES2::ENABLE_AMBIENT_OCTREE, true);
+						bind_baked_light_octree = true;
+						if (prev_baked_light != baked_light) {
+							Texture *tex = texture_owner.get(baked_light->octree_texture);
+							if (tex) {
 
-							glActiveTexture(GL_TEXTURE0+max_texture_units-3);
-							glBindTexture(tex->target,tex->tex_id); //bind the texture
-						}
-						if (baked_light->light_texture.is_valid()) {
-							Texture *texl=texture_owner.get(baked_light->light_texture);
-							if (texl) {
-								glActiveTexture(GL_TEXTURE0+max_texture_units-4);
-								glBindTexture(texl->target,texl->tex_id); //bind the light texture
+								glActiveTexture(GL_TEXTURE0 + max_texture_units - 3);
+								glBindTexture(tex->target, tex->tex_id); //bind the texture
+							}
+							if (baked_light->light_texture.is_valid()) {
+								Texture *texl = texture_owner.get(baked_light->light_texture);
+								if (texl) {
+									glActiveTexture(GL_TEXTURE0 + max_texture_units - 4);
+									glBindTexture(texl->target, texl->tex_id); //bind the light texture
+								}
 							}
-						}
 
+						}
 					}
-				} else if (baked_light->mode==VS::BAKED_LIGHT_LIGHTMAPS) {
+					else if (baked_light->mode == VS::BAKED_LIGHT_LIGHTMAPS) {
 
 
-					int lightmap_idx = e->instance->baked_lightmap_id;
+						int lightmap_idx = e->instance->baked_lightmap_id;
 
-					material_shader.set_conditional(MaterialShaderGLES2::ENABLE_AMBIENT_LIGHTMAP,false);
-					bind_baked_lightmap=false;
+						material_shader.set_conditional(MaterialShaderGLES2::ENABLE_AMBIENT_LIGHTMAP, false);
+						bind_baked_lightmap = false;
 
 
-					if (baked_light->lightmaps.has(lightmap_idx)) {
+						if (baked_light->lightmaps.has(lightmap_idx)) {
 
 
-						RID texid = baked_light->lightmaps[lightmap_idx];
+							RID texid = baked_light->lightmaps[lightmap_idx];
 
-						if (prev_baked_light!=baked_light || texid!=prev_baked_light_texture) {
+							if (prev_baked_light != baked_light || texid != prev_baked_light_texture) {
 
 
-							Texture *tex = texture_owner.get(texid);
-							if (tex) {
+								Texture *tex = texture_owner.get(texid);
+								if (tex) {
 
-								glActiveTexture(GL_TEXTURE0+max_texture_units-3);
-								glBindTexture(tex->target,tex->tex_id); //bind the texture
+									glActiveTexture(GL_TEXTURE0 + max_texture_units - 3);
+									glBindTexture(tex->target, tex->tex_id); //bind the texture
+								}
+
+								prev_baked_light_texture = texid;
 							}
 
-							prev_baked_light_texture=texid;
-						}
+							if (texid.is_valid()) {
+								material_shader.set_conditional(MaterialShaderGLES2::ENABLE_AMBIENT_LIGHTMAP, true);
+								bind_baked_lightmap = true;
+							}
 
-						if (texid.is_valid()) {
-							material_shader.set_conditional(MaterialShaderGLES2::ENABLE_AMBIENT_LIGHTMAP,true);
-							bind_baked_lightmap=true;
 						}
-
 					}
 				}
-			}
 
-			if (int(prev_baked_light!=NULL) ^ int(baked_light!=NULL)) {
-				rebind=true;
+				if (int(prev_baked_light != NULL) ^ int(baked_light != NULL)) {
+					rebind = true;
+				}
 			}
 		}
 
@@ -10332,7 +10346,11 @@ void RasterizerGLES2::_update_framebuffer() {
 		framebuffer.fbo=0;
 	}
 
+#ifdef TOOLS_ENABLED
 	framebuffer.active=use_fbo;
+#else
+	framebuffer.active=use_fbo && !low_memory_2d;
+#endif
 	framebuffer.width=dwidth;
 	framebuffer.height=dheight;
 	framebuffer.scale=scale;
@@ -10374,6 +10392,13 @@ void RasterizerGLES2::_update_framebuffer() {
 	GLuint format_rgba = GL_RGBA;
 	GLuint format_rgb = use_fp16_fb?_GL_RGB16F_EXT:GL_RGB;
 	GLuint format_type = use_fp16_fb?_GL_HALF_FLOAT_OES:GL_UNSIGNED_BYTE;
+	GLuint format_internal=GL_RGBA;
+
+	if (use_16bits_fbo) {
+		format_type=GL_UNSIGNED_SHORT_5_6_5;
+		format_rgba=GL_RGB;
+		format_internal=GL_RGB;
+	}
 	/*GLuint format_luminance = use_fp16_fb?GL_RGB16F:GL_RGBA;
 	GLuint format_luminance_type = use_fp16_fb?(use_fu_GL_HALF_FLOAT_OES):GL_UNSIGNED_BYTE;
 	GLuint format_luminance_components = use_fp16_fb?GL_RGB:GL_RGBA;*/
@@ -10387,7 +10412,7 @@ void RasterizerGLES2::_update_framebuffer() {
 
 	glGenTextures(1, &framebuffer.color);
 	glBindTexture(GL_TEXTURE_2D, framebuffer.color);
-	glTexImage2D(GL_TEXTURE_2D, 0, format_rgba,  framebuffer.width, framebuffer.height, 0, GL_RGBA, format_type, NULL);
+	glTexImage2D(GL_TEXTURE_2D, 0, format_rgba,  framebuffer.width, framebuffer.height, 0, format_internal, format_type, NULL);
 	glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_NEAREST);
 	glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_NEAREST);
 //	glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_LINEAR);
@@ -10412,7 +10437,7 @@ void RasterizerGLES2::_update_framebuffer() {
 		framebuffer.fbo=0;
 		framebuffer.active=false;
 		//print_line("**************** NO FAMEBUFFEEEERRRR????");
-		WARN_PRINT("Could not create framebuffer!!");
+		WARN_PRINT(String("Could not create framebuffer!!, code: "+itos(status)).ascii().get_data());
 	}
 
 	//sample
@@ -10421,7 +10446,7 @@ void RasterizerGLES2::_update_framebuffer() {
 	glBindFramebuffer(GL_FRAMEBUFFER, framebuffer.sample_fbo);
 	glGenTextures(1, &framebuffer.sample_color);
 	glBindTexture(GL_TEXTURE_2D, framebuffer.sample_color);
-	glTexImage2D(GL_TEXTURE_2D, 0, format_rgba,  framebuffer.width, framebuffer.height, 0, GL_RGBA, format_type, NULL);
+	glTexImage2D(GL_TEXTURE_2D, 0, format_rgba,  framebuffer.width, framebuffer.height, 0, format_internal, format_type, NULL);
 	glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_NEAREST);
 	glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_NEAREST);
 //	glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_LINEAR);
@@ -10481,7 +10506,7 @@ void RasterizerGLES2::_update_framebuffer() {
 			glTexParameterf(GL_TEXTURE_2D, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE);
 			glTexParameterf(GL_TEXTURE_2D, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE);
 			glTexImage2D(GL_TEXTURE_2D, 0, format_rgba, size, size, 0,
-				     GL_RGBA, format_type, NULL);
+				     format_internal, format_type, NULL);
 			glFramebufferTexture2D(GL_FRAMEBUFFER, GL_COLOR_ATTACHMENT0,
 					       GL_TEXTURE_2D, framebuffer.blur[i].color, 0);
 
@@ -11210,6 +11235,11 @@ RasterizerGLES2* RasterizerGLES2::get_singleton() {
 
 int RasterizerGLES2::RenderList::max_elements=RenderList::DEFAULT_MAX_ELEMENTS;
 
+void RasterizerGLES2::set_force_16_bits_fbo(bool p_force) {
+
+	use_16bits_fbo=p_force;
+}
+
 RasterizerGLES2::RasterizerGLES2(bool p_compress_arrays,bool p_keep_ram_copy,bool p_default_fragment_lighting,bool p_use_reload_hooks) {
 
 	_singleton = this;
@@ -11242,6 +11272,7 @@ RasterizerGLES2::RasterizerGLES2(bool p_compress_arrays,bool p_keep_ram_copy,boo
 	use_fp16_fb=bool(GLOBAL_DEF("rasterizer/fp16_framebuffer",true));
 	use_shadow_mapping=true;
 	use_fast_texture_filter=!bool(GLOBAL_DEF("rasterizer/trilinear_mipmap_filter",true));
+	low_memory_2d=bool(GLOBAL_DEF("rasterizer/low_memory_2d_mode",false));
 	skel_default.resize(1024*4);
 	for(int i=0;i<1024/3;i++) {
 
@@ -11270,6 +11301,7 @@ RasterizerGLES2::RasterizerGLES2(bool p_compress_arrays,bool p_keep_ram_copy,boo
 	framebuffer.active=false;
 	tc0_id_cache=0;
 	tc0_idx=0;
+	use_16bits_fbo=false;
 };
 
 void RasterizerGLES2::restore_framebuffer() {
diff --git a/drivers/gles2/rasterizer_gles2.h b/drivers/gles2/rasterizer_gles2.h
index 507e46ae75..e203a56c24 100644
--- a/drivers/gles2/rasterizer_gles2.h
+++ b/drivers/gles2/rasterizer_gles2.h
@@ -91,6 +91,7 @@ class RasterizerGLES2 : public Rasterizer {
 	bool srgb_supported;
 	bool float_supported;
 	bool float_linear_supported;
+	bool use_16bits_fbo;
 
 	ShadowFilterTechnique shadow_filter;
 
@@ -105,13 +106,13 @@ class RasterizerGLES2 : public Rasterizer {
 	float anisotropic_level;
 
 	bool use_half_float;
-
+	bool low_memory_2d;
 
 	Vector<float> skel_default;
 
 	Image _get_gl_image_and_format(const Image& p_image, Image::Format p_format, uint32_t p_flags,GLenum& r_gl_format,GLenum& r_gl_internal_format,int &r_gl_components,bool &r_has_alpha_cache,bool &r_compressed);
 
-	class RenderTarget;
+	struct RenderTarget;
 
 	struct Texture {
 
@@ -305,7 +306,7 @@ class RasterizerGLES2 : public Rasterizer {
 		virtual ~GeometryOwner() {}
 	};
 
-	class Mesh;
+	struct Mesh;
 
 	struct Surface : public Geometry {
 
@@ -1705,6 +1706,8 @@ public:
 
 	static RasterizerGLES2* get_singleton();
 
+	virtual void set_force_16_bits_fbo(bool p_force);
+
 	RasterizerGLES2(bool p_compress_arrays=false,bool p_keep_ram_copy=true,bool p_default_fragment_lighting=true,bool p_use_reload_hooks=false);
 	virtual ~RasterizerGLES2();
 };
diff --git a/drivers/gles2/shader_compiler_gles2.cpp b/drivers/gles2/shader_compiler_gles2.cpp
index d57512c936..f981529ee3 100644
--- a/drivers/gles2/shader_compiler_gles2.cpp
+++ b/drivers/gles2/shader_compiler_gles2.cpp
@@ -154,6 +154,9 @@ String ShaderCompilerGLES2::dump_node_code(SL::Node *p_node,int p_level,bool p_a
 				if (vnode->name==vname_vertex && p_assign_left) {
 					vertex_code_writes_vertex=true;
 				}
+				if (vnode->name == vname_position && p_assign_left) {
+					vertex_code_writes_position = true;
+				}
 				if (vnode->name==vname_color_interp) {
 					flags->use_color_interp=true;
 				}
@@ -218,6 +221,10 @@ String ShaderCompilerGLES2::dump_node_code(SL::Node *p_node,int p_level,bool p_a
 					uses_light=true;
 				}
 
+				if (vnode->name==vname_shadow) {
+					uses_shadow_color=true;
+				}
+
 			}
 			if (type==ShaderLanguage::SHADER_CANVAS_ITEM_VERTEX) {
 
@@ -659,6 +666,7 @@ Error ShaderCompilerGLES2::compile(const String& p_code, ShaderLanguage::ShaderT
 	uses_texpixel_size=false;
 	uses_worldvec=false;
 	vertex_code_writes_vertex=false;
+	vertex_code_writes_position = false;
 	uses_shadow_color=false;
 	uniforms=r_uniforms;
 	flags=&r_flags;
@@ -690,6 +698,7 @@ Error ShaderCompilerGLES2::compile(const String& p_code, ShaderLanguage::ShaderT
 	r_flags.uses_texscreen=uses_texscreen;
 	r_flags.uses_texpos=uses_texpos;
 	r_flags.vertex_code_writes_vertex=vertex_code_writes_vertex;
+	r_flags.vertex_code_writes_position=vertex_code_writes_position;
 	r_flags.uses_discard=uses_discard;
 	r_flags.uses_screen_uv=uses_screen_uv;
 	r_flags.uses_light=uses_light;
@@ -778,125 +787,129 @@ ShaderCompilerGLES2::ShaderCompilerGLES2() {
 	replace_table["texscreen"]= "texscreen";
 	replace_table["texpos"]= "texpos";
 
-	mode_replace_table[0]["SRC_VERTEX"]="vertex_in.xyz";
-	mode_replace_table[0]["SRC_NORMAL"]="normal_in";
-	mode_replace_table[0]["SRC_TANGENT"]="tangent_in";
-	mode_replace_table[0]["SRC_BINORMALF"]="binormalf";
-
-	mode_replace_table[0]["VERTEX"]="vertex_interp";
-	mode_replace_table[0]["NORMAL"]="normal_interp";
-	mode_replace_table[0]["TANGENT"]="tangent_interp";
-	mode_replace_table[0]["BINORMAL"]="binormal_interp";
-	mode_replace_table[0]["UV"]="uv_interp.xy";
-	mode_replace_table[0]["UV2"]="uv_interp.zw";
-	mode_replace_table[0]["COLOR"]="color_interp";
+
+
+	mode_replace_table[ShaderLanguage::SHADER_MATERIAL_VERTEX]["SRC_VERTEX"] = "vertex_in.xyz";
+	mode_replace_table[ShaderLanguage::SHADER_MATERIAL_VERTEX]["SRC_NORMAL"] = "normal_in";
+	mode_replace_table[ShaderLanguage::SHADER_MATERIAL_VERTEX]["SRC_TANGENT"]="tangent_in";
+	mode_replace_table[ShaderLanguage::SHADER_MATERIAL_VERTEX]["SRC_BINORMALF"]="binormalf";
+	
+	mode_replace_table[ShaderLanguage::SHADER_MATERIAL_VERTEX]["POSITION"] = "gl_Position";
+	mode_replace_table[ShaderLanguage::SHADER_MATERIAL_VERTEX]["VERTEX"]="vertex_interp";
+	mode_replace_table[ShaderLanguage::SHADER_MATERIAL_VERTEX]["NORMAL"]="normal_interp";
+	mode_replace_table[ShaderLanguage::SHADER_MATERIAL_VERTEX]["TANGENT"]="tangent_interp";
+	mode_replace_table[ShaderLanguage::SHADER_MATERIAL_VERTEX]["BINORMAL"]="binormal_interp";
+	mode_replace_table[ShaderLanguage::SHADER_MATERIAL_VERTEX]["UV"]="uv_interp.xy";
+	mode_replace_table[ShaderLanguage::SHADER_MATERIAL_VERTEX]["UV2"]="uv_interp.zw";
+	mode_replace_table[ShaderLanguage::SHADER_MATERIAL_VERTEX]["COLOR"]="color_interp";
 	//@TODO convert to glsl stuff
-	mode_replace_table[0]["SPEC_EXP"]="vertex_specular_exp";
-	mode_replace_table[0]["WORLD_MATRIX"]="world_transform";
-	mode_replace_table[0]["INV_CAMERA_MATRIX"]="camera_inverse_transform";
-	mode_replace_table[0]["PROJECTION_MATRIX"]="projection_transform";
-	mode_replace_table[0]["MODELVIEW_MATRIX"]="modelview";
-	mode_replace_table[0]["POINT_SIZE"]="gl_PointSize";
-	mode_replace_table[0]["VAR1"]="var1_interp";
-	mode_replace_table[0]["VAR2"]="var2_interp";
-
-//	mode_replace_table[0]["SCREEN_POS"]="SCREEN_POS";
-//	mode_replace_table[0]["SCREEN_SIZE"]="SCREEN_SIZE";
-	mode_replace_table[0]["INSTANCE_ID"]="instance_id";
-	mode_replace_table[0]["TIME"]="time";
-
-	mode_replace_table[1]["VERTEX"]="vertex";
-	//mode_replace_table[1]["POSITION"]="IN_POSITION";
-	mode_replace_table[1]["NORMAL"]="normal";
-	mode_replace_table[1]["TANGENT"]="tangent";
-	mode_replace_table[1]["POSITION"]="gl_Position";
-	mode_replace_table[1]["BINORMAL"]="binormal";
-	mode_replace_table[1]["NORMALMAP"]="normalmap";
-	mode_replace_table[1]["NORMALMAP_DEPTH"]="normaldepth";
-	mode_replace_table[1]["VAR1"]="var1_interp";
-	mode_replace_table[1]["VAR2"]="var2_interp";
-	mode_replace_table[1]["UV"]="uv";
-	mode_replace_table[1]["UV2"]="uv2";
-	mode_replace_table[1]["SCREEN_UV"]="screen_uv";
-	mode_replace_table[1]["VAR1"]="var1_interp";
-	mode_replace_table[1]["VAR2"]="var2_interp";
-	mode_replace_table[1]["COLOR"]="color";
-	mode_replace_table[1]["DIFFUSE"]="diffuse.rgb";
-	mode_replace_table[1]["DIFFUSE_ALPHA"]="diffuse";
-	mode_replace_table[1]["SPECULAR"]="specular";
-	mode_replace_table[1]["EMISSION"]="emission";
-	mode_replace_table[1]["SHADE_PARAM"]="shade_param";
-	mode_replace_table[1]["SPEC_EXP"]="specular_exp";
-	mode_replace_table[1]["GLOW"]="glow";
-	mode_replace_table[1]["DISCARD"]="discard_";
-	mode_replace_table[1]["POINT_COORD"]="gl_PointCoord";
-	mode_replace_table[1]["INV_CAMERA_MATRIX"]="camera_inverse_transform";
-
-	//mode_replace_table[1]["SCREEN_POS"]="SCREEN_POS";
-	//mode_replace_table[1]["SCREEN_TEXEL_SIZE"]="SCREEN_TEXEL_SIZE";
-	mode_replace_table[1]["TIME"]="time";
+	mode_replace_table[ShaderLanguage::SHADER_MATERIAL_VERTEX]["SPEC_EXP"]="vertex_specular_exp";
+	mode_replace_table[ShaderLanguage::SHADER_MATERIAL_VERTEX]["WORLD_MATRIX"]="world_transform";
+	mode_replace_table[ShaderLanguage::SHADER_MATERIAL_VERTEX]["INV_CAMERA_MATRIX"]="camera_inverse_transform";
+	mode_replace_table[ShaderLanguage::SHADER_MATERIAL_VERTEX]["PROJECTION_MATRIX"]="projection_transform";
+	mode_replace_table[ShaderLanguage::SHADER_MATERIAL_VERTEX]["MODELVIEW_MATRIX"]="modelview";
+	mode_replace_table[ShaderLanguage::SHADER_MATERIAL_VERTEX]["POINT_SIZE"]="gl_PointSize";
+	mode_replace_table[ShaderLanguage::SHADER_MATERIAL_VERTEX]["VAR1"]="var1_interp";
+	mode_replace_table[ShaderLanguage::SHADER_MATERIAL_VERTEX]["VAR2"]="var2_interp";
+
+//	mode_replace_table[ShaderLanguage::SHADER_MATERIAL_VERTEX]["SCREEN_POS"]="SCREEN_POS";
+//	mode_replace_table[ShaderLanguage::SHADER_MATERIAL_VERTEX]["SCREEN_SIZE"]="SCREEN_SIZE";
+	mode_replace_table[ShaderLanguage::SHADER_MATERIAL_VERTEX]["INSTANCE_ID"]="instance_id";
+	mode_replace_table[ShaderLanguage::SHADER_MATERIAL_VERTEX]["TIME"]="time";
+
+	mode_replace_table[ShaderLanguage::SHADER_MATERIAL_FRAGMENT]["VERTEX"]="vertex";
+	//mode_replace_table[ShaderLanguage::SHADER_MATERIAL_FRAGMENT]["POSITION"]="IN_POSITION";
+	mode_replace_table[ShaderLanguage::SHADER_MATERIAL_FRAGMENT]["NORMAL"]="normal";
+	mode_replace_table[ShaderLanguage::SHADER_MATERIAL_FRAGMENT]["TANGENT"]="tangent";
+	mode_replace_table[ShaderLanguage::SHADER_MATERIAL_FRAGMENT]["POSITION"]="gl_Position";
+	mode_replace_table[ShaderLanguage::SHADER_MATERIAL_FRAGMENT]["BINORMAL"]="binormal";
+	mode_replace_table[ShaderLanguage::SHADER_MATERIAL_FRAGMENT]["NORMALMAP"]="normalmap";
+	mode_replace_table[ShaderLanguage::SHADER_MATERIAL_FRAGMENT]["NORMALMAP_DEPTH"]="normaldepth";
+	mode_replace_table[ShaderLanguage::SHADER_MATERIAL_FRAGMENT]["VAR1"]="var1_interp";
+	mode_replace_table[ShaderLanguage::SHADER_MATERIAL_FRAGMENT]["VAR2"]="var2_interp";
+	mode_replace_table[ShaderLanguage::SHADER_MATERIAL_FRAGMENT]["UV"]="uv";
+	mode_replace_table[ShaderLanguage::SHADER_MATERIAL_FRAGMENT]["UV2"]="uv2";
+	mode_replace_table[ShaderLanguage::SHADER_MATERIAL_FRAGMENT]["SCREEN_UV"]="screen_uv";
+	mode_replace_table[ShaderLanguage::SHADER_MATERIAL_FRAGMENT]["VAR1"]="var1_interp";
+	mode_replace_table[ShaderLanguage::SHADER_MATERIAL_FRAGMENT]["VAR2"]="var2_interp";
+	mode_replace_table[ShaderLanguage::SHADER_MATERIAL_FRAGMENT]["COLOR"]="color";
+	mode_replace_table[ShaderLanguage::SHADER_MATERIAL_FRAGMENT]["DIFFUSE"]="diffuse.rgb";
+	mode_replace_table[ShaderLanguage::SHADER_MATERIAL_FRAGMENT]["DIFFUSE_ALPHA"]="diffuse";
+	mode_replace_table[ShaderLanguage::SHADER_MATERIAL_FRAGMENT]["SPECULAR"]="specular";
+	mode_replace_table[ShaderLanguage::SHADER_MATERIAL_FRAGMENT]["EMISSION"]="emission";
+	mode_replace_table[ShaderLanguage::SHADER_MATERIAL_FRAGMENT]["SHADE_PARAM"]="shade_param";
+	mode_replace_table[ShaderLanguage::SHADER_MATERIAL_FRAGMENT]["SPEC_EXP"]="specular_exp";
+	mode_replace_table[ShaderLanguage::SHADER_MATERIAL_FRAGMENT]["GLOW"]="glow";
+	mode_replace_table[ShaderLanguage::SHADER_MATERIAL_FRAGMENT]["DISCARD"]="discard_";
+	mode_replace_table[ShaderLanguage::SHADER_MATERIAL_FRAGMENT]["POINT_COORD"]="gl_PointCoord";
+	mode_replace_table[ShaderLanguage::SHADER_MATERIAL_FRAGMENT]["INV_CAMERA_MATRIX"]="camera_inverse_transform";
+
+	//mode_replace_table[ShaderLanguage::SHADER_MATERIAL_FRAGMENT]["SCREEN_POS"]="SCREEN_POS";
+	//mode_replace_table[ShaderLanguage::SHADER_MATERIAL_FRAGMENT]["SCREEN_TEXEL_SIZE"]="SCREEN_TEXEL_SIZE";
+	mode_replace_table[ShaderLanguage::SHADER_MATERIAL_FRAGMENT]["TIME"]="time";
 
 	//////////////
 
-	mode_replace_table[2]["NORMAL"]="normal";
-	//mode_replace_table[2]["POSITION"]="IN_POSITION";
-	mode_replace_table[2]["LIGHT_DIR"]="light_dir";
-	mode_replace_table[2]["LIGHT_DIFFUSE"]="light_diffuse";
-	mode_replace_table[2]["LIGHT_SPECULAR"]="light_specular";
-	mode_replace_table[2]["EYE_VEC"]="eye_vec";
-	mode_replace_table[2]["DIFFUSE"]="mdiffuse";
-	mode_replace_table[2]["SPECULAR"]="specular";
-	mode_replace_table[2]["SPECULAR_EXP"]="specular_exp";
-	mode_replace_table[2]["SHADE_PARAM"]="shade_param";
-	mode_replace_table[2]["LIGHT"]="light";
-	mode_replace_table[2]["POINT_COORD"]="gl_PointCoord";
-	mode_replace_table[2]["TIME"]="time";
-
-	mode_replace_table[3]["SRC_VERTEX"]="src_vtx";
-	mode_replace_table[3]["VERTEX"]="outvec.xy";
-	mode_replace_table[3]["WORLD_VERTEX"]="outvec.xy";
-	mode_replace_table[3]["UV"]="uv_interp";
-	mode_replace_table[3]["COLOR"]="color_interp";
-	mode_replace_table[3]["VAR1"]="var1_interp";
-	mode_replace_table[3]["VAR2"]="var2_interp";
-	mode_replace_table[3]["POINT_SIZE"]="gl_PointSize";
-	mode_replace_table[3]["WORLD_MATRIX"]="modelview_matrix";
-	mode_replace_table[3]["PROJECTION_MATRIX"]="projection_matrix";
-	mode_replace_table[3]["EXTRA_MATRIX"]="extra_matrix";
-	mode_replace_table[3]["TIME"]="time";
-
-	mode_replace_table[4]["POSITION"]="gl_Position";
-	mode_replace_table[4]["NORMAL"]="normal";
-	mode_replace_table[4]["NORMALMAP"]="normal_map";
-	mode_replace_table[4]["NORMALMAP_DEPTH"]="normal_depth";
-	mode_replace_table[4]["UV"]="uv_interp";
-	mode_replace_table[4]["SRC_COLOR"]="color_interp";
-	mode_replace_table[4]["COLOR"]="color";
-	mode_replace_table[4]["TEXTURE"]="texture";
-	mode_replace_table[4]["TEXTURE_PIXEL_SIZE"]="texpixel_size";
-	mode_replace_table[4]["VAR1"]="var1_interp";
-	mode_replace_table[4]["VAR2"]="var2_interp";
-	mode_replace_table[4]["SCREEN_UV"]="screen_uv";
-	mode_replace_table[4]["POINT_COORD"]="gl_PointCoord";
-	mode_replace_table[4]["TIME"]="time";
-
-	mode_replace_table[5]["POSITION"]="gl_Position";
-	mode_replace_table[5]["NORMAL"]="normal";
-	mode_replace_table[5]["UV"]="uv_interp";
-	mode_replace_table[5]["COLOR"]="color";
-	mode_replace_table[5]["TEXTURE"]="texture";
-	mode_replace_table[5]["TEXTURE_PIXEL_SIZE"]="texpixel_size";
-	mode_replace_table[5]["VAR1"]="var1_interp";
-	mode_replace_table[5]["VAR2"]="var2_interp";
-	mode_replace_table[5]["LIGHT_VEC"]="light_vec";
-	mode_replace_table[5]["LIGHT_HEIGHT"]="light_height";
-	mode_replace_table[5]["LIGHT_COLOR"]="light";
-	mode_replace_table[5]["LIGHT_UV"]="light_uv";
-	mode_replace_table[5]["LIGHT"]="light_out";
-	mode_replace_table[5]["SHADOW"]="shadow_color";
-	mode_replace_table[5]["SCREEN_UV"]="screen_uv";
-	mode_replace_table[5]["POINT_COORD"]="gl_PointCoord";
-	mode_replace_table[5]["TIME"]="time";
+	mode_replace_table[ShaderLanguage::SHADER_MATERIAL_LIGHT]["NORMAL"]="normal";
+	//mode_replace_table[ShaderLanguage::SHADER_MATERIAL_LIGHT]["POSITION"]="IN_POSITION";
+	mode_replace_table[ShaderLanguage::SHADER_MATERIAL_LIGHT]["LIGHT_DIR"]="light_dir";
+	mode_replace_table[ShaderLanguage::SHADER_MATERIAL_LIGHT]["LIGHT_DIFFUSE"]="light_diffuse";
+	mode_replace_table[ShaderLanguage::SHADER_MATERIAL_LIGHT]["LIGHT_SPECULAR"]="light_specular";
+	mode_replace_table[ShaderLanguage::SHADER_MATERIAL_LIGHT]["EYE_VEC"]="eye_vec";
+	mode_replace_table[ShaderLanguage::SHADER_MATERIAL_LIGHT]["DIFFUSE"]="mdiffuse";
+	mode_replace_table[ShaderLanguage::SHADER_MATERIAL_LIGHT]["SPECULAR"]="specular";
+	mode_replace_table[ShaderLanguage::SHADER_MATERIAL_LIGHT]["SPECULAR_EXP"]="specular_exp";
+	mode_replace_table[ShaderLanguage::SHADER_MATERIAL_LIGHT]["SHADE_PARAM"]="shade_param";
+	mode_replace_table[ShaderLanguage::SHADER_MATERIAL_LIGHT]["LIGHT"]="light";
+	mode_replace_table[ShaderLanguage::SHADER_MATERIAL_LIGHT]["POINT_COORD"]="gl_PointCoord";
+	mode_replace_table[ShaderLanguage::SHADER_MATERIAL_LIGHT]["TIME"]="time";
+	mode_replace_table[ShaderLanguage::SHADER_MATERIAL_LIGHT]["SHADOW"]="shadow_color";
+
+	mode_replace_table[ShaderLanguage::SHADER_CANVAS_ITEM_VERTEX]["SRC_VERTEX"]="src_vtx";
+	mode_replace_table[ShaderLanguage::SHADER_CANVAS_ITEM_VERTEX]["VERTEX"]="outvec.xy";
+	mode_replace_table[ShaderLanguage::SHADER_CANVAS_ITEM_VERTEX]["WORLD_VERTEX"]="outvec.xy";
+	mode_replace_table[ShaderLanguage::SHADER_CANVAS_ITEM_VERTEX]["UV"]="uv_interp";
+	mode_replace_table[ShaderLanguage::SHADER_CANVAS_ITEM_VERTEX]["COLOR"]="color_interp";
+	mode_replace_table[ShaderLanguage::SHADER_CANVAS_ITEM_VERTEX]["VAR1"]="var1_interp";
+	mode_replace_table[ShaderLanguage::SHADER_CANVAS_ITEM_VERTEX]["VAR2"]="var2_interp";
+	mode_replace_table[ShaderLanguage::SHADER_CANVAS_ITEM_VERTEX]["POINT_SIZE"]="gl_PointSize";
+	mode_replace_table[ShaderLanguage::SHADER_CANVAS_ITEM_VERTEX]["WORLD_MATRIX"]="modelview_matrix";
+	mode_replace_table[ShaderLanguage::SHADER_CANVAS_ITEM_VERTEX]["PROJECTION_MATRIX"]="projection_matrix";
+	mode_replace_table[ShaderLanguage::SHADER_CANVAS_ITEM_VERTEX]["EXTRA_MATRIX"]="extra_matrix";
+	mode_replace_table[ShaderLanguage::SHADER_CANVAS_ITEM_VERTEX]["TIME"]="time";
+
+	mode_replace_table[ShaderLanguage::SHADER_CANVAS_ITEM_FRAGMENT]["POSITION"]="gl_Position";
+	mode_replace_table[ShaderLanguage::SHADER_CANVAS_ITEM_FRAGMENT]["NORMAL"]="normal";
+	mode_replace_table[ShaderLanguage::SHADER_CANVAS_ITEM_FRAGMENT]["NORMALMAP"]="normal_map";
+	mode_replace_table[ShaderLanguage::SHADER_CANVAS_ITEM_FRAGMENT]["NORMALMAP_DEPTH"]="normal_depth";
+	mode_replace_table[ShaderLanguage::SHADER_CANVAS_ITEM_FRAGMENT]["UV"]="uv_interp";
+	mode_replace_table[ShaderLanguage::SHADER_CANVAS_ITEM_FRAGMENT]["SRC_COLOR"]="color_interp";
+	mode_replace_table[ShaderLanguage::SHADER_CANVAS_ITEM_FRAGMENT]["COLOR"]="color";
+	mode_replace_table[ShaderLanguage::SHADER_CANVAS_ITEM_FRAGMENT]["TEXTURE"]="texture";
+	mode_replace_table[ShaderLanguage::SHADER_CANVAS_ITEM_FRAGMENT]["TEXTURE_PIXEL_SIZE"]="texpixel_size";
+	mode_replace_table[ShaderLanguage::SHADER_CANVAS_ITEM_FRAGMENT]["VAR1"]="var1_interp";
+	mode_replace_table[ShaderLanguage::SHADER_CANVAS_ITEM_FRAGMENT]["VAR2"]="var2_interp";
+	mode_replace_table[ShaderLanguage::SHADER_CANVAS_ITEM_FRAGMENT]["SCREEN_UV"]="screen_uv";
+	mode_replace_table[ShaderLanguage::SHADER_CANVAS_ITEM_FRAGMENT]["POINT_COORD"]="gl_PointCoord";
+	mode_replace_table[ShaderLanguage::SHADER_CANVAS_ITEM_FRAGMENT]["TIME"]="time";
+
+	mode_replace_table[ShaderLanguage::SHADER_CANVAS_ITEM_LIGHT]["POSITION"]="gl_Position";
+	mode_replace_table[ShaderLanguage::SHADER_CANVAS_ITEM_LIGHT]["NORMAL"]="normal";
+	mode_replace_table[ShaderLanguage::SHADER_CANVAS_ITEM_LIGHT]["UV"]="uv_interp";
+	mode_replace_table[ShaderLanguage::SHADER_CANVAS_ITEM_LIGHT]["COLOR"]="color";
+	mode_replace_table[ShaderLanguage::SHADER_CANVAS_ITEM_LIGHT]["TEXTURE"]="texture";
+	mode_replace_table[ShaderLanguage::SHADER_CANVAS_ITEM_LIGHT]["TEXTURE_PIXEL_SIZE"]="texpixel_size";
+	mode_replace_table[ShaderLanguage::SHADER_CANVAS_ITEM_LIGHT]["VAR1"]="var1_interp";
+	mode_replace_table[ShaderLanguage::SHADER_CANVAS_ITEM_LIGHT]["VAR2"]="var2_interp";
+	mode_replace_table[ShaderLanguage::SHADER_CANVAS_ITEM_LIGHT]["LIGHT_VEC"]="light_vec";
+	mode_replace_table[ShaderLanguage::SHADER_CANVAS_ITEM_LIGHT]["LIGHT_HEIGHT"]="light_height";
+	mode_replace_table[ShaderLanguage::SHADER_CANVAS_ITEM_LIGHT]["LIGHT_COLOR"]="light";
+	mode_replace_table[ShaderLanguage::SHADER_CANVAS_ITEM_LIGHT]["LIGHT_UV"]="light_uv";
+	mode_replace_table[ShaderLanguage::SHADER_CANVAS_ITEM_LIGHT]["LIGHT"]="light_out";
+	mode_replace_table[ShaderLanguage::SHADER_CANVAS_ITEM_LIGHT]["SHADOW"]="shadow_color";
+	mode_replace_table[ShaderLanguage::SHADER_CANVAS_ITEM_LIGHT]["SCREEN_UV"]="screen_uv";
+	mode_replace_table[ShaderLanguage::SHADER_CANVAS_ITEM_LIGHT]["POINT_COORD"]="gl_PointCoord";
+	mode_replace_table[ShaderLanguage::SHADER_CANVAS_ITEM_LIGHT]["TIME"]="time";
 
 
 
@@ -917,6 +930,7 @@ ShaderCompilerGLES2::ShaderCompilerGLES2() {
 	vname_var1_interp="VAR1";
 	vname_var2_interp="VAR2";
 	vname_vertex="VERTEX";
+	vname_position = "POSITION";
 	vname_light="LIGHT";
 	vname_time="TIME";
 	vname_normalmap="NORMALMAP";
diff --git a/drivers/gles2/shader_compiler_gles2.h b/drivers/gles2/shader_compiler_gles2.h
index 43902a7536..46ef2e035f 100644
--- a/drivers/gles2/shader_compiler_gles2.h
+++ b/drivers/gles2/shader_compiler_gles2.h
@@ -34,7 +34,7 @@ class ShaderCompilerGLES2 {
 
 	class Uniform;
 public:
-	class Flags;
+	struct Flags;
 private:
 
 	ShaderLanguage::ProgramNode *program_node;
@@ -55,6 +55,7 @@ private:
 	bool uses_texpixel_size;
 	bool uses_worldvec;
 	bool vertex_code_writes_vertex;
+	bool vertex_code_writes_position;
 	bool uses_shadow_color;
 
 	bool sinh_used;
@@ -76,6 +77,7 @@ private:
 	StringName vname_var1_interp;
 	StringName vname_var2_interp;
 	StringName vname_vertex;
+	StringName vname_position;
 	StringName vname_light;
 	StringName vname_time;
 	StringName vname_normalmap;
@@ -107,6 +109,7 @@ public:
 		bool uses_texpos;
 		bool uses_normalmap;
 		bool vertex_code_writes_vertex;
+		bool vertex_code_writes_position;
 		bool uses_discard;
 		bool uses_screen_uv;
 		bool use_color_interp;
diff --git a/drivers/gles2/shaders/SCsub b/drivers/gles2/shaders/SCsub
index 9679223b16..38177d725f 100644
--- a/drivers/gles2/shaders/SCsub
+++ b/drivers/gles2/shaders/SCsub
@@ -6,5 +6,3 @@ if env['BUILDERS'].has_key('GLSL120GLES'):
 	env.GLSL120GLES('canvas_shadow.glsl');
 	env.GLSL120GLES('blur.glsl');
 	env.GLSL120GLES('copy.glsl');
-
-
diff --git a/drivers/gles2/shaders/material.glsl b/drivers/gles2/shaders/material.glsl
index 38fb03ab5c..ccd80bf2f0 100644
--- a/drivers/gles2/shaders/material.glsl
+++ b/drivers/gles2/shaders/material.glsl
@@ -1175,6 +1175,10 @@ FRAGMENT_SHADER_CODE
 		vec3 mdiffuse = diffuse.rgb;
 		vec3 light;
 
+#if defined(USE_LIGHT_SHADOW_COLOR)
+		vec3 shadow_color=vec3(0.0,0.0,0.0);
+#endif
+
 #if defined(USE_LIGHT_SHADER_CODE)
 //light is written by the light shader
 {
@@ -1195,6 +1199,10 @@ LIGHT_SHADER_CODE
 #endif
 		diffuse.rgb = const_light_mult * ambient_light *diffuse.rgb + light * attenuation * shadow_attenuation;
 
+#if defined(USE_LIGHT_SHADOW_COLOR)
+		diffuse.rgb += light * shadow_color * attenuation * (1.0 - shadow_attenuation);
+#endif
+
 #ifdef USE_FOG
 
 		diffuse.rgb = mix(diffuse.rgb,fog_interp.rgb,fog_interp.a);
diff --git a/drivers/jpg/SCsub b/drivers/jpg/SCsub
index e1fcc5ea89..df91b10a02 100644
--- a/drivers/jpg/SCsub
+++ b/drivers/jpg/SCsub
@@ -10,4 +10,3 @@ jpg_sources = [
 env.drivers_sources+=jpg_sources
 
 #env.add_source_files(env.drivers_sources, jpg_sources)
-
diff --git a/drivers/mpc/SCsub b/drivers/mpc/SCsub
index af61d95e4c..32ffdb863f 100644
--- a/drivers/mpc/SCsub
+++ b/drivers/mpc/SCsub
@@ -19,4 +19,3 @@ env.add_source_files(env.drivers_sources,"*.cpp")
 #env.add_source_files(env.drivers_sources, mpc_sources)
 
 Export('env')
-
diff --git a/drivers/nedmalloc/SCsub b/drivers/nedmalloc/SCsub
index 8c0028b41d..8e6edd1f96 100644
--- a/drivers/nedmalloc/SCsub
+++ b/drivers/nedmalloc/SCsub
@@ -3,4 +3,3 @@ Export('env');
 
 env.add_source_files(env.drivers_sources,"*.cpp")
 #env.add_source_files(env.drivers_sources,"*.c")
-
diff --git a/drivers/nrex/README.md b/drivers/nrex/README.md
index f150a5d76f..9ff67992dc 100644
--- a/drivers/nrex/README.md
+++ b/drivers/nrex/README.md
@@ -1,5 +1,7 @@
 # NREX: Node RegEx
 
+Version 0.1
+
 Small node-based regular expression library. It only does text pattern
 matchhing, not replacement. To use add the files `nrex.hpp`, `nrex.cpp`
 and `nrex_config.h` to your project and follow the example:
@@ -18,47 +20,42 @@ More details about its use is documented in `nrex.hpp`
 
 Currently supported features:
  * Capturing `()` and non-capturing `(?:)` groups
- * Any character `.`
+ * Any character `.` (includes newlines)
  * Shorthand caracter classes `\w\W\s\S\d\D`
- * User-defined character classes such as `[A-Za-z]`
+ * POSIX character classes such as `[[:alnum:]]`
+ * Bracket expressions such as `[A-Za-z]`
  * Simple quantifiers `?`, `*` and `+`
  * Range quantifiers `{0,1}`
  * Lazy (non-greedy) quantifiers `*?`
  * Begining `^` and end `$` anchors
+ * Word boundaries `\b`
  * Alternation `|`
- * Backreferences `\1` to `\99`
-
-To do list:
+ * ASCII `\xFF` code points
  * Unicode `\uFFFF` code points
+ * Positive `(?=)` and negative `(?!)` lookahead
+ * Positive `(?<=)` and negative `(?<!)` lookbehind (fixed length and no alternations)
+ * Backreferences `\1` and `\g{1}` (limited by default to 9 - can be unlimited)
 
 ## License
 
 Copyright (c) 2015, Zher Huei Lee
 All rights reserved.
 
-Redistribution and use in source and binary forms, with or without
-modification, are permitted provided that the following conditions are
-met:
-
- 1. Redistributions of source code must retain the above copyright
-    notice, this list of conditions and the following disclaimer.
-
- 2. Redistributions in binary form must reproduce the above copyright
-    notice, this list of conditions and the following disclaimer in the
-    documentation and/or other materials provided with the distribution.
-
- 3. Neither the name of the copyright holder nor the names of its
-    contributors may be used to endorse or promote products derived from
-    this software without specific prior written permission.
-
-THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
-IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
-TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
-PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
-HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
-SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
-TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
-PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
-LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
-NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
-SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+This software is provided 'as-is', without any express or implied
+warranty.  In no event will the authors be held liable for any damages
+arising from the use of this software.
+
+Permission is granted to anyone to use this software for any purpose,
+including commercial applications, and to alter it and redistribute it
+freely, subject to the following restrictions:
+
+ 1. The origin of this software must not be misrepresented; you must not
+    claim that you wrote the original software. If you use this software
+    in a product, an acknowledgment in the product documentation would
+    be appreciated but is not required.
+    
+ 2. Altered source versions must be plainly marked as such, and must not
+    be misrepresented as being the original software.
+    
+ 3. This notice may not be removed or altered from any source
+    distribution.
diff --git a/drivers/nrex/SCsub b/drivers/nrex/SCsub
index 2441d3061b..a00c7b86f4 100644
--- a/drivers/nrex/SCsub
+++ b/drivers/nrex/SCsub
@@ -1,4 +1,3 @@
-
 Import('env')
 
 sources = [
@@ -6,4 +5,3 @@ sources = [
 	'regex.cpp',
 ]
 env.add_source_files(env.drivers_sources, sources)
-
diff --git a/drivers/nrex/nrex.cpp b/drivers/nrex/nrex.cpp
index 696d46240e..1eb9ec38c8 100644
--- a/drivers/nrex/nrex.cpp
+++ b/drivers/nrex/nrex.cpp
@@ -1,4 +1,5 @@
 //  NREX: Node RegEx
+//  Version 0.1
 //
 //  Copyright (c) 2015, Zher Huei Lee
 //  All rights reserved.
@@ -29,11 +30,13 @@
 #include <wctype.h>
 #include <wchar.h>
 #define NREX_ISALPHANUM iswalnum
+#define NREX_ISSPACE iswspace
 #define NREX_STRLEN wcslen
 #else
 #include <ctype.h>
 #include <string.h>
 #define NREX_ISALPHANUM isalnum
+#define NREX_ISSPACE isspace
 #define NREX_STRLEN strlen
 #endif
 
@@ -116,34 +119,72 @@ class nrex_array
         }
 };
 
-static nrex_char nrex_unescape(nrex_char repr)
+static int nrex_parse_hex(nrex_char c)
 {
-    switch (repr)
+    if ('0' <= c && c <= '9')
     {
-        case '^': return '^';
-        case '$': return '$';
-        case '(': return '(';
-        case ')': return ')';
-        case '\\': return '\\';
-        case '.': return '.';
-        case '+': return '+';
-        case '*': return '*';
-        case '?': return '?';
-        case '-': return '-';
-        case 'a': return '\a';
-        case 'e': return '\e';
-        case 'f': return '\f';
-        case 'n': return '\n';
-        case 'r': return '\r';
-        case 't': return '\t';
-        case 'v': return '\v';
+        return int(c - '0');
     }
-    return 0;
+    else if ('a' <= c && c <= 'f')
+    {
+        return int(c - 'a') + 10;
+    }
+    else if ('A' <= c && c <= 'F')
+    {
+        return int(c - 'A') + 10;
+    }
+    return -1;
+}
+
+static nrex_char nrex_unescape(const nrex_char*& c)
+{
+    switch (c[1])
+    {
+        case '0': ++c; return '\0';
+        case 'a': ++c; return '\a';
+        case 'e': ++c; return '\e';
+        case 'f': ++c; return '\f';
+        case 'n': ++c; return '\n';
+        case 'r': ++c; return '\r';
+        case 't': ++c; return '\t';
+        case 'v': ++c; return '\v';
+        case 'b': ++c; return '\b';
+        case 'x':
+        {
+            int point = 0;
+            for (int i = 2; i <= 3; ++i)
+            {
+                int res = nrex_parse_hex(c[i]);
+                if (res == -1)
+                {
+                    return '\0';
+                }
+                point = (point << 4) + res;
+            }
+            c = &c[3];
+            return nrex_char(point);
+        }
+        case 'u':
+        {
+            int point = 0;
+            for (int i = 2; i <= 5; ++i)
+            {
+                int res = nrex_parse_hex(c[i]);
+                if (res == -1)
+                {
+                    return '\0';
+                }
+                point = (point << 4) + res;
+            }
+            c = &c[5];
+            return nrex_char(point);
+        }
+    }
+    return (++c)[0];
 }
 
 struct nrex_search
 {
-    public:
         const nrex_char* str;
         nrex_result* captures;
         int end;
@@ -168,12 +209,14 @@ struct nrex_node
         nrex_node* previous;
         nrex_node* parent;
         bool quantifiable;
+        int length;
 
         nrex_node(bool quantify = false)
             : next(NULL)
             , previous(NULL)
             , parent(NULL)
             , quantifiable(quantify)
+            , length(-1)
         {
         }
 
@@ -206,21 +249,61 @@ struct nrex_node
             }
             return pos;
         }
+
+        void increment_length(int amount, bool subtract = false)
+        {
+            if (amount >= 0 && length >= 0)
+            {
+                if (!subtract)
+                {
+                    length += amount;
+                }
+                else
+                {
+                    length -= amount;
+                }
+            }
+            else
+            {
+                length = -1;
+            }
+            if (parent)
+            {
+                parent->increment_length(amount, subtract);
+            }
+        }
 };
 
 struct nrex_node_group : public nrex_node
 {
-        int capturing;
+        static const int NonCapture = -1;
+        static const int Bracket = -2;
+        static const int LookAhead = -3;
+        static const int LookBehind = -4;
+
+        int mode;
         bool negate;
         nrex_array<nrex_node*> childset;
         nrex_node* back;
 
-        nrex_node_group(int capturing)
+        nrex_node_group(int mode)
             : nrex_node(true)
-            , capturing(capturing)
+            , mode(mode)
             , negate(false)
             , back(NULL)
         {
+            if (mode != Bracket)
+            {
+                length = 0;
+            }
+            else
+            {
+                length = 1;
+            }
+            if (mode == LookAhead || mode == LookBehind)
+            {
+                quantifiable = false;
+            }
         }
 
         virtual ~nrex_node_group()
@@ -234,14 +317,23 @@ struct nrex_node_group : public nrex_node
 
         int test(nrex_search* s, int pos) const
         {
-            if (capturing >= 0)
+            if (mode >= 0)
             {
-                s->captures[capturing].start = pos;
+                s->captures[mode].start = pos;
             }
             for (unsigned int i = 0; i < childset.size(); ++i)
             {
                 s->complete = false;
-                int res = childset[i]->test(s, pos);
+                int offset = 0;
+                if (mode == LookBehind)
+                {
+                    if (pos < length)
+                    {
+                        return -1;
+                    }
+                    offset = length;
+                }
+                int res = childset[i]->test(s, pos - offset);
                 if (s->complete)
                 {
                     return res;
@@ -256,12 +348,20 @@ struct nrex_node_group : public nrex_node
                     {
                         return -1;
                     }
+                    if (i + 1 < childset.size())
+                    {
+                        continue;
+                    }
                 }
                 if (res >= 0)
                 {
-                    if (capturing >= 0)
+                    if (mode >= 0)
+                    {
+                        s->captures[mode].length = res - pos;
+                    }
+                    else if (mode == LookAhead || mode == LookBehind)
                     {
-                        s->captures[capturing].length = res - pos;
+                        res = pos;
                     }
                     return next ? next->test(s, res) : res;
                 }
@@ -271,15 +371,19 @@ struct nrex_node_group : public nrex_node
 
         virtual int test_parent(nrex_search* s, int pos) const
         {
-            if (capturing >= 0)
+            if (mode >= 0)
             {
-                s->captures[capturing].length = pos - s->captures[capturing].start;
+                s->captures[mode].length = pos - s->captures[mode].start;
             }
             return nrex_node::test_parent(s, pos);
         }
 
         void add_childset()
         {
+            if (childset.size() > 0 && mode != Bracket)
+            {
+                length = -1;
+            }
             back = NULL;
         }
 
@@ -287,7 +391,7 @@ struct nrex_node_group : public nrex_node
         {
             node->parent = this;
             node->previous = back;
-            if (back)
+            if (back && mode != Bracket)
             {
                 back->next = node;
             }
@@ -295,6 +399,10 @@ struct nrex_node_group : public nrex_node
             {
                 childset.push(node);
             }
+            if (mode != Bracket)
+            {
+                increment_length(node->length);
+            }
             back = node;
         }
 
@@ -310,10 +418,32 @@ struct nrex_node_group : public nrex_node
             {
                 childset.pop();
             }
+            if (mode != Bracket)
+            {
+                increment_length(old->length, true);
+            }
             back = old->previous;
             add_child(node);
             return old;
         }
+
+        void pop_back()
+        {
+            if (back)
+            {
+                nrex_node* old = back;
+                if (!old->previous)
+                {
+                    childset.pop();
+                }
+                if (mode != Bracket)
+                {
+                    increment_length(old->length, true);
+                }
+                back = old->previous;
+                NREX_DELETE(old);
+            }
+        }
 };
 
 struct nrex_node_char : public nrex_node
@@ -324,11 +454,12 @@ struct nrex_node_char : public nrex_node
             : nrex_node(true)
             , ch(c)
         {
+            length = 1;
         }
 
         int test(nrex_search* s, int pos) const
         {
-            if (s->end == pos || s->at(pos) != ch)
+            if (s->end <= pos || 0 > pos || s->at(pos) != ch)
             {
                 return -1;
             }
@@ -346,11 +477,12 @@ struct nrex_node_range : public nrex_node
             , start(s)
             , end(e)
         {
+            length = 1;
         }
 
         int test(nrex_search* s, int pos) const
         {
-            if (s->end == pos)
+            if (s->end <= pos || 0 > pos)
             {
                 return -1;
             }
@@ -363,20 +495,219 @@ struct nrex_node_range : public nrex_node
         }
 };
 
-static bool nrex_is_whitespace(nrex_char repr)
+enum nrex_class_type
 {
-    switch (repr)
+    nrex_class_none,
+    nrex_class_alnum,
+    nrex_class_alpha,
+    nrex_class_blank,
+    nrex_class_cntrl,
+    nrex_class_digit,
+    nrex_class_graph,
+    nrex_class_lower,
+    nrex_class_print,
+    nrex_class_punct,
+    nrex_class_space,
+    nrex_class_upper,
+    nrex_class_xdigit,
+    nrex_class_word
+};
+
+static bool nrex_compare_class(const nrex_char** pos, const char* text)
+{
+    unsigned int i = 0;
+    for (i = 0; text[i] != '\0'; ++i)
     {
-        case ' ':
-        case '\t':
-        case '\r':
-        case '\n':
-        case '\f':
-            return true;
+        if ((*pos)[i] != text[i])
+        {
+            return false;
+        }
     }
-    return false;
+    if ((*pos)[i++] != ':' || (*pos)[i] != ']')
+    {
+        return false;
+    }
+    *pos = &(*pos)[i];
+    return true;
 }
 
+#define NREX_COMPARE_CLASS(POS, NAME) if (nrex_compare_class(POS, #NAME)) return nrex_class_ ## NAME
+
+static nrex_class_type nrex_parse_class(const nrex_char** pos)
+{
+    NREX_COMPARE_CLASS(pos, alnum);
+    NREX_COMPARE_CLASS(pos, alpha);
+    NREX_COMPARE_CLASS(pos, blank);
+    NREX_COMPARE_CLASS(pos, cntrl);
+    NREX_COMPARE_CLASS(pos, digit);
+    NREX_COMPARE_CLASS(pos, graph);
+    NREX_COMPARE_CLASS(pos, lower);
+    NREX_COMPARE_CLASS(pos, print);
+    NREX_COMPARE_CLASS(pos, punct);
+    NREX_COMPARE_CLASS(pos, space);
+    NREX_COMPARE_CLASS(pos, upper);
+    NREX_COMPARE_CLASS(pos, xdigit);
+    NREX_COMPARE_CLASS(pos, word);
+    return nrex_class_none;
+}
+
+struct nrex_node_class : public nrex_node
+{
+        nrex_class_type type;
+
+        nrex_node_class(nrex_class_type t)
+            : nrex_node(true)
+            , type(t)
+        {
+            length = 1;
+        }
+
+        int test(nrex_search* s, int pos) const
+        {
+            if (s->end <= pos || 0 > pos)
+            {
+                return -1;
+            }
+            if (!test_class(s->at(pos)))
+            {
+                return -1;
+            }
+            return next ? next->test(s, pos + 1) : pos + 1;
+        }
+
+        bool test_class(nrex_char c) const
+        {
+            if ((0 <= c && c <= 0x1F) || c == 0x7F)
+            {
+                if (type == nrex_class_cntrl)
+                {
+                    return true;
+                }
+            }
+            else if (c < 0x7F)
+            {
+                if (type == nrex_class_print)
+                {
+                    return true;
+                }
+                else if (type == nrex_class_graph && c != ' ')
+                {
+                    return true;
+                }
+                else if ('0' <= c && c <= '9')
+                {
+                    switch (type)
+                    {
+                        case nrex_class_alnum:
+                        case nrex_class_digit:
+                        case nrex_class_xdigit:
+                        case nrex_class_word:
+                            return true;
+                        default:
+                            break;
+                    }
+                }
+                else if ('A' <= c && c <= 'Z')
+                {
+                    switch (type)
+                    {
+                        case nrex_class_alnum:
+                        case nrex_class_alpha:
+                        case nrex_class_upper:
+                        case nrex_class_word:
+                            return true;
+                        case nrex_class_xdigit:
+                            if (c <= 'F')
+                            {
+                                return true;
+                            }
+                        default:
+                            break;
+                    }
+                }
+                else if ('a' <= c && c <= 'z')
+                {
+                    switch (type)
+                    {
+                        case nrex_class_alnum:
+                        case nrex_class_alpha:
+                        case nrex_class_lower:
+                        case nrex_class_word:
+                            return true;
+                        case nrex_class_xdigit:
+                            if (c <= 'f')
+                            {
+                                return true;
+                            }
+                        default:
+                            break;
+                    }
+                }
+            }
+            switch (c)
+            {
+                case ' ':
+                case '\t':
+                    if (type == nrex_class_blank)
+                    {
+                        return true;
+                    }
+                case '\r':
+                case '\n':
+                case '\f':
+                    if (type == nrex_class_space)
+                    {
+                        return true;
+                    }
+                    break;
+                case '_':
+                    if (type == nrex_class_word)
+                    {
+                        return true;
+                    }
+                case ']':
+                case '[':
+                case '!':
+                case '"':
+                case '#':
+                case '$':
+                case '%':
+                case '&':
+                case '\'':
+                case '(':
+                case ')':
+                case '*':
+                case '+':
+                case ',':
+                case '.':
+                case '/':
+                case ':':
+                case ';':
+                case '<':
+                case '=':
+                case '>':
+                case '?':
+                case '@':
+                case '\\':
+                case '^':
+                case '`':
+                case '{':
+                case '|':
+                case '}':
+                case '~':
+                case '-':
+                    if (type == nrex_class_punct)
+                    {
+                        return true;
+                    }
+                    break;
+                default:
+                    break;
+            }
+            return false;
+        }
+};
+
 static bool nrex_is_shorthand(nrex_char repr)
 {
     switch (repr)
@@ -400,11 +731,12 @@ struct nrex_node_shorthand : public nrex_node
             : nrex_node(true)
             , repr(c)
         {
+            length = 1;
         }
 
         int test(nrex_search* s, int pos) const
         {
-            if (s->end == pos)
+            if (s->end <= pos || 0 > pos)
             {
                 return -1;
             }
@@ -435,7 +767,7 @@ struct nrex_node_shorthand : public nrex_node
                 case 'S':
                     invert = true;
                 case 's':
-                    if (nrex_is_whitespace(c))
+                    if (NREX_ISSPACE(c))
                     {
                         found = true;
                     }
@@ -469,10 +801,10 @@ struct nrex_node_quantifier : public nrex_node
         bool greedy;
         nrex_node* child;
 
-        nrex_node_quantifier()
+        nrex_node_quantifier(int min, int max)
             : nrex_node()
-            , min(0)
-            , max(0)
+            , min(min)
+            , max(max)
             , greedy(true)
             , child(NULL)
         {
@@ -488,57 +820,70 @@ struct nrex_node_quantifier : public nrex_node
 
         int test(nrex_search* s, int pos) const
         {
-            nrex_array<int> backtrack;
-            backtrack.push(pos);
-            while (backtrack.top() <= s->end)
+            return test_step(s, pos, 0, pos);
+        }
+
+        int test_step(nrex_search* s, int pos, int level, int start) const
+        {
+            if (pos > s->end)
             {
-                if (max >= 1 && backtrack.size() > (unsigned int)max)
-                {
-                    break;
-                }
-                if (!greedy && (unsigned int)min < backtrack.size())
+                return -1;
+            }
+            if (!greedy && level > min)
+            {
+                int res = pos;
+                if (next)
                 {
-                    int res = backtrack.top();
-                    if (next)
-                    {
-                        res = next->test(s, res);
-                    }
-                    if (s->complete)
-                    {
-                        return res;
-                    }
-                    if (res >= 0 && parent->test_parent(s, res) >= 0)
-                    {
-                        return res;
-                    }
+                    res = next->test(s, res);
                 }
-                int res = child->test(s, backtrack.top());
                 if (s->complete)
                 {
                     return res;
                 }
-                if (res < 0 || res == backtrack.top())
+                if (res >= 0 && parent->test_parent(s, res) >= 0)
                 {
-                    break;
+                    return res;
                 }
-                backtrack.push(res);
             }
-            while (greedy && (unsigned int) min < backtrack.size())
+            if (max >= 0 && level > max)
             {
-                int res = backtrack.top();
-                if (next)
+                return -1;
+            }
+            if (level > 1 && level > min + 1 && pos == start)
+            {
+                return -1;
+            }
+            int res = pos;
+            if (level >= 1)
+            {
+                res = child->test(s, pos);
+                if (s->complete)
                 {
-                    res = next->test(s, res);
+                    return res;
                 }
-                if (res >= 0 && parent->test_parent(s, res) >= 0)
+            }
+            if (res >= 0)
+            {
+                int res_step = test_step(s, res, level + 1, start);
+                if (res_step >= 0)
                 {
-                    return res;
+                    return res_step;
                 }
-                if (s->complete)
+                else if (greedy && level >= min)
                 {
-                    return res;
+                    if (next)
+                    {
+                        res = next->test(s, res);
+                    }
+                    if (s->complete)
+                    {
+                        return res;
+                    }
+                    if (res >= 0 && parent->test_parent(s, res) >= 0)
+                    {
+                        return res;
+                    }
                 }
-                backtrack.pop();
             }
             return -1;
         }
@@ -552,6 +897,7 @@ struct nrex_node_anchor : public nrex_node
             : nrex_node()
             , end(end)
         {
+            length = 0;
         }
 
         int test(nrex_search* s, int pos) const
@@ -568,6 +914,45 @@ struct nrex_node_anchor : public nrex_node
         }
 };
 
+struct nrex_node_word_boundary : public nrex_node
+{
+        bool inverse;
+
+        nrex_node_word_boundary(bool inverse)
+            : nrex_node()
+            , inverse(inverse)
+        {
+            length = 0;
+        }
+
+        int test(nrex_search* s, int pos) const
+        {
+            bool left = false;
+            bool right = false;
+            if (pos != 0)
+            {
+                nrex_char c = s->at(pos - 1);
+                if (c == '_' || NREX_ISALPHANUM(c))
+                {
+                    left = true;
+                }
+            }
+            if (pos != s->end)
+            {
+                nrex_char c = s->at(pos);
+                if (c == '_' || NREX_ISALPHANUM(c))
+                {
+                    right = true;
+                }
+            }
+            if ((left != right) == inverse)
+            {
+                return -1;
+            }
+            return next ? next->test(s, pos) : pos;
+        }
+};
+
 struct nrex_node_backreference : public nrex_node
 {
         int ref;
@@ -576,6 +961,7 @@ struct nrex_node_backreference : public nrex_node
             : nrex_node(true)
             , ref(ref)
         {
+            length = -1;
         }
 
         int test(nrex_search* s, int pos) const
@@ -596,12 +982,31 @@ struct nrex_node_backreference : public nrex_node
         }
 };
 
+bool nrex_has_lookbehind(nrex_array<nrex_node_group*>& stack)
+{
+    for (unsigned int i = 0; i < stack.size(); i++)
+    {
+        if (stack[i]->mode == nrex_node_group::LookBehind)
+        {
+            return true;
+        }
+    }
+    return false;
+}
+
 nrex::nrex()
     : _capturing(0)
     , _root(NULL)
 {
 }
 
+nrex::nrex(const nrex_char* pattern, int captures)
+    : _capturing(0)
+    , _root(NULL)
+{
+    compile(pattern, captures);
+}
+
 nrex::~nrex()
 {
     if (_root)
@@ -627,10 +1032,14 @@ void nrex::reset()
 
 int nrex::capture_size() const
 {
-    return _capturing + 1;
+    if (_root)
+    {
+        return _capturing + 1;
+    }
+    return 0;
 }
 
-bool nrex::compile(const nrex_char* pattern)
+bool nrex::compile(const nrex_char* pattern, int captures)
 {
     reset();
     nrex_node_group* root = NREX_NEW(nrex_node_group(_capturing));
@@ -647,16 +1056,32 @@ bool nrex::compile(const nrex_char* pattern)
                 if (c[2] == ':')
                 {
                     c = &c[2];
-                    nrex_node_group* group = NREX_NEW(nrex_node_group(-1));
+                    nrex_node_group* group = NREX_NEW(nrex_node_group(nrex_node_group::NonCapture));
+                    stack.top()->add_child(group);
+                    stack.push(group);
+                }
+                else if (c[2] == '!' || c[2] == '=')
+                {
+                    c = &c[2];
+                    nrex_node_group* group = NREX_NEW(nrex_node_group(nrex_node_group::LookAhead));
+                    group->negate = (c[0] == '!');
+                    stack.top()->add_child(group);
+                    stack.push(group);
+                }
+                else if (c[2] == '<' && (c[3] == '!' || c[3] == '='))
+                {
+                    c = &c[3];
+                    nrex_node_group* group = NREX_NEW(nrex_node_group(nrex_node_group::LookBehind));
+                    group->negate = (c[0] == '!');
                     stack.top()->add_child(group);
                     stack.push(group);
                 }
                 else
                 {
-                    NREX_COMPILE_ERROR("unrecognised qualifier for parenthesis");
+                    NREX_COMPILE_ERROR("unrecognised qualifier for group");
                 }
             }
-            else if (_capturing < 99)
+            else if (captures >= 0 && _capturing < captures)
             {
                 nrex_node_group* group = NREX_NEW(nrex_node_group(++_capturing));
                 stack.top()->add_child(group);
@@ -664,7 +1089,7 @@ bool nrex::compile(const nrex_char* pattern)
             }
             else
             {
-                nrex_node_group* group = NREX_NEW(nrex_node_group(-1));
+                nrex_node_group* group = NREX_NEW(nrex_node_group(nrex_node_group::NonCapture));
                 stack.top()->add_child(group);
                 stack.push(group);
             }
@@ -682,152 +1107,228 @@ bool nrex::compile(const nrex_char* pattern)
         }
         else if (c[0] == '[')
         {
-            nrex_node_group* group = NREX_NEW(nrex_node_group(-1));
+            nrex_node_group* group = NREX_NEW(nrex_node_group(nrex_node_group::Bracket));
             stack.top()->add_child(group);
             if (c[1] == '^')
             {
                 group->negate = true;
                 ++c;
             }
+            bool first_child = true;
+            nrex_char previous_child;
+            bool previous_child_single = false;
             while (true)
             {
                 group->add_childset();
                 ++c;
                 if (c[0] == '\0')
                 {
-                    NREX_COMPILE_ERROR("unclosed character class '[]'");
+                    NREX_COMPILE_ERROR("unclosed bracket expression '['");
                 }
-                if (c[0] == ']')
+                if (c[0] == '[' && c[1] == ':')
+                {
+                    const nrex_char* d = &c[2];
+                    nrex_class_type cls = nrex_parse_class(&d);
+                    if (cls != nrex_class_none)
+                    {
+                        c = d;
+                        group->add_child(NREX_NEW(nrex_node_class(cls)));
+                        previous_child_single = false;
+                    }
+                    else
+                    {
+                        group->add_child(NREX_NEW(nrex_node_char('[')));
+                        previous_child = '[';
+                        previous_child_single = true;
+                    }
+                }
+                else if (c[0] == ']' && !first_child)
                 {
                     break;
                 }
                 else if (c[0] == '\\')
                 {
-                    nrex_char unescaped = nrex_unescape(c[1]);
-                    if (unescaped)
-                    {
-                        group->add_child(NREX_NEW(nrex_node_char(unescaped)));
-                        ++c;
-                    }
-                    else if (nrex_is_shorthand(c[1]))
+                    if (nrex_is_shorthand(c[1]))
                     {
                         group->add_child(NREX_NEW(nrex_node_shorthand(c[1])));
                         ++c;
+                        previous_child_single = false;
                     }
                     else
                     {
-                        NREX_COMPILE_ERROR("escape token not recognised");
+                        const nrex_char* d = c;
+                        nrex_char unescaped = nrex_unescape(d);
+                        if (c == d)
+                        {
+                            NREX_COMPILE_ERROR("invalid escape token");
+                        }
+                        group->add_child(NREX_NEW(nrex_node_char(unescaped)));
+                        c = d;
+                        previous_child = unescaped;
+                        previous_child_single = true;
                     }
                 }
-                else
+                else if (previous_child_single && c[0] == '-')
                 {
-                    if (c[1] == '-' && c[2] != '\0')
+                    bool is_range = false;
+                    nrex_char next;
+                    if (c[1] != '\0' && c[1] != ']')
                     {
-                        bool range = false;
-                        if ('A' <= c[0] && c[0] <= 'Z' && 'A' <= c[2] && c[2] <= 'Z')
+                        if (c[1] == '\\')
                         {
-                            range = true;
+                            const nrex_char* d = ++c;
+                            next = nrex_unescape(d);
+                            if (c == d)
+                            {
+                                NREX_COMPILE_ERROR("invalid escape token in range");
+                            }
                         }
-                        if ('a' <= c[0] && c[0] <= 'z' && 'a' <= c[2] && c[2] <= 'z')
-                        {
-                            range = true;
-                        }
-                        if ('0' <= c[0] && c[0] <= '9' && '0' <= c[2] && c[2] <= '9')
+                        else
                         {
-                            range = true;
+                            next = c[1];
+                            ++c;
                         }
-                        if (range)
+                        is_range = true;
+                    }
+                    if (is_range)
+                    {
+                        if (next < previous_child)
                         {
-                            group->add_child(NREX_NEW(nrex_node_range(c[0], c[2])));
-                            c = &c[2];
-                            continue;
+                            NREX_COMPILE_ERROR("text range out of order");
                         }
+                        group->pop_back();
+                        group->add_child(NREX_NEW(nrex_node_range(previous_child, next)));
+                        previous_child_single = false;
                     }
+                    else
+                    {
+                        group->add_child(NREX_NEW(nrex_node_char(c[0])));
+                        previous_child = c[0];
+                        previous_child_single = true;
+                    }
+                }
+                else
+                {
                     group->add_child(NREX_NEW(nrex_node_char(c[0])));
+                    previous_child = c[0];
+                    previous_child_single = true;
                 }
-
+                first_child = false;
             }
         }
         else if (nrex_is_quantifier(c[0]))
         {
-            nrex_node_quantifier* quant = NREX_NEW(nrex_node_quantifier);
-            quant->child = stack.top()->swap_back(quant);
-            if (quant->child == NULL || !quant->child->quantifiable)
-            {
-                NREX_COMPILE_ERROR("element not quantifiable");
-            }
-            quant->child->previous = NULL;
-            quant->child->next = NULL;
-            quant->child->parent = quant;
+            int min = 0;
+            int max = -1;
+            bool valid_quantifier = true;
             if (c[0] == '?')
             {
-                quant->min = 0;
-                quant->max = 1;
+                min = 0;
+                max = 1;
             }
             else if (c[0] == '+')
             {
-                quant->min = 1;
-                quant->max = -1;
+                min = 1;
+                max = -1;
             }
             else if (c[0] == '*')
             {
-                quant->min = 0;
-                quant->max = -1;
+                min = 0;
+                max = -1;
             }
             else if (c[0] == '{')
             {
                 bool max_set = false;
-                quant->min = 0;
-                quant->max = -1;
+                const nrex_char* d = c;
                 while (true)
                 {
-                    ++c;
-                    if (c[0] == '\0')
+                    ++d;
+                    if (d[0] == '\0')
                     {
-                        NREX_COMPILE_ERROR("unclosed range quantifier '{}'");
+                        valid_quantifier = false;
+                        break;
                     }
-                    else if (c[0] == '}')
+                    else if (d[0] == '}')
                     {
                         break;
                     }
-                    else if (c[0] == ',')
+                    else if (d[0] == ',')
                     {
                         max_set = true;
                         continue;
                     }
-                    else if (c[0] < '0' || '9' < c[0])
+                    else if (d[0] < '0' || '9' < d[0])
                     {
-                        NREX_COMPILE_ERROR("expected numeric digits, ',' or '}'");
+                        valid_quantifier = false;
+                        break;
                     }
                     if (max_set)
                     {
-                        if (quant->max < 0)
+                        if (max < 0)
                         {
-                            quant->max = int(c[0] - '0');
+                            max = int(d[0] - '0');
                         }
                         else
                         {
-                            quant->max = quant->max * 10 + int(c[0] - '0');
+                            max = max * 10 + int(d[0] - '0');
                         }
                     }
                     else
                     {
-                        quant->min = quant->min * 10 + int(c[0] - '0');
+                        min = min * 10 + int(d[0] - '0');
                     }
                 }
                 if (!max_set)
                 {
-                    quant->max = quant->min;
+                    max = min;
+                }
+                if (valid_quantifier)
+                {
+                    c = d;
                 }
             }
-            if (c[1] == '?')
+            if (valid_quantifier)
             {
-                quant->greedy = false;
-                ++c;
+                if (stack.top()->back == NULL || !stack.top()->back->quantifiable)
+                {
+                    NREX_COMPILE_ERROR("element not quantifiable");
+                }
+                nrex_node_quantifier* quant = NREX_NEW(nrex_node_quantifier(min, max));
+                if (min == max)
+                {
+                    if (stack.top()->back->length >= 0)
+                    {
+                        quant->length = max * stack.top()->back->length;
+                    }
+                }
+                else
+                {
+                    if (nrex_has_lookbehind(stack))
+                    {
+                        NREX_COMPILE_ERROR("variable length quantifiers inside lookbehind not supported");
+                    }
+                }
+                quant->child = stack.top()->swap_back(quant);
+                quant->child->previous = NULL;
+                quant->child->next = NULL;
+                quant->child->parent = quant;
+                if (c[1] == '?')
+                {
+                    quant->greedy = false;
+                    ++c;
+                }
+            }
+            else
+            {
+                stack.top()->add_child(NREX_NEW(nrex_node_char(c[0])));
             }
         }
         else if (c[0] == '|')
         {
+            if (nrex_has_lookbehind(stack))
+            {
+                NREX_COMPILE_ERROR("alternations inside lookbehind not supported");
+            }
             stack.top()->add_childset();
         }
         else if (c[0] == '^' || c[0] == '$')
@@ -840,39 +1341,55 @@ bool nrex::compile(const nrex_char* pattern)
         }
         else if (c[0] == '\\')
         {
-            nrex_char unescaped = nrex_unescape(c[1]);
-            if (unescaped)
-            {
-                stack.top()->add_child(NREX_NEW(nrex_node_char(unescaped)));
-                ++c;
-            }
-            else if (nrex_is_shorthand(c[1]))
+            if (nrex_is_shorthand(c[1]))
             {
                 stack.top()->add_child(NREX_NEW(nrex_node_shorthand(c[1])));
                 ++c;
             }
-            else if ('1' <= c[1] && c[1] <= '9')
+            else if (('1' <= c[1] && c[1] <= '9') || (c[1] == 'g' && c[2] == '{'))
             {
                 int ref = 0;
-                if ('0' <= c[2] && c[2] <= '9')
+                bool unclosed = false;
+                if (c[1] == 'g')
                 {
-                    ref = int(c[1] - '0') * 10 + int(c[2] - '0');
+                    unclosed = true;
                     c = &c[2];
                 }
-                else
+                while ('0' <= c[1] && c[1] <= '9')
                 {
-                    ref = int(c[1] - '0');
+                    ref = ref * 10 + int(c[1] - '0');
                     ++c;
                 }
-                if (ref > _capturing)
+                if (c[1] == '}')
+                {
+                    unclosed = false;
+                    ++c;
+                }
+                if (ref > _capturing || ref <= 0 || unclosed)
                 {
                     NREX_COMPILE_ERROR("backreference to non-existent capture");
                 }
+                if (nrex_has_lookbehind(stack))
+                {
+                    NREX_COMPILE_ERROR("backreferences inside lookbehind not supported");
+                }
                 stack.top()->add_child(NREX_NEW(nrex_node_backreference(ref)));
             }
+            else if (c[1] == 'b' || c[1] == 'B')
+            {
+                stack.top()->add_child(NREX_NEW(nrex_node_word_boundary(c[1] == 'B')));
+                ++c;
+            }
             else
             {
-                NREX_COMPILE_ERROR("escape token not recognised");
+                const nrex_char* d = c;
+                nrex_char unescaped = nrex_unescape(d);
+                if (c == d)
+                {
+                    NREX_COMPILE_ERROR("invalid escape token");
+                }
+                stack.top()->add_child(NREX_NEW(nrex_node_char(unescaped)));
+                c = d;
             }
         }
         else
@@ -880,11 +1397,19 @@ bool nrex::compile(const nrex_char* pattern)
             stack.top()->add_child(NREX_NEW(nrex_node_char(c[0])));
         }
     }
+    if (stack.size() > 1)
+    {
+        NREX_COMPILE_ERROR("unclosed group '('");
+    }
     return true;
 }
 
 bool nrex::match(const nrex_char* str, nrex_result* captures, int offset, int end) const
 {
+    if (!_root)
+    {
+        return false;
+    }
     nrex_search s(str, captures);
     if (end >= offset)
     {
@@ -894,7 +1419,7 @@ bool nrex::match(const nrex_char* str, nrex_result* captures, int offset, int en
     {
         s.end = NREX_STRLEN(str);
     }
-    for (int i = offset; i < s.end; ++i)
+    for (int i = offset; i <= s.end; ++i)
     {
         for (int c = 0; c <= _capturing; ++c)
         {
diff --git a/drivers/nrex/nrex.hpp b/drivers/nrex/nrex.hpp
index 2a6aa08e1d..44e950c517 100644
--- a/drivers/nrex/nrex.hpp
+++ b/drivers/nrex/nrex.hpp
@@ -1,4 +1,5 @@
 //  NREX: Node RegEx
+//  Version 0.1
 //
 //  Copyright (c) 2015, Zher Huei Lee
 //  All rights reserved.
@@ -59,7 +60,32 @@ class nrex
         int _capturing;
         nrex_node* _root;
     public:
+
+        /*!
+         * \brief Initialises an empty regex container
+         */
         nrex();
+
+        /*!
+         * \brief Initialises and compiles the regex pattern
+         *
+         * This calls nrex::compile() with the same arguments. To check whether
+         * the compilation was successfull, use nrex::valid().
+         *
+         * If the NREX_THROW_ERROR was defined it would automatically throw a
+         * runtime error nrex_compile_error if it encounters a problem when
+         * parsing the pattern.
+         *
+         * \param pattern   The regex pattern
+         * \param captures  The maximum number of capture groups to allow. Any
+         *                  extra would be converted to non-capturing groups.
+         *                  If negative, no limit would be imposed. Defaults
+         *                  to 9.
+         *
+         * \see nrex::compile()
+         */
+        nrex(const nrex_char* pattern, int captures = 9);
+
         ~nrex();
 
         /*!
@@ -78,8 +104,9 @@ class nrex
          *
          * This is used to provide the array size of the captures needed for
          * nrex::match() to work. The size is actually the number of capture
-         * groups + one for the matching of the entire pattern. The result is
-         * always capped at 100.
+         * groups + one for the matching of the entire pattern. This can be
+         * capped using the extra argument given in nrex::compile()
+         * (default 10).
          *
          * \return The number of captures
          */
@@ -95,10 +122,14 @@ class nrex
          * runtime error nrex_compile_error if it encounters a problem when
          * parsing the pattern.
          *
-         * \param The regex pattern
+         * \param pattern   The regex pattern
+         * \param captures  The maximum number of capture groups to allow. Any
+         *                  extra would be converted to non-capturing groups.
+         *                  If negative, no limit would be imposed. Defaults
+         *                  to 9.
          * \return True if the pattern was succesfully compiled
          */
-        bool compile(const nrex_char* pattern);
+        bool compile(const nrex_char* pattern, int captures = 9);
 
         /*!
          * \brief Uses the pattern to search through the provided string
diff --git a/drivers/nrex/regex.cpp b/drivers/nrex/regex.cpp
index 0a813c3490..e8578221a9 100644
--- a/drivers/nrex/regex.cpp
+++ b/drivers/nrex/regex.cpp
@@ -15,7 +15,7 @@
 
 void RegEx::_bind_methods() {
 
-	ObjectTypeDB::bind_method(_MD("compile","pattern"),&RegEx::compile);
+	ObjectTypeDB::bind_method(_MD("compile","pattern", "capture"),&RegEx::compile, DEFVAL(9));
 	ObjectTypeDB::bind_method(_MD("find","text","start","end"),&RegEx::find, DEFVAL(0), DEFVAL(-1));
 	ObjectTypeDB::bind_method(_MD("clear"),&RegEx::clear);
 	ObjectTypeDB::bind_method(_MD("is_valid"),&RegEx::is_valid);
@@ -54,7 +54,9 @@ bool RegEx::is_valid() const {
 };
 
 int RegEx::get_capture_count() const {
-	
+
+	ERR_FAIL_COND_V( !exp.valid(), 0 );
+
 	return exp.capture_size();
 }
 
@@ -66,11 +68,11 @@ String RegEx::get_capture(int capture) const {
 
 }
 
-Error RegEx::compile(const String& p_pattern) {
+Error RegEx::compile(const String& p_pattern, int capture) {
 
 	clear();
 
-	exp.compile(p_pattern.c_str());
+	exp.compile(p_pattern.c_str(), capture);
 
 	ERR_FAIL_COND_V( !exp.valid(), FAILED );
 
diff --git a/drivers/nrex/regex.h b/drivers/nrex/regex.h
index 0626029705..76aab2aea6 100644
--- a/drivers/nrex/regex.h
+++ b/drivers/nrex/regex.h
@@ -36,7 +36,7 @@ public:
 	bool is_valid() const;
 	int get_capture_count() const;
 	String get_capture(int capture) const;
-	Error compile(const String& p_pattern);
+	Error compile(const String& p_pattern, int capture = 9);
 	int find(const String& p_text, int p_start = 0, int p_end = -1) const;
 
 	RegEx();
diff --git a/drivers/ogg/SCsub b/drivers/ogg/SCsub
index dd59890064..3ee1bb6408 100644
--- a/drivers/ogg/SCsub
+++ b/drivers/ogg/SCsub
@@ -6,6 +6,4 @@ ogg_sources = [
 	"ogg/framing.c",
 ]
 
-if env['theora'] != "yes" or env['use_theoraplayer_binary'] != "yes":
-	env.drivers_sources+=ogg_sources
-
+env.drivers_sources+=ogg_sources
diff --git a/drivers/openssl/SCsub b/drivers/openssl/SCsub
index 7197364e01..6d3e7e6732 100644
--- a/drivers/openssl/SCsub
+++ b/drivers/openssl/SCsub
@@ -4,5 +4,3 @@ env.add_source_files(env.drivers_sources,"*.cpp")
 env.add_source_files(env.drivers_sources,"*.c")
 
 Export('env')
-
-
diff --git a/drivers/opus/SCsub b/drivers/opus/SCsub
index a2bebf62b3..59c746209b 100644
--- a/drivers/opus/SCsub
+++ b/drivers/opus/SCsub
@@ -124,7 +124,7 @@ opus_sources_lib = [
 ]
 
 if("opus_fixed_point" in env and env.opus_fixed_point=="yes"):
-	env.Append(CPPPATH=["#drivers/opus/silk/fixed"], CFLAGS=["-DOPUS_FIXED_POINT"])
+	env.Append(CFLAGS=["-DOPUS_FIXED_POINT"])
 	opus_sources_silk = [
 		"opus/silk/fixed/apply_sine_window_FIX.c",
 		"opus/silk/fixed/k2a_FIX.c",
@@ -153,7 +153,6 @@ if("opus_fixed_point" in env and env.opus_fixed_point=="yes"):
 		"opus/silk/fixed/regularize_correlations_FIX.c"
 	]
 else:
-	env.Append(CPPPATH=["#drivers/opus/silk/float"])
 	opus_sources_silk = [
 		"opus/silk/float/apply_sine_window_FLP.c",
 		"opus/silk/float/inner_product_FLP.c",
@@ -193,8 +192,4 @@ opus_sources_lib+=opus_sources_silk
 env.drivers_sources+=opus_sources_lib
 env.drivers_sources+=opus_sources
 
-env.Append(CPPPATH=["#drivers/opus"])
-env.Append(CPPPATH=["#drivers/opus/celt","#drivers/opus/silk","#drivers/opus/silk/float"])
-env.Append(CFLAGS=["-DOPUS_HAVE_CONFIG_H"])
-
 Export('env')
diff --git a/drivers/opus/analysis.c b/drivers/opus/analysis.c
index 47e8668b8e..e27e948178 100644
--- a/drivers/opus/analysis.c
+++ b/drivers/opus/analysis.c
@@ -25,19 +25,19 @@
    SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 */
 
-#ifdef OPUS_HAVE_CONFIG_H
-#include "opus_config.h"
+#ifdef OPUS_ENABLED
+#include "opus/opus_config.h"
 #endif
 
-#include "kiss_fft.h"
-#include "celt.h"
-#include "opus_modes.h"
-#include "arch.h"
-#include "quant_bands.h"
+#include "opus/celt/kiss_fft.h"
+#include "opus/celt/celt.h"
+#include "opus/celt/opus_modes.h"
+#include "opus/celt/arch.h"
+#include "opus/celt/quant_bands.h"
 #include <stdio.h>
-#include "analysis.h"
-#include "mlp.h"
-#include "stack_alloc.h"
+#include "opus/analysis.h"
+#include "opus/mlp.h"
+#include "opus/celt/stack_alloc.h"
 
 extern const MLP net;
 
diff --git a/drivers/opus/analysis.h b/drivers/opus/analysis.h
index be0388faa3..548614d529 100644
--- a/drivers/opus/analysis.h
+++ b/drivers/opus/analysis.h
@@ -28,8 +28,8 @@
 #ifndef ANALYSIS_H
 #define ANALYSIS_H
 
-#include "celt.h"
-#include "opus_private.h"
+#include "opus/celt/celt.h"
+#include "opus/opus_private.h"
 
 #define NB_FRAMES 8
 #define NB_TBANDS 18
diff --git a/drivers/opus/celt/_kiss_fft_guts.h b/drivers/opus/celt/_kiss_fft_guts.h
index 21bea8a9b0..2a4ee744ef 100644
--- a/drivers/opus/celt/_kiss_fft_guts.h
+++ b/drivers/opus/celt/_kiss_fft_guts.h
@@ -33,7 +33,7 @@
    defines kiss_fft_scalar as either short or a float type
    and defines
    typedef struct { kiss_fft_scalar r; kiss_fft_scalar i; }kiss_fft_cpx; */
-#include "kiss_fft.h"
+#include "opus/celt/kiss_fft.h"
 
 /*
   Explanation of macros dealing with complex math:
@@ -45,7 +45,7 @@
    C_ADDTO( res , a)    : res += a
  * */
 #ifdef OPUS_FIXED_POINT
-#include "arch.h"
+#include "opus/celt/arch.h"
 
 
 #define SAMP_MAX 2147483647
diff --git a/drivers/opus/celt/arch.h b/drivers/opus/celt/arch.h
index 83e3705000..d964f8d90c 100644
--- a/drivers/opus/celt/arch.h
+++ b/drivers/opus/celt/arch.h
@@ -34,8 +34,8 @@
 #ifndef ARCH_H
 #define ARCH_H
 
-#include "opus_types.h"
-#include "opus_defines.h"
+#include "opus/opus_types.h"
+#include "opus/opus_defines.h"
 
 # if !defined(__GNUC_PREREQ)
 #  if defined(__GNUC__)&&defined(__GNUC_MINOR__)
@@ -109,10 +109,10 @@ typedef opus_val32 celt_ener;
 #define SCALEOUT(a)     (a)
 
 #ifdef FIXED_DEBUG
-#include "fixed_debug.h"
+#include "opus/celt/fixed_debug.h"
 #else
 
-#include "fixed_generic.h"
+#include "opus/celt/fixed_generic.h"
 
 #ifdef OPUS_ARM_INLINE_EDSP
 #include "arm/fixed_armv5e.h"
diff --git a/drivers/opus/celt/arm/arm_celt_map.c b/drivers/opus/celt/arm/arm_celt_map.c
index b187345154..31e7d5b319 100644
--- a/drivers/opus/celt/arm/arm_celt_map.c
+++ b/drivers/opus/celt/arm/arm_celt_map.c
@@ -25,11 +25,11 @@
    SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 */
 
-#ifdef OPUS_HAVE_CONFIG_H
-#include "opus_config.h"
+#ifdef OPUS_ENABLED
+#include "opus/opus_config.h"
 #endif
 
-#include "pitch.h"
+#include "opus/celt/pitch.h"
 
 #if defined(OPUS_HAVE_RTCD)
 
diff --git a/drivers/opus/celt/arm/armcpu.c b/drivers/opus/celt/arm/armcpu.c
index 7f0af631b9..fb7f2421fe 100644
--- a/drivers/opus/celt/arm/armcpu.c
+++ b/drivers/opus/celt/arm/armcpu.c
@@ -27,16 +27,16 @@
 
 /* Original code from libtheora modified to suit to Opus */
 
-#ifdef OPUS_HAVE_CONFIG_H
-#include "opus_config.h"
+#ifdef OPUS_ENABLED
+#include "opus/opus_config.h"
 #endif
 
 #ifdef OPUS_HAVE_RTCD
 
-#include "armcpu.h"
-#include "cpu_support.h"
-#include "os_support.h"
-#include "opus_types.h"
+#include "opus/celt/arm/armcpu.h"
+#include "opus/celt/cpu_support.h"
+#include "opus/celt/os_support.h"
+#include "opus/opus_types.h"
 
 #define OPUS_CPU_ARM_V4    (1)
 #define OPUS_CPU_ARM_EDSP  (1<<1)
diff --git a/drivers/opus/celt/arm/fixed_armv5e.h b/drivers/opus/celt/arm/fixed_armv5e.h
index 1194a7d3ec..cb6e4c1da9 100644
--- a/drivers/opus/celt/arm/fixed_armv5e.h
+++ b/drivers/opus/celt/arm/fixed_armv5e.h
@@ -30,7 +30,7 @@
 #ifndef FIXED_ARMv5E_H
 #define FIXED_ARMv5E_H
 
-#include "fixed_armv4.h"
+#include "opus/celt/arm/fixed_armv4.h"
 
 /** 16x32 multiplication, followed by a 16-bit shift right. Results fits in 32 bits */
 #undef MULT16_32_Q16
diff --git a/drivers/opus/celt/arm/pitch_arm.h b/drivers/opus/celt/arm/pitch_arm.h
index df5e82ef0b..18d1f2e75e 100644
--- a/drivers/opus/celt/arm/pitch_arm.h
+++ b/drivers/opus/celt/arm/pitch_arm.h
@@ -28,7 +28,7 @@
 #if !defined(PITCH_ARM_H)
 # define PITCH_ARM_H
 
-# include "armcpu.h"
+# include "opus/celt/arm/armcpu.h"
 
 # if defined(OPUS_FIXED_POINT)
 
diff --git a/drivers/opus/celt/bands.c b/drivers/opus/celt/bands.c
index 87280c8333..5a6b23d87f 100644
--- a/drivers/opus/celt/bands.c
+++ b/drivers/opus/celt/bands.c
@@ -27,21 +27,21 @@
    SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 */
 
-#ifdef OPUS_HAVE_CONFIG_H
-#include "opus_config.h"
+#ifdef OPUS_ENABLED
+#include "opus/opus_config.h"
 #endif
 
 #include <math.h>
-#include "bands.h"
-#include "opus_modes.h"
-#include "vq.h"
-#include "cwrs.h"
-#include "stack_alloc.h"
-#include "os_support.h"
-#include "mathops.h"
-#include "rate.h"
-#include "quant_bands.h"
-#include "pitch.h"
+#include "opus/celt/bands.h"
+#include "opus/celt/opus_modes.h"
+#include "opus/celt/vq.h"
+#include "opus/celt/cwrs.h"
+#include "opus/celt/stack_alloc.h"
+#include "opus/celt/os_support.h"
+#include "opus/celt/mathops.h"
+#include "opus/celt/rate.h"
+#include "opus/celt/quant_bands.h"
+#include "opus/celt/pitch.h"
 
 int hysteresis_decision(opus_val16 val, const opus_val16 *thresholds, const opus_val16 *hysteresis, int N, int prev)
 {
diff --git a/drivers/opus/celt/bands.h b/drivers/opus/celt/bands.h
index fe1e47097a..1ef7cbc8ee 100644
--- a/drivers/opus/celt/bands.h
+++ b/drivers/opus/celt/bands.h
@@ -30,11 +30,11 @@
 #ifndef BANDS_H
 #define BANDS_H
 
-#include "arch.h"
-#include "opus_modes.h"
-#include "entenc.h"
-#include "entdec.h"
-#include "rate.h"
+#include "opus/celt/arch.h"
+#include "opus/celt/opus_modes.h"
+#include "opus/celt/entenc.h"
+#include "opus/celt/entdec.h"
+#include "opus/celt/rate.h"
 
 /** Compute the amplitude (sqrt energy) in each of the bands
  * @param m Mode data
diff --git a/drivers/opus/celt/celt.c b/drivers/opus/celt/celt.c
index b894e1e13f..d99a91ba29 100644
--- a/drivers/opus/celt/celt.c
+++ b/drivers/opus/celt/celt.c
@@ -27,28 +27,28 @@
    SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 */
 
-#ifdef OPUS_HAVE_CONFIG_H
-#include "opus_config.h"
+#ifdef OPUS_ENABLED
+#include "opus/opus_config.h"
 #endif
 
 #define CELT_C
 
-#include "os_support.h"
-#include "mdct.h"
+#include "opus/celt/os_support.h"
+#include "opus/celt/mdct.h"
 #include <math.h>
-#include "celt.h"
-#include "pitch.h"
-#include "bands.h"
-#include "opus_modes.h"
-#include "entcode.h"
-#include "quant_bands.h"
-#include "rate.h"
-#include "stack_alloc.h"
-#include "mathops.h"
-#include "float_cast.h"
+#include "opus/celt/celt.h"
+#include "opus/celt/pitch.h"
+#include "opus/celt/bands.h"
+#include "opus/celt/opus_modes.h"
+#include "opus/celt/entcode.h"
+#include "opus/celt/quant_bands.h"
+#include "opus/celt/rate.h"
+#include "opus/celt/stack_alloc.h"
+#include "opus/celt/mathops.h"
+#include "opus/celt/float_cast.h"
 #include <stdarg.h>
-#include "celt_lpc.h"
-#include "vq.h"
+#include "opus/celt/celt_lpc.h"
+#include "opus/celt/vq.h"
 
 #ifndef PACKAGE_VERSION
 #define PACKAGE_VERSION "unknown"
diff --git a/drivers/opus/celt/celt.h b/drivers/opus/celt/celt.h
index 5deea1f0aa..2b7dc3fa72 100644
--- a/drivers/opus/celt/celt.h
+++ b/drivers/opus/celt/celt.h
@@ -35,12 +35,12 @@
 #ifndef CELT_H
 #define CELT_H
 
-#include "opus_types.h"
-#include "opus_defines.h"
-#include "opus_custom.h"
-#include "entenc.h"
-#include "entdec.h"
-#include "arch.h"
+#include "opus/opus_types.h"
+#include "opus/opus_defines.h"
+#include "opus/opus_custom.h"
+#include "opus/celt/entenc.h"
+#include "opus/celt/entdec.h"
+#include "opus/celt/arch.h"
 
 #ifdef __cplusplus
 extern "C" {
diff --git a/drivers/opus/celt/celt_decoder.c b/drivers/opus/celt/celt_decoder.c
index 93791feab4..67c3789439 100644
--- a/drivers/opus/celt/celt_decoder.c
+++ b/drivers/opus/celt/celt_decoder.c
@@ -27,29 +27,29 @@
    SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 */
 
-#ifdef OPUS_HAVE_CONFIG_H
-#include "opus_config.h"
+#ifdef OPUS_ENABLED
+#include "opus/opus_config.h"
 #endif
 
 #define CELT_DECODER_C
 
-#include "cpu_support.h"
-#include "os_support.h"
-#include "mdct.h"
+#include "opus/celt/cpu_support.h"
+#include "opus/celt/os_support.h"
+#include "opus/celt/mdct.h"
 #include <math.h>
-#include "celt.h"
-#include "pitch.h"
-#include "bands.h"
-#include "opus_modes.h"
-#include "entcode.h"
-#include "quant_bands.h"
-#include "rate.h"
-#include "stack_alloc.h"
-#include "mathops.h"
-#include "float_cast.h"
+#include "opus/celt/celt.h"
+#include "opus/celt/pitch.h"
+#include "opus/celt/bands.h"
+#include "opus/celt/opus_modes.h"
+#include "opus/celt/entcode.h"
+#include "opus/celt/quant_bands.h"
+#include "opus/celt/rate.h"
+#include "opus/celt/stack_alloc.h"
+#include "opus/celt/mathops.h"
+#include "opus/celt/float_cast.h"
 #include <stdarg.h>
-#include "celt_lpc.h"
-#include "vq.h"
+#include "opus/celt/celt_lpc.h"
+#include "opus/celt/vq.h"
 
 /**********************************************************************/
 /*                                                                    */
diff --git a/drivers/opus/celt/celt_encoder.c b/drivers/opus/celt/celt_encoder.c
index a61e41f42d..810ee5d743 100644
--- a/drivers/opus/celt/celt_encoder.c
+++ b/drivers/opus/celt/celt_encoder.c
@@ -27,29 +27,29 @@
    SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 */
 
-#ifdef OPUS_HAVE_CONFIG_H
-#include "opus_config.h"
+#ifdef OPUS_ENABLED
+#include "opus/opus_config.h"
 #endif
 
 #define CELT_ENCODER_C
 
-#include "cpu_support.h"
-#include "os_support.h"
-#include "mdct.h"
+#include "opus/celt/cpu_support.h"
+#include "opus/celt/os_support.h"
+#include "opus/celt/mdct.h"
 #include <math.h>
-#include "celt.h"
-#include "pitch.h"
-#include "bands.h"
-#include "opus_modes.h"
-#include "entcode.h"
-#include "quant_bands.h"
-#include "rate.h"
-#include "stack_alloc.h"
-#include "mathops.h"
-#include "float_cast.h"
+#include "opus/celt/celt.h"
+#include "opus/celt/pitch.h"
+#include "opus/celt/bands.h"
+#include "opus/celt/opus_modes.h"
+#include "opus/celt/entcode.h"
+#include "opus/celt/quant_bands.h"
+#include "opus/celt/rate.h"
+#include "opus/celt/stack_alloc.h"
+#include "opus/celt/mathops.h"
+#include "opus/celt/float_cast.h"
 #include <stdarg.h>
-#include "celt_lpc.h"
-#include "vq.h"
+#include "opus/celt/celt_lpc.h"
+#include "opus/celt/vq.h"
 
 
 /** Encoder state
diff --git a/drivers/opus/celt/celt_lpc.c b/drivers/opus/celt/celt_lpc.c
index 1fa4406bc9..ad0a6dfd43 100644
--- a/drivers/opus/celt/celt_lpc.c
+++ b/drivers/opus/celt/celt_lpc.c
@@ -25,14 +25,14 @@
    SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 */
 
-#ifdef OPUS_HAVE_CONFIG_H
-#include "opus_config.h"
+#ifdef OPUS_ENABLED
+#include "opus/opus_config.h"
 #endif
 
-#include "celt_lpc.h"
-#include "stack_alloc.h"
-#include "mathops.h"
-#include "pitch.h"
+#include "opus/celt/celt_lpc.h"
+#include "opus/celt/stack_alloc.h"
+#include "opus/celt/mathops.h"
+#include "opus/celt/pitch.h"
 
 void _celt_lpc(
       opus_val16       *_lpc, /* out: [0...p-1] LPC coefficients      */
diff --git a/drivers/opus/celt/celt_lpc.h b/drivers/opus/celt/celt_lpc.h
index dc2a0a3d26..12bb78a6fd 100644
--- a/drivers/opus/celt/celt_lpc.h
+++ b/drivers/opus/celt/celt_lpc.h
@@ -28,7 +28,7 @@
 #ifndef PLC_H
 #define PLC_H
 
-#include "arch.h"
+#include "opus/celt/arch.h"
 
 #define LPC_ORDER 24
 
diff --git a/drivers/opus/celt/cpu_support.h b/drivers/opus/celt/cpu_support.h
index d68dbe62c5..f682a1d19e 100644
--- a/drivers/opus/celt/cpu_support.h
+++ b/drivers/opus/celt/cpu_support.h
@@ -28,8 +28,8 @@
 #ifndef CPU_SUPPORT_H
 #define CPU_SUPPORT_H
 
-#include "opus_types.h"
-#include "opus_defines.h"
+#include "opus/opus_types.h"
+#include "opus/opus_defines.h"
 
 #if defined(OPUS_HAVE_RTCD) && defined(OPUS_ARM_ASM)
 #include "arm/armcpu.h"
diff --git a/drivers/opus/celt/cwrs.c b/drivers/opus/celt/cwrs.c
index b866aa9210..bae9d21b97 100644
--- a/drivers/opus/celt/cwrs.c
+++ b/drivers/opus/celt/cwrs.c
@@ -27,14 +27,14 @@
    SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 */
 
-#ifdef OPUS_HAVE_CONFIG_H
-#include "opus_config.h"
+#ifdef OPUS_ENABLED
+#include "opus/opus_config.h"
 #endif
 
-#include "os_support.h"
-#include "cwrs.h"
-#include "mathops.h"
-#include "arch.h"
+#include "opus/celt/os_support.h"
+#include "opus/celt/cwrs.h"
+#include "opus/celt/mathops.h"
+#include "opus/celt/arch.h"
 
 #ifdef CUSTOM_MODES
 
diff --git a/drivers/opus/celt/cwrs.h b/drivers/opus/celt/cwrs.h
index 7dfbd076d1..5400afa6a4 100644
--- a/drivers/opus/celt/cwrs.h
+++ b/drivers/opus/celt/cwrs.h
@@ -30,10 +30,10 @@
 #ifndef CWRS_H
 #define CWRS_H
 
-#include "arch.h"
-#include "stack_alloc.h"
-#include "entenc.h"
-#include "entdec.h"
+#include "opus/celt/arch.h"
+#include "opus/celt/stack_alloc.h"
+#include "opus/celt/entenc.h"
+#include "opus/celt/entdec.h"
 
 #ifdef CUSTOM_MODES
 int log2_frac(opus_uint32 val, int frac);
diff --git a/drivers/opus/celt/ecintrin.h b/drivers/opus/celt/ecintrin.h
index 2263cff6bd..6ed8fb280e 100644
--- a/drivers/opus/celt/ecintrin.h
+++ b/drivers/opus/celt/ecintrin.h
@@ -26,10 +26,10 @@
 */
 
 /*Some common macros for potential platform-specific optimization.*/
-#include "opus_types.h"
+#include "opus/opus_types.h"
 #include <math.h>
 #include <limits.h>
-#include "arch.h"
+#include "opus/celt/arch.h"
 #if !defined(_ecintrin_H)
 # define _ecintrin_H (1)
 
diff --git a/drivers/opus/celt/entcode.c b/drivers/opus/celt/entcode.c
index fd817a9db5..5c9874908d 100644
--- a/drivers/opus/celt/entcode.c
+++ b/drivers/opus/celt/entcode.c
@@ -25,12 +25,12 @@
    SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 */
 
-#ifdef OPUS_HAVE_CONFIG_H
-#include "opus_config.h"
+#ifdef OPUS_ENABLED
+#include "opus/opus_config.h"
 #endif
 
-#include "entcode.h"
-#include "arch.h"
+#include "opus/celt/entcode.h"
+#include "opus/celt/arch.h"
 
 #if !defined(EC_CLZ)
 /*This is a fallback for systems where we don't know how to access
diff --git a/drivers/opus/celt/entcode.h b/drivers/opus/celt/entcode.h
index dd13e49e50..c129f9b7d9 100644
--- a/drivers/opus/celt/entcode.h
+++ b/drivers/opus/celt/entcode.h
@@ -25,14 +25,14 @@
    SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 */
 
-#include "opus_types.h"
-#include "opus_defines.h"
+#include "opus/opus_types.h"
+#include "opus/opus_defines.h"
 
 #if !defined(_entcode_H)
 # define _entcode_H (1)
 # include <limits.h>
 # include <stddef.h>
-# include "ecintrin.h"
+# include "opus/celt/ecintrin.h"
 
 /*OPT: ec_window must be at least 32 bits, but if you have fast arithmetic on a
    larger type, you can speed up the decoder by using it here.*/
diff --git a/drivers/opus/celt/entdec.c b/drivers/opus/celt/entdec.c
index 383da571c9..0ec4d460d2 100644
--- a/drivers/opus/celt/entdec.c
+++ b/drivers/opus/celt/entdec.c
@@ -25,15 +25,15 @@
    SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 */
 
-#ifdef OPUS_HAVE_CONFIG_H
-#include "opus_config.h"
+#ifdef OPUS_ENABLED
+#include "opus/opus_config.h"
 #endif
 
 #include <stddef.h>
-#include "os_support.h"
-#include "arch.h"
-#include "entdec.h"
-#include "mfrngcod.h"
+#include "opus/celt/os_support.h"
+#include "opus/celt/arch.h"
+#include "opus/celt/entdec.h"
+#include "opus/celt/mfrngcod.h"
 
 /*A range decoder.
   This is an entropy decoder based upon \cite{Mar79}, which is itself a
diff --git a/drivers/opus/celt/entdec.h b/drivers/opus/celt/entdec.h
index d8ab318730..fda60ab326 100644
--- a/drivers/opus/celt/entdec.h
+++ b/drivers/opus/celt/entdec.h
@@ -28,7 +28,7 @@
 #if !defined(_entdec_H)
 # define _entdec_H (1)
 # include <limits.h>
-# include "entcode.h"
+# include "opus/celt/entcode.h"
 
 /*Initializes the decoder.
   _buf: The input buffer to use.
diff --git a/drivers/opus/celt/entenc.c b/drivers/opus/celt/entenc.c
index 299329c63f..085b2b2816 100644
--- a/drivers/opus/celt/entenc.c
+++ b/drivers/opus/celt/entenc.c
@@ -25,13 +25,13 @@
    SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 */
 
-#if defined(OPUS_HAVE_CONFIG_H)
-# include "opus_config.h"
+#if defined(OPUS_ENABLED)
+# include "opus/opus_config.h"
 #endif
-#include "os_support.h"
-#include "arch.h"
-#include "entenc.h"
-#include "mfrngcod.h"
+#include "opus/celt/os_support.h"
+#include "opus/celt/arch.h"
+#include "opus/celt/entenc.h"
+#include "opus/celt/mfrngcod.h"
 
 /*A range encoder.
   See entdec.c and the references for implementation details \cite{Mar79,MNW98}.
diff --git a/drivers/opus/celt/entenc.h b/drivers/opus/celt/entenc.h
index 796bc4d572..3f4a3acc93 100644
--- a/drivers/opus/celt/entenc.h
+++ b/drivers/opus/celt/entenc.h
@@ -28,7 +28,7 @@
 #if !defined(_entenc_H)
 # define _entenc_H (1)
 # include <stddef.h>
-# include "entcode.h"
+# include "opus/celt/entcode.h"
 
 /*Initializes the encoder.
   _buf:  The buffer to store output bytes in.
diff --git a/drivers/opus/celt/fixed_debug.h b/drivers/opus/celt/fixed_debug.h
index 80bc94910f..0ed16baa17 100644
--- a/drivers/opus/celt/fixed_debug.h
+++ b/drivers/opus/celt/fixed_debug.h
@@ -33,7 +33,7 @@
 #define FIXED_DEBUG_H
 
 #include <stdio.h>
-#include "opus_defines.h"
+#include "opus/opus_defines.h"
 
 #ifdef CELT_C
 OPUS_EXPORT opus_int64 celt_mips=0;
diff --git a/drivers/opus/celt/float_cast.h b/drivers/opus/celt/float_cast.h
index ede6574860..86e80a93ff 100644
--- a/drivers/opus/celt/float_cast.h
+++ b/drivers/opus/celt/float_cast.h
@@ -30,7 +30,7 @@
 #define FLOAT_CAST_H
 
 
-#include "arch.h"
+#include "opus/celt/arch.h"
 
 /*============================================================================
 **      On Intel Pentium processors (especially PIII and probably P4), converting
diff --git a/drivers/opus/celt/kiss_fft.c b/drivers/opus/celt/kiss_fft.c
index 333be975d1..89a1790b24 100644
--- a/drivers/opus/celt/kiss_fft.c
+++ b/drivers/opus/celt/kiss_fft.c
@@ -30,16 +30,16 @@
    heavily modified to better suit Opus */
 
 #ifndef SKIP_CONFIG_H
-#  ifdef OPUS_HAVE_CONFIG_H
-#include "opus_config.h"
+#  ifdef OPUS_ENABLED
+#include "opus/opus_config.h"
 #  endif
 #endif
 
-#include "_kiss_fft_guts.h"
-#include "arch.h"
-#include "os_support.h"
-#include "mathops.h"
-#include "stack_alloc.h"
+#include "opus/celt/_kiss_fft_guts.h"
+#include "opus/celt/arch.h"
+#include "opus/celt/os_support.h"
+#include "opus/celt/mathops.h"
+#include "opus/celt/stack_alloc.h"
 
 /* The guts header contains all the multiplication and addition macros that are defined for
    complex numbers.  It also delares the kf_ internal functions.
diff --git a/drivers/opus/celt/kiss_fft.h b/drivers/opus/celt/kiss_fft.h
index aa22b3a419..db2532c692 100644
--- a/drivers/opus/celt/kiss_fft.h
+++ b/drivers/opus/celt/kiss_fft.h
@@ -31,7 +31,7 @@
 
 #include <stdlib.h>
 #include <math.h>
-#include "arch.h"
+#include "opus/celt/arch.h"
 
 #ifdef __cplusplus
 extern "C" {
@@ -46,7 +46,7 @@ extern "C" {
 #endif
 
 #ifdef OPUS_FIXED_POINT
-#include "arch.h"
+#include "opus/celt/arch.h"
 
 #  define kiss_fft_scalar opus_int32
 #  define kiss_twiddle_scalar opus_int16
diff --git a/drivers/opus/celt/laplace.c b/drivers/opus/celt/laplace.c
index c6d293f298..9dc4d94d24 100644
--- a/drivers/opus/celt/laplace.c
+++ b/drivers/opus/celt/laplace.c
@@ -26,12 +26,12 @@
    SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 */
 
-#ifdef OPUS_HAVE_CONFIG_H
-#include "opus_config.h"
+#ifdef OPUS_ENABLED
+#include "opus/opus_config.h"
 #endif
 
-#include "laplace.h"
-#include "mathops.h"
+#include "opus/celt/laplace.h"
+#include "opus/celt/mathops.h"
 
 /* The minimum probability of an energy delta (out of 32768). */
 #define LAPLACE_LOG_MINP (0)
diff --git a/drivers/opus/celt/laplace.h b/drivers/opus/celt/laplace.h
index 46c14b5da5..9efcc73aa2 100644
--- a/drivers/opus/celt/laplace.h
+++ b/drivers/opus/celt/laplace.h
@@ -26,8 +26,8 @@
    SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 */
 
-#include "entenc.h"
-#include "entdec.h"
+#include "opus/celt/entenc.h"
+#include "opus/celt/entdec.h"
 
 /** Encode a value that is assumed to be the realisation of a
     Laplace-distributed random process
diff --git a/drivers/opus/celt/mathops.c b/drivers/opus/celt/mathops.c
index 49be746d8c..88e5aea129 100644
--- a/drivers/opus/celt/mathops.c
+++ b/drivers/opus/celt/mathops.c
@@ -31,11 +31,11 @@
    SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 */
 
-#ifdef OPUS_HAVE_CONFIG_H
-#include "opus_config.h"
+#ifdef OPUS_ENABLED
+#include "opus/opus_config.h"
 #endif
 
-#include "mathops.h"
+#include "opus/celt/mathops.h"
 
 /*Compute floor(sqrt(_val)) with exact arithmetic.
   This has been tested on all possible 32-bit inputs.*/
diff --git a/drivers/opus/celt/mathops.h b/drivers/opus/celt/mathops.h
index 4a6bc539bc..759f58d1b7 100644
--- a/drivers/opus/celt/mathops.h
+++ b/drivers/opus/celt/mathops.h
@@ -34,9 +34,9 @@
 #ifndef MATHOPS_H
 #define MATHOPS_H
 
-#include "arch.h"
-#include "entcode.h"
-#include "os_support.h"
+#include "opus/celt/arch.h"
+#include "opus/celt/entcode.h"
+#include "opus/celt/os_support.h"
 
 /* Multiplies two 16-bit fractional values. Bit-exactness of this macro is important */
 #define FRAC_MUL16(a,b) ((16384+((opus_int32)(opus_int16)(a)*(opus_int16)(b)))>>15)
@@ -141,7 +141,7 @@ static OPUS_INLINE float celt_exp2(float x)
 
 #ifdef OPUS_FIXED_POINT
 
-#include "os_support.h"
+#include "opus/celt/os_support.h"
 
 #ifndef OVERRIDE_CELT_ILOG2
 /** Integer log in base2. Undefined for zero and negative numbers */
diff --git a/drivers/opus/celt/mdct.c b/drivers/opus/celt/mdct.c
index d08d026fac..ae34538d6c 100644
--- a/drivers/opus/celt/mdct.c
+++ b/drivers/opus/celt/mdct.c
@@ -40,18 +40,18 @@
 */
 
 #ifndef SKIP_CONFIG_H
-#ifdef OPUS_HAVE_CONFIG_H
-#include "opus_config.h"
+#ifdef OPUS_ENABLED
+#include "opus/opus_config.h"
 #endif
 #endif
 
-#include "mdct.h"
-#include "kiss_fft.h"
-#include "_kiss_fft_guts.h"
+#include "opus/celt/mdct.h"
+#include "opus/celt/kiss_fft.h"
+#include "opus/celt/_kiss_fft_guts.h"
 #include <math.h>
-#include "os_support.h"
-#include "mathops.h"
-#include "stack_alloc.h"
+#include "opus/celt/os_support.h"
+#include "opus/celt/mathops.h"
+#include "opus/celt/stack_alloc.h"
 
 #ifdef CUSTOM_MODES
 
diff --git a/drivers/opus/celt/mdct.h b/drivers/opus/celt/mdct.h
index 4e7a199246..492afeae1e 100644
--- a/drivers/opus/celt/mdct.h
+++ b/drivers/opus/celt/mdct.h
@@ -42,9 +42,9 @@
 #ifndef MDCT_H
 #define MDCT_H
 
-#include "opus_defines.h"
-#include "kiss_fft.h"
-#include "arch.h"
+#include "opus/opus_defines.h"
+#include "opus/celt/kiss_fft.h"
+#include "opus/celt/arch.h"
 
 typedef struct {
    int n;
diff --git a/drivers/opus/celt/mfrngcod.h b/drivers/opus/celt/mfrngcod.h
index 809152a59a..c24d98cde2 100644
--- a/drivers/opus/celt/mfrngcod.h
+++ b/drivers/opus/celt/mfrngcod.h
@@ -27,7 +27,7 @@
 
 #if !defined(_mfrngcode_H)
 # define _mfrngcode_H (1)
-# include "entcode.h"
+# include "opus/celt/entcode.h"
 
 /*Constants used by the entropy encoder/decoder.*/
 
diff --git a/drivers/opus/celt/modes.c b/drivers/opus/celt/modes.c
index 3794074aaa..aef2681443 100644
--- a/drivers/opus/celt/modes.c
+++ b/drivers/opus/celt/modes.c
@@ -27,16 +27,16 @@
    SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 */
 
-#ifdef OPUS_HAVE_CONFIG_H
-#include "opus_config.h"
+#ifdef OPUS_ENABLED
+#include "opus/opus_config.h"
 #endif
 
-#include "celt.h"
-#include "opus_modes.h"
-#include "rate.h"
-#include "os_support.h"
-#include "stack_alloc.h"
-#include "quant_bands.h"
+#include "opus/celt/celt.h"
+#include "opus/celt/opus_modes.h"
+#include "opus/celt/rate.h"
+#include "opus/celt/os_support.h"
+#include "opus/celt/stack_alloc.h"
+#include "opus/celt/quant_bands.h"
 
 static const opus_int16 eband5ms[] = {
 /*0  200 400 600 800  1k 1.2 1.4 1.6  2k 2.4 2.8 3.2  4k 4.8 5.6 6.8  8k 9.6 12k 15.6 */
@@ -63,9 +63,9 @@ static const unsigned char band_allocation[] = {
 
 #ifndef CUSTOM_MODES_ONLY
  #ifdef OPUS_FIXED_POINT
-  #include "static_modes_fixed.h"
+  #include "opus/celt/static_modes_fixed.h"
  #else
-  #include "static_modes_float.h"
+  #include "opus/celt/static_modes_float.h"
  #endif
 #endif /* CUSTOM_MODES_ONLY */
 
diff --git a/drivers/opus/celt/opus_custom_demo.c b/drivers/opus/celt/opus_custom_demo.c
index 8c7f58b6e6..b3129de779 100644
--- a/drivers/opus/celt/opus_custom_demo.c
+++ b/drivers/opus/celt/opus_custom_demo.c
@@ -26,12 +26,12 @@
    SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 */
 
-#ifdef OPUS_HAVE_CONFIG_H
-#include "opus_config.h"
+#ifdef OPUS_ENABLED
+#include "opus/opus_config.h"
 #endif
 
-#include "opus_custom.h"
-#include "arch.h"
+#include "opus/opus_custom.h"
+#include "opus/celt/arch.h"
 #include <stdio.h>
 #include <stdlib.h>
 #include <math.h>
diff --git a/drivers/opus/celt/opus_modes.h b/drivers/opus/celt/opus_modes.h
index a1df46265e..38e5844535 100644
--- a/drivers/opus/celt/opus_modes.h
+++ b/drivers/opus/celt/opus_modes.h
@@ -30,12 +30,12 @@
 #ifndef OPUS_MODES_H
 #define OPUS_MODES_H
 
-#include "opus_types.h"
-#include "celt.h"
-#include "arch.h"
-#include "mdct.h"
-#include "entenc.h"
-#include "entdec.h"
+#include "opus/opus_types.h"
+#include "opus/celt/celt.h"
+#include "opus/celt/arch.h"
+#include "opus/celt/mdct.h"
+#include "opus/celt/entenc.h"
+#include "opus/celt/entdec.h"
 
 #define MAX_PERIOD 1024
 
diff --git a/drivers/opus/celt/os_support.h b/drivers/opus/celt/os_support.h
index 5e47e3cff9..e1cf884467 100644
--- a/drivers/opus/celt/os_support.h
+++ b/drivers/opus/celt/os_support.h
@@ -35,8 +35,8 @@
 #  include "custom_support.h"
 #endif
 
-#include "opus_types.h"
-#include "opus_defines.h"
+#include "opus/opus_types.h"
+#include "opus/opus_defines.h"
 
 #include <string.h>
 #include <stdio.h>
diff --git a/drivers/opus/celt/pitch.c b/drivers/opus/celt/pitch.c
index 48cd02fb2b..c7c2b98c00 100644
--- a/drivers/opus/celt/pitch.c
+++ b/drivers/opus/celt/pitch.c
@@ -31,16 +31,16 @@
    SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 */
 
-#ifdef OPUS_HAVE_CONFIG_H
-#include "opus_config.h"
+#ifdef OPUS_ENABLED
+#include "opus/opus_config.h"
 #endif
 
-#include "pitch.h"
-#include "os_support.h"
-#include "opus_modes.h"
-#include "stack_alloc.h"
-#include "mathops.h"
-#include "celt_lpc.h"
+#include "opus/celt/pitch.h"
+#include "opus/celt/os_support.h"
+#include "opus/celt/opus_modes.h"
+#include "opus/celt/stack_alloc.h"
+#include "opus/celt/mathops.h"
+#include "opus/celt/celt_lpc.h"
 
 static void find_best_pitch(opus_val32 *xcorr, opus_val16 *y, int len,
                             int max_pitch, int *best_pitch
diff --git a/drivers/opus/celt/pitch.h b/drivers/opus/celt/pitch.h
index 3a7d305425..f599f5fc76 100644
--- a/drivers/opus/celt/pitch.h
+++ b/drivers/opus/celt/pitch.h
@@ -34,8 +34,8 @@
 #ifndef PITCH_H
 #define PITCH_H
 
-#include "opus_modes.h"
-#include "cpu_support.h"
+#include "opus/celt/opus_modes.h"
+#include "opus/celt/cpu_support.h"
 
 #if defined(__SSE__) && !defined(OPUS_FIXED_POINT)
 #include "x86/pitch_sse.h"
diff --git a/drivers/opus/celt/quant_bands.c b/drivers/opus/celt/quant_bands.c
index 0a170e850d..e64ed28f22 100644
--- a/drivers/opus/celt/quant_bands.c
+++ b/drivers/opus/celt/quant_bands.c
@@ -26,18 +26,18 @@
    SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 */
 
-#ifdef OPUS_HAVE_CONFIG_H
-#include "opus_config.h"
+#ifdef OPUS_ENABLED
+#include "opus/opus_config.h"
 #endif
 
-#include "quant_bands.h"
-#include "laplace.h"
+#include "opus/celt/quant_bands.h"
+#include "opus/celt/laplace.h"
 #include <math.h>
-#include "os_support.h"
-#include "arch.h"
-#include "mathops.h"
-#include "stack_alloc.h"
-#include "rate.h"
+#include "opus/celt/os_support.h"
+#include "opus/celt/arch.h"
+#include "opus/celt/mathops.h"
+#include "opus/celt/stack_alloc.h"
+#include "opus/celt/rate.h"
 
 #ifdef OPUS_FIXED_POINT
 /* Mean energy in each band quantized in Q4 */
diff --git a/drivers/opus/celt/quant_bands.h b/drivers/opus/celt/quant_bands.h
index 840df8723f..85d011e6e0 100644
--- a/drivers/opus/celt/quant_bands.h
+++ b/drivers/opus/celt/quant_bands.h
@@ -29,11 +29,11 @@
 #ifndef QUANT_BANDS
 #define QUANT_BANDS
 
-#include "arch.h"
-#include "opus_modes.h"
-#include "entenc.h"
-#include "entdec.h"
-#include "mathops.h"
+#include "opus/celt/arch.h"
+#include "opus/celt/opus_modes.h"
+#include "opus/celt/entenc.h"
+#include "opus/celt/entdec.h"
+#include "opus/celt/mathops.h"
 
 #ifdef OPUS_FIXED_POINT
 extern const signed char eMeans[25];
diff --git a/drivers/opus/celt/rate.c b/drivers/opus/celt/rate.c
index cca585ad95..ecc0ab2a4f 100644
--- a/drivers/opus/celt/rate.c
+++ b/drivers/opus/celt/rate.c
@@ -26,18 +26,18 @@
    SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 */
 
-#ifdef OPUS_HAVE_CONFIG_H
-#include "opus_config.h"
+#ifdef OPUS_ENABLED
+#include "opus/opus_config.h"
 #endif
 
 #include <math.h>
-#include "opus_modes.h"
-#include "cwrs.h"
-#include "arch.h"
-#include "os_support.h"
+#include "opus/celt/opus_modes.h"
+#include "opus/celt/cwrs.h"
+#include "opus/celt/arch.h"
+#include "opus/celt/os_support.h"
 
-#include "entcode.h"
-#include "rate.h"
+#include "opus/celt/entcode.h"
+#include "opus/celt/rate.h"
 
 static const unsigned char LOG2_FRAC_TABLE[24]={
    0,
diff --git a/drivers/opus/celt/rate.h b/drivers/opus/celt/rate.h
index 7ced23ea09..e12dd29db8 100644
--- a/drivers/opus/celt/rate.h
+++ b/drivers/opus/celt/rate.h
@@ -40,8 +40,8 @@
 #define QTHETA_OFFSET 4
 #define QTHETA_OFFSET_TWOPHASE 16
 
-#include "cwrs.h"
-#include "opus_modes.h"
+#include "opus/celt/cwrs.h"
+#include "opus/celt/opus_modes.h"
 
 void compute_pulse_cache(CELTMode *m, int LM);
 
diff --git a/drivers/opus/celt/stack_alloc.h b/drivers/opus/celt/stack_alloc.h
index d500c4dab9..464a6d0b7f 100644
--- a/drivers/opus/celt/stack_alloc.h
+++ b/drivers/opus/celt/stack_alloc.h
@@ -32,8 +32,8 @@
 #ifndef STACK_ALLOC_H
 #define STACK_ALLOC_H
 
-#include "opus_types.h"
-#include "opus_defines.h"
+#include "opus/opus_types.h"
+#include "opus/opus_defines.h"
 
 #if (!defined (VAR_ARRAYS) && !defined (USE_ALLOCA) && !defined (NONTHREADSAFE_PSEUDOSTACK))
 #define VAR_ARRAYS
@@ -149,7 +149,7 @@ extern char *global_stack_top;
 
 #endif /* ENABLE_VALGRIND */
 
-#include "os_support.h"
+#include "opus/celt/os_support.h"
 #define VARDECL(type, var) type *var
 #define ALLOC(var, size, type) var = PUSH(global_stack, size, type)
 #define SAVE_STACK char *_saved_stack = global_stack;
diff --git a/drivers/opus/celt/static_modes_fixed.h b/drivers/opus/celt/static_modes_fixed.h
index d23e2a66f5..1d92fc4b27 100644
--- a/drivers/opus/celt/static_modes_fixed.h
+++ b/drivers/opus/celt/static_modes_fixed.h
@@ -1,8 +1,8 @@
 /* The contents of this file was automatically generated by dump_modes.c
    with arguments: 48000 960
    It contains static definitions for some pre-defined modes. */
-#include "opus_modes.h"
-#include "rate.h"
+#include "opus/celt/opus_modes.h"
+#include "opus/celt/rate.h"
 
 #ifndef DEF_WINDOW120
 #define DEF_WINDOW120
diff --git a/drivers/opus/celt/static_modes_float.h b/drivers/opus/celt/static_modes_float.h
index fe6bb4c8a3..362be6cca8 100644
--- a/drivers/opus/celt/static_modes_float.h
+++ b/drivers/opus/celt/static_modes_float.h
@@ -1,8 +1,8 @@
 /* The contents of this file was automatically generated by dump_modes.c
    with arguments: 48000 960
    It contains static definitions for some pre-defined modes. */
-#include "opus_modes.h"
-#include "rate.h"
+#include "opus/celt/opus_modes.h"
+#include "opus/celt/rate.h"
 
 #ifndef DEF_WINDOW120
 #define DEF_WINDOW120
diff --git a/drivers/opus/celt/tests/test_unit_cwrs32.c b/drivers/opus/celt/tests/test_unit_cwrs32.c
index 9cf124336a..db43e3392b 100644
--- a/drivers/opus/celt/tests/test_unit_cwrs32.c
+++ b/drivers/opus/celt/tests/test_unit_cwrs32.c
@@ -26,8 +26,8 @@
    SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 */
 
-#ifdef OPUS_HAVE_CONFIG_H
-#include "opus_config.h"
+#ifdef OPUS_ENABLED
+#include "opus/opus_config.h"
 #endif
 
 #include <stdio.h>
@@ -40,13 +40,13 @@
 #endif
 
 #define CELT_C
-#include "stack_alloc.h"
-#include "entenc.c"
-#include "entdec.c"
-#include "entcode.c"
-#include "cwrs.c"
-#include "mathops.c"
-#include "rate.h"
+#include "opus/celt/stack_alloc.h"
+#include "opus/celt/entenc.c"
+#include "opus/celt/entdec.c"
+#include "opus/celt/entcode.c"
+#include "opus/celt/cwrs.c"
+#include "opus/celt/mathops.c"
+#include "opus/celt/rate.h"
 
 #define NMAX (240)
 #define KMAX (128)
diff --git a/drivers/opus/celt/tests/test_unit_dft.c b/drivers/opus/celt/tests/test_unit_dft.c
index 4a00013b2a..9c0db3e9ac 100644
--- a/drivers/opus/celt/tests/test_unit_dft.c
+++ b/drivers/opus/celt/tests/test_unit_dft.c
@@ -25,8 +25,8 @@
    SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 */
 
-#ifdef OPUS_HAVE_CONFIG_H
-#include "opus_config.h"
+#ifdef OPUS_ENABLED
+#include "opus/opus_config.h"
 #endif
 
 #define SKIP_CONFIG_H
@@ -38,11 +38,11 @@
 #include <stdio.h>
 
 #define CELT_C
-#include "stack_alloc.h"
-#include "kiss_fft.h"
-#include "kiss_fft.c"
-#include "mathops.c"
-#include "entcode.c"
+#include "opus/celt/stack_alloc.h"
+#include "opus/celt/kiss_fft.h"
+#include "opus/celt/kiss_fft.c"
+#include "opus/celt/mathops.c"
+#include "opus/celt/entcode.c"
 
 
 #ifndef M_PI
diff --git a/drivers/opus/celt/tests/test_unit_entropy.c b/drivers/opus/celt/tests/test_unit_entropy.c
index 62268b1564..8c2defa8b3 100644
--- a/drivers/opus/celt/tests/test_unit_entropy.c
+++ b/drivers/opus/celt/tests/test_unit_entropy.c
@@ -26,22 +26,22 @@
    SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 */
 
-#ifdef OPUS_HAVE_CONFIG_H
-#include "opus_config.h"
+#ifdef OPUS_ENABLED
+#include "opus/opus_config.h"
 #endif
 
 #include <stdlib.h>
 #include <stdio.h>
 #include <math.h>
 #include <time.h>
-#include "entcode.h"
-#include "entenc.h"
-#include "entdec.h"
+#include "opus/celt/entcode.h"
+#include "opus/celt/entenc.h"
+#include "opus/celt/entdec.h"
 #include <string.h>
 
-#include "entenc.c"
-#include "entdec.c"
-#include "entcode.c"
+#include "opus/celt/entenc.c"
+#include "opus/celt/entdec.c"
+#include "opus/celt/entcode.c"
 
 #ifndef M_LOG2E
 # define M_LOG2E    1.4426950408889634074
diff --git a/drivers/opus/celt/tests/test_unit_laplace.c b/drivers/opus/celt/tests/test_unit_laplace.c
index af7d471045..e4fe83c4d6 100644
--- a/drivers/opus/celt/tests/test_unit_laplace.c
+++ b/drivers/opus/celt/tests/test_unit_laplace.c
@@ -25,20 +25,20 @@
    SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 */
 
-#ifdef OPUS_HAVE_CONFIG_H
-#include "opus_config.h"
+#ifdef OPUS_ENABLED
+#include "opus/opus_config.h"
 #endif
 
 #include <stdio.h>
 #include <stdlib.h>
-#include "laplace.h"
+#include "opus/celt/laplace.h"
 #define CELT_C
-#include "stack_alloc.h"
+#include "opus/celt/stack_alloc.h"
 
-#include "entenc.c"
-#include "entdec.c"
-#include "entcode.c"
-#include "laplace.c"
+#include "opus/celt/entenc.c"
+#include "opus/celt/entdec.c"
+#include "opus/celt/entcode.c"
+#include "opus/celt/laplace.c"
 
 #define DATA_SIZE 40000
 
diff --git a/drivers/opus/celt/tests/test_unit_mathops.c b/drivers/opus/celt/tests/test_unit_mathops.c
index 36d6a4bfb4..0e3f300d40 100644
--- a/drivers/opus/celt/tests/test_unit_mathops.c
+++ b/drivers/opus/celt/tests/test_unit_mathops.c
@@ -26,8 +26,8 @@
    SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 */
 
-#ifdef OPUS_HAVE_CONFIG_H
-#include "opus_config.h"
+#ifdef OPUS_ENABLED
+#include "opus/opus_config.h"
 #endif
 
 #ifndef CUSTOM_MODES
@@ -36,15 +36,15 @@
 
 #define CELT_C
 
-#include "mathops.c"
-#include "entenc.c"
-#include "entdec.c"
-#include "entcode.c"
-#include "bands.c"
-#include "quant_bands.c"
-#include "laplace.c"
-#include "vq.c"
-#include "cwrs.c"
+#include "opus/celt/mathops.c"
+#include "opus/celt/entenc.c"
+#include "opus/celt/entdec.c"
+#include "opus/celt/entcode.c"
+#include "opus/celt/bands.c"
+#include "opus/celt/quant_bands.c"
+#include "opus/celt/laplace.c"
+#include "opus/celt/vq.c"
+#include "opus/celt/cwrs.c"
 #include <stdio.h>
 #include <math.h>
 
diff --git a/drivers/opus/celt/tests/test_unit_mdct.c b/drivers/opus/celt/tests/test_unit_mdct.c
index e3b5eec11c..0be03db2e8 100644
--- a/drivers/opus/celt/tests/test_unit_mdct.c
+++ b/drivers/opus/celt/tests/test_unit_mdct.c
@@ -25,8 +25,8 @@
    SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 */
 
-#ifdef OPUS_HAVE_CONFIG_H
-#include "opus_config.h"
+#ifdef OPUS_ENABLED
+#include "opus/opus_config.h"
 #endif
 
 #define SKIP_CONFIG_H
@@ -38,13 +38,13 @@
 #include <stdio.h>
 
 #define CELT_C
-#include "mdct.h"
-#include "stack_alloc.h"
+#include "opus/celt/mdct.h"
+#include "opus/celt/stack_alloc.h"
 
-#include "kiss_fft.c"
-#include "mdct.c"
-#include "mathops.c"
-#include "entcode.c"
+#include "opus/celt/kiss_fft.c"
+#include "opus/celt/mdct.c"
+#include "opus/celt/mathops.c"
+#include "opus/celt/entcode.c"
 
 #ifndef M_PI
 #define M_PI 3.141592653
diff --git a/drivers/opus/celt/tests/test_unit_rotation.c b/drivers/opus/celt/tests/test_unit_rotation.c
index c12cc3f02f..a57bf11e79 100644
--- a/drivers/opus/celt/tests/test_unit_rotation.c
+++ b/drivers/opus/celt/tests/test_unit_rotation.c
@@ -25,8 +25,8 @@
    SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 */
 
-#ifdef OPUS_HAVE_CONFIG_H
-#include "opus_config.h"
+#ifdef OPUS_ENABLED
+#include "opus/opus_config.h"
 #endif
 
 #ifndef CUSTOM_MODES
@@ -37,13 +37,13 @@
 
 #include <stdio.h>
 #include <stdlib.h>
-#include "vq.c"
-#include "cwrs.c"
-#include "entcode.c"
-#include "entenc.c"
-#include "entdec.c"
-#include "mathops.c"
-#include "bands.h"
+#include "opus/celt/vq.c"
+#include "opus/celt/cwrs.c"
+#include "opus/celt/entcode.c"
+#include "opus/celt/entenc.c"
+#include "opus/celt/entdec.c"
+#include "opus/celt/mathops.c"
+#include "opus/celt/bands.h"
 #include <math.h>
 #define MAX_SIZE 100
 
diff --git a/drivers/opus/celt/tests/test_unit_types.c b/drivers/opus/celt/tests/test_unit_types.c
index 29e671067f..12d5ca72f8 100644
--- a/drivers/opus/celt/tests/test_unit_types.c
+++ b/drivers/opus/celt/tests/test_unit_types.c
@@ -25,11 +25,11 @@
    SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 */
 
-#ifdef OPUS_HAVE_CONFIG_H
-#include "opus_config.h"
+#ifdef OPUS_ENABLED
+#include "opus/opus_config.h"
 #endif
 
-#include "opus_types.h"
+#include "opus/opus_types.h"
 #include <stdio.h>
 
 int main(void)
diff --git a/drivers/opus/celt/vq.c b/drivers/opus/celt/vq.c
index 20b0b82728..29a98a3a63 100644
--- a/drivers/opus/celt/vq.c
+++ b/drivers/opus/celt/vq.c
@@ -26,17 +26,17 @@
    SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 */
 
-#ifdef OPUS_HAVE_CONFIG_H
-#include "opus_config.h"
+#ifdef OPUS_ENABLED
+#include "opus/opus_config.h"
 #endif
 
-#include "mathops.h"
-#include "cwrs.h"
-#include "vq.h"
-#include "arch.h"
-#include "os_support.h"
-#include "bands.h"
-#include "rate.h"
+#include "opus/celt/mathops.h"
+#include "opus/celt/cwrs.h"
+#include "opus/celt/vq.h"
+#include "opus/celt/arch.h"
+#include "opus/celt/os_support.h"
+#include "opus/celt/bands.h"
+#include "opus/celt/rate.h"
 
 static void exp_rotation1(celt_norm *X, int len, int stride, opus_val16 c, opus_val16 s)
 {
diff --git a/drivers/opus/celt/vq.h b/drivers/opus/celt/vq.h
index 8bab59c5e0..b52b1a0982 100644
--- a/drivers/opus/celt/vq.h
+++ b/drivers/opus/celt/vq.h
@@ -33,9 +33,9 @@
 #ifndef VQ_H
 #define VQ_H
 
-#include "entenc.h"
-#include "entdec.h"
-#include "opus_modes.h"
+#include "opus/celt/entenc.h"
+#include "opus/celt/entdec.h"
+#include "opus/celt/opus_modes.h"
 
 /** Algebraic pulse-vector quantiser. The signal x is replaced by the sum of
   * the pitch and a combination of pulses such that its norm is still equal
diff --git a/drivers/opus/celt/x86/pitch_sse.h b/drivers/opus/celt/x86/pitch_sse.h
index 695122a5ad..1542b87232 100644
--- a/drivers/opus/celt/x86/pitch_sse.h
+++ b/drivers/opus/celt/x86/pitch_sse.h
@@ -33,7 +33,7 @@
 #define PITCH_SSE_H
 
 #include <xmmintrin.h>
-#include "arch.h"
+#include "opus/celt/arch.h"
 
 #define OVERRIDE_XCORR_KERNEL
 static OPUS_INLINE void xcorr_kernel(const opus_val16 *x, const opus_val16 *y, opus_val32 sum[4], int len)
diff --git a/drivers/opus/http.c b/drivers/opus/http.c
index 803db044af..24991b0401 100644
--- a/drivers/opus/http.c
+++ b/drivers/opus/http.c
@@ -9,11 +9,11 @@
  * by the Xiph.Org Foundation and contributors http://www.xiph.org/ *
  *                                                                  *
  ********************************************************************/
-#ifdef OPUS_HAVE_CONFIG_H
-#include "opus_config.h"
+#ifdef OPUS_ENABLED
+#include "opus/opus_config.h"
 #endif
 
-#include "internal.h"
+#include "opus/internal.h"
 #include <ctype.h>
 #include <errno.h>
 #include <limits.h>
@@ -214,7 +214,7 @@ static const char *op_parse_file_url(const char *_src){
 #  include <winsock2.h>
 #  include <ws2tcpip.h>
 #  include <openssl/ssl.h>
-#  include "winerrno.h"
+#  include "opus/winerrno.h"
 
 typedef SOCKET op_sock;
 
diff --git a/drivers/opus/info.c b/drivers/opus/info.c
index f5ad2110be..8175a013ac 100644
--- a/drivers/opus/info.c
+++ b/drivers/opus/info.c
@@ -9,11 +9,11 @@
  * by the Xiph.Org Foundation and contributors http://www.xiph.org/ *
  *                                                                  *
  ********************************************************************/
-#ifdef OPUS_HAVE_CONFIG_H
-#include "opus_config.h"
+#ifdef OPUS_ENABLED
+#include "opus/opus_config.h"
 #endif
 
-#include "internal.h"
+#include "opus/internal.h"
 #include <limits.h>
 #include <string.h>
 
diff --git a/drivers/opus/internal.c b/drivers/opus/internal.c
index a9c3671179..d73628ec53 100644
--- a/drivers/opus/internal.c
+++ b/drivers/opus/internal.c
@@ -9,11 +9,11 @@
  * by the Xiph.Org Foundation and contributors http://www.xiph.org/ *
  *                                                                  *
  ********************************************************************/
-#ifdef OPUS_HAVE_CONFIG_H
-#include "opus_config.h"
+#ifdef OPUS_ENABLED
+#include "opus/opus_config.h"
 #endif
 
-#include "internal.h"
+#include "opus/internal.h"
 
 #if defined(OP_ENABLE_ASSERTIONS)
 void op_fatal_impl(const char *_str,const char *_file,int _line){
diff --git a/drivers/opus/mlp.c b/drivers/opus/mlp.c
index 7220a23d42..d4d4971796 100644
--- a/drivers/opus/mlp.c
+++ b/drivers/opus/mlp.c
@@ -25,17 +25,17 @@
    SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 */
 
-#ifdef OPUS_HAVE_CONFIG_H
-#include "opus_config.h"
+#ifdef OPUS_ENABLED
+#include "opus/opus_config.h"
 #endif
 
-#include "opus_types.h"
-#include "opus_defines.h"
+#include "opus/opus_types.h"
+#include "opus/opus_defines.h"
 
 #include <math.h>
-#include "mlp.h"
-#include "arch.h"
-#include "tansig_table.h"
+#include "opus/mlp.h"
+#include "opus/celt/arch.h"
+#include "opus/tansig_table.h"
 #define MAX_NEURONS 100
 
 #if 0
diff --git a/drivers/opus/mlp.h b/drivers/opus/mlp.h
index 86c8e0617d..3f9ca73bb0 100644
--- a/drivers/opus/mlp.h
+++ b/drivers/opus/mlp.h
@@ -28,7 +28,7 @@
 #ifndef _MLP_H_
 #define _MLP_H_
 
-#include "arch.h"
+#include "opus/celt/arch.h"
 
 typedef struct {
 	int layers;
diff --git a/drivers/opus/mlp_data.c b/drivers/opus/mlp_data.c
index 401c4c0250..127c62ce8c 100644
--- a/drivers/opus/mlp_data.c
+++ b/drivers/opus/mlp_data.c
@@ -1,7 +1,7 @@
 /* The contents of this file was automatically generated by mlp_train.c
    It contains multi-layer perceptron (MLP) weights. */
 
-#include "mlp.h"
+#include "opus/mlp.h"
 
 /* RMS error was 0.138320, seed was 1361535663 */
 
diff --git a/drivers/opus/opus.c b/drivers/opus/opus.c
index 8978e3b06b..5af71b478d 100644
--- a/drivers/opus/opus.c
+++ b/drivers/opus/opus.c
@@ -25,12 +25,12 @@
    SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 */
 
-#ifdef OPUS_HAVE_CONFIG_H
-#include "opus_config.h"
+#ifdef OPUS_ENABLED
+#include "opus/opus_config.h"
 #endif
 
-#include "opus.h"
-#include "opus_private.h"
+#include "opus/opus.h"
+#include "opus/opus_private.h"
 
 #ifndef DISABLE_FLOAT_API
 OPUS_EXPORT void opus_pcm_soft_clip(float *_x, int N, int C, float *declip_mem)
diff --git a/drivers/opus/opus.h b/drivers/opus/opus.h
index 93a53a2ffc..b99f553016 100644
--- a/drivers/opus/opus.h
+++ b/drivers/opus/opus.h
@@ -33,8 +33,8 @@
 #ifndef OPUS_H
 #define OPUS_H
 
-#include "opus_types.h"
-#include "opus_defines.h"
+#include "opus/opus_types.h"
+#include "opus/opus_defines.h"
 
 #ifdef __cplusplus
 extern "C" {
diff --git a/drivers/opus/opus_config.h b/drivers/opus/opus_config.h
index c6470e92c3..98c3e87cc6 100644
--- a/drivers/opus/opus_config.h
+++ b/drivers/opus/opus_config.h
@@ -7,12 +7,16 @@
 /* Define to 1 if you have the <inttypes.h> header file. */
 #define HAVE_INTTYPES_H 1
 
+#if (!defined( _MSC_VER ) || ( _MSC_VER >= 1800 ))
+
 /* Define to 1 if you have the `lrint' function. */
 #define HAVE_LRINT 1
 
 /* Define to 1 if you have the `lrintf' function. */
 #define HAVE_LRINTF 1
 
+#endif
+
 /* Define to 1 if you have the <memory.h> header file. */
 #define HAVE_MEMORY_H 1
 
@@ -109,7 +113,11 @@
 /* Define to the equivalent of the C99 'restrict' keyword, or to
    nothing if this is not supported.  Do not define if restrict is
    supported directly.  */
+#if (!defined( _MSC_VER ) || ( _MSC_VER >= 1800 ))
 #define restrict __restrict
+#else
+#undef restrict
+#endif
 /* Work around a bug in Sun C++: it does not support _Restrict or
    __restrict__, even though the corresponding Sun C compiler ends up with
    "#define restrict _Restrict" or "#define restrict __restrict__" in the
diff --git a/drivers/opus/opus_custom.h b/drivers/opus/opus_custom.h
index 41f36bf2fb..32fcb81ac7 100644
--- a/drivers/opus/opus_custom.h
+++ b/drivers/opus/opus_custom.h
@@ -35,7 +35,7 @@
 #ifndef OPUS_CUSTOM_H
 #define OPUS_CUSTOM_H
 
-#include "opus_defines.h"
+#include "opus/opus_defines.h"
 
 #ifdef __cplusplus
 extern "C" {
diff --git a/drivers/opus/opus_decoder.c b/drivers/opus/opus_decoder.c
index c5d4cc6aaa..dea56015ce 100644
--- a/drivers/opus/opus_decoder.c
+++ b/drivers/opus/opus_decoder.c
@@ -25,8 +25,8 @@
    SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 */
 
-#ifdef OPUS_HAVE_CONFIG_H
-# include "opus_config.h"
+#ifdef OPUS_ENABLED
+# include "opus/opus_config.h"
 #endif
 
 #ifndef OPUS_BUILD
@@ -38,19 +38,19 @@
 #endif
 
 #include <stdarg.h>
-#include "celt.h"
-#include "opus.h"
-#include "entdec.h"
-#include "opus_modes.h"
-#include "API.h"
-#include "stack_alloc.h"
-#include "float_cast.h"
-#include "opus_private.h"
-#include "os_support.h"
-#include "structs.h"
-#include "define.h"
-#include "mathops.h"
-#include "cpu_support.h"
+#include "opus/celt/celt.h"
+#include "opus/opus.h"
+#include "opus/celt/entdec.h"
+#include "opus/celt/opus_modes.h"
+#include "opus/silk/API.h"
+#include "opus/celt/stack_alloc.h"
+#include "opus/celt/float_cast.h"
+#include "opus/opus_private.h"
+#include "opus/celt/os_support.h"
+#include "opus/silk/structs.h"
+#include "opus/silk/define.h"
+#include "opus/celt/mathops.h"
+#include "opus/celt/cpu_support.h"
 
 struct OpusDecoder {
    int          celt_dec_offset;
diff --git a/drivers/opus/opus_defines.h b/drivers/opus/opus_defines.h
index 265089f65e..9d089d8391 100644
--- a/drivers/opus/opus_defines.h
+++ b/drivers/opus/opus_defines.h
@@ -33,7 +33,7 @@
 #ifndef OPUS_DEFINES_H
 #define OPUS_DEFINES_H
 
-#include "opus_types.h"
+#include "opus/opus_types.h"
 
 #ifdef __cplusplus
 extern "C" {
diff --git a/drivers/opus/opus_demo.c b/drivers/opus/opus_demo.c
index 7fcf65fd8b..a0c5a5f862 100644
--- a/drivers/opus/opus_demo.c
+++ b/drivers/opus/opus_demo.c
@@ -26,19 +26,19 @@
    SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 */
 
-#ifdef OPUS_HAVE_CONFIG_H
-#include "opus_config.h"
+#ifdef OPUS_ENABLED
+#include "opus/opus_config.h"
 #endif
 
 #include <stdio.h>
 #include <stdlib.h>
 #include <math.h>
 #include <string.h>
-#include "opus.h"
-#include "debug.h"
-#include "opus_types.h"
-#include "opus_private.h"
-#include "opus_multistream.h"
+#include "opus/opus.h"
+#include "opus/silk/debug.h"
+#include "opus/opus_types.h"
+#include "opus/opus_private.h"
+#include "opus/opus_multistream.h"
 
 #define MAX_PACKET 1500
 
diff --git a/drivers/opus/opus_encoder.c b/drivers/opus/opus_encoder.c
index f739daa258..890a2514de 100644
--- a/drivers/opus/opus_encoder.c
+++ b/drivers/opus/opus_encoder.c
@@ -25,29 +25,29 @@
    SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 */
 
-#ifdef OPUS_HAVE_CONFIG_H
-#include "opus_config.h"
+#ifdef OPUS_ENABLED
+#include "opus/opus_config.h"
 #endif
 
 #include <stdarg.h>
-#include "celt.h"
-#include "entenc.h"
-#include "opus_modes.h"
-#include "API.h"
-#include "stack_alloc.h"
-#include "float_cast.h"
-#include "opus.h"
-#include "arch.h"
-#include "opus_private.h"
-#include "os_support.h"
-#include "cpu_support.h"
-#include "analysis.h"
-#include "mathops.h"
-#include "tuning_parameters.h"
+#include "opus/celt/celt.h"
+#include "opus/celt/entenc.h"
+#include "opus/celt/opus_modes.h"
+#include "opus/silk/API.h"
+#include "opus/celt/stack_alloc.h"
+#include "opus/celt/float_cast.h"
+#include "opus/opus.h"
+#include "opus/celt/arch.h"
+#include "opus/opus_private.h"
+#include "opus/celt/os_support.h"
+#include "opus/celt/cpu_support.h"
+#include "opus/analysis.h"
+#include "opus/celt/mathops.h"
+#include "opus/silk/tuning_parameters.h"
 #ifdef OPUS_FIXED_POINT
-#include "fixed/structs_FIX.h"
+#include "opus/silk/fixed/structs_FIX.h"
 #else
-#include "float/structs_FLP.h"
+#include "opus/silk/float/structs_FLP.h"
 #endif
 
 #define MAX_ENCODER_BUFFER 480
diff --git a/drivers/opus/opus_multistream.c b/drivers/opus/opus_multistream.c
index 8211c0b470..10c42dd56d 100644
--- a/drivers/opus/opus_multistream.c
+++ b/drivers/opus/opus_multistream.c
@@ -25,17 +25,17 @@
    SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 */
 
-#ifdef OPUS_HAVE_CONFIG_H
-#include "opus_config.h"
+#ifdef OPUS_ENABLED
+#include "opus/opus_config.h"
 #endif
 
-#include "opus_multistream.h"
-#include "opus.h"
-#include "opus_private.h"
-#include "stack_alloc.h"
+#include "opus/opus_multistream.h"
+#include "opus/opus.h"
+#include "opus/opus_private.h"
+#include "opus/celt/stack_alloc.h"
 #include <stdarg.h>
-#include "float_cast.h"
-#include "os_support.h"
+#include "opus/celt/float_cast.h"
+#include "opus/celt/os_support.h"
 
 
 int validate_layout(const ChannelLayout *layout)
diff --git a/drivers/opus/opus_multistream.h b/drivers/opus/opus_multistream.h
index ae5997934a..f736c99c88 100644
--- a/drivers/opus/opus_multistream.h
+++ b/drivers/opus/opus_multistream.h
@@ -33,7 +33,7 @@
 #ifndef OPUS_MULTISTREAM_H
 #define OPUS_MULTISTREAM_H
 
-#include "opus.h"
+#include "opus/opus.h"
 
 #ifdef __cplusplus
 extern "C" {
diff --git a/drivers/opus/opus_multistream_decoder.c b/drivers/opus/opus_multistream_decoder.c
index 64a0c24067..43c695d815 100644
--- a/drivers/opus/opus_multistream_decoder.c
+++ b/drivers/opus/opus_multistream_decoder.c
@@ -25,17 +25,17 @@
    SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 */
 
-#ifdef OPUS_HAVE_CONFIG_H
-#include "opus_config.h"
+#ifdef OPUS_ENABLED
+#include "opus/opus_config.h"
 #endif
 
-#include "opus_multistream.h"
-#include "opus.h"
-#include "opus_private.h"
-#include "stack_alloc.h"
+#include "opus/opus_multistream.h"
+#include "opus/opus.h"
+#include "opus/opus_private.h"
+#include "opus/celt/stack_alloc.h"
 #include <stdarg.h>
-#include "float_cast.h"
-#include "os_support.h"
+#include "opus/celt/float_cast.h"
+#include "opus/celt/os_support.h"
 
 struct OpusMSDecoder {
    ChannelLayout layout;
diff --git a/drivers/opus/opus_multistream_encoder.c b/drivers/opus/opus_multistream_encoder.c
index 8d559743ea..685d2de277 100644
--- a/drivers/opus/opus_multistream_encoder.c
+++ b/drivers/opus/opus_multistream_encoder.c
@@ -25,22 +25,22 @@
    SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 */
 
-#ifdef OPUS_HAVE_CONFIG_H
-#include "opus_config.h"
+#ifdef OPUS_ENABLED
+#include "opus/opus_config.h"
 #endif
 
-#include "opus_multistream.h"
-#include "opus.h"
-#include "opus_private.h"
-#include "stack_alloc.h"
+#include "opus/opus_multistream.h"
+#include "opus/opus.h"
+#include "opus/opus_private.h"
+#include "opus/celt/stack_alloc.h"
 #include <stdarg.h>
-#include "float_cast.h"
-#include "os_support.h"
-#include "mathops.h"
-#include "mdct.h"
-#include "opus_modes.h"
-#include "bands.h"
-#include "quant_bands.h"
+#include "opus/celt/float_cast.h"
+#include "opus/celt/os_support.h"
+#include "opus/celt/mathops.h"
+#include "opus/celt/mdct.h"
+#include "opus/celt/opus_modes.h"
+#include "opus/celt/bands.h"
+#include "opus/celt/quant_bands.h"
 
 typedef struct {
    int nb_streams;
diff --git a/drivers/opus/opus_private.h b/drivers/opus/opus_private.h
index 83225f2b6c..d63ed4f051 100644
--- a/drivers/opus/opus_private.h
+++ b/drivers/opus/opus_private.h
@@ -29,9 +29,9 @@
 #ifndef OPUS_PRIVATE_H
 #define OPUS_PRIVATE_H
 
-#include "arch.h"
-#include "opus.h"
-#include "celt.h"
+#include "opus/celt/arch.h"
+#include "opus/opus.h"
+#include "opus/celt/celt.h"
 
 struct OpusRepacketizer {
    unsigned char toc;
diff --git a/drivers/opus/opusfile.c b/drivers/opus/opusfile.c
index 1e7497f6cd..a38e8cd5ad 100644
--- a/drivers/opus/opusfile.c
+++ b/drivers/opus/opusfile.c
@@ -14,11 +14,11 @@
  last mod: $Id: vorbisfile.c 17573 2010-10-27 14:53:59Z xiphmont $
 
  ********************************************************************/
-#ifdef OPUS_HAVE_CONFIG_H
-#include "opus_config.h"
+#ifdef OPUS_ENABLED
+#include "opus/opus_config.h"
 #endif
 
-#include "internal.h"
+#include "opus/internal.h"
 #include <stdio.h>
 #include <stdlib.h>
 #include <errno.h>
@@ -26,7 +26,7 @@
 #include <string.h>
 #include <math.h>
 
-#include "opusfile.h"
+#include "opus/opusfile.h"
 
 /*This implementation is largely based off of libvorbisfile.
   All of the Ogg bits work roughly the same, though I have made some
diff --git a/drivers/opus/repacketizer.c b/drivers/opus/repacketizer.c
index 01406bb39b..eb0c0c7376 100644
--- a/drivers/opus/repacketizer.c
+++ b/drivers/opus/repacketizer.c
@@ -25,13 +25,13 @@
    SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 */
 
-#ifdef OPUS_HAVE_CONFIG_H
-#include "opus_config.h"
+#ifdef OPUS_ENABLED
+#include "opus/opus_config.h"
 #endif
 
-#include "opus.h"
-#include "opus_private.h"
-#include "os_support.h"
+#include "opus/opus.h"
+#include "opus/opus_private.h"
+#include "opus/celt/os_support.h"
 
 
 int opus_repacketizer_get_size(void)
diff --git a/drivers/opus/repacketizer_demo.c b/drivers/opus/repacketizer_demo.c
index 1ca9cc3c96..5df4f26958 100644
--- a/drivers/opus/repacketizer_demo.c
+++ b/drivers/opus/repacketizer_demo.c
@@ -25,11 +25,11 @@
    SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 */
 
-#ifdef OPUS_HAVE_CONFIG_H
-#include "opus_config.h"
+#ifdef OPUS_ENABLED
+#include "opus/opus_config.h"
 #endif
 
-#include "opus.h"
+#include "opus/opus.h"
 #include <stdio.h>
 #include <stdlib.h>
 #include <string.h>
diff --git a/drivers/opus/silk/A2NLSF.c b/drivers/opus/silk/A2NLSF.c
index cec53a5cd8..18b0e3092d 100644
--- a/drivers/opus/silk/A2NLSF.c
+++ b/drivers/opus/silk/A2NLSF.c
@@ -31,12 +31,12 @@ POSSIBILITY OF SUCH DAMAGE.
 /* Therefore the result is not accurate NLSFs, but the two      */
 /* functions are accurate inverses of each other                */
 
-#ifdef OPUS_HAVE_CONFIG_H
-#include "opus_config.h"
+#ifdef OPUS_ENABLED
+#include "opus/opus_config.h"
 #endif
 
-#include "SigProc_FIX.h"
-#include "tables.h"
+#include "opus/silk/SigProc_FIX.h"
+#include "opus/silk/tables.h"
 
 /* Number of binary divisions, when not in low complexity mode */
 #define BIN_DIV_STEPS_A2NLSF_FIX      3 /* must be no higher than 16 - log2( LSF_COS_TAB_SZ_FIX ) */
diff --git a/drivers/opus/silk/API.h b/drivers/opus/silk/API.h
index f0601bcf6b..70f81a0c44 100644
--- a/drivers/opus/silk/API.h
+++ b/drivers/opus/silk/API.h
@@ -28,11 +28,11 @@ POSSIBILITY OF SUCH DAMAGE.
 #ifndef SILK_API_H
 #define SILK_API_H
 
-#include "control.h"
-#include "typedef.h"
-#include "errors.h"
-#include "entenc.h"
-#include "entdec.h"
+#include "opus/silk/control.h"
+#include "opus/silk/typedef.h"
+#include "opus/silk/errors.h"
+#include "opus/celt/entenc.h"
+#include "opus/celt/entdec.h"
 
 #ifdef __cplusplus
 extern "C"
diff --git a/drivers/opus/silk/CNG.c b/drivers/opus/silk/CNG.c
index 8b8dbf882c..253ae7db4c 100644
--- a/drivers/opus/silk/CNG.c
+++ b/drivers/opus/silk/CNG.c
@@ -25,12 +25,12 @@ ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 POSSIBILITY OF SUCH DAMAGE.
 ***********************************************************************/
 
-#ifdef OPUS_HAVE_CONFIG_H
-#include "opus_config.h"
+#ifdef OPUS_ENABLED
+#include "opus/opus_config.h"
 #endif
 
-#include "silk_main.h"
-#include "stack_alloc.h"
+#include "opus/silk/silk_main.h"
+#include "opus/celt/stack_alloc.h"
 
 /* Generates excitation for CNG LPC synthesis */
 static OPUS_INLINE void silk_CNG_exc(
diff --git a/drivers/opus/silk/HP_variable_cutoff.c b/drivers/opus/silk/HP_variable_cutoff.c
index 379752bb19..9da2032869 100644
--- a/drivers/opus/silk/HP_variable_cutoff.c
+++ b/drivers/opus/silk/HP_variable_cutoff.c
@@ -25,15 +25,15 @@ ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 POSSIBILITY OF SUCH DAMAGE.
 ***********************************************************************/
 
-#ifdef OPUS_HAVE_CONFIG_H
-#include "opus_config.h"
+#ifdef OPUS_ENABLED
+#include "opus/opus_config.h"
 #endif
 #ifdef OPUS_FIXED_POINT
-#include "main_FIX.h"
+#include "opus/silk/fixed/main_FIX.h"
 #else
-#include "main_FLP.h"
+#include "opus/silk/float/main_FLP.h"
 #endif
-#include "tuning_parameters.h"
+#include "opus/silk/tuning_parameters.h"
 
 /* High-pass filter with cutoff frequency adaptation based on pitch lag statistics */
 void silk_HP_variable_cutoff(
diff --git a/drivers/opus/silk/LPC_analysis_filter.c b/drivers/opus/silk/LPC_analysis_filter.c
index 98ef509e4e..4976d9d3c5 100644
--- a/drivers/opus/silk/LPC_analysis_filter.c
+++ b/drivers/opus/silk/LPC_analysis_filter.c
@@ -25,12 +25,12 @@ ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 POSSIBILITY OF SUCH DAMAGE.
 ***********************************************************************/
 
-#ifdef OPUS_HAVE_CONFIG_H
-#include "opus_config.h"
+#ifdef OPUS_ENABLED
+#include "opus/opus_config.h"
 #endif
 
-#include "SigProc_FIX.h"
-#include "celt_lpc.h"
+#include "opus/silk/SigProc_FIX.h"
+#include "opus/celt/celt_lpc.h"
 
 /*******************************************/
 /* LPC analysis filter                     */
diff --git a/drivers/opus/silk/LPC_inv_pred_gain.c b/drivers/opus/silk/LPC_inv_pred_gain.c
index 6dc9a49861..aa67875759 100644
--- a/drivers/opus/silk/LPC_inv_pred_gain.c
+++ b/drivers/opus/silk/LPC_inv_pred_gain.c
@@ -25,11 +25,11 @@ ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 POSSIBILITY OF SUCH DAMAGE.
 ***********************************************************************/
 
-#ifdef OPUS_HAVE_CONFIG_H
-#include "opus_config.h"
+#ifdef OPUS_ENABLED
+#include "opus/opus_config.h"
 #endif
 
-#include "SigProc_FIX.h"
+#include "opus/silk/SigProc_FIX.h"
 
 #define QA                          24
 #define A_LIMIT                     SILK_FIX_CONST( 0.99975, QA )
diff --git a/drivers/opus/silk/LP_variable_cutoff.c b/drivers/opus/silk/LP_variable_cutoff.c
index 098c19d34f..65c4a6b06a 100644
--- a/drivers/opus/silk/LP_variable_cutoff.c
+++ b/drivers/opus/silk/LP_variable_cutoff.c
@@ -25,8 +25,8 @@ ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 POSSIBILITY OF SUCH DAMAGE.
 ***********************************************************************/
 
-#ifdef OPUS_HAVE_CONFIG_H
-#include "opus_config.h"
+#ifdef OPUS_ENABLED
+#include "opus/opus_config.h"
 #endif
 
 /*
@@ -35,7 +35,7 @@ POSSIBILITY OF SUCH DAMAGE.
     [0.95 : 0.15 : 0.35] normalized cut off frequencies.
 */
 
-#include "silk_main.h"
+#include "opus/silk/silk_main.h"
 
 /* Helper function, interpolates the filter taps */
 static OPUS_INLINE void silk_LP_interpolate_filter_taps(
diff --git a/drivers/opus/silk/NLSF2A.c b/drivers/opus/silk/NLSF2A.c
index 2b6f685f49..c3b8568a0c 100644
--- a/drivers/opus/silk/NLSF2A.c
+++ b/drivers/opus/silk/NLSF2A.c
@@ -25,8 +25,8 @@ ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 POSSIBILITY OF SUCH DAMAGE.
 ***********************************************************************/
 
-#ifdef OPUS_HAVE_CONFIG_H
-#include "opus_config.h"
+#ifdef OPUS_ENABLED
+#include "opus/opus_config.h"
 #endif
 
 /* conversion between prediction filter coefficients and LSFs   */
@@ -35,8 +35,8 @@ POSSIBILITY OF SUCH DAMAGE.
 /* therefore the result is not accurate LSFs, but the two       */
 /* functions are accurate inverses of each other                */
 
-#include "SigProc_FIX.h"
-#include "tables.h"
+#include "opus/silk/SigProc_FIX.h"
+#include "opus/silk/tables.h"
 
 #define QA      16
 
diff --git a/drivers/opus/silk/NLSF_VQ.c b/drivers/opus/silk/NLSF_VQ.c
index e4ca79fbfe..9420f5bfb9 100644
--- a/drivers/opus/silk/NLSF_VQ.c
+++ b/drivers/opus/silk/NLSF_VQ.c
@@ -25,11 +25,11 @@ ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 POSSIBILITY OF SUCH DAMAGE.
 ***********************************************************************/
 
-#ifdef OPUS_HAVE_CONFIG_H
-#include "opus_config.h"
+#ifdef OPUS_ENABLED
+#include "opus/opus_config.h"
 #endif
 
-#include "silk_main.h"
+#include "opus/silk/silk_main.h"
 
 /* Compute quantization errors for an LPC_order element input vector for a VQ codebook */
 void silk_NLSF_VQ(
diff --git a/drivers/opus/silk/NLSF_VQ_weights_laroia.c b/drivers/opus/silk/NLSF_VQ_weights_laroia.c
index f461ba01c0..d3b4fd6401 100644
--- a/drivers/opus/silk/NLSF_VQ_weights_laroia.c
+++ b/drivers/opus/silk/NLSF_VQ_weights_laroia.c
@@ -25,12 +25,12 @@ ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 POSSIBILITY OF SUCH DAMAGE.
 ***********************************************************************/
 
-#ifdef OPUS_HAVE_CONFIG_H
-#include "opus_config.h"
+#ifdef OPUS_ENABLED
+#include "opus/opus_config.h"
 #endif
 
-#include "define.h"
-#include "SigProc_FIX.h"
+#include "opus/silk/define.h"
+#include "opus/silk/SigProc_FIX.h"
 
 /*
 R. Laroia, N. Phamdo and N. Farvardin, "Robust and Efficient Quantization of Speech LSP
diff --git a/drivers/opus/silk/NLSF_decode.c b/drivers/opus/silk/NLSF_decode.c
index 786a62d278..6868a93858 100644
--- a/drivers/opus/silk/NLSF_decode.c
+++ b/drivers/opus/silk/NLSF_decode.c
@@ -25,11 +25,11 @@ ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 POSSIBILITY OF SUCH DAMAGE.
 ***********************************************************************/
 
-#ifdef OPUS_HAVE_CONFIG_H
-#include "opus_config.h"
+#ifdef OPUS_ENABLED
+#include "opus/opus_config.h"
 #endif
 
-#include "silk_main.h"
+#include "opus/silk/silk_main.h"
 
 /* Predictive dequantizer for NLSF residuals */
 static OPUS_INLINE void silk_NLSF_residual_dequant(               /* O    Returns RD value in Q30                     */
diff --git a/drivers/opus/silk/NLSF_del_dec_quant.c b/drivers/opus/silk/NLSF_del_dec_quant.c
index b74585370c..9caeb0a23b 100644
--- a/drivers/opus/silk/NLSF_del_dec_quant.c
+++ b/drivers/opus/silk/NLSF_del_dec_quant.c
@@ -25,11 +25,11 @@ ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 POSSIBILITY OF SUCH DAMAGE.
 ***********************************************************************/
 
-#ifdef OPUS_HAVE_CONFIG_H
-#include "opus_config.h"
+#ifdef OPUS_ENABLED
+#include "opus/opus_config.h"
 #endif
 
-#include "silk_main.h"
+#include "opus/silk/silk_main.h"
 
 /* Delayed-decision quantizer for NLSF residuals */
 opus_int32 silk_NLSF_del_dec_quant(                             /* O    Returns RD value in Q25                     */
diff --git a/drivers/opus/silk/NLSF_encode.c b/drivers/opus/silk/NLSF_encode.c
index bf67bd5cf1..157c0c8ff6 100644
--- a/drivers/opus/silk/NLSF_encode.c
+++ b/drivers/opus/silk/NLSF_encode.c
@@ -25,12 +25,12 @@ ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 POSSIBILITY OF SUCH DAMAGE.
 ***********************************************************************/
 
-#ifdef OPUS_HAVE_CONFIG_H
-#include "opus_config.h"
+#ifdef OPUS_ENABLED
+#include "opus/opus_config.h"
 #endif
 
-#include "silk_main.h"
-#include "stack_alloc.h"
+#include "opus/silk/silk_main.h"
+#include "opus/celt/stack_alloc.h"
 
 /***********************/
 /* NLSF vector encoder */
diff --git a/drivers/opus/silk/NLSF_stabilize.c b/drivers/opus/silk/NLSF_stabilize.c
index a1bf20b8d4..caeeed9754 100644
--- a/drivers/opus/silk/NLSF_stabilize.c
+++ b/drivers/opus/silk/NLSF_stabilize.c
@@ -25,8 +25,8 @@ ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 POSSIBILITY OF SUCH DAMAGE.
 ***********************************************************************/
 
-#ifdef OPUS_HAVE_CONFIG_H
-#include "opus_config.h"
+#ifdef OPUS_ENABLED
+#include "opus/opus_config.h"
 #endif
 
 /* NLSF stabilizer:                                         */
@@ -38,7 +38,7 @@ POSSIBILITY OF SUCH DAMAGE.
 /* - Output are sorted NLSF coefficients                    */
 /*                                                          */
 
-#include "SigProc_FIX.h"
+#include "opus/silk/SigProc_FIX.h"
 
 /* Constant Definitions */
 #define MAX_LOOPS        20
diff --git a/drivers/opus/silk/NLSF_unpack.c b/drivers/opus/silk/NLSF_unpack.c
index 60242a3b52..20d3020f9b 100644
--- a/drivers/opus/silk/NLSF_unpack.c
+++ b/drivers/opus/silk/NLSF_unpack.c
@@ -25,11 +25,11 @@ ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 POSSIBILITY OF SUCH DAMAGE.
 ***********************************************************************/
 
-#ifdef OPUS_HAVE_CONFIG_H
-#include "opus_config.h"
+#ifdef OPUS_ENABLED
+#include "opus/opus_config.h"
 #endif
 
-#include "silk_main.h"
+#include "opus/silk/silk_main.h"
 
 /* Unpack predictor values and indices for entropy coding tables */
 void silk_NLSF_unpack(
diff --git a/drivers/opus/silk/NSQ.c b/drivers/opus/silk/NSQ.c
index a08e34e893..6092498a1b 100644
--- a/drivers/opus/silk/NSQ.c
+++ b/drivers/opus/silk/NSQ.c
@@ -25,12 +25,12 @@ ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 POSSIBILITY OF SUCH DAMAGE.
 ***********************************************************************/
 
-#ifdef OPUS_HAVE_CONFIG_H
-#include "opus_config.h"
+#ifdef OPUS_ENABLED
+#include "opus/opus_config.h"
 #endif
 
-#include "silk_main.h"
-#include "stack_alloc.h"
+#include "opus/silk/silk_main.h"
+#include "opus/celt/stack_alloc.h"
 
 static OPUS_INLINE void silk_nsq_scale_states(
     const silk_encoder_state *psEncC,           /* I    Encoder State                   */
diff --git a/drivers/opus/silk/NSQ_del_dec.c b/drivers/opus/silk/NSQ_del_dec.c
index 8ac6311b11..4aa730ed6a 100644
--- a/drivers/opus/silk/NSQ_del_dec.c
+++ b/drivers/opus/silk/NSQ_del_dec.c
@@ -25,12 +25,12 @@ ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 POSSIBILITY OF SUCH DAMAGE.
 ***********************************************************************/
 
-#ifdef OPUS_HAVE_CONFIG_H
-#include "opus_config.h"
+#ifdef OPUS_ENABLED
+#include "opus/opus_config.h"
 #endif
 
-#include "silk_main.h"
-#include "stack_alloc.h"
+#include "opus/silk/silk_main.h"
+#include "opus/celt/stack_alloc.h"
 
 typedef struct {
     opus_int32 sLPC_Q14[ MAX_SUB_FRAME_LENGTH + NSQ_LPC_BUF_LENGTH ];
diff --git a/drivers/opus/silk/PLC.c b/drivers/opus/silk/PLC.c
index 9fc11adda9..1f5ce09927 100644
--- a/drivers/opus/silk/PLC.c
+++ b/drivers/opus/silk/PLC.c
@@ -25,13 +25,13 @@ ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 POSSIBILITY OF SUCH DAMAGE.
 ***********************************************************************/
 
-#ifdef OPUS_HAVE_CONFIG_H
-#include "opus_config.h"
+#ifdef OPUS_ENABLED
+#include "opus/opus_config.h"
 #endif
 
-#include "silk_main.h"
-#include "stack_alloc.h"
-#include "PLC.h"
+#include "opus/silk/silk_main.h"
+#include "opus/celt/stack_alloc.h"
+#include "opus/silk/PLC.h"
 
 #define NB_ATT 2
 static const opus_int16 HARM_ATT_Q15[NB_ATT]              = { 32440, 31130 }; /* 0.99, 0.95 */
diff --git a/drivers/opus/silk/PLC.h b/drivers/opus/silk/PLC.h
index f531cda950..ae573fc00c 100644
--- a/drivers/opus/silk/PLC.h
+++ b/drivers/opus/silk/PLC.h
@@ -28,7 +28,7 @@ POSSIBILITY OF SUCH DAMAGE.
 #ifndef SILK_PLC_H
 #define SILK_PLC_H
 
-#include "silk_main.h"
+#include "opus/silk/silk_main.h"
 
 #define BWE_COEF                        0.99
 #define V_PITCH_GAIN_START_MIN_Q14      11469               /* 0.7 in Q14               */
diff --git a/drivers/opus/silk/SigProc_FIX.h b/drivers/opus/silk/SigProc_FIX.h
index 1b58057910..f4db0fa9a4 100644
--- a/drivers/opus/silk/SigProc_FIX.h
+++ b/drivers/opus/silk/SigProc_FIX.h
@@ -38,9 +38,9 @@ extern "C"
 #define SILK_MAX_ORDER_LPC            16            /* max order of the LPC analysis in schur() and k2a() */
 
 #include <string.h>                                 /* for memset(), memcpy(), memmove() */
-#include "typedef.h"
-#include "resampler_structs.h"
-#include "macros.h"
+#include "opus/silk/typedef.h"
+#include "opus/silk/resampler_structs.h"
+#include "opus/silk/macros.h"
 
 
 /********************************************************************/
@@ -575,9 +575,9 @@ static OPUS_INLINE opus_int64 silk_max_64(opus_int64 a, opus_int64 b)
 /* the following seems faster on x86 */
 #define silk_SMMUL(a32, b32)                (opus_int32)silk_RSHIFT64(silk_SMULL((a32), (b32)), 32)
 
-#include "Inlines.h"
-#include "MacroCount.h"
-#include "MacroDebug.h"
+#include "opus/silk/Inlines.h"
+#include "opus/silk/MacroCount.h"
+#include "opus/silk/MacroDebug.h"
 
 #ifdef OPUS_ARM_INLINE_ASM
 #include "arm/SigProc_FIX_armv4.h"
diff --git a/drivers/opus/silk/VAD.c b/drivers/opus/silk/VAD.c
index 3a5c566627..a789a13d32 100644
--- a/drivers/opus/silk/VAD.c
+++ b/drivers/opus/silk/VAD.c
@@ -25,12 +25,12 @@ ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 POSSIBILITY OF SUCH DAMAGE.
 ***********************************************************************/
 
-#ifdef OPUS_HAVE_CONFIG_H
-#include "opus_config.h"
+#ifdef OPUS_ENABLED
+#include "opus/opus_config.h"
 #endif
 
-#include "silk_main.h"
-#include "stack_alloc.h"
+#include "opus/silk/silk_main.h"
+#include "opus/celt/stack_alloc.h"
 
 /* Silk VAD noise level estimation */
 static OPUS_INLINE void silk_VAD_GetNoiseLevels(
diff --git a/drivers/opus/silk/VQ_WMat_EC.c b/drivers/opus/silk/VQ_WMat_EC.c
index 28c5fc7e6f..a35b84ef14 100644
--- a/drivers/opus/silk/VQ_WMat_EC.c
+++ b/drivers/opus/silk/VQ_WMat_EC.c
@@ -25,11 +25,11 @@ ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 POSSIBILITY OF SUCH DAMAGE.
 ***********************************************************************/
 
-#ifdef OPUS_HAVE_CONFIG_H
-#include "opus_config.h"
+#ifdef OPUS_ENABLED
+#include "opus/opus_config.h"
 #endif
 
-#include "silk_main.h"
+#include "opus/silk/silk_main.h"
 
 /* Entropy constrained matrix-weighted VQ, hard-coded to 5-element vectors, for a single input data vector */
 void silk_VQ_WMat_EC(
diff --git a/drivers/opus/silk/ana_filt_bank_1.c b/drivers/opus/silk/ana_filt_bank_1.c
index 387dcd87e7..a2fa720bd0 100644
--- a/drivers/opus/silk/ana_filt_bank_1.c
+++ b/drivers/opus/silk/ana_filt_bank_1.c
@@ -25,11 +25,11 @@ ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 POSSIBILITY OF SUCH DAMAGE.
 ***********************************************************************/
 
-#ifdef OPUS_HAVE_CONFIG_H
-#include "opus_config.h"
+#ifdef OPUS_ENABLED
+#include "opus/opus_config.h"
 #endif
 
-#include "SigProc_FIX.h"
+#include "opus/silk/SigProc_FIX.h"
 
 /* Coefficients for 2-band filter bank based on first-order allpass filters */
 static opus_int16 A_fb1_20 = 5394 << 1;
diff --git a/drivers/opus/silk/biquad_alt.c b/drivers/opus/silk/biquad_alt.c
index 5f1d6d25c3..85708167de 100644
--- a/drivers/opus/silk/biquad_alt.c
+++ b/drivers/opus/silk/biquad_alt.c
@@ -32,11 +32,11 @@ POSSIBILITY OF SUCH DAMAGE.
  * Can handle slowly varying filter coefficients                        *
  *                                                                      */
 
-#ifdef OPUS_HAVE_CONFIG_H
-#include "opus_config.h"
+#ifdef OPUS_ENABLED
+#include "opus/opus_config.h"
 #endif
 
-#include "SigProc_FIX.h"
+#include "opus/silk/SigProc_FIX.h"
 
 /* Second order ARMA filter, alternative implementation */
 void silk_biquad_alt(
diff --git a/drivers/opus/silk/bwexpander.c b/drivers/opus/silk/bwexpander.c
index d757483872..7f81a2af99 100644
--- a/drivers/opus/silk/bwexpander.c
+++ b/drivers/opus/silk/bwexpander.c
@@ -25,11 +25,11 @@ ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 POSSIBILITY OF SUCH DAMAGE.
 ***********************************************************************/
 
-#ifdef OPUS_HAVE_CONFIG_H
-#include "opus_config.h"
+#ifdef OPUS_ENABLED
+#include "opus/opus_config.h"
 #endif
 
-#include "SigProc_FIX.h"
+#include "opus/silk/SigProc_FIX.h"
 
 /* Chirp (bandwidth expand) LP AR filter */
 void silk_bwexpander(
diff --git a/drivers/opus/silk/bwexpander_32.c b/drivers/opus/silk/bwexpander_32.c
index 8a60767c0d..58dfe1f8d6 100644
--- a/drivers/opus/silk/bwexpander_32.c
+++ b/drivers/opus/silk/bwexpander_32.c
@@ -25,11 +25,11 @@ ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 POSSIBILITY OF SUCH DAMAGE.
 ***********************************************************************/
 
-#ifdef OPUS_HAVE_CONFIG_H
-#include "opus_config.h"
+#ifdef OPUS_ENABLED
+#include "opus/opus_config.h"
 #endif
 
-#include "SigProc_FIX.h"
+#include "opus/silk/SigProc_FIX.h"
 
 /* Chirp (bandwidth expand) LP AR filter */
 void silk_bwexpander_32(
diff --git a/drivers/opus/silk/check_control_input.c b/drivers/opus/silk/check_control_input.c
index 0e02fff22d..9c882334c3 100644
--- a/drivers/opus/silk/check_control_input.c
+++ b/drivers/opus/silk/check_control_input.c
@@ -25,13 +25,13 @@ ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 POSSIBILITY OF SUCH DAMAGE.
 ***********************************************************************/
 
-#ifdef OPUS_HAVE_CONFIG_H
-#include "opus_config.h"
+#ifdef OPUS_ENABLED
+#include "opus/opus_config.h"
 #endif
 
-#include "silk_main.h"
-#include "control.h"
-#include "errors.h"
+#include "opus/silk/silk_main.h"
+#include "opus/silk/control.h"
+#include "opus/silk/errors.h"
 
 /* Check encoder control struct */
 opus_int check_control_input(
diff --git a/drivers/opus/silk/code_signs.c b/drivers/opus/silk/code_signs.c
index 8bcc6ecde1..e2f671d7ca 100644
--- a/drivers/opus/silk/code_signs.c
+++ b/drivers/opus/silk/code_signs.c
@@ -25,11 +25,11 @@ ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 POSSIBILITY OF SUCH DAMAGE.
 ***********************************************************************/
 
-#ifdef OPUS_HAVE_CONFIG_H
-#include "opus_config.h"
+#ifdef OPUS_ENABLED
+#include "opus/opus_config.h"
 #endif
 
-#include "silk_main.h"
+#include "opus/silk/silk_main.h"
 
 /*#define silk_enc_map(a)                ((a) > 0 ? 1 : 0)*/
 /*#define silk_dec_map(a)                ((a) > 0 ? 1 : -1)*/
diff --git a/drivers/opus/silk/control.h b/drivers/opus/silk/control.h
index 747e5426a0..4b20c4a8b8 100644
--- a/drivers/opus/silk/control.h
+++ b/drivers/opus/silk/control.h
@@ -28,7 +28,7 @@ POSSIBILITY OF SUCH DAMAGE.
 #ifndef SILK_CONTROL_H
 #define SILK_CONTROL_H
 
-#include "typedef.h"
+#include "opus/silk/typedef.h"
 
 #ifdef __cplusplus
 extern "C"
diff --git a/drivers/opus/silk/control_SNR.c b/drivers/opus/silk/control_SNR.c
index ae6351798b..e3983d5039 100644
--- a/drivers/opus/silk/control_SNR.c
+++ b/drivers/opus/silk/control_SNR.c
@@ -25,12 +25,12 @@ ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 POSSIBILITY OF SUCH DAMAGE.
 ***********************************************************************/
 
-#ifdef OPUS_HAVE_CONFIG_H
-#include "opus_config.h"
+#ifdef OPUS_ENABLED
+#include "opus/opus_config.h"
 #endif
 
-#include "silk_main.h"
-#include "tuning_parameters.h"
+#include "opus/silk/silk_main.h"
+#include "opus/silk/tuning_parameters.h"
 
 /* Control SNR of redidual quantizer */
 opus_int silk_control_SNR(
diff --git a/drivers/opus/silk/control_audio_bandwidth.c b/drivers/opus/silk/control_audio_bandwidth.c
index 6f060bbd29..846a7baa59 100644
--- a/drivers/opus/silk/control_audio_bandwidth.c
+++ b/drivers/opus/silk/control_audio_bandwidth.c
@@ -25,12 +25,12 @@ ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 POSSIBILITY OF SUCH DAMAGE.
 ***********************************************************************/
 
-#ifdef OPUS_HAVE_CONFIG_H
-#include "opus_config.h"
+#ifdef OPUS_ENABLED
+#include "opus/opus_config.h"
 #endif
 
-#include "silk_main.h"
-#include "tuning_parameters.h"
+#include "opus/silk/silk_main.h"
+#include "opus/silk/tuning_parameters.h"
 
 /* Control internal sampling rate */
 opus_int silk_control_audio_bandwidth(
diff --git a/drivers/opus/silk/control_codec.c b/drivers/opus/silk/control_codec.c
index 2d7b10e9b7..beb6dfe6cd 100644
--- a/drivers/opus/silk/control_codec.c
+++ b/drivers/opus/silk/control_codec.c
@@ -25,19 +25,19 @@ ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 POSSIBILITY OF SUCH DAMAGE.
 ***********************************************************************/
 
-#ifdef OPUS_HAVE_CONFIG_H
-#include "opus_config.h"
+#ifdef OPUS_ENABLED
+#include "opus/opus_config.h"
 #endif
 #ifdef OPUS_FIXED_POINT
-#include "main_FIX.h"
+#include "opus/silk/fixed/main_FIX.h"
 #define silk_encoder_state_Fxx      silk_encoder_state_FIX
 #else
-#include "main_FLP.h"
+#include "opus/silk/float/main_FLP.h"
 #define silk_encoder_state_Fxx      silk_encoder_state_FLP
 #endif
-#include "stack_alloc.h"
-#include "tuning_parameters.h"
-#include "pitch_est_defines.h"
+#include "opus/celt/stack_alloc.h"
+#include "opus/silk/tuning_parameters.h"
+#include "opus/silk/pitch_est_defines.h"
 
 static opus_int silk_setup_resamplers(
     silk_encoder_state_Fxx          *psEnc,             /* I/O                      */
diff --git a/drivers/opus/silk/debug.c b/drivers/opus/silk/debug.c
index 2230813fae..9f31b68f76 100644
--- a/drivers/opus/silk/debug.c
+++ b/drivers/opus/silk/debug.c
@@ -25,12 +25,12 @@ ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 POSSIBILITY OF SUCH DAMAGE.
 ***********************************************************************/
 
-#ifdef OPUS_HAVE_CONFIG_H
-#include "opus_config.h"
+#ifdef OPUS_ENABLED
+#include "opus/opus_config.h"
 #endif
 
-#include "debug.h"
-#include "SigProc_FIX.h"
+#include "opus/silk/debug.h"
+#include "opus/silk/SigProc_FIX.h"
 
 #if SILK_TIC_TOC
 
diff --git a/drivers/opus/silk/debug.h b/drivers/opus/silk/debug.h
index efb6d3e99e..d2eccfa1e4 100644
--- a/drivers/opus/silk/debug.h
+++ b/drivers/opus/silk/debug.h
@@ -28,7 +28,7 @@ POSSIBILITY OF SUCH DAMAGE.
 #ifndef SILK_DEBUG_H
 #define SILK_DEBUG_H
 
-#include "typedef.h"
+#include "opus/silk/typedef.h"
 #include <stdio.h>      /* file writing */
 #include <string.h>     /* strcpy, strcmp */
 
diff --git a/drivers/opus/silk/dec_API.c b/drivers/opus/silk/dec_API.c
index cd72115a20..43aeb4faff 100644
--- a/drivers/opus/silk/dec_API.c
+++ b/drivers/opus/silk/dec_API.c
@@ -25,12 +25,12 @@ ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 POSSIBILITY OF SUCH DAMAGE.
 ***********************************************************************/
 
-#ifdef OPUS_HAVE_CONFIG_H
-#include "opus_config.h"
+#ifdef OPUS_ENABLED
+#include "opus/opus_config.h"
 #endif
-#include "API.h"
-#include "silk_main.h"
-#include "stack_alloc.h"
+#include "opus/silk/API.h"
+#include "opus/silk/silk_main.h"
+#include "opus/celt/stack_alloc.h"
 
 /************************/
 /* Decoder Super Struct */
diff --git a/drivers/opus/silk/decode_core.c b/drivers/opus/silk/decode_core.c
index 8f801ea7ad..f7cd8db7c9 100644
--- a/drivers/opus/silk/decode_core.c
+++ b/drivers/opus/silk/decode_core.c
@@ -25,12 +25,12 @@ ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 POSSIBILITY OF SUCH DAMAGE.
 ***********************************************************************/
 
-#ifdef OPUS_HAVE_CONFIG_H
-#include "opus_config.h"
+#ifdef OPUS_ENABLED
+#include "opus/opus_config.h"
 #endif
 
-#include "silk_main.h"
-#include "stack_alloc.h"
+#include "opus/silk/silk_main.h"
+#include "opus/celt/stack_alloc.h"
 
 /**********************************************************/
 /* Core decoder. Performs inverse NSQ operation LTP + LPC */
diff --git a/drivers/opus/silk/decode_frame.c b/drivers/opus/silk/decode_frame.c
index 38500227c2..f166019455 100644
--- a/drivers/opus/silk/decode_frame.c
+++ b/drivers/opus/silk/decode_frame.c
@@ -25,13 +25,13 @@ ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 POSSIBILITY OF SUCH DAMAGE.
 ***********************************************************************/
 
-#ifdef OPUS_HAVE_CONFIG_H
-#include "opus_config.h"
+#ifdef OPUS_ENABLED
+#include "opus/opus_config.h"
 #endif
 
-#include "silk_main.h"
-#include "stack_alloc.h"
-#include "PLC.h"
+#include "opus/silk/silk_main.h"
+#include "opus/celt/stack_alloc.h"
+#include "opus/silk/PLC.h"
 
 /****************/
 /* Decode frame */
diff --git a/drivers/opus/silk/decode_indices.c b/drivers/opus/silk/decode_indices.c
index c2aaad2606..3e09fb416b 100644
--- a/drivers/opus/silk/decode_indices.c
+++ b/drivers/opus/silk/decode_indices.c
@@ -25,11 +25,11 @@ ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 POSSIBILITY OF SUCH DAMAGE.
 ***********************************************************************/
 
-#ifdef OPUS_HAVE_CONFIG_H
-#include "opus_config.h"
+#ifdef OPUS_ENABLED
+#include "opus/opus_config.h"
 #endif
 
-#include "silk_main.h"
+#include "opus/silk/silk_main.h"
 
 /* Decode side-information parameters from payload */
 void silk_decode_indices(
diff --git a/drivers/opus/silk/decode_parameters.c b/drivers/opus/silk/decode_parameters.c
index 72df4fcdb2..7f4cbeec29 100644
--- a/drivers/opus/silk/decode_parameters.c
+++ b/drivers/opus/silk/decode_parameters.c
@@ -25,11 +25,11 @@ ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 POSSIBILITY OF SUCH DAMAGE.
 ***********************************************************************/
 
-#ifdef OPUS_HAVE_CONFIG_H
-#include "opus_config.h"
+#ifdef OPUS_ENABLED
+#include "opus/opus_config.h"
 #endif
 
-#include "silk_main.h"
+#include "opus/silk/silk_main.h"
 
 /* Decode parameters from payload */
 void silk_decode_parameters(
diff --git a/drivers/opus/silk/decode_pitch.c b/drivers/opus/silk/decode_pitch.c
index 3e1dd2d35b..73e94b4ee2 100644
--- a/drivers/opus/silk/decode_pitch.c
+++ b/drivers/opus/silk/decode_pitch.c
@@ -25,15 +25,15 @@ ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 POSSIBILITY OF SUCH DAMAGE.
 ***********************************************************************/
 
-#ifdef OPUS_HAVE_CONFIG_H
-#include "opus_config.h"
+#ifdef OPUS_ENABLED
+#include "opus/opus_config.h"
 #endif
 
 /***********************************************************
 * Pitch analyser function
 ********************************************************** */
-#include "SigProc_FIX.h"
-#include "pitch_est_defines.h"
+#include "opus/silk/SigProc_FIX.h"
+#include "opus/silk/pitch_est_defines.h"
 
 void silk_decode_pitch(
     opus_int16                  lagIndex,           /* I                                                                */
diff --git a/drivers/opus/silk/decode_pulses.c b/drivers/opus/silk/decode_pulses.c
index 13772f8a57..24f841881d 100644
--- a/drivers/opus/silk/decode_pulses.c
+++ b/drivers/opus/silk/decode_pulses.c
@@ -25,11 +25,11 @@ ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 POSSIBILITY OF SUCH DAMAGE.
 ***********************************************************************/
 
-#ifdef OPUS_HAVE_CONFIG_H
-#include "opus_config.h"
+#ifdef OPUS_ENABLED
+#include "opus/opus_config.h"
 #endif
 
-#include "silk_main.h"
+#include "opus/silk/silk_main.h"
 
 /*********************************************/
 /* Decode quantization indices of excitation */
diff --git a/drivers/opus/silk/decoder_set_fs.c b/drivers/opus/silk/decoder_set_fs.c
index 6d2de56647..97ecd8afd1 100644
--- a/drivers/opus/silk/decoder_set_fs.c
+++ b/drivers/opus/silk/decoder_set_fs.c
@@ -25,11 +25,11 @@ ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 POSSIBILITY OF SUCH DAMAGE.
 ***********************************************************************/
 
-#ifdef OPUS_HAVE_CONFIG_H
-#include "opus_config.h"
+#ifdef OPUS_ENABLED
+#include "opus/opus_config.h"
 #endif
 
-#include "silk_main.h"
+#include "opus/silk/silk_main.h"
 
 /* Set decoder sampling rate */
 opus_int silk_decoder_set_fs(
diff --git a/drivers/opus/silk/define.h b/drivers/opus/silk/define.h
index c47aca9f58..a38564c835 100644
--- a/drivers/opus/silk/define.h
+++ b/drivers/opus/silk/define.h
@@ -28,8 +28,8 @@ POSSIBILITY OF SUCH DAMAGE.
 #ifndef SILK_DEFINE_H
 #define SILK_DEFINE_H
 
-#include "errors.h"
-#include "typedef.h"
+#include "opus/silk/errors.h"
+#include "opus/silk/typedef.h"
 
 #ifdef __cplusplus
 extern "C"
diff --git a/drivers/opus/silk/enc_API.c b/drivers/opus/silk/enc_API.c
index 66a9bb67de..ac1a7854f0 100644
--- a/drivers/opus/silk/enc_API.c
+++ b/drivers/opus/silk/enc_API.c
@@ -25,20 +25,20 @@ ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 POSSIBILITY OF SUCH DAMAGE.
 ***********************************************************************/
 
-#ifdef OPUS_HAVE_CONFIG_H
-#include "opus_config.h"
+#ifdef OPUS_ENABLED
+#include "opus/opus_config.h"
 #endif
-#include "define.h"
-#include "API.h"
-#include "control.h"
-#include "typedef.h"
-#include "stack_alloc.h"
-#include "structs.h"
-#include "tuning_parameters.h"
+#include "opus/silk/define.h"
+#include "opus/silk/API.h"
+#include "opus/silk/control.h"
+#include "opus/silk/typedef.h"
+#include "opus/celt/stack_alloc.h"
+#include "opus/silk/structs.h"
+#include "opus/silk/tuning_parameters.h"
 #ifdef OPUS_FIXED_POINT
-#include "main_FIX.h"
+#include "opus/silk/fixed/main_FIX.h"
 #else
-#include "main_FLP.h"
+#include "opus/silk/float/main_FLP.h"
 #endif
 
 /***************************************/
diff --git a/drivers/opus/silk/encode_indices.c b/drivers/opus/silk/encode_indices.c
index c6679b34f6..7150325d49 100644
--- a/drivers/opus/silk/encode_indices.c
+++ b/drivers/opus/silk/encode_indices.c
@@ -25,11 +25,11 @@ ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 POSSIBILITY OF SUCH DAMAGE.
 ***********************************************************************/
 
-#ifdef OPUS_HAVE_CONFIG_H
-#include "opus_config.h"
+#ifdef OPUS_ENABLED
+#include "opus/opus_config.h"
 #endif
 
-#include "silk_main.h"
+#include "opus/silk/silk_main.h"
 
 /* Encode side-information parameters to payload */
 void silk_encode_indices(
diff --git a/drivers/opus/silk/encode_pulses.c b/drivers/opus/silk/encode_pulses.c
index d148b9d1e6..f9fe39eeb8 100644
--- a/drivers/opus/silk/encode_pulses.c
+++ b/drivers/opus/silk/encode_pulses.c
@@ -25,12 +25,12 @@ ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 POSSIBILITY OF SUCH DAMAGE.
 ***********************************************************************/
 
-#ifdef OPUS_HAVE_CONFIG_H
-#include "opus_config.h"
+#ifdef OPUS_ENABLED
+#include "opus/opus_config.h"
 #endif
 
-#include "silk_main.h"
-#include "stack_alloc.h"
+#include "opus/silk/silk_main.h"
+#include "opus/celt/stack_alloc.h"
 
 /*********************************************/
 /* Encode quantization indices of excitation */
diff --git a/drivers/opus/silk/fixed/LTP_analysis_filter_FIX.c b/drivers/opus/silk/fixed/LTP_analysis_filter_FIX.c
index 1df4b01d20..19d5defc7a 100644
--- a/drivers/opus/silk/fixed/LTP_analysis_filter_FIX.c
+++ b/drivers/opus/silk/fixed/LTP_analysis_filter_FIX.c
@@ -25,11 +25,11 @@ ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 POSSIBILITY OF SUCH DAMAGE.
 ***********************************************************************/
 
-#ifdef OPUS_HAVE_CONFIG_H
-#include "opus_config.h"
+#ifdef OPUS_ENABLED
+#include "opus/opus_config.h"
 #endif
 
-#include "main_FIX.h"
+#include "opus/silk/fixed/main_FIX.h"
 
 void silk_LTP_analysis_filter_FIX(
     opus_int16                      *LTP_res,                               /* O    LTP residual signal of length MAX_NB_SUBFR * ( pre_length + subfr_length )  */
diff --git a/drivers/opus/silk/fixed/LTP_scale_ctrl_FIX.c b/drivers/opus/silk/fixed/LTP_scale_ctrl_FIX.c
index ab6923c5c9..0887fd03f7 100644
--- a/drivers/opus/silk/fixed/LTP_scale_ctrl_FIX.c
+++ b/drivers/opus/silk/fixed/LTP_scale_ctrl_FIX.c
@@ -25,11 +25,11 @@ ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 POSSIBILITY OF SUCH DAMAGE.
 ***********************************************************************/
 
-#ifdef OPUS_HAVE_CONFIG_H
-#include "opus_config.h"
+#ifdef OPUS_ENABLED
+#include "opus/opus_config.h"
 #endif
 
-#include "main_FIX.h"
+#include "opus/silk/fixed/main_FIX.h"
 
 /* Calculation of LTP state scaling */
 void silk_LTP_scale_ctrl_FIX(
diff --git a/drivers/opus/silk/fixed/apply_sine_window_FIX.c b/drivers/opus/silk/fixed/apply_sine_window_FIX.c
index 0998b49eca..5b4d8ebfdc 100644
--- a/drivers/opus/silk/fixed/apply_sine_window_FIX.c
+++ b/drivers/opus/silk/fixed/apply_sine_window_FIX.c
@@ -25,11 +25,11 @@ ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 POSSIBILITY OF SUCH DAMAGE.
 ***********************************************************************/
 
-#ifdef OPUS_HAVE_CONFIG_H
-#include "opus_config.h"
+#ifdef OPUS_ENABLED
+#include "opus/opus_config.h"
 #endif
 
-#include "SigProc_FIX.h"
+#include "opus/silk/SigProc_FIX.h"
 
 /* Apply sine window to signal vector.                                      */
 /* Window types:                                                            */
diff --git a/drivers/opus/silk/fixed/autocorr_FIX.c b/drivers/opus/silk/fixed/autocorr_FIX.c
index 438b42f85b..88a849e12c 100644
--- a/drivers/opus/silk/fixed/autocorr_FIX.c
+++ b/drivers/opus/silk/fixed/autocorr_FIX.c
@@ -25,12 +25,12 @@ ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 POSSIBILITY OF SUCH DAMAGE.
 ***********************************************************************/
 
-#ifdef OPUS_HAVE_CONFIG_H
-#include "opus_config.h"
+#ifdef OPUS_ENABLED
+#include "opus/opus_config.h"
 #endif
 
-#include "SigProc_FIX.h"
-#include "celt_lpc.h"
+#include "opus/silk/SigProc_FIX.h"
+#include "opus/celt/celt_lpc.h"
 
 /* Compute autocorrelation */
 void silk_autocorr(
diff --git a/drivers/opus/silk/fixed/burg_modified_FIX.c b/drivers/opus/silk/fixed/burg_modified_FIX.c
index ce2a560e6d..5ef3ad2c38 100644
--- a/drivers/opus/silk/fixed/burg_modified_FIX.c
+++ b/drivers/opus/silk/fixed/burg_modified_FIX.c
@@ -25,14 +25,14 @@ ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 POSSIBILITY OF SUCH DAMAGE.
 ***********************************************************************/
 
-#ifdef OPUS_HAVE_CONFIG_H
-#include "opus_config.h"
+#ifdef OPUS_ENABLED
+#include "opus/opus_config.h"
 #endif
 
-#include "SigProc_FIX.h"
-#include "define.h"
-#include "tuning_parameters.h"
-#include "pitch.h"
+#include "opus/silk/SigProc_FIX.h"
+#include "opus/silk/define.h"
+#include "opus/silk/tuning_parameters.h"
+#include "opus/celt/pitch.h"
 
 #define MAX_FRAME_SIZE              384             /* subfr_length * nb_subfr = ( 0.005 * 16000 + 16 ) * 4 = 384 */
 
diff --git a/drivers/opus/silk/fixed/corrMatrix_FIX.c b/drivers/opus/silk/fixed/corrMatrix_FIX.c
index 28543fc204..9f50153dcc 100644
--- a/drivers/opus/silk/fixed/corrMatrix_FIX.c
+++ b/drivers/opus/silk/fixed/corrMatrix_FIX.c
@@ -25,15 +25,15 @@ ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 POSSIBILITY OF SUCH DAMAGE.
 ***********************************************************************/
 
-#ifdef OPUS_HAVE_CONFIG_H
-#include "opus_config.h"
+#ifdef OPUS_ENABLED
+#include "opus/opus_config.h"
 #endif
 
 /**********************************************************************
  * Correlation Matrix Computations for LS estimate.
  **********************************************************************/
 
-#include "main_FIX.h"
+#include "opus/silk/fixed/main_FIX.h"
 
 /* Calculates correlation vector X'*t */
 void silk_corrVector_FIX(
diff --git a/drivers/opus/silk/fixed/encode_frame_FIX.c b/drivers/opus/silk/fixed/encode_frame_FIX.c
index 2d80ca3583..27944729f8 100644
--- a/drivers/opus/silk/fixed/encode_frame_FIX.c
+++ b/drivers/opus/silk/fixed/encode_frame_FIX.c
@@ -25,13 +25,13 @@ ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 POSSIBILITY OF SUCH DAMAGE.
 ***********************************************************************/
 
-#ifdef OPUS_HAVE_CONFIG_H
-#include "opus_config.h"
+#ifdef OPUS_ENABLED
+#include "opus/opus_config.h"
 #endif
 
-#include "main_FIX.h"
-#include "stack_alloc.h"
-#include "tuning_parameters.h"
+#include "opus/silk/fixed/main_FIX.h"
+#include "opus/celt/stack_alloc.h"
+#include "opus/silk/tuning_parameters.h"
 
 /* Low Bitrate Redundancy (LBRR) encoding. Reuse all parameters but encode with lower bitrate           */
 static OPUS_INLINE void silk_LBRR_encode_FIX(
diff --git a/drivers/opus/silk/fixed/find_LPC_FIX.c b/drivers/opus/silk/fixed/find_LPC_FIX.c
index a46cdb7515..df76e17ca2 100644
--- a/drivers/opus/silk/fixed/find_LPC_FIX.c
+++ b/drivers/opus/silk/fixed/find_LPC_FIX.c
@@ -25,13 +25,13 @@ ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 POSSIBILITY OF SUCH DAMAGE.
 ***********************************************************************/
 
-#ifdef OPUS_HAVE_CONFIG_H
-#include "opus_config.h"
+#ifdef OPUS_ENABLED
+#include "opus/opus_config.h"
 #endif
 
-#include "main_FIX.h"
-#include "stack_alloc.h"
-#include "tuning_parameters.h"
+#include "opus/silk/fixed/main_FIX.h"
+#include "opus/celt/stack_alloc.h"
+#include "opus/silk/tuning_parameters.h"
 
 /* Finds LPC vector from correlations, and converts to NLSF */
 void silk_find_LPC_FIX(
diff --git a/drivers/opus/silk/fixed/find_LTP_FIX.c b/drivers/opus/silk/fixed/find_LTP_FIX.c
index a1d152eee4..d556371ffa 100644
--- a/drivers/opus/silk/fixed/find_LTP_FIX.c
+++ b/drivers/opus/silk/fixed/find_LTP_FIX.c
@@ -25,12 +25,12 @@ ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 POSSIBILITY OF SUCH DAMAGE.
 ***********************************************************************/
 
-#ifdef OPUS_HAVE_CONFIG_H
-#include "opus_config.h"
+#ifdef OPUS_ENABLED
+#include "opus/opus_config.h"
 #endif
 
-#include "main_FIX.h"
-#include "tuning_parameters.h"
+#include "opus/silk/fixed/main_FIX.h"
+#include "opus/silk/tuning_parameters.h"
 
 /* Head room for correlations */
 #define LTP_CORRS_HEAD_ROOM                             2
diff --git a/drivers/opus/silk/fixed/find_pitch_lags_FIX.c b/drivers/opus/silk/fixed/find_pitch_lags_FIX.c
index 0598477cd1..0d00b3edf6 100644
--- a/drivers/opus/silk/fixed/find_pitch_lags_FIX.c
+++ b/drivers/opus/silk/fixed/find_pitch_lags_FIX.c
@@ -25,13 +25,13 @@ ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 POSSIBILITY OF SUCH DAMAGE.
 ***********************************************************************/
 
-#ifdef OPUS_HAVE_CONFIG_H
-#include "opus_config.h"
+#ifdef OPUS_ENABLED
+#include "opus/opus_config.h"
 #endif
 
-#include "main_FIX.h"
-#include "stack_alloc.h"
-#include "tuning_parameters.h"
+#include "opus/silk/fixed/main_FIX.h"
+#include "opus/celt/stack_alloc.h"
+#include "opus/silk/tuning_parameters.h"
 
 /* Find pitch lags */
 void silk_find_pitch_lags_FIX(
diff --git a/drivers/opus/silk/fixed/find_pred_coefs_FIX.c b/drivers/opus/silk/fixed/find_pred_coefs_FIX.c
index 0ab70df09d..93b506b523 100644
--- a/drivers/opus/silk/fixed/find_pred_coefs_FIX.c
+++ b/drivers/opus/silk/fixed/find_pred_coefs_FIX.c
@@ -25,12 +25,12 @@ ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 POSSIBILITY OF SUCH DAMAGE.
 ***********************************************************************/
 
-#ifdef OPUS_HAVE_CONFIG_H
-#include "opus_config.h"
+#ifdef OPUS_ENABLED
+#include "opus/opus_config.h"
 #endif
 
-#include "main_FIX.h"
-#include "stack_alloc.h"
+#include "opus/silk/fixed/main_FIX.h"
+#include "opus/celt/stack_alloc.h"
 
 void silk_find_pred_coefs_FIX(
     silk_encoder_state_FIX          *psEnc,                                 /* I/O  encoder state                                                               */
diff --git a/drivers/opus/silk/fixed/k2a_FIX.c b/drivers/opus/silk/fixed/k2a_FIX.c
index 848666ee3b..7d5808f190 100644
--- a/drivers/opus/silk/fixed/k2a_FIX.c
+++ b/drivers/opus/silk/fixed/k2a_FIX.c
@@ -25,11 +25,11 @@ ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 POSSIBILITY OF SUCH DAMAGE.
 ***********************************************************************/
 
-#ifdef OPUS_HAVE_CONFIG_H
-#include "opus_config.h"
+#ifdef OPUS_ENABLED
+#include "opus/opus_config.h"
 #endif
 
-#include "SigProc_FIX.h"
+#include "opus/silk/SigProc_FIX.h"
 
 /* Step up function, converts reflection coefficients to prediction coefficients */
 void silk_k2a(
diff --git a/drivers/opus/silk/fixed/k2a_Q16_FIX.c b/drivers/opus/silk/fixed/k2a_Q16_FIX.c
index f7e62e95fe..8df61dc3f6 100644
--- a/drivers/opus/silk/fixed/k2a_Q16_FIX.c
+++ b/drivers/opus/silk/fixed/k2a_Q16_FIX.c
@@ -25,11 +25,11 @@ ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 POSSIBILITY OF SUCH DAMAGE.
 ***********************************************************************/
 
-#ifdef OPUS_HAVE_CONFIG_H
-#include "opus_config.h"
+#ifdef OPUS_ENABLED
+#include "opus/opus_config.h"
 #endif
 
-#include "SigProc_FIX.h"
+#include "opus/silk/SigProc_FIX.h"
 
 /* Step up function, converts reflection coefficients to prediction coefficients */
 void silk_k2a_Q16(
diff --git a/drivers/opus/silk/fixed/main_FIX.h b/drivers/opus/silk/fixed/main_FIX.h
index fb47ffe700..71a560ef0e 100644
--- a/drivers/opus/silk/fixed/main_FIX.h
+++ b/drivers/opus/silk/fixed/main_FIX.h
@@ -28,13 +28,13 @@ POSSIBILITY OF SUCH DAMAGE.
 #ifndef SILK_MAIN_FIX_H
 #define SILK_MAIN_FIX_H
 
-#include "SigProc_FIX.h"
-#include "structs_FIX.h"
-#include "control.h"
-#include "silk_main.h"
-#include "PLC.h"
-#include "debug.h"
-#include "entenc.h"
+#include "opus/silk/SigProc_FIX.h"
+#include "opus/silk/fixed/structs_FIX.h"
+#include "opus/silk/control.h"
+#include "opus/silk/silk_main.h"
+#include "opus/silk/PLC.h"
+#include "opus/silk/debug.h"
+#include "opus/celt/entenc.h"
 
 #ifndef FORCE_CPP_BUILD
 #ifdef __cplusplus
diff --git a/drivers/opus/silk/fixed/noise_shape_analysis_FIX.c b/drivers/opus/silk/fixed/noise_shape_analysis_FIX.c
index 420cbeedfc..862640d2c8 100644
--- a/drivers/opus/silk/fixed/noise_shape_analysis_FIX.c
+++ b/drivers/opus/silk/fixed/noise_shape_analysis_FIX.c
@@ -25,13 +25,13 @@ ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 POSSIBILITY OF SUCH DAMAGE.
 ***********************************************************************/
 
-#ifdef OPUS_HAVE_CONFIG_H
-#include "opus_config.h"
+#ifdef OPUS_ENABLED
+#include "opus/opus_config.h"
 #endif
 
-#include "main_FIX.h"
-#include "stack_alloc.h"
-#include "tuning_parameters.h"
+#include "opus/silk/fixed/main_FIX.h"
+#include "opus/celt/stack_alloc.h"
+#include "opus/silk/tuning_parameters.h"
 
 /* Compute gain to make warped filter coefficients have a zero mean log frequency response on a   */
 /* non-warped frequency scale. (So that it can be implemented with a minimum-phase monic filter.) */
diff --git a/drivers/opus/silk/fixed/pitch_analysis_core_FIX.c b/drivers/opus/silk/fixed/pitch_analysis_core_FIX.c
index 4d65c09d1d..2acd51db6a 100644
--- a/drivers/opus/silk/fixed/pitch_analysis_core_FIX.c
+++ b/drivers/opus/silk/fixed/pitch_analysis_core_FIX.c
@@ -25,18 +25,18 @@ ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 POSSIBILITY OF SUCH DAMAGE.
 ***********************************************************************/
 
-#ifdef OPUS_HAVE_CONFIG_H
-#include "opus_config.h"
+#ifdef OPUS_ENABLED
+#include "opus/opus_config.h"
 #endif
 
 /***********************************************************
 * Pitch analyser function
 ********************************************************** */
-#include "SigProc_FIX.h"
-#include "pitch_est_defines.h"
-#include "stack_alloc.h"
-#include "debug.h"
-#include "pitch.h"
+#include "opus/silk/SigProc_FIX.h"
+#include "opus/silk/pitch_est_defines.h"
+#include "opus/celt/stack_alloc.h"
+#include "opus/silk/debug.h"
+#include "opus/celt/pitch.h"
 
 #define SCRATCH_SIZE    22
 #define SF_LENGTH_4KHZ  ( PE_SUBFR_LENGTH_MS * 4 )
diff --git a/drivers/opus/silk/fixed/prefilter_FIX.c b/drivers/opus/silk/fixed/prefilter_FIX.c
index 0b027eb836..195df3da75 100644
--- a/drivers/opus/silk/fixed/prefilter_FIX.c
+++ b/drivers/opus/silk/fixed/prefilter_FIX.c
@@ -25,13 +25,13 @@ ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 POSSIBILITY OF SUCH DAMAGE.
 ***********************************************************************/
 
-#ifdef OPUS_HAVE_CONFIG_H
-#include "opus_config.h"
+#ifdef OPUS_ENABLED
+#include "opus/opus_config.h"
 #endif
 
-#include "main_FIX.h"
-#include "stack_alloc.h"
-#include "tuning_parameters.h"
+#include "opus/silk/fixed/main_FIX.h"
+#include "opus/celt/stack_alloc.h"
+#include "opus/silk/tuning_parameters.h"
 
 /* Prefilter for finding Quantizer input signal */
 static OPUS_INLINE void silk_prefilt_FIX(
diff --git a/drivers/opus/silk/fixed/process_gains_FIX.c b/drivers/opus/silk/fixed/process_gains_FIX.c
index 3a78c475bb..2c501a2010 100644
--- a/drivers/opus/silk/fixed/process_gains_FIX.c
+++ b/drivers/opus/silk/fixed/process_gains_FIX.c
@@ -25,12 +25,12 @@ ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 POSSIBILITY OF SUCH DAMAGE.
 ***********************************************************************/
 
-#ifdef OPUS_HAVE_CONFIG_H
-#include "opus_config.h"
+#ifdef OPUS_ENABLED
+#include "opus/opus_config.h"
 #endif
 
-#include "main_FIX.h"
-#include "tuning_parameters.h"
+#include "opus/silk/fixed/main_FIX.h"
+#include "opus/silk/tuning_parameters.h"
 
 /* Processing of gains */
 void silk_process_gains_FIX(
diff --git a/drivers/opus/silk/fixed/regularize_correlations_FIX.c b/drivers/opus/silk/fixed/regularize_correlations_FIX.c
index a3378fdd17..af34da68fa 100644
--- a/drivers/opus/silk/fixed/regularize_correlations_FIX.c
+++ b/drivers/opus/silk/fixed/regularize_correlations_FIX.c
@@ -25,11 +25,11 @@ ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 POSSIBILITY OF SUCH DAMAGE.
 ***********************************************************************/
 
-#ifdef OPUS_HAVE_CONFIG_H
-#include "opus_config.h"
+#ifdef OPUS_ENABLED
+#include "opus/opus_config.h"
 #endif
 
-#include "main_FIX.h"
+#include "opus/silk/fixed/main_FIX.h"
 
 /* Add noise to matrix diagonal */
 void silk_regularize_correlations_FIX(
diff --git a/drivers/opus/silk/fixed/residual_energy16_FIX.c b/drivers/opus/silk/fixed/residual_energy16_FIX.c
index 39bdff2a72..9b6e103a52 100644
--- a/drivers/opus/silk/fixed/residual_energy16_FIX.c
+++ b/drivers/opus/silk/fixed/residual_energy16_FIX.c
@@ -25,11 +25,11 @@ ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 POSSIBILITY OF SUCH DAMAGE.
 ***********************************************************************/
 
-#ifdef OPUS_HAVE_CONFIG_H
-#include "opus_config.h"
+#ifdef OPUS_ENABLED
+#include "opus/opus_config.h"
 #endif
 
-#include "main_FIX.h"
+#include "opus/silk/fixed/main_FIX.h"
 
 /* Residual energy: nrg = wxx - 2 * wXx * c + c' * wXX * c */
 opus_int32 silk_residual_energy16_covar_FIX(
diff --git a/drivers/opus/silk/fixed/residual_energy_FIX.c b/drivers/opus/silk/fixed/residual_energy_FIX.c
index 13dbc51e39..468bfbab48 100644
--- a/drivers/opus/silk/fixed/residual_energy_FIX.c
+++ b/drivers/opus/silk/fixed/residual_energy_FIX.c
@@ -25,12 +25,12 @@ ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 POSSIBILITY OF SUCH DAMAGE.
 ***********************************************************************/
 
-#ifdef OPUS_HAVE_CONFIG_H
-#include "opus_config.h"
+#ifdef OPUS_ENABLED
+#include "opus/opus_config.h"
 #endif
 
-#include "main_FIX.h"
-#include "stack_alloc.h"
+#include "opus/silk/fixed/main_FIX.h"
+#include "opus/celt/stack_alloc.h"
 
 /* Calculates residual energies of input subframes where all subframes have LPC_order   */
 /* of preceding samples                                                                 */
diff --git a/drivers/opus/silk/fixed/schur64_FIX.c b/drivers/opus/silk/fixed/schur64_FIX.c
index 22c0952ffd..fe1278e062 100644
--- a/drivers/opus/silk/fixed/schur64_FIX.c
+++ b/drivers/opus/silk/fixed/schur64_FIX.c
@@ -25,11 +25,11 @@ ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 POSSIBILITY OF SUCH DAMAGE.
 ***********************************************************************/
 
-#ifdef OPUS_HAVE_CONFIG_H
-#include "opus_config.h"
+#ifdef OPUS_ENABLED
+#include "opus/opus_config.h"
 #endif
 
-#include "SigProc_FIX.h"
+#include "opus/silk/SigProc_FIX.h"
 
 /* Slower than schur(), but more accurate.                              */
 /* Uses SMULL(), available on armv4                                     */
diff --git a/drivers/opus/silk/fixed/schur_FIX.c b/drivers/opus/silk/fixed/schur_FIX.c
index e8b24cf068..b0a36e5ad2 100644
--- a/drivers/opus/silk/fixed/schur_FIX.c
+++ b/drivers/opus/silk/fixed/schur_FIX.c
@@ -25,11 +25,11 @@ ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 POSSIBILITY OF SUCH DAMAGE.
 ***********************************************************************/
 
-#ifdef OPUS_HAVE_CONFIG_H
-#include "opus_config.h"
+#ifdef OPUS_ENABLED
+#include "opus/opus_config.h"
 #endif
 
-#include "SigProc_FIX.h"
+#include "opus/silk/SigProc_FIX.h"
 
 /* Faster than schur64(), but much less accurate.                       */
 /* uses SMLAWB(), requiring armv5E and higher.                          */
diff --git a/drivers/opus/silk/fixed/solve_LS_FIX.c b/drivers/opus/silk/fixed/solve_LS_FIX.c
index 5d09284935..6e8afaaa88 100644
--- a/drivers/opus/silk/fixed/solve_LS_FIX.c
+++ b/drivers/opus/silk/fixed/solve_LS_FIX.c
@@ -25,13 +25,13 @@ ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 POSSIBILITY OF SUCH DAMAGE.
 ***********************************************************************/
 
-#ifdef OPUS_HAVE_CONFIG_H
-#include "opus_config.h"
+#ifdef OPUS_ENABLED
+#include "opus/opus_config.h"
 #endif
 
-#include "main_FIX.h"
-#include "stack_alloc.h"
-#include "tuning_parameters.h"
+#include "opus/silk/fixed/main_FIX.h"
+#include "opus/celt/stack_alloc.h"
+#include "opus/silk/tuning_parameters.h"
 
 /*****************************/
 /* Internal function headers */
diff --git a/drivers/opus/silk/fixed/structs_FIX.h b/drivers/opus/silk/fixed/structs_FIX.h
index 0284dfa27a..21eab05f0f 100644
--- a/drivers/opus/silk/fixed/structs_FIX.h
+++ b/drivers/opus/silk/fixed/structs_FIX.h
@@ -28,9 +28,9 @@ POSSIBILITY OF SUCH DAMAGE.
 #ifndef SILK_STRUCTS_FIX_H
 #define SILK_STRUCTS_FIX_H
 
-#include "typedef.h"
-#include "silk_main.h"
-#include "structs.h"
+#include "opus/silk/typedef.h"
+#include "opus/silk/silk_main.h"
+#include "opus/silk/structs.h"
 
 #ifdef __cplusplus
 extern "C"
diff --git a/drivers/opus/silk/fixed/vector_ops_FIX.c b/drivers/opus/silk/fixed/vector_ops_FIX.c
index b1e422eb91..c2725194ec 100644
--- a/drivers/opus/silk/fixed/vector_ops_FIX.c
+++ b/drivers/opus/silk/fixed/vector_ops_FIX.c
@@ -25,11 +25,11 @@ ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 POSSIBILITY OF SUCH DAMAGE.
 ***********************************************************************/
 
-#ifdef OPUS_HAVE_CONFIG_H
-#include "opus_config.h"
+#ifdef OPUS_ENABLED
+#include "opus/opus_config.h"
 #endif
 
-#include "SigProc_FIX.h"
+#include "opus/silk/SigProc_FIX.h"
 
 /* Copy and multiply a vector by a constant */
 void silk_scale_copy_vector16(
diff --git a/drivers/opus/silk/fixed/warped_autocorrelation_FIX.c b/drivers/opus/silk/fixed/warped_autocorrelation_FIX.c
index 3f04df775c..4e5a7fee58 100644
--- a/drivers/opus/silk/fixed/warped_autocorrelation_FIX.c
+++ b/drivers/opus/silk/fixed/warped_autocorrelation_FIX.c
@@ -25,11 +25,11 @@ ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 POSSIBILITY OF SUCH DAMAGE.
 ***********************************************************************/
 
-#ifdef OPUS_HAVE_CONFIG_H
-#include "opus_config.h"
+#ifdef OPUS_ENABLED
+#include "opus/opus_config.h"
 #endif
 
-#include "main_FIX.h"
+#include "opus/silk/fixed/main_FIX.h"
 
 #define QC  10
 #define QS  14
diff --git a/drivers/opus/silk/float/LPC_analysis_filter_FLP.c b/drivers/opus/silk/float/LPC_analysis_filter_FLP.c
index 8d26c093bf..438b704fe9 100644
--- a/drivers/opus/silk/float/LPC_analysis_filter_FLP.c
+++ b/drivers/opus/silk/float/LPC_analysis_filter_FLP.c
@@ -25,12 +25,12 @@ ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 POSSIBILITY OF SUCH DAMAGE.
 ***********************************************************************/
 
-#ifdef OPUS_HAVE_CONFIG_H
-#include "opus_config.h"
+#ifdef OPUS_ENABLED
+#include "opus/opus_config.h"
 #endif
 
 #include <stdlib.h>
-#include "main_FLP.h"
+#include "opus/silk/float/main_FLP.h"
 
 /************************************************/
 /* LPC analysis filter                          */
diff --git a/drivers/opus/silk/float/LPC_inv_pred_gain_FLP.c b/drivers/opus/silk/float/LPC_inv_pred_gain_FLP.c
index 968edfb189..2e86e269eb 100644
--- a/drivers/opus/silk/float/LPC_inv_pred_gain_FLP.c
+++ b/drivers/opus/silk/float/LPC_inv_pred_gain_FLP.c
@@ -25,12 +25,12 @@ ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 POSSIBILITY OF SUCH DAMAGE.
 ***********************************************************************/
 
-#ifdef OPUS_HAVE_CONFIG_H
-#include "opus_config.h"
+#ifdef OPUS_ENABLED
+#include "opus/opus_config.h"
 #endif
 
-#include "SigProc_FIX.h"
-#include "SigProc_FLP.h"
+#include "opus/silk/SigProc_FIX.h"
+#include "opus/silk/float/SigProc_FLP.h"
 
 #define RC_THRESHOLD        0.9999f
 
diff --git a/drivers/opus/silk/float/LTP_analysis_filter_FLP.c b/drivers/opus/silk/float/LTP_analysis_filter_FLP.c
index fc729e99b1..5919ceb928 100644
--- a/drivers/opus/silk/float/LTP_analysis_filter_FLP.c
+++ b/drivers/opus/silk/float/LTP_analysis_filter_FLP.c
@@ -25,11 +25,11 @@ ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 POSSIBILITY OF SUCH DAMAGE.
 ***********************************************************************/
 
-#ifdef OPUS_HAVE_CONFIG_H
-#include "opus_config.h"
+#ifdef OPUS_ENABLED
+#include "opus/opus_config.h"
 #endif
 
-#include "main_FLP.h"
+#include "opus/silk/float/main_FLP.h"
 
 void silk_LTP_analysis_filter_FLP(
     silk_float                      *LTP_res,                           /* O    LTP res MAX_NB_SUBFR*(pre_lgth+subfr_lngth) */
diff --git a/drivers/opus/silk/float/LTP_scale_ctrl_FLP.c b/drivers/opus/silk/float/LTP_scale_ctrl_FLP.c
index 60e1119d5a..c952d810af 100644
--- a/drivers/opus/silk/float/LTP_scale_ctrl_FLP.c
+++ b/drivers/opus/silk/float/LTP_scale_ctrl_FLP.c
@@ -25,11 +25,11 @@ ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 POSSIBILITY OF SUCH DAMAGE.
 ***********************************************************************/
 
-#ifdef OPUS_HAVE_CONFIG_H
-#include "opus_config.h"
+#ifdef OPUS_ENABLED
+#include "opus/opus_config.h"
 #endif
 
-#include "main_FLP.h"
+#include "opus/silk/float/main_FLP.h"
 
 void silk_LTP_scale_ctrl_FLP(
     silk_encoder_state_FLP          *psEnc,                             /* I/O  Encoder state FLP                           */
diff --git a/drivers/opus/silk/float/SigProc_FLP.h b/drivers/opus/silk/float/SigProc_FLP.h
index f0cb3733be..9b14f24f21 100644
--- a/drivers/opus/silk/float/SigProc_FLP.h
+++ b/drivers/opus/silk/float/SigProc_FLP.h
@@ -28,8 +28,8 @@ POSSIBILITY OF SUCH DAMAGE.
 #ifndef SILK_SIGPROC_FLP_H
 #define SILK_SIGPROC_FLP_H
 
-#include "SigProc_FIX.h"
-#include "float_cast.h"
+#include "opus/silk/SigProc_FIX.h"
+#include "opus/celt/float_cast.h"
 #include <math.h>
 
 #ifdef  __cplusplus
diff --git a/drivers/opus/silk/float/apply_sine_window_FLP.c b/drivers/opus/silk/float/apply_sine_window_FLP.c
index d904585d17..e8aa197bb0 100644
--- a/drivers/opus/silk/float/apply_sine_window_FLP.c
+++ b/drivers/opus/silk/float/apply_sine_window_FLP.c
@@ -25,11 +25,11 @@ ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 POSSIBILITY OF SUCH DAMAGE.
 ***********************************************************************/
 
-#ifdef OPUS_HAVE_CONFIG_H
-#include "opus_config.h"
+#ifdef OPUS_ENABLED
+#include "opus/opus_config.h"
 #endif
 
-#include "main_FLP.h"
+#include "opus/silk/float/main_FLP.h"
 
 /* Apply sine window to signal vector   */
 /* Window types:                        */
diff --git a/drivers/opus/silk/float/autocorrelation_FLP.c b/drivers/opus/silk/float/autocorrelation_FLP.c
index 192a001b16..f4b90ff32d 100644
--- a/drivers/opus/silk/float/autocorrelation_FLP.c
+++ b/drivers/opus/silk/float/autocorrelation_FLP.c
@@ -25,12 +25,12 @@ ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 POSSIBILITY OF SUCH DAMAGE.
 ***********************************************************************/
 
-#ifdef OPUS_HAVE_CONFIG_H
-#include "opus_config.h"
+#ifdef OPUS_ENABLED
+#include "opus/opus_config.h"
 #endif
 
-#include "typedef.h"
-#include "SigProc_FLP.h"
+#include "opus/silk/typedef.h"
+#include "opus/silk/float/SigProc_FLP.h"
 
 /* compute autocorrelation */
 void silk_autocorrelation_FLP(
diff --git a/drivers/opus/silk/float/burg_modified_FLP.c b/drivers/opus/silk/float/burg_modified_FLP.c
index 0f30ca2280..5a16334240 100644
--- a/drivers/opus/silk/float/burg_modified_FLP.c
+++ b/drivers/opus/silk/float/burg_modified_FLP.c
@@ -25,13 +25,13 @@ ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 POSSIBILITY OF SUCH DAMAGE.
 ***********************************************************************/
 
-#ifdef OPUS_HAVE_CONFIG_H
-#include "opus_config.h"
+#ifdef OPUS_ENABLED
+#include "opus/opus_config.h"
 #endif
 
-#include "SigProc_FLP.h"
-#include "tuning_parameters.h"
-#include "define.h"
+#include "opus/silk/float/SigProc_FLP.h"
+#include "opus/silk/tuning_parameters.h"
+#include "opus/silk/define.h"
 
 #define MAX_FRAME_SIZE              384 /* subfr_length * nb_subfr = ( 0.005 * 16000 + 16 ) * 4 = 384*/
 
diff --git a/drivers/opus/silk/float/bwexpander_FLP.c b/drivers/opus/silk/float/bwexpander_FLP.c
index 86154dc3f1..b3de4f6453 100644
--- a/drivers/opus/silk/float/bwexpander_FLP.c
+++ b/drivers/opus/silk/float/bwexpander_FLP.c
@@ -25,11 +25,11 @@ ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 POSSIBILITY OF SUCH DAMAGE.
 ***********************************************************************/
 
-#ifdef OPUS_HAVE_CONFIG_H
-#include "opus_config.h"
+#ifdef OPUS_ENABLED
+#include "opus/opus_config.h"
 #endif
 
-#include "SigProc_FLP.h"
+#include "opus/silk/float/SigProc_FLP.h"
 
 /* Chirp (bw expand) LP AR filter */
 void silk_bwexpander_FLP(
diff --git a/drivers/opus/silk/float/corrMatrix_FLP.c b/drivers/opus/silk/float/corrMatrix_FLP.c
index e193c98f11..551f8578d7 100644
--- a/drivers/opus/silk/float/corrMatrix_FLP.c
+++ b/drivers/opus/silk/float/corrMatrix_FLP.c
@@ -25,15 +25,15 @@ ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 POSSIBILITY OF SUCH DAMAGE.
 ***********************************************************************/
 
-#ifdef OPUS_HAVE_CONFIG_H
-#include "opus_config.h"
+#ifdef OPUS_ENABLED
+#include "opus/opus_config.h"
 #endif
 
 /**********************************************************************
  * Correlation matrix computations for LS estimate.
  **********************************************************************/
 
-#include "main_FLP.h"
+#include "opus/silk/float/main_FLP.h"
 
 /* Calculates correlation vector X'*t */
 void silk_corrVector_FLP(
diff --git a/drivers/opus/silk/float/encode_frame_FLP.c b/drivers/opus/silk/float/encode_frame_FLP.c
index 90e5357ced..c5973b8922 100644
--- a/drivers/opus/silk/float/encode_frame_FLP.c
+++ b/drivers/opus/silk/float/encode_frame_FLP.c
@@ -25,12 +25,12 @@ ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 POSSIBILITY OF SUCH DAMAGE.
 ***********************************************************************/
 
-#ifdef OPUS_HAVE_CONFIG_H
-#include "opus_config.h"
+#ifdef OPUS_ENABLED
+#include "opus/opus_config.h"
 #endif
 
-#include "main_FLP.h"
-#include "tuning_parameters.h"
+#include "opus/silk/float/main_FLP.h"
+#include "opus/silk/tuning_parameters.h"
 
 /* Low Bitrate Redundancy (LBRR) encoding. Reuse all parameters but encode with lower bitrate */
 static OPUS_INLINE void silk_LBRR_encode_FLP(
diff --git a/drivers/opus/silk/float/energy_FLP.c b/drivers/opus/silk/float/energy_FLP.c
index d441526df3..9c6fad48d7 100644
--- a/drivers/opus/silk/float/energy_FLP.c
+++ b/drivers/opus/silk/float/energy_FLP.c
@@ -25,11 +25,11 @@ ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 POSSIBILITY OF SUCH DAMAGE.
 ***********************************************************************/
 
-#ifdef OPUS_HAVE_CONFIG_H
-#include "opus_config.h"
+#ifdef OPUS_ENABLED
+#include "opus/opus_config.h"
 #endif
 
-#include "SigProc_FLP.h"
+#include "opus/silk/float/SigProc_FLP.h"
 
 /* sum of squares of a silk_float array, with result as double */
 double silk_energy_FLP(
diff --git a/drivers/opus/silk/float/find_LPC_FLP.c b/drivers/opus/silk/float/find_LPC_FLP.c
index 212f2de3cd..2b8c54388f 100644
--- a/drivers/opus/silk/float/find_LPC_FLP.c
+++ b/drivers/opus/silk/float/find_LPC_FLP.c
@@ -25,13 +25,13 @@ ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 POSSIBILITY OF SUCH DAMAGE.
 ***********************************************************************/
 
-#ifdef OPUS_HAVE_CONFIG_H
-#include "opus_config.h"
+#ifdef OPUS_ENABLED
+#include "opus/opus_config.h"
 #endif
 
-#include "define.h"
-#include "main_FLP.h"
-#include "tuning_parameters.h"
+#include "opus/silk/define.h"
+#include "opus/silk/float/main_FLP.h"
+#include "opus/silk/tuning_parameters.h"
 
 /* LPC analysis */
 void silk_find_LPC_FLP(
diff --git a/drivers/opus/silk/float/find_LTP_FLP.c b/drivers/opus/silk/float/find_LTP_FLP.c
index 5c62851f20..2f66de4684 100644
--- a/drivers/opus/silk/float/find_LTP_FLP.c
+++ b/drivers/opus/silk/float/find_LTP_FLP.c
@@ -25,12 +25,12 @@ ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 POSSIBILITY OF SUCH DAMAGE.
 ***********************************************************************/
 
-#ifdef OPUS_HAVE_CONFIG_H
-#include "opus_config.h"
+#ifdef OPUS_ENABLED
+#include "opus/opus_config.h"
 #endif
 
-#include "main_FLP.h"
-#include "tuning_parameters.h"
+#include "opus/silk/float/main_FLP.h"
+#include "opus/silk/tuning_parameters.h"
 
 void silk_find_LTP_FLP(
     silk_float                      b[ MAX_NB_SUBFR * LTP_ORDER ],      /* O    LTP coefs                                   */
diff --git a/drivers/opus/silk/float/find_pitch_lags_FLP.c b/drivers/opus/silk/float/find_pitch_lags_FLP.c
index d74d5941b5..a2d582c734 100644
--- a/drivers/opus/silk/float/find_pitch_lags_FLP.c
+++ b/drivers/opus/silk/float/find_pitch_lags_FLP.c
@@ -25,13 +25,13 @@ ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 POSSIBILITY OF SUCH DAMAGE.
 ***********************************************************************/
 
-#ifdef OPUS_HAVE_CONFIG_H
-#include "opus_config.h"
+#ifdef OPUS_ENABLED
+#include "opus/opus_config.h"
 #endif
 
 #include <stdlib.h>
-#include "main_FLP.h"
-#include "tuning_parameters.h"
+#include "opus/silk/float/main_FLP.h"
+#include "opus/silk/tuning_parameters.h"
 
 void silk_find_pitch_lags_FLP(
     silk_encoder_state_FLP          *psEnc,                             /* I/O  Encoder state FLP                           */
diff --git a/drivers/opus/silk/float/find_pred_coefs_FLP.c b/drivers/opus/silk/float/find_pred_coefs_FLP.c
index e0d8804cc9..61eead7573 100644
--- a/drivers/opus/silk/float/find_pred_coefs_FLP.c
+++ b/drivers/opus/silk/float/find_pred_coefs_FLP.c
@@ -25,11 +25,11 @@ ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 POSSIBILITY OF SUCH DAMAGE.
 ***********************************************************************/
 
-#ifdef OPUS_HAVE_CONFIG_H
-#include "opus_config.h"
+#ifdef OPUS_ENABLED
+#include "opus/opus_config.h"
 #endif
 
-#include "main_FLP.h"
+#include "opus/silk/float/main_FLP.h"
 
 /* Find LPC and LTP coefficients */
 void silk_find_pred_coefs_FLP(
diff --git a/drivers/opus/silk/float/inner_product_FLP.c b/drivers/opus/silk/float/inner_product_FLP.c
index 57acf5ffba..e5f0308448 100644
--- a/drivers/opus/silk/float/inner_product_FLP.c
+++ b/drivers/opus/silk/float/inner_product_FLP.c
@@ -25,11 +25,11 @@ ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 POSSIBILITY OF SUCH DAMAGE.
 ***********************************************************************/
 
-#ifdef OPUS_HAVE_CONFIG_H
-#include "opus_config.h"
+#ifdef OPUS_ENABLED
+#include "opus/opus_config.h"
 #endif
 
-#include "SigProc_FLP.h"
+#include "opus/silk/float/SigProc_FLP.h"
 
 /* inner product of two silk_float arrays, with result as double */
 double silk_inner_product_FLP(
diff --git a/drivers/opus/silk/float/k2a_FLP.c b/drivers/opus/silk/float/k2a_FLP.c
index a668a32127..71dc4c3c44 100644
--- a/drivers/opus/silk/float/k2a_FLP.c
+++ b/drivers/opus/silk/float/k2a_FLP.c
@@ -25,11 +25,11 @@ ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 POSSIBILITY OF SUCH DAMAGE.
 ***********************************************************************/
 
-#ifdef OPUS_HAVE_CONFIG_H
-#include "opus_config.h"
+#ifdef OPUS_ENABLED
+#include "opus/opus_config.h"
 #endif
 
-#include "SigProc_FLP.h"
+#include "opus/silk/float/SigProc_FLP.h"
 
 /* step up function, converts reflection coefficients to prediction coefficients */
 void silk_k2a_FLP(
diff --git a/drivers/opus/silk/float/levinsondurbin_FLP.c b/drivers/opus/silk/float/levinsondurbin_FLP.c
index 64aaf0fb29..8fbca230a5 100644
--- a/drivers/opus/silk/float/levinsondurbin_FLP.c
+++ b/drivers/opus/silk/float/levinsondurbin_FLP.c
@@ -25,11 +25,11 @@ ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 POSSIBILITY OF SUCH DAMAGE.
 ***********************************************************************/
 
-#ifdef OPUS_HAVE_CONFIG_H
-#include "opus_config.h"
+#ifdef OPUS_ENABLED
+#include "opus/opus_config.h"
 #endif
 
-#include "SigProc_FLP.h"
+#include "opus/silk/float/SigProc_FLP.h"
 
 /* Solve the normal equations using the Levinson-Durbin recursion */
 silk_float silk_levinsondurbin_FLP(         /* O    prediction error energy                                     */
diff --git a/drivers/opus/silk/float/main_FLP.h b/drivers/opus/silk/float/main_FLP.h
index 92d6ec3df1..4ec8ddeefc 100644
--- a/drivers/opus/silk/float/main_FLP.h
+++ b/drivers/opus/silk/float/main_FLP.h
@@ -28,13 +28,13 @@ POSSIBILITY OF SUCH DAMAGE.
 #ifndef SILK_MAIN_FLP_H
 #define SILK_MAIN_FLP_H
 
-#include "SigProc_FLP.h"
-#include "SigProc_FIX.h"
-#include "structs_FLP.h"
-#include "silk_main.h"
-#include "define.h"
-#include "debug.h"
-#include "entenc.h"
+#include "opus/silk/float/SigProc_FLP.h"
+#include "opus/silk/SigProc_FIX.h"
+#include "opus/silk/float/structs_FLP.h"
+#include "opus/silk/silk_main.h"
+#include "opus/silk/define.h"
+#include "opus/silk/debug.h"
+#include "opus/celt/entenc.h"
 
 #ifdef __cplusplus
 extern "C"
diff --git a/drivers/opus/silk/float/noise_shape_analysis_FLP.c b/drivers/opus/silk/float/noise_shape_analysis_FLP.c
index f80e0b3d0e..5d8bc6332a 100644
--- a/drivers/opus/silk/float/noise_shape_analysis_FLP.c
+++ b/drivers/opus/silk/float/noise_shape_analysis_FLP.c
@@ -25,12 +25,12 @@ ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 POSSIBILITY OF SUCH DAMAGE.
 ***********************************************************************/
 
-#ifdef OPUS_HAVE_CONFIG_H
-#include "opus_config.h"
+#ifdef OPUS_ENABLED
+#include "opus/opus_config.h"
 #endif
 
-#include "main_FLP.h"
-#include "tuning_parameters.h"
+#include "opus/silk/float/main_FLP.h"
+#include "opus/silk/tuning_parameters.h"
 
 /* Compute gain to make warped filter coefficients have a zero mean log frequency response on a   */
 /* non-warped frequency scale. (So that it can be implemented with a minimum-phase monic filter.) */
diff --git a/drivers/opus/silk/float/pitch_analysis_core_FLP.c b/drivers/opus/silk/float/pitch_analysis_core_FLP.c
index 2588094c49..2689c5008b 100644
--- a/drivers/opus/silk/float/pitch_analysis_core_FLP.c
+++ b/drivers/opus/silk/float/pitch_analysis_core_FLP.c
@@ -25,17 +25,17 @@ ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 POSSIBILITY OF SUCH DAMAGE.
 ***********************************************************************/
 
-#ifdef OPUS_HAVE_CONFIG_H
-#include "opus_config.h"
+#ifdef OPUS_ENABLED
+#include "opus/opus_config.h"
 #endif
 
 /*****************************************************************************
 * Pitch analyser function
 ******************************************************************************/
-#include "SigProc_FLP.h"
-#include "SigProc_FIX.h"
-#include "pitch_est_defines.h"
-#include "pitch.h"
+#include "opus/silk/float/SigProc_FLP.h"
+#include "opus/silk/SigProc_FIX.h"
+#include "opus/silk/pitch_est_defines.h"
+#include "opus/celt/pitch.h"
 
 #define SCRATCH_SIZE        22
 
diff --git a/drivers/opus/silk/float/prefilter_FLP.c b/drivers/opus/silk/float/prefilter_FLP.c
index aa43852ff1..0298ef2f8e 100644
--- a/drivers/opus/silk/float/prefilter_FLP.c
+++ b/drivers/opus/silk/float/prefilter_FLP.c
@@ -25,12 +25,12 @@ ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 POSSIBILITY OF SUCH DAMAGE.
 ***********************************************************************/
 
-#ifdef OPUS_HAVE_CONFIG_H
-#include "opus_config.h"
+#ifdef OPUS_ENABLED
+#include "opus/opus_config.h"
 #endif
 
-#include "main_FLP.h"
-#include "tuning_parameters.h"
+#include "opus/silk/float/main_FLP.h"
+#include "opus/silk/tuning_parameters.h"
 
 /*
 * Prefilter for finding Quantizer input signal
diff --git a/drivers/opus/silk/float/process_gains_FLP.c b/drivers/opus/silk/float/process_gains_FLP.c
index e83d05552a..bd5e7ee7a2 100644
--- a/drivers/opus/silk/float/process_gains_FLP.c
+++ b/drivers/opus/silk/float/process_gains_FLP.c
@@ -25,12 +25,12 @@ ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 POSSIBILITY OF SUCH DAMAGE.
 ***********************************************************************/
 
-#ifdef OPUS_HAVE_CONFIG_H
-#include "opus_config.h"
+#ifdef OPUS_ENABLED
+#include "opus/opus_config.h"
 #endif
 
-#include "main_FLP.h"
-#include "tuning_parameters.h"
+#include "opus/silk/float/main_FLP.h"
+#include "opus/silk/tuning_parameters.h"
 
 /* Processing of gains */
 void silk_process_gains_FLP(
diff --git a/drivers/opus/silk/float/regularize_correlations_FLP.c b/drivers/opus/silk/float/regularize_correlations_FLP.c
index f056eadc57..397e45b10b 100644
--- a/drivers/opus/silk/float/regularize_correlations_FLP.c
+++ b/drivers/opus/silk/float/regularize_correlations_FLP.c
@@ -25,11 +25,11 @@ ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 POSSIBILITY OF SUCH DAMAGE.
 ***********************************************************************/
 
-#ifdef OPUS_HAVE_CONFIG_H
-#include "opus_config.h"
+#ifdef OPUS_ENABLED
+#include "opus/opus_config.h"
 #endif
 
-#include "main_FLP.h"
+#include "opus/silk/float/main_FLP.h"
 
 /* Add noise to matrix diagonal */
 void silk_regularize_correlations_FLP(
diff --git a/drivers/opus/silk/float/residual_energy_FLP.c b/drivers/opus/silk/float/residual_energy_FLP.c
index 011efcef04..50bc728b71 100644
--- a/drivers/opus/silk/float/residual_energy_FLP.c
+++ b/drivers/opus/silk/float/residual_energy_FLP.c
@@ -25,11 +25,11 @@ ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 POSSIBILITY OF SUCH DAMAGE.
 ***********************************************************************/
 
-#ifdef OPUS_HAVE_CONFIG_H
-#include "opus_config.h"
+#ifdef OPUS_ENABLED
+#include "opus/opus_config.h"
 #endif
 
-#include "main_FLP.h"
+#include "opus/silk/float/main_FLP.h"
 
 #define MAX_ITERATIONS_RESIDUAL_NRG         10
 #define REGULARIZATION_FACTOR               1e-8f
diff --git a/drivers/opus/silk/float/scale_copy_vector_FLP.c b/drivers/opus/silk/float/scale_copy_vector_FLP.c
index 7578d44894..8c5bfadb3a 100644
--- a/drivers/opus/silk/float/scale_copy_vector_FLP.c
+++ b/drivers/opus/silk/float/scale_copy_vector_FLP.c
@@ -25,11 +25,11 @@ ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 POSSIBILITY OF SUCH DAMAGE.
 ***********************************************************************/
 
-#ifdef OPUS_HAVE_CONFIG_H
-#include "opus_config.h"
+#ifdef OPUS_ENABLED
+#include "opus/opus_config.h"
 #endif
 
-#include "SigProc_FLP.h"
+#include "opus/silk/float/SigProc_FLP.h"
 
 /* copy and multiply a vector by a constant */
 void silk_scale_copy_vector_FLP(
diff --git a/drivers/opus/silk/float/scale_vector_FLP.c b/drivers/opus/silk/float/scale_vector_FLP.c
index 03345d519d..191b3d6041 100644
--- a/drivers/opus/silk/float/scale_vector_FLP.c
+++ b/drivers/opus/silk/float/scale_vector_FLP.c
@@ -25,11 +25,11 @@ ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 POSSIBILITY OF SUCH DAMAGE.
 ***********************************************************************/
 
-#ifdef OPUS_HAVE_CONFIG_H
-#include "opus_config.h"
+#ifdef OPUS_ENABLED
+#include "opus/opus_config.h"
 #endif
 
-#include "SigProc_FLP.h"
+#include "opus/silk/float/SigProc_FLP.h"
 
 /* multiply a vector by a constant */
 void silk_scale_vector_FLP(
diff --git a/drivers/opus/silk/float/schur_FLP.c b/drivers/opus/silk/float/schur_FLP.c
index 76b87f1304..631dbe093a 100644
--- a/drivers/opus/silk/float/schur_FLP.c
+++ b/drivers/opus/silk/float/schur_FLP.c
@@ -25,11 +25,11 @@ ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 POSSIBILITY OF SUCH DAMAGE.
 ***********************************************************************/
 
-#ifdef OPUS_HAVE_CONFIG_H
-#include "opus_config.h"
+#ifdef OPUS_ENABLED
+#include "opus/opus_config.h"
 #endif
 
-#include "SigProc_FLP.h"
+#include "opus/silk/float/SigProc_FLP.h"
 
 silk_float silk_schur_FLP(                  /* O    returns residual energy                                     */
     silk_float          refl_coef[],        /* O    reflection coefficients (length order)                      */
diff --git a/drivers/opus/silk/float/solve_LS_FLP.c b/drivers/opus/silk/float/solve_LS_FLP.c
index 9fd962b33d..b3757ce03f 100644
--- a/drivers/opus/silk/float/solve_LS_FLP.c
+++ b/drivers/opus/silk/float/solve_LS_FLP.c
@@ -25,12 +25,12 @@ ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 POSSIBILITY OF SUCH DAMAGE.
 ***********************************************************************/
 
-#ifdef OPUS_HAVE_CONFIG_H
-#include "opus_config.h"
+#ifdef OPUS_ENABLED
+#include "opus/opus_config.h"
 #endif
 
-#include "main_FLP.h"
-#include "tuning_parameters.h"
+#include "opus/silk/float/main_FLP.h"
+#include "opus/silk/tuning_parameters.h"
 
 /**********************************************************************
  * LDL Factorisation. Finds the upper triangular matrix L and the diagonal
diff --git a/drivers/opus/silk/float/sort_FLP.c b/drivers/opus/silk/float/sort_FLP.c
index 58ea485116..f2570503a5 100644
--- a/drivers/opus/silk/float/sort_FLP.c
+++ b/drivers/opus/silk/float/sort_FLP.c
@@ -25,16 +25,16 @@ ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 POSSIBILITY OF SUCH DAMAGE.
 ***********************************************************************/
 
-#ifdef OPUS_HAVE_CONFIG_H
-#include "opus_config.h"
+#ifdef OPUS_ENABLED
+#include "opus/opus_config.h"
 #endif
 
 /* Insertion sort (fast for already almost sorted arrays):  */
 /* Best case:  O(n)   for an already sorted array           */
 /* Worst case: O(n^2) for an inversely sorted array         */
 
-#include "typedef.h"
-#include "SigProc_FLP.h"
+#include "opus/silk/typedef.h"
+#include "opus/silk/float/SigProc_FLP.h"
 
 void silk_insertion_sort_decreasing_FLP(
     silk_float          *a,                 /* I/O  Unsorted / Sorted vector                                    */
diff --git a/drivers/opus/silk/float/structs_FLP.h b/drivers/opus/silk/float/structs_FLP.h
index 4082914d93..798aec2f43 100644
--- a/drivers/opus/silk/float/structs_FLP.h
+++ b/drivers/opus/silk/float/structs_FLP.h
@@ -28,9 +28,9 @@ POSSIBILITY OF SUCH DAMAGE.
 #ifndef SILK_STRUCTS_FLP_H
 #define SILK_STRUCTS_FLP_H
 
-#include "typedef.h"
-#include "silk_main.h"
-#include "structs.h"
+#include "opus/silk/typedef.h"
+#include "opus/silk/silk_main.h"
+#include "opus/silk/structs.h"
 
 #ifdef __cplusplus
 extern "C"
diff --git a/drivers/opus/silk/float/warped_autocorrelation_FLP.c b/drivers/opus/silk/float/warped_autocorrelation_FLP.c
index 6075dfe8d3..092b998d93 100644
--- a/drivers/opus/silk/float/warped_autocorrelation_FLP.c
+++ b/drivers/opus/silk/float/warped_autocorrelation_FLP.c
@@ -25,11 +25,11 @@ ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 POSSIBILITY OF SUCH DAMAGE.
 ***********************************************************************/
 
-#ifdef OPUS_HAVE_CONFIG_H
-#include "opus_config.h"
+#ifdef OPUS_ENABLED
+#include "opus/opus_config.h"
 #endif
 
-#include "main_FLP.h"
+#include "opus/silk/float/main_FLP.h"
 
 /* Autocorrelations for a warped frequency axis */
 void silk_warped_autocorrelation_FLP(
diff --git a/drivers/opus/silk/float/wrappers_FLP.c b/drivers/opus/silk/float/wrappers_FLP.c
index c4e34e5578..31586cf2a9 100644
--- a/drivers/opus/silk/float/wrappers_FLP.c
+++ b/drivers/opus/silk/float/wrappers_FLP.c
@@ -25,11 +25,11 @@ ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 POSSIBILITY OF SUCH DAMAGE.
 ***********************************************************************/
 
-#ifdef OPUS_HAVE_CONFIG_H
-#include "opus_config.h"
+#ifdef OPUS_ENABLED
+#include "opus/opus_config.h"
 #endif
 
-#include "main_FLP.h"
+#include "opus/silk/float/main_FLP.h"
 
 /* Wrappers. Calls flp / fix code */
 
diff --git a/drivers/opus/silk/gain_quant.c b/drivers/opus/silk/gain_quant.c
index e9467198eb..e1d9b91126 100644
--- a/drivers/opus/silk/gain_quant.c
+++ b/drivers/opus/silk/gain_quant.c
@@ -25,11 +25,11 @@ ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 POSSIBILITY OF SUCH DAMAGE.
 ***********************************************************************/
 
-#ifdef OPUS_HAVE_CONFIG_H
-#include "opus_config.h"
+#ifdef OPUS_ENABLED
+#include "opus/opus_config.h"
 #endif
 
-#include "silk_main.h"
+#include "opus/silk/silk_main.h"
 
 #define OFFSET                  ( ( MIN_QGAIN_DB * 128 ) / 6 + 16 * 128 )
 #define SCALE_Q16               ( ( 65536 * ( N_LEVELS_QGAIN - 1 ) ) / ( ( ( MAX_QGAIN_DB - MIN_QGAIN_DB ) * 128 ) / 6 ) )
diff --git a/drivers/opus/silk/init_decoder.c b/drivers/opus/silk/init_decoder.c
index 88c1ff7b43..43fa1f1c38 100644
--- a/drivers/opus/silk/init_decoder.c
+++ b/drivers/opus/silk/init_decoder.c
@@ -25,11 +25,11 @@ ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 POSSIBILITY OF SUCH DAMAGE.
 ***********************************************************************/
 
-#ifdef OPUS_HAVE_CONFIG_H
-#include "opus_config.h"
+#ifdef OPUS_ENABLED
+#include "opus/opus_config.h"
 #endif
 
-#include "silk_main.h"
+#include "opus/silk/silk_main.h"
 
 /************************/
 /* Init Decoder State   */
diff --git a/drivers/opus/silk/init_encoder.c b/drivers/opus/silk/init_encoder.c
index baf97d49e7..9ba4524b73 100644
--- a/drivers/opus/silk/init_encoder.c
+++ b/drivers/opus/silk/init_encoder.c
@@ -25,16 +25,16 @@ ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 POSSIBILITY OF SUCH DAMAGE.
 ***********************************************************************/
 
-#ifdef OPUS_HAVE_CONFIG_H
-#include "opus_config.h"
+#ifdef OPUS_ENABLED
+#include "opus/opus_config.h"
 #endif
 #ifdef OPUS_FIXED_POINT
-#include "main_FIX.h"
+#include "opus/silk/fixed/main_FIX.h"
 #else
-#include "main_FLP.h"
+#include "opus/silk/float/main_FLP.h"
 #endif
-#include "tuning_parameters.h"
-#include "cpu_support.h"
+#include "opus/silk/tuning_parameters.h"
+#include "opus/celt/cpu_support.h"
 
 /*********************************/
 /* Initialize Silk Encoder state */
diff --git a/drivers/opus/silk/inner_prod_aligned.c b/drivers/opus/silk/inner_prod_aligned.c
index d625001db7..86ecf463ad 100644
--- a/drivers/opus/silk/inner_prod_aligned.c
+++ b/drivers/opus/silk/inner_prod_aligned.c
@@ -25,11 +25,11 @@ ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 POSSIBILITY OF SUCH DAMAGE.
 ***********************************************************************/
 
-#ifdef OPUS_HAVE_CONFIG_H
-#include "opus_config.h"
+#ifdef OPUS_ENABLED
+#include "opus/opus_config.h"
 #endif
 
-#include "SigProc_FIX.h"
+#include "opus/silk/SigProc_FIX.h"
 
 opus_int32 silk_inner_prod_aligned_scale(
     const opus_int16 *const     inVec1,             /*    I input vector 1                                              */
diff --git a/drivers/opus/silk/interpolate.c b/drivers/opus/silk/interpolate.c
index d5df0feddb..26c44de493 100644
--- a/drivers/opus/silk/interpolate.c
+++ b/drivers/opus/silk/interpolate.c
@@ -25,11 +25,11 @@ ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 POSSIBILITY OF SUCH DAMAGE.
 ***********************************************************************/
 
-#ifdef OPUS_HAVE_CONFIG_H
-#include "opus_config.h"
+#ifdef OPUS_ENABLED
+#include "opus/opus_config.h"
 #endif
 
-#include "silk_main.h"
+#include "opus/silk/silk_main.h"
 
 /* Interpolate two vectors */
 void silk_interpolate(
diff --git a/drivers/opus/silk/lin2log.c b/drivers/opus/silk/lin2log.c
index 77bfc8c8ab..ea11f33dd0 100644
--- a/drivers/opus/silk/lin2log.c
+++ b/drivers/opus/silk/lin2log.c
@@ -25,11 +25,11 @@ ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 POSSIBILITY OF SUCH DAMAGE.
 ***********************************************************************/
 
-#ifdef OPUS_HAVE_CONFIG_H
-#include "opus_config.h"
+#ifdef OPUS_ENABLED
+#include "opus/opus_config.h"
 #endif
 
-#include "SigProc_FIX.h"
+#include "opus/silk/SigProc_FIX.h"
 /* Approximation of 128 * log2() (very close inverse of silk_log2lin()) */
 /* Convert input to a log scale    */
 opus_int32 silk_lin2log(
diff --git a/drivers/opus/silk/log2lin.c b/drivers/opus/silk/log2lin.c
index 0ed2a12efd..0a33ca48fb 100644
--- a/drivers/opus/silk/log2lin.c
+++ b/drivers/opus/silk/log2lin.c
@@ -25,11 +25,11 @@ ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 POSSIBILITY OF SUCH DAMAGE.
 ***********************************************************************/
 
-#ifdef OPUS_HAVE_CONFIG_H
-#include "opus_config.h"
+#ifdef OPUS_ENABLED
+#include "opus/opus_config.h"
 #endif
 
-#include "SigProc_FIX.h"
+#include "opus/silk/SigProc_FIX.h"
 
 /* Approximation of 2^() (very close inverse of silk_lin2log()) */
 /* Convert input to a linear scale    */
diff --git a/drivers/opus/silk/macros.h b/drivers/opus/silk/macros.h
index 6cf2e93dbc..8986dc8f82 100644
--- a/drivers/opus/silk/macros.h
+++ b/drivers/opus/silk/macros.h
@@ -28,10 +28,10 @@ POSSIBILITY OF SUCH DAMAGE.
 #ifndef SILK_MACROS_H
 #define SILK_MACROS_H
 
-#include "opus_config.h"
+#include "opus/opus_config.h"
 
-#include "opus_types.h"
-#include "opus_defines.h"
+#include "opus/opus_types.h"
+#include "opus/opus_defines.h"
 
 /* This is an OPUS_INLINE header file for general platform. */
 
@@ -77,7 +77,7 @@ POSSIBILITY OF SUCH DAMAGE.
                                         (( (a) & ((b)^0x80000000) & 0x80000000) ? silk_int32_MIN : (a)-(b)) :    \
                                         ((((a)^0x80000000) & (b)  & 0x80000000) ? silk_int32_MAX : (a)-(b)) )
 
-#include "ecintrin.h"
+#include "opus/celt/ecintrin.h"
 
 static OPUS_INLINE opus_int32 silk_CLZ16(opus_int16 in16)
 {
diff --git a/drivers/opus/silk/pitch_est_defines.h b/drivers/opus/silk/pitch_est_defines.h
index e1e4b5d768..e2d9e517c4 100644
--- a/drivers/opus/silk/pitch_est_defines.h
+++ b/drivers/opus/silk/pitch_est_defines.h
@@ -28,7 +28,7 @@ POSSIBILITY OF SUCH DAMAGE.
 #ifndef SILK_PE_DEFINES_H
 #define SILK_PE_DEFINES_H
 
-#include "SigProc_FIX.h"
+#include "opus/silk/SigProc_FIX.h"
 
 /********************************************************/
 /* Definitions for pitch estimator                      */
diff --git a/drivers/opus/silk/pitch_est_tables.c b/drivers/opus/silk/pitch_est_tables.c
index 97ddbab010..7555f5b04b 100644
--- a/drivers/opus/silk/pitch_est_tables.c
+++ b/drivers/opus/silk/pitch_est_tables.c
@@ -25,12 +25,12 @@ ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 POSSIBILITY OF SUCH DAMAGE.
 ***********************************************************************/
 
-#ifdef OPUS_HAVE_CONFIG_H
-#include "opus_config.h"
+#ifdef OPUS_ENABLED
+#include "opus/opus_config.h"
 #endif
 
-#include "typedef.h"
-#include "pitch_est_defines.h"
+#include "opus/silk/typedef.h"
+#include "opus/silk/pitch_est_defines.h"
 
 const opus_int8 silk_CB_lags_stage2_10_ms[ PE_MAX_NB_SUBFR >> 1][ PE_NB_CBKS_STAGE2_10MS ] =
 {
diff --git a/drivers/opus/silk/process_NLSFs.c b/drivers/opus/silk/process_NLSFs.c
index 0193fda1f1..9e6ebf827c 100644
--- a/drivers/opus/silk/process_NLSFs.c
+++ b/drivers/opus/silk/process_NLSFs.c
@@ -25,11 +25,11 @@ ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 POSSIBILITY OF SUCH DAMAGE.
 ***********************************************************************/
 
-#ifdef OPUS_HAVE_CONFIG_H
-#include "opus_config.h"
+#ifdef OPUS_ENABLED
+#include "opus/opus_config.h"
 #endif
 
-#include "silk_main.h"
+#include "opus/silk/silk_main.h"
 
 /* Limit, stabilize, convert and quantize NLSFs */
 void silk_process_NLSFs(
diff --git a/drivers/opus/silk/quant_LTP_gains.c b/drivers/opus/silk/quant_LTP_gains.c
index 34bcd3acdb..f6fff470f4 100644
--- a/drivers/opus/silk/quant_LTP_gains.c
+++ b/drivers/opus/silk/quant_LTP_gains.c
@@ -25,12 +25,12 @@ ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 POSSIBILITY OF SUCH DAMAGE.
 ***********************************************************************/
 
-#ifdef OPUS_HAVE_CONFIG_H
-#include "opus_config.h"
+#ifdef OPUS_ENABLED
+#include "opus/opus_config.h"
 #endif
 
-#include "silk_main.h"
-#include "tuning_parameters.h"
+#include "opus/silk/silk_main.h"
+#include "opus/silk/tuning_parameters.h"
 
 void silk_quant_LTP_gains(
     opus_int16                  B_Q14[ MAX_NB_SUBFR * LTP_ORDER ],          /* I/O  (un)quantized LTP gains         */
diff --git a/drivers/opus/silk/resampler.c b/drivers/opus/silk/resampler.c
index 14b185c45e..dde8fcddb1 100644
--- a/drivers/opus/silk/resampler.c
+++ b/drivers/opus/silk/resampler.c
@@ -25,8 +25,8 @@ ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 POSSIBILITY OF SUCH DAMAGE.
 ***********************************************************************/
 
-#ifdef OPUS_HAVE_CONFIG_H
-#include "opus_config.h"
+#ifdef OPUS_ENABLED
+#include "opus/opus_config.h"
 #endif
 
 /*
@@ -47,7 +47,7 @@ POSSIBILITY OF SUCH DAMAGE.
  * AF  -> AR2 filter followed by FIR interpolation
  */
 
-#include "resampler_private.h"
+#include "opus/silk/resampler_private.h"
 
 /* Tables with delay compensation values to equalize total delay for different modes */
 static const opus_int8 delay_matrix_enc[ 5 ][ 3 ] = {
diff --git a/drivers/opus/silk/resampler_down2.c b/drivers/opus/silk/resampler_down2.c
index 5c4b27759a..dbc962c5ef 100644
--- a/drivers/opus/silk/resampler_down2.c
+++ b/drivers/opus/silk/resampler_down2.c
@@ -25,12 +25,12 @@ ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 POSSIBILITY OF SUCH DAMAGE.
 ***********************************************************************/
 
-#ifdef OPUS_HAVE_CONFIG_H
-#include "opus_config.h"
+#ifdef OPUS_ENABLED
+#include "opus/opus_config.h"
 #endif
 
-#include "SigProc_FIX.h"
-#include "resampler_rom.h"
+#include "opus/silk/SigProc_FIX.h"
+#include "opus/silk/resampler_rom.h"
 
 /* Downsample by a factor 2 */
 void silk_resampler_down2(
diff --git a/drivers/opus/silk/resampler_down2_3.c b/drivers/opus/silk/resampler_down2_3.c
index 2733072fe6..6ff32ff336 100644
--- a/drivers/opus/silk/resampler_down2_3.c
+++ b/drivers/opus/silk/resampler_down2_3.c
@@ -25,13 +25,13 @@ ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 POSSIBILITY OF SUCH DAMAGE.
 ***********************************************************************/
 
-#ifdef OPUS_HAVE_CONFIG_H
-#include "opus_config.h"
+#ifdef OPUS_ENABLED
+#include "opus/opus_config.h"
 #endif
 
-#include "SigProc_FIX.h"
-#include "resampler_private.h"
-#include "stack_alloc.h"
+#include "opus/silk/SigProc_FIX.h"
+#include "opus/silk/resampler_private.h"
+#include "opus/celt/stack_alloc.h"
 
 #define ORDER_FIR                   4
 
diff --git a/drivers/opus/silk/resampler_private.h b/drivers/opus/silk/resampler_private.h
index 422a7d9d95..5c0ee110fc 100644
--- a/drivers/opus/silk/resampler_private.h
+++ b/drivers/opus/silk/resampler_private.h
@@ -32,9 +32,9 @@ POSSIBILITY OF SUCH DAMAGE.
 extern "C" {
 #endif
 
-#include "SigProc_FIX.h"
-#include "resampler_structs.h"
-#include "resampler_rom.h"
+#include "opus/silk/SigProc_FIX.h"
+#include "opus/silk/resampler_structs.h"
+#include "opus/silk/resampler_rom.h"
 
 /* Number of input samples to process in the inner loop */
 #define RESAMPLER_MAX_BATCH_SIZE_MS             10
diff --git a/drivers/opus/silk/resampler_private_AR2.c b/drivers/opus/silk/resampler_private_AR2.c
index 84157d17ba..e04319da4d 100644
--- a/drivers/opus/silk/resampler_private_AR2.c
+++ b/drivers/opus/silk/resampler_private_AR2.c
@@ -25,12 +25,12 @@ ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 POSSIBILITY OF SUCH DAMAGE.
 ***********************************************************************/
 
-#ifdef OPUS_HAVE_CONFIG_H
-#include "opus_config.h"
+#ifdef OPUS_ENABLED
+#include "opus/opus_config.h"
 #endif
 
-#include "SigProc_FIX.h"
-#include "resampler_private.h"
+#include "opus/silk/SigProc_FIX.h"
+#include "opus/silk/resampler_private.h"
 
 /* Second order AR filter with single delay elements */
 void silk_resampler_private_AR2(
diff --git a/drivers/opus/silk/resampler_private_IIR_FIR.c b/drivers/opus/silk/resampler_private_IIR_FIR.c
index f45c3e7413..1d71ebd891 100644
--- a/drivers/opus/silk/resampler_private_IIR_FIR.c
+++ b/drivers/opus/silk/resampler_private_IIR_FIR.c
@@ -25,13 +25,13 @@ ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 POSSIBILITY OF SUCH DAMAGE.
 ***********************************************************************/
 
-#ifdef OPUS_HAVE_CONFIG_H
-#include "opus_config.h"
+#ifdef OPUS_ENABLED
+#include "opus/opus_config.h"
 #endif
 
-#include "SigProc_FIX.h"
-#include "resampler_private.h"
-#include "stack_alloc.h"
+#include "opus/silk/SigProc_FIX.h"
+#include "opus/silk/resampler_private.h"
+#include "opus/celt/stack_alloc.h"
 
 static OPUS_INLINE opus_int16 *silk_resampler_private_IIR_FIR_INTERPOL(
     opus_int16  *out,
diff --git a/drivers/opus/silk/resampler_private_down_FIR.c b/drivers/opus/silk/resampler_private_down_FIR.c
index f4de303546..739c91db29 100644
--- a/drivers/opus/silk/resampler_private_down_FIR.c
+++ b/drivers/opus/silk/resampler_private_down_FIR.c
@@ -25,13 +25,13 @@ ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 POSSIBILITY OF SUCH DAMAGE.
 ***********************************************************************/
 
-#ifdef OPUS_HAVE_CONFIG_H
-#include "opus_config.h"
+#ifdef OPUS_ENABLED
+#include "opus/opus_config.h"
 #endif
 
-#include "SigProc_FIX.h"
-#include "resampler_private.h"
-#include "stack_alloc.h"
+#include "opus/silk/SigProc_FIX.h"
+#include "opus/silk/resampler_private.h"
+#include "opus/celt/stack_alloc.h"
 
 static OPUS_INLINE opus_int16 *silk_resampler_private_down_FIR_INTERPOL(
     opus_int16          *out,
diff --git a/drivers/opus/silk/resampler_private_up2_HQ.c b/drivers/opus/silk/resampler_private_up2_HQ.c
index 39f4818454..a2b6ad432e 100644
--- a/drivers/opus/silk/resampler_private_up2_HQ.c
+++ b/drivers/opus/silk/resampler_private_up2_HQ.c
@@ -25,12 +25,12 @@ ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 POSSIBILITY OF SUCH DAMAGE.
 ***********************************************************************/
 
-#ifdef OPUS_HAVE_CONFIG_H
-#include "opus_config.h"
+#ifdef OPUS_ENABLED
+#include "opus/opus_config.h"
 #endif
 
-#include "SigProc_FIX.h"
-#include "resampler_private.h"
+#include "opus/silk/SigProc_FIX.h"
+#include "opus/silk/resampler_private.h"
 
 /* Upsample by a factor 2, high quality */
 /* Uses 2nd order allpass filters for the 2x upsampling, followed by a      */
diff --git a/drivers/opus/silk/resampler_rom.c b/drivers/opus/silk/resampler_rom.c
index 0098e18ba8..d564087051 100644
--- a/drivers/opus/silk/resampler_rom.c
+++ b/drivers/opus/silk/resampler_rom.c
@@ -25,14 +25,14 @@ ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 POSSIBILITY OF SUCH DAMAGE.
 ***********************************************************************/
 
-#ifdef OPUS_HAVE_CONFIG_H
-#include "opus_config.h"
+#ifdef OPUS_ENABLED
+#include "opus/opus_config.h"
 #endif
 
 /* Filter coefficients for IIR/FIR polyphase resampling     *
  * Total size: 179 Words (358 Bytes)                        */
 
-#include "resampler_private.h"
+#include "opus/silk/resampler_private.h"
 
 /* Matlab code for the notch filter coefficients: */
 /* B = [1, 0.147, 1];  A = [1, 0.107, 0.89]; G = 0.93; freqz(G * B, A, 2^14, 16e3); axis([0, 8000, -10, 1]) */
diff --git a/drivers/opus/silk/resampler_rom.h b/drivers/opus/silk/resampler_rom.h
index 490b3388dc..2fa586ebf2 100644
--- a/drivers/opus/silk/resampler_rom.h
+++ b/drivers/opus/silk/resampler_rom.h
@@ -33,8 +33,8 @@ extern "C"
 {
 #endif
 
-#include "typedef.h"
-#include "resampler_structs.h"
+#include "opus/silk/typedef.h"
+#include "opus/silk/resampler_structs.h"
 
 #define RESAMPLER_DOWN_ORDER_FIR0               18
 #define RESAMPLER_DOWN_ORDER_FIR1               24
diff --git a/drivers/opus/silk/shell_coder.c b/drivers/opus/silk/shell_coder.c
index 79e392bd98..cd18ed638b 100644
--- a/drivers/opus/silk/shell_coder.c
+++ b/drivers/opus/silk/shell_coder.c
@@ -25,11 +25,11 @@ ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 POSSIBILITY OF SUCH DAMAGE.
 ***********************************************************************/
 
-#ifdef OPUS_HAVE_CONFIG_H
-#include "opus_config.h"
+#ifdef OPUS_ENABLED
+#include "opus/opus_config.h"
 #endif
 
-#include "silk_main.h"
+#include "opus/silk/silk_main.h"
 
 /* shell coder; pulse-subframe length is hardcoded */
 
diff --git a/drivers/opus/silk/sigm_Q15.c b/drivers/opus/silk/sigm_Q15.c
index 2df5b9695c..4c78250472 100644
--- a/drivers/opus/silk/sigm_Q15.c
+++ b/drivers/opus/silk/sigm_Q15.c
@@ -25,13 +25,13 @@ ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 POSSIBILITY OF SUCH DAMAGE.
 ***********************************************************************/
 
-#ifdef OPUS_HAVE_CONFIG_H
-#include "opus_config.h"
+#ifdef OPUS_ENABLED
+#include "opus/opus_config.h"
 #endif
 
 /* Approximate sigmoid function */
 
-#include "SigProc_FIX.h"
+#include "opus/silk/SigProc_FIX.h"
 
 /* fprintf(1, '%d, ', round(1024 * ([1 ./ (1 + exp(-(1:5))), 1] - 1 ./ (1 + exp(-(0:5)))))); */
 static const opus_int32 sigm_LUT_slope_Q10[ 6 ] = {
diff --git a/drivers/opus/silk/silk_main.h b/drivers/opus/silk/silk_main.h
index 2bdf89784d..14671dcf72 100644
--- a/drivers/opus/silk/silk_main.h
+++ b/drivers/opus/silk/silk_main.h
@@ -28,15 +28,15 @@ POSSIBILITY OF SUCH DAMAGE.
 #ifndef SILK_MAIN_H
 #define SILK_MAIN_H
 
-#include "SigProc_FIX.h"
-#include "define.h"
-#include "structs.h"
-#include "tables.h"
-#include "PLC.h"
-#include "control.h"
-#include "debug.h"
-#include "entenc.h"
-#include "entdec.h"
+#include "opus/silk/SigProc_FIX.h"
+#include "opus/silk/define.h"
+#include "opus/silk/structs.h"
+#include "opus/silk/tables.h"
+#include "opus/silk/PLC.h"
+#include "opus/silk/control.h"
+#include "opus/silk/debug.h"
+#include "opus/celt/entenc.h"
+#include "opus/celt/entdec.h"
 
 /* Convert Left/Right stereo signal to adaptive Mid/Side representation */
 void silk_stereo_LR_to_MS(
diff --git a/drivers/opus/silk/sort.c b/drivers/opus/silk/sort.c
index 5e9ba08616..495292ad51 100644
--- a/drivers/opus/silk/sort.c
+++ b/drivers/opus/silk/sort.c
@@ -25,8 +25,8 @@ ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 POSSIBILITY OF SUCH DAMAGE.
 ***********************************************************************/
 
-#ifdef OPUS_HAVE_CONFIG_H
-#include "opus_config.h"
+#ifdef OPUS_ENABLED
+#include "opus/opus_config.h"
 #endif
 
 /* Insertion sort (fast for already almost sorted arrays):   */
@@ -35,7 +35,7 @@ POSSIBILITY OF SUCH DAMAGE.
 /*                                                           */
 /* Shell short:    http://en.wikipedia.org/wiki/Shell_sort   */
 
-#include "SigProc_FIX.h"
+#include "opus/silk/SigProc_FIX.h"
 
 void silk_insertion_sort_increasing(
     opus_int32           *a,             /* I/O   Unsorted / Sorted vector               */
diff --git a/drivers/opus/silk/stereo_LR_to_MS.c b/drivers/opus/silk/stereo_LR_to_MS.c
index 678f46984b..e17a36046e 100644
--- a/drivers/opus/silk/stereo_LR_to_MS.c
+++ b/drivers/opus/silk/stereo_LR_to_MS.c
@@ -25,12 +25,12 @@ ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 POSSIBILITY OF SUCH DAMAGE.
 ***********************************************************************/
 
-#ifdef OPUS_HAVE_CONFIG_H
-#include "opus_config.h"
+#ifdef OPUS_ENABLED
+#include "opus/opus_config.h"
 #endif
 
-#include "silk_main.h"
-#include "stack_alloc.h"
+#include "opus/silk/silk_main.h"
+#include "opus/celt/stack_alloc.h"
 
 /* Convert Left/Right stereo signal to adaptive Mid/Side representation */
 void silk_stereo_LR_to_MS(
diff --git a/drivers/opus/silk/stereo_MS_to_LR.c b/drivers/opus/silk/stereo_MS_to_LR.c
index 34f43cf795..dddb62c788 100644
--- a/drivers/opus/silk/stereo_MS_to_LR.c
+++ b/drivers/opus/silk/stereo_MS_to_LR.c
@@ -25,11 +25,11 @@ ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 POSSIBILITY OF SUCH DAMAGE.
 ***********************************************************************/
 
-#ifdef OPUS_HAVE_CONFIG_H
-#include "opus_config.h"
+#ifdef OPUS_ENABLED
+#include "opus/opus_config.h"
 #endif
 
-#include "silk_main.h"
+#include "opus/silk/silk_main.h"
 
 /* Convert adaptive Mid/Side representation to Left/Right stereo signal */
 void silk_stereo_MS_to_LR(
diff --git a/drivers/opus/silk/stereo_decode_pred.c b/drivers/opus/silk/stereo_decode_pred.c
index 56d94e56fe..d54faf137e 100644
--- a/drivers/opus/silk/stereo_decode_pred.c
+++ b/drivers/opus/silk/stereo_decode_pred.c
@@ -25,11 +25,11 @@ ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 POSSIBILITY OF SUCH DAMAGE.
 ***********************************************************************/
 
-#ifdef OPUS_HAVE_CONFIG_H
-#include "opus_config.h"
+#ifdef OPUS_ENABLED
+#include "opus/opus_config.h"
 #endif
 
-#include "silk_main.h"
+#include "opus/silk/silk_main.h"
 
 /* Decode mid/side predictors */
 void silk_stereo_decode_pred(
diff --git a/drivers/opus/silk/stereo_encode_pred.c b/drivers/opus/silk/stereo_encode_pred.c
index bfe75b08e4..d68922cf01 100644
--- a/drivers/opus/silk/stereo_encode_pred.c
+++ b/drivers/opus/silk/stereo_encode_pred.c
@@ -25,11 +25,11 @@ ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 POSSIBILITY OF SUCH DAMAGE.
 ***********************************************************************/
 
-#ifdef OPUS_HAVE_CONFIG_H
-#include "opus_config.h"
+#ifdef OPUS_ENABLED
+#include "opus/opus_config.h"
 #endif
 
-#include "silk_main.h"
+#include "opus/silk/silk_main.h"
 
 /* Entropy code the mid/side quantization indices */
 void silk_stereo_encode_pred(
diff --git a/drivers/opus/silk/stereo_find_predictor.c b/drivers/opus/silk/stereo_find_predictor.c
index 266293dff3..1f529b28d0 100644
--- a/drivers/opus/silk/stereo_find_predictor.c
+++ b/drivers/opus/silk/stereo_find_predictor.c
@@ -25,11 +25,11 @@ ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 POSSIBILITY OF SUCH DAMAGE.
 ***********************************************************************/
 
-#ifdef OPUS_HAVE_CONFIG_H
-#include "opus_config.h"
+#ifdef OPUS_ENABLED
+#include "opus/opus_config.h"
 #endif
 
-#include "silk_main.h"
+#include "opus/silk/silk_main.h"
 
 /* Find least-squares prediction gain for one signal based on another and quantize it */
 opus_int32 silk_stereo_find_predictor(                          /* O    Returns predictor in Q13                    */
diff --git a/drivers/opus/silk/stereo_quant_pred.c b/drivers/opus/silk/stereo_quant_pred.c
index 834020d715..3a4d9b31da 100644
--- a/drivers/opus/silk/stereo_quant_pred.c
+++ b/drivers/opus/silk/stereo_quant_pred.c
@@ -25,11 +25,11 @@ ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 POSSIBILITY OF SUCH DAMAGE.
 ***********************************************************************/
 
-#ifdef OPUS_HAVE_CONFIG_H
-#include "opus_config.h"
+#ifdef OPUS_ENABLED
+#include "opus/opus_config.h"
 #endif
 
-#include "silk_main.h"
+#include "opus/silk/silk_main.h"
 
 /* Quantize mid/side predictors */
 void silk_stereo_quant_pred(
diff --git a/drivers/opus/silk/structs.h b/drivers/opus/silk/structs.h
index 1826b36a80..c8c5dae844 100644
--- a/drivers/opus/silk/structs.h
+++ b/drivers/opus/silk/structs.h
@@ -28,11 +28,11 @@ POSSIBILITY OF SUCH DAMAGE.
 #ifndef SILK_STRUCTS_H
 #define SILK_STRUCTS_H
 
-#include "typedef.h"
-#include "SigProc_FIX.h"
-#include "define.h"
-#include "entenc.h"
-#include "entdec.h"
+#include "opus/silk/typedef.h"
+#include "opus/silk/SigProc_FIX.h"
+#include "opus/silk/define.h"
+#include "opus/celt/entenc.h"
+#include "opus/celt/entdec.h"
 
 #ifdef __cplusplus
 extern "C"
diff --git a/drivers/opus/silk/sum_sqr_shift.c b/drivers/opus/silk/sum_sqr_shift.c
index 8ec27f8a03..7e2a97b530 100644
--- a/drivers/opus/silk/sum_sqr_shift.c
+++ b/drivers/opus/silk/sum_sqr_shift.c
@@ -25,11 +25,11 @@ ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 POSSIBILITY OF SUCH DAMAGE.
 ***********************************************************************/
 
-#ifdef OPUS_HAVE_CONFIG_H
-#include "opus_config.h"
+#ifdef OPUS_ENABLED
+#include "opus/opus_config.h"
 #endif
 
-#include "SigProc_FIX.h"
+#include "opus/silk/SigProc_FIX.h"
 
 /* Compute number of bits to right shift the sum of squares of a vector */
 /* of int16s to make it fit in an int32                                 */
diff --git a/drivers/opus/silk/table_LSF_cos.c b/drivers/opus/silk/table_LSF_cos.c
index 674b6a03e6..818a532c28 100644
--- a/drivers/opus/silk/table_LSF_cos.c
+++ b/drivers/opus/silk/table_LSF_cos.c
@@ -25,11 +25,11 @@ ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 POSSIBILITY OF SUCH DAMAGE.
 ***********************************************************************/
 
-#ifdef OPUS_HAVE_CONFIG_H
-#include "opus_config.h"
+#ifdef OPUS_ENABLED
+#include "opus/opus_config.h"
 #endif
 
-#include "tables.h"
+#include "opus/silk/tables.h"
 
 /* Cosine approximation table for LSF conversion */
 /* Q12 values (even) */
diff --git a/drivers/opus/silk/tables.h b/drivers/opus/silk/tables.h
index a91431e854..0dc7c37545 100644
--- a/drivers/opus/silk/tables.h
+++ b/drivers/opus/silk/tables.h
@@ -28,8 +28,8 @@ POSSIBILITY OF SUCH DAMAGE.
 #ifndef SILK_TABLES_H
 #define SILK_TABLES_H
 
-#include "define.h"
-#include "structs.h"
+#include "opus/silk/define.h"
+#include "opus/silk/structs.h"
 
 #ifdef __cplusplus
 extern "C"
diff --git a/drivers/opus/silk/tables_LTP.c b/drivers/opus/silk/tables_LTP.c
index 56b672db8b..6a05698252 100644
--- a/drivers/opus/silk/tables_LTP.c
+++ b/drivers/opus/silk/tables_LTP.c
@@ -25,11 +25,11 @@ ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 POSSIBILITY OF SUCH DAMAGE.
 ***********************************************************************/
 
-#ifdef OPUS_HAVE_CONFIG_H
-#include "opus_config.h"
+#ifdef OPUS_ENABLED
+#include "opus/opus_config.h"
 #endif
 
-#include "tables.h"
+#include "opus/silk/tables.h"
 
 const opus_uint8 silk_LTP_per_index_iCDF[3] = {
        179,     99,      0
diff --git a/drivers/opus/silk/tables_NLSF_CB_NB_MB.c b/drivers/opus/silk/tables_NLSF_CB_NB_MB.c
index ded35eee74..66c2fd4036 100644
--- a/drivers/opus/silk/tables_NLSF_CB_NB_MB.c
+++ b/drivers/opus/silk/tables_NLSF_CB_NB_MB.c
@@ -25,11 +25,11 @@ ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 POSSIBILITY OF SUCH DAMAGE.
 ***********************************************************************/
 
-#ifdef OPUS_HAVE_CONFIG_H
-#include "opus_config.h"
+#ifdef OPUS_ENABLED
+#include "opus/opus_config.h"
 #endif
 
-#include "tables.h"
+#include "opus/silk/tables.h"
 
 static const opus_uint8 silk_NLSF_CB1_NB_MB_Q8[ 320 ] = {
         12,     35,     60,     83,    108,    132,    157,    180,
diff --git a/drivers/opus/silk/tables_NLSF_CB_WB.c b/drivers/opus/silk/tables_NLSF_CB_WB.c
index d83567ea6f..366f1bc887 100644
--- a/drivers/opus/silk/tables_NLSF_CB_WB.c
+++ b/drivers/opus/silk/tables_NLSF_CB_WB.c
@@ -25,11 +25,11 @@ ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 POSSIBILITY OF SUCH DAMAGE.
 ***********************************************************************/
 
-#ifdef OPUS_HAVE_CONFIG_H
-#include "opus_config.h"
+#ifdef OPUS_ENABLED
+#include "opus/opus_config.h"
 #endif
 
-#include "tables.h"
+#include "opus/silk/tables.h"
 
 static const opus_uint8 silk_NLSF_CB1_WB_Q8[ 512 ] = {
          7,     23,     38,     54,     69,     85,    100,    116,
diff --git a/drivers/opus/silk/tables_gain.c b/drivers/opus/silk/tables_gain.c
index 6df980616b..efb5b899b8 100644
--- a/drivers/opus/silk/tables_gain.c
+++ b/drivers/opus/silk/tables_gain.c
@@ -25,11 +25,11 @@ ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 POSSIBILITY OF SUCH DAMAGE.
 ***********************************************************************/
 
-#ifdef OPUS_HAVE_CONFIG_H
-#include "opus_config.h"
+#ifdef OPUS_ENABLED
+#include "opus/opus_config.h"
 #endif
 
-#include "tables.h"
+#include "opus/silk/tables.h"
 
 #ifdef __cplusplus
 extern "C"
diff --git a/drivers/opus/silk/tables_other.c b/drivers/opus/silk/tables_other.c
index 246e960fa4..5e588fbf0c 100644
--- a/drivers/opus/silk/tables_other.c
+++ b/drivers/opus/silk/tables_other.c
@@ -25,13 +25,13 @@ ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 POSSIBILITY OF SUCH DAMAGE.
 ***********************************************************************/
 
-#ifdef OPUS_HAVE_CONFIG_H
-#include "opus_config.h"
+#ifdef OPUS_ENABLED
+#include "opus/opus_config.h"
 #endif
 
-#include "structs.h"
-#include "define.h"
-#include "tables.h"
+#include "opus/silk/structs.h"
+#include "opus/silk/define.h"
+#include "opus/silk/tables.h"
 
 #ifdef __cplusplus
 extern "C"
diff --git a/drivers/opus/silk/tables_pitch_lag.c b/drivers/opus/silk/tables_pitch_lag.c
index 0af5c5ace7..e1c4617d0a 100644
--- a/drivers/opus/silk/tables_pitch_lag.c
+++ b/drivers/opus/silk/tables_pitch_lag.c
@@ -25,11 +25,11 @@ ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 POSSIBILITY OF SUCH DAMAGE.
 ***********************************************************************/
 
-#ifdef OPUS_HAVE_CONFIG_H
-#include "opus_config.h"
+#ifdef OPUS_ENABLED
+#include "opus/opus_config.h"
 #endif
 
-#include "tables.h"
+#include "opus/silk/tables.h"
 
 const opus_uint8 silk_pitch_lag_iCDF[ 2 * ( PITCH_EST_MAX_LAG_MS - PITCH_EST_MIN_LAG_MS ) ] = {
        253,    250,    244,    233,    212,    182,    150,    131,
diff --git a/drivers/opus/silk/tables_pulses_per_block.c b/drivers/opus/silk/tables_pulses_per_block.c
index 05ba2318f8..a1e2fb03d8 100644
--- a/drivers/opus/silk/tables_pulses_per_block.c
+++ b/drivers/opus/silk/tables_pulses_per_block.c
@@ -25,11 +25,11 @@ ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 POSSIBILITY OF SUCH DAMAGE.
 ***********************************************************************/
 
-#ifdef OPUS_HAVE_CONFIG_H
-#include "opus_config.h"
+#ifdef OPUS_ENABLED
+#include "opus/opus_config.h"
 #endif
 
-#include "tables.h"
+#include "opus/silk/tables.h"
 
 const opus_uint8 silk_max_pulses_table[ 4 ] = {
          8,     10,     12,     16
diff --git a/drivers/opus/silk/typedef.h b/drivers/opus/silk/typedef.h
index ca2361bc82..3e193b4a45 100644
--- a/drivers/opus/silk/typedef.h
+++ b/drivers/opus/silk/typedef.h
@@ -28,8 +28,8 @@ POSSIBILITY OF SUCH DAMAGE.
 #ifndef SILK_TYPEDEF_H
 #define SILK_TYPEDEF_H
 
-#include "opus_types.h"
-#include "opus_defines.h"
+#include "opus/opus_types.h"
+#include "opus/opus_defines.h"
 
 #ifndef OPUS_FIXED_POINT
 # include <float.h>
diff --git a/drivers/opus/stream.c b/drivers/opus/stream.c
index 17293f2bca..8dd23e88e7 100644
--- a/drivers/opus/stream.c
+++ b/drivers/opus/stream.c
@@ -14,11 +14,11 @@
  last mod: $Id: vorbisfile.c 17573 2010-10-27 14:53:59Z xiphmont $
 
  ********************************************************************/
-#ifdef OPUS_HAVE_CONFIG_H
-#include "opus_config.h"
+#ifdef OPUS_ENABLED
+#include "opus/opus_config.h"
 #endif
 
-#include "internal.h"
+#include "opus/internal.h"
 #include <sys/types.h>
 #include <stdio.h>
 #include <stdlib.h>
diff --git a/drivers/opus/wincerts.c b/drivers/opus/wincerts.c
index 568a085e43..2a04eab6d1 100644
--- a/drivers/opus/wincerts.c
+++ b/drivers/opus/wincerts.c
@@ -16,11 +16,11 @@
 
   [1] <http://rt.openssl.org/Ticket/Display.html?id=2158>*/
 
-#ifdef OPUS_HAVE_CONFIG_H
-#include "opus_config.h"
+#ifdef OPUS_ENABLED
+#include "opus/opus_config.h"
 #endif
 
-#include "internal.h"
+#include "opus/internal.h"
 #if defined(OP_ENABLE_HTTP)&&defined(_WIN32)
 /*You must include windows.h before wincrypt.h and x509.h.*/
 # define WIN32_LEAN_AND_MEAN
diff --git a/drivers/png/SCsub b/drivers/png/SCsub
index 7b937d4dfb..9ee066cbb2 100644
--- a/drivers/png/SCsub
+++ b/drivers/png/SCsub
@@ -38,4 +38,3 @@ env.drivers_sources+=png_sources
 #env.add_source_files(env.drivers_sources, png_sources)
 
 Export('env')
-
diff --git a/drivers/png/resource_saver_png.cpp b/drivers/png/resource_saver_png.cpp
index 76e0c03c46..581efb5adc 100644
--- a/drivers/png/resource_saver_png.cpp
+++ b/drivers/png/resource_saver_png.cpp
@@ -214,6 +214,7 @@ Error ResourceSaverPNG::save_image(const String &p_path, Image &p_img) {
 	memdelete(f);
 
 	/* cleanup heap allocation */
+	png_destroy_write_struct(&png_ptr, &info_ptr);
 
 	return OK;
 }
diff --git a/drivers/register_driver_types.cpp b/drivers/register_driver_types.cpp
index 2647d23011..11c4e7dd29 100644
--- a/drivers/register_driver_types.cpp
+++ b/drivers/register_driver_types.cpp
@@ -29,6 +29,11 @@
 #include "convex_decomp/b2d_decompose.h"
 #endif
 
+#ifdef TOOLS_ENABLED
+#include "pe_bliss/pe_bliss_godot.h"
+#include "platform/windows/export/export.h"
+#endif
+
 #ifdef TREMOR_ENABLED
 #include "teora/audio_stream_ogg.h"
 #endif
@@ -49,10 +54,6 @@
 #include "theora/video_stream_theora.h"
 #endif
 
-#ifdef THEORAPLAYER_ENABLED
-#include "theoraplayer/video_stream_theoraplayer.h"
-#endif
-
 
 #include "drivers/nrex/regex.h"
 
@@ -103,10 +104,6 @@ static ResourceFormatLoaderAudioStreamSpeex *speex_stream_loader=NULL;
 static ResourceFormatLoaderVideoStreamTheora* theora_stream_loader = NULL;
 #endif
 
-#ifdef THEORAPLAYER_ENABLED
-static ResourceFormatLoaderVideoStreamTheoraplayer* theoraplayer_stream_loader = NULL;
-#endif
-
 #ifdef MUSEPACK_ENABLED
 static ResourceFormatLoaderAudioStreamMPC * mpc_stream_loader=NULL;
 #endif
@@ -228,12 +225,6 @@ void register_driver_types() {
 	ObjectTypeDB::register_type<VideoStreamTheora>();
 #endif
 
-#ifdef THEORAPLAYER_ENABLED
-	theoraplayer_stream_loader = memnew( ResourceFormatLoaderVideoStreamTheoraplayer );
-	ResourceLoader::add_resource_format_loader(theoraplayer_stream_loader);
-	ObjectTypeDB::register_type<VideoStreamTheoraplayer>();
-#endif
-
 
 #ifdef TOOLS_ENABLED
 #ifdef SQUISH_ENABLED
@@ -246,7 +237,7 @@ void register_driver_types() {
 #ifdef ETC1_ENABLED
 	_register_etc1_compress_func();
 #endif
-
+	
 	initialize_chibi();
 }
 
@@ -272,9 +263,6 @@ void unregister_driver_types() {
 	memdelete (theora_stream_loader);
 #endif
 
-#ifdef THEORAPLAYER_ENABLED
-	memdelete (theoraplayer_stream_loader);
-#endif
 
 #ifdef MUSEPACK_ENABLED
 
diff --git a/drivers/squish/SCsub b/drivers/squish/SCsub
index d55a32ad5e..da39dc1ebc 100644
--- a/drivers/squish/SCsub
+++ b/drivers/squish/SCsub
@@ -21,4 +21,3 @@ if (env["tools"]=="yes"):
 #env.add_source_files(env.drivers_sources, squish_sources)
 
 Export('env')
-
diff --git a/drivers/theora/SCsub b/drivers/theora/SCsub
index ecabce6c9d..fa85b49804 100644
--- a/drivers/theora/SCsub
+++ b/drivers/theora/SCsub
@@ -1,4 +1,3 @@
-
 Import('env')
 
 sources = [
@@ -32,7 +31,37 @@ sources = [
 	"theora/video_stream_theora.cpp",
 ]
 
-if env['use_theoraplayer_binary'] != "yes":
-	env.drivers_sources += sources
+sources_x86 = [
+	"theora/x86/mmxencfrag.c",
+	"theora/x86/mmxfdct.c",
+	"theora/x86/mmxfrag.c",
+	"theora/x86/mmxidct.c",
+	"theora/x86/mmxstate.c",
+	"theora/x86/sse2fdct.c",
+	"theora/x86/x86enc.c",
+	"theora/x86/x86state.c",
+]
+
+sources_x86_vc = [
+	"theora/x86_vc/mmxencfrag.c",
+	"theora/x86_vc/mmxfdct.c",
+	"theora/x86_vc/mmxfrag.c",
+	"theora/x86_vc/mmxidct.c",
+	"theora/x86_vc/mmxstate.c",
+	"theora/x86_vc/x86enc.c",
+	"theora/x86_vc/x86state.c",
+]
+
+env.drivers_sources += sources
+
+if (env["x86_opt_gcc"]):
+	env.Append(CCFLAGS=["-DOC_X86_ASM"])
+	env.drivers_sources += sources_x86
+
+if (env["x86_opt_vc"]):
+	env.Append(CCFLAGS=["-DOC_X86_ASM"])
+	env.drivers_sources += sources_x86_vc
+
+	
 
 
diff --git a/drivers/theora/decode.c b/drivers/theora/decode.c
index 7be66463d8..882606ae77 100644
--- a/drivers/theora/decode.c
+++ b/drivers/theora/decode.c
@@ -1611,28 +1611,35 @@ static void oc_filter_hedge(unsigned char *_dst,int _dst_ystride,
   int                  sum1;
   int                  bx;
   int                  by;
+  int		       _rlimit1;
+  int		       _rlimit2;
   rdst=_dst;
   rsrc=_src;
-  for(bx=0;bx<8;bx++){
+  for(bx=0;bx<8;++bx){
     cdst=rdst;
     csrc=rsrc;
-    for(by=0;by<10;by++){
+    _rlimit1 = _rlimit2 = _flimit;
+    for(by=0;by<10;++by){
       r[by]=*csrc;
       csrc+=_src_ystride;
     }
     sum0=sum1=0;
-    for(by=0;by<4;by++){
-      sum0+=abs(r[by+1]-r[by]);
-      sum1+=abs(r[by+5]-r[by+6]);
+    for(by=0;by<4;++by){
+      int sumed = abs(r[by+1]-r[by]);
+      sum0+=sumed;
+      _rlimit1-=sumed;
+      sumed = abs(r[by+5]-r[by+6]);
+      sum1+=sumed;
+      _rlimit2-=sumed;
     }
     *_variance0+=OC_MINI(255,sum0);
     *_variance1+=OC_MINI(255,sum1);
-    if(sum0<_flimit&&sum1<_flimit&&r[5]-r[4]<_qstep&&r[4]-r[5]<_qstep){
+    if(_rlimit1&&_rlimit2&&!(r[5]-r[4]-_qstep)&&!(r[4]-r[5]-_qstep)){
       *cdst=(unsigned char)(r[0]*3+r[1]*2+r[2]+r[3]+r[4]+4>>3);
       cdst+=_dst_ystride;
       *cdst=(unsigned char)(r[0]*2+r[1]+r[2]*2+r[3]+r[4]+r[5]+4>>3);
       cdst+=_dst_ystride;
-      for(by=0;by<4;by++){
+      for(by=0;by<4;++by){
         *cdst=(unsigned char)(r[by]+r[by+1]+r[by+2]+r[by+3]*2+
          r[by+4]+r[by+5]+r[by+6]+4>>3);
         cdst+=_dst_ystride;
@@ -1642,13 +1649,13 @@ static void oc_filter_hedge(unsigned char *_dst,int _dst_ystride,
       *cdst=(unsigned char)(r[5]+r[6]+r[7]+r[8]*2+r[9]*3+4>>3);
     }
     else{
-      for(by=1;by<=8;by++){
+      for(by=1;by<=8;++by){
         *cdst=(unsigned char)r[by];
         cdst+=_dst_ystride;
       }
     }
-    rdst++;
-    rsrc++;
+    ++rdst;
+    ++rsrc;
   }
 }
 
@@ -1663,19 +1670,26 @@ static void oc_filter_vedge(unsigned char *_dst,int _dst_ystride,
   int                  sum1;
   int                  bx;
   int                  by;
+  int		       _rlimit1;
+  int		       _rlimit2;
   cdst=_dst;
-  for(by=0;by<8;by++){
+  for(by=0;by<8;++by){
     rsrc=cdst-1;
     rdst=cdst;
-    for(bx=0;bx<10;bx++)r[bx]=*rsrc++;
+    for(bx=0;bx<10;++bx)r[bx]=*rsrc++;
     sum0=sum1=0;
-    for(bx=0;bx<4;bx++){
-      sum0+=abs(r[bx+1]-r[bx]);
-      sum1+=abs(r[bx+5]-r[bx+6]);
+    _rlimit1 = _rlimit2 = _flimit;
+    for(bx=0;bx<4;++bx){
+      int sumed = abs(r[bx+1]-r[bx]);
+      sum0+=sumed;
+      _rlimit1-=sumed;
+      sumed = abs(r[bx+5]-r[bx+6]);
+      sum1+=sumed;
+      _rlimit2-=sumed;
     }
     _variances[0]+=OC_MINI(255,sum0);
     _variances[1]+=OC_MINI(255,sum1);
-    if(sum0<_flimit&&sum1<_flimit&&r[5]-r[4]<_qstep&&r[4]-r[5]<_qstep){
+    if(_rlimit1&&_rlimit2&&!(r[5]-r[4]-_qstep)&&!(r[4]-r[5]-_qstep)){
       *rdst++=(unsigned char)(r[0]*3+r[1]*2+r[2]+r[3]+r[4]+4>>3);
       *rdst++=(unsigned char)(r[0]*2+r[1]+r[2]*2+r[3]+r[4]+r[5]+4>>3);
       for(bx=0;bx<4;bx++){
diff --git a/drivers/theora/encint.h b/drivers/theora/encint.h
index 97897d5a04..82338256dc 100644
--- a/drivers/theora/encint.h
+++ b/drivers/theora/encint.h
@@ -14,6 +14,7 @@
   last mod: $Id: encint.h 16503 2009-08-22 18:14:02Z giles $
 
  ********************************************************************/
+ 
 #if !defined(_encint_H)
 # define _encint_H (1)
 # if defined(HAVE_CONFIG_H)
diff --git a/drivers/theora/video_stream_theora.cpp b/drivers/theora/video_stream_theora.cpp
index bea49e34b7..fe248bc911 100644
--- a/drivers/theora/video_stream_theora.cpp
+++ b/drivers/theora/video_stream_theora.cpp
@@ -7,11 +7,34 @@
 
 
 int VideoStreamPlaybackTheora::	buffer_data() {
-  char *buffer=ogg_sync_buffer(&oy,4096);
-  int bytes=file->get_buffer((uint8_t*)buffer, 4096);
 
-  ogg_sync_wrote(&oy,bytes);
-  return(bytes);
+	char *buffer=ogg_sync_buffer(&oy,4096);
+
+#ifdef THEORA_USE_THREAD_STREAMING
+
+	int read;
+
+	do {
+		thread_sem->post();
+		read = MIN(ring_buffer.data_left(),4096);
+		if (read) {
+			ring_buffer.read((uint8_t*)buffer,read);
+			ogg_sync_wrote(&oy,read);
+		} else {
+			OS::get_singleton()->delay_usec(100);
+		}
+
+	} while(read==0);
+
+	return read;
+
+#else
+
+	int bytes=file->get_buffer((uint8_t*)buffer, 4096);
+	ogg_sync_wrote(&oy,bytes);
+	return(bytes);
+
+#endif
 }
 
 int VideoStreamPlaybackTheora::queue_page(ogg_page *page){
@@ -178,7 +201,7 @@ void VideoStreamPlaybackTheora::video_write(void){
 
 void VideoStreamPlaybackTheora::clear() {
 
-	if (file_name == "")
+	if (!file)
 		return;
 
 	if(vorbis_p){
@@ -200,6 +223,14 @@ void VideoStreamPlaybackTheora::clear() {
 	}
 	ogg_sync_clear(&oy);
 
+#ifdef THEORA_USE_THREAD_STREAMING
+	thread_exit=true;
+	thread_sem->post(); //just in case
+	Thread::wait_to_finish(thread);
+	memdelete(thread);
+	thread=NULL;
+	ring_buffer.clear();
+#endif
 	//file_name = "";
 
 	theora_p = 0;
@@ -208,11 +239,16 @@ void VideoStreamPlaybackTheora::clear() {
 	frames_pending = 0;
 	videobuf_time = 0;
 
+	if (file) {
+		memdelete(file);
+	}
+	file=NULL;
 	playing = false;
 };
 
 void VideoStreamPlaybackTheora::set_file(const String& p_file) {
 
+	ERR_FAIL_COND(playing);
 	ogg_packet op;
 	th_setup_info    *ts = NULL;
 
@@ -223,7 +259,17 @@ void VideoStreamPlaybackTheora::set_file(const String& p_file) {
 	file = FileAccess::open(p_file, FileAccess::READ);
 	ERR_FAIL_COND(!file);
 
+#ifdef THEORA_USE_THREAD_STREAMING
+	thread_exit=false;
+	thread_eof=false;
+	//pre-fill buffer
+	int to_read = ring_buffer.space_left();
+	int read = file->get_buffer(read_buffer.ptr(),to_read);
+	ring_buffer.write(read_buffer.ptr(),read);
+
+	thread=Thread::create(_streaming_thread,this);
 
+#endif
 
 	ogg_sync_init(&oy);
 
@@ -239,7 +285,9 @@ void VideoStreamPlaybackTheora::set_file(const String& p_file) {
 	/* Only interested in Vorbis/Theora streams */
 	int stateflag = 0;
 
-    int audio_track_skip=audio_track;
+	int audio_track_skip=audio_track;
+
+
 	while(!stateflag){
 		int ret=buffer_data();
 		if(ret==0)break;
@@ -265,15 +313,21 @@ void VideoStreamPlaybackTheora::set_file(const String& p_file) {
 				copymem(&to,&test,sizeof(test));
 				theora_p=1;
 			}else if(!vorbis_p && vorbis_synthesis_headerin(&vi,&vc,&op)>=0){
+
+
 				/* it is vorbis */
-                if (audio_track_skip) {
-                    vorbis_info_clear(&vi);
-                    vorbis_comment_clear(&vc);
-                    audio_track_skip--;
-                } else {
-                    copymem(&vo,&test,sizeof(test));
-                    vorbis_p=1;
-                }
+				if (audio_track_skip) {
+					vorbis_info_clear(&vi);
+					vorbis_comment_clear(&vc);
+					ogg_stream_clear(&test);
+					vorbis_info_init(&vi);
+					vorbis_comment_init(&vc);
+
+					audio_track_skip--;
+				} else {
+                                        copymem(&vo,&test,sizeof(test));
+					vorbis_p=1;
+				}
 			}else{
 				/* whatever it is, we don't care about it */
 				ogg_stream_clear(&test);
@@ -357,6 +411,7 @@ void VideoStreamPlaybackTheora::set_file(const String& p_file) {
 		th_decode_ctl(td,TH_DECCTL_GET_PPLEVEL_MAX,&pp_level_max,
 					  sizeof(pp_level_max));
 		pp_level=pp_level_max;
+		pp_level=0;
 		th_decode_ctl(td,TH_DECCTL_SET_PPLEVEL,&pp_level,sizeof(pp_level));
 		pp_inc=0;
 
@@ -392,6 +447,7 @@ void VideoStreamPlaybackTheora::set_file(const String& p_file) {
 		fprintf(stderr,"Ogg logical stream %lx is Vorbis %d channel %ld Hz audio.\n",
 				vo.serialno,vi.channels,vi.rate);
 		//_setup(vi.channels, vi.rate);
+
 	}else{
 		/* tear down the partial vorbis setup */
 		vorbis_info_clear(&vi);
@@ -401,6 +457,9 @@ void VideoStreamPlaybackTheora::set_file(const String& p_file) {
 	playing = false;
 	buffering=true;
 	time=0;
+	audio_frames_wrote=0;
+
+
 };
 
 float VideoStreamPlaybackTheora::get_time() const {
@@ -422,17 +481,23 @@ void VideoStreamPlaybackTheora::update(float p_delta) {
 		return;
 	};
 
+#ifdef THEORA_USE_THREAD_STREAMING
+	thread_sem->post();
+#endif
+
 	//double ctime =AudioServer::get_singleton()->get_mix_time();
 
 	//print_line("play "+rtos(p_delta));
 	time+=p_delta;
 
-	if (videobuf_time>get_time())
+	if (videobuf_time>get_time()) {
 		return; //no new frames need to be produced
+	}
 
 	bool frame_done=false;
+	bool audio_done=false;
 
-	while (!frame_done) {
+	while (!frame_done || !audio_done) {
 		//a frame needs to be produced
 
 		ogg_packet op;
@@ -490,6 +555,17 @@ void VideoStreamPlaybackTheora::update(float p_delta) {
 				audio_pending=true;
 
 
+				if (vd.granulepos>=0) {
+				//	print_line("wrote: "+itos(audio_frames_wrote)+" gpos: "+itos(vd.granulepos));
+				}
+
+				//print_line("mix audio!");
+
+				audio_frames_wrote+=ret-to_read;
+
+				//print_line("AGP: "+itos(vd.granulepos)+" added "+itos(ret-to_read));
+
+
 			} else {
 
 				/* no pending audio; is there a pending packet to decode? */
@@ -503,6 +579,9 @@ void VideoStreamPlaybackTheora::update(float p_delta) {
 				};
 			}
 
+
+			audio_done = videobuf_time < (audio_frames_wrote/float(vi.rate));
+
 			if (buffer_full)
 				break;
 		}
@@ -512,7 +591,7 @@ void VideoStreamPlaybackTheora::update(float p_delta) {
 			if(ogg_stream_packetout(&to,&op)>0){
 
 
-				if(pp_inc){
+				if(false && pp_inc){
 					pp_level+=pp_inc;
 					th_decode_ctl(td,TH_DECCTL_SET_PPLEVEL,&pp_level,
 								  sizeof(pp_level));
@@ -540,19 +619,24 @@ void VideoStreamPlaybackTheora::update(float p_delta) {
 					 keyframing.  Soon enough libtheora will be able to deal
 					 with non-keyframe seeks.  */
 
-					if(videobuf_time>=get_time())
+					if(videobuf_time>=get_time()) {
 						frame_done=true;
-					else{
+					} else{
 						/*If we are too slow, reduce the pp level.*/
 						pp_inc=pp_level>0?-1:0;
 					}
 				}
 
-			} else
+			} else {
+
 				break;
+			}
 		}
-
+#ifdef THEORA_USE_THREAD_STREAMING
+		if (file && thread_eof && ring_buffer.data_left()==0) {
+#else
 		if (file && /*!videobuf_ready && */ file->eof_reached()) {
+#endif
 			printf("video done, stopping\n");
 			stop();
 			return;
@@ -567,7 +651,9 @@ void VideoStreamPlaybackTheora::update(float p_delta) {
 			}
 		}
 	#else
-		if (!frame_done){
+
+
+		if (!frame_done || !audio_done){
 			//what's the point of waiting for audio to grab a page?
 
 			buffer_data();
@@ -596,6 +682,7 @@ void VideoStreamPlaybackTheora::update(float p_delta) {
 		else if(tdiff<ti.fps_denominator*0.05/ti.fps_numerator){
 			pp_inc=pp_level>0?-1:0;
 		}
+
 	}
 
 	video_write();
@@ -607,15 +694,21 @@ void VideoStreamPlaybackTheora::play() {
 
 	if (!playing)
 		time=0;
+	else {
+		stop();
+	}
+
 	playing = true;
 	delay_compensation=Globals::get_singleton()->get("audio/video_delay_compensation_ms");
 	delay_compensation/=1000.0;
 
+
 };
 
 void VideoStreamPlaybackTheora::stop() {
 
 	if (playing) {
+
 		clear();
 		set_file(file_name); //reset
 	}
@@ -693,7 +786,33 @@ int VideoStreamPlaybackTheora::get_mix_rate() const{
 	return vi.rate;
 }
 
+#ifdef THEORA_USE_THREAD_STREAMING
+
+
+void VideoStreamPlaybackTheora::_streaming_thread(void *ud) {
+
+	VideoStreamPlaybackTheora *vs=(VideoStreamPlaybackTheora*)ud;
+
+	while(!vs->thread_exit) {
 
+		//just fill back the buffer
+		if (!vs->thread_eof) {
+
+			int to_read = vs->ring_buffer.space_left();
+			if (to_read) {
+				int read = vs->file->get_buffer(vs->read_buffer.ptr(),to_read);
+				vs->ring_buffer.write(vs->read_buffer.ptr(),read);
+				vs->thread_eof=vs->file->eof_reached();
+			}
+
+
+		}
+
+		vs->thread_sem->wait();
+	}
+}
+
+#endif
 
 VideoStreamPlaybackTheora::VideoStreamPlaybackTheora() {
 
@@ -709,16 +828,34 @@ VideoStreamPlaybackTheora::VideoStreamPlaybackTheora() {
 	texture = Ref<ImageTexture>( memnew(ImageTexture ));
 	mix_callback=NULL;
 	mix_udata=NULL;
-    audio_track=0;
+	    audio_track=0;
 	delay_compensation=0;
+	audio_frames_wrote=0;
+
+#ifdef THEORA_USE_THREAD_STREAMING
+	int rb_power = nearest_shift(RB_SIZE_KB*1024);
+	ring_buffer.resize(rb_power);
+	read_buffer.resize(RB_SIZE_KB*1024);
+	thread_sem=Semaphore::create();
+	thread=NULL;
+	thread_exit=false;
+	thread_eof=false;
+
+#endif
 };
 
 VideoStreamPlaybackTheora::~VideoStreamPlaybackTheora() {
 
+#ifdef THEORA_USE_THREAD_STREAMING
+
+	memdelete(thread_sem);
+#endif
 	clear();
 
 	if (file)
 		memdelete(file);
+
+
 };
 
 
diff --git a/drivers/theora/video_stream_theora.h b/drivers/theora/video_stream_theora.h
index 95c7fe88f6..c15ef31cfc 100644
--- a/drivers/theora/video_stream_theora.h
+++ b/drivers/theora/video_stream_theora.h
@@ -6,9 +6,13 @@
 #include "theora/theoradec.h"
 #include "vorbis/codec.h"
 #include "os/file_access.h"
-
+#include "ring_buffer.h"
 #include "io/resource_loader.h"
 #include "scene/resources/video_stream.h"
+#include "os/thread.h"
+#include "os/semaphore.h"
+
+//#define THEORA_USE_THREAD_STREAMING
 
 class VideoStreamPlaybackTheora : public VideoStreamPlayback {
 
@@ -32,6 +36,7 @@ class VideoStreamPlaybackTheora : public VideoStreamPlayback {
 	void video_write(void);
 	float get_time() const;
 
+
 	ogg_sync_state   oy;
 	ogg_page         og;
 	ogg_stream_state vo;
@@ -65,7 +70,25 @@ class VideoStreamPlaybackTheora : public VideoStreamPlayback {
 	AudioMixCallback mix_callback;
 	void* mix_udata;
 
-    int audio_track;
+#ifdef THEORA_USE_THREAD_STREAMING
+
+	enum {
+		RB_SIZE_KB=1024
+	};
+
+	RingBuffer<uint8_t> ring_buffer;
+	Vector<uint8_t> read_buffer;
+	bool thread_eof;
+	Semaphore *thread_sem;
+	Thread *thread;
+	volatile bool thread_exit;
+
+	static void _streaming_thread(void *ud);
+
+#endif
+
+
+	int audio_track;
 
 protected:
 
@@ -115,20 +138,20 @@ class VideoStreamTheora : public VideoStream {
 	OBJ_TYPE(VideoStreamTheora,VideoStream);
 
 	String file;
-    int audio_track;
+	int audio_track;
 
 
 public:
 
 	Ref<VideoStreamPlayback> instance_playback() {
 		Ref<VideoStreamPlaybackTheora> pb = memnew( VideoStreamPlaybackTheora );
-        pb->set_audio_track(audio_track);
+		pb->set_audio_track(audio_track);
 		pb->set_file(file);
 		return pb;
 	}
 
 	void set_file(const String& p_file) { file=p_file; }
-    void set_audio_track(int p_track) { audio_track=p_track; }
+	void set_audio_track(int p_track) { audio_track=p_track; }
 
     VideoStreamTheora() { audio_track=0; }
 
diff --git a/drivers/unix/SCsub b/drivers/unix/SCsub
index bcd231579c..e8b3cadfc7 100644
--- a/drivers/unix/SCsub
+++ b/drivers/unix/SCsub
@@ -1,7 +1,13 @@
 Import('env')
 
+ed_gl_set='#include "os_unix.h"\n'
+ed_gl_set+='String OS_Unix::get_global_settings_path() const {\n'
+ed_gl_set+='\treturn "' + env["unix_global_settings_path"]+'";\n'
+ed_gl_set+='}\n'
+f = open("os_unix_global_settings_path.cpp","wb")
+f.write(ed_gl_set)
+f.close()
+
 env.add_source_files(env.drivers_sources,"*.cpp")
 
 Export('env')
-
-
diff --git a/drivers/unix/os_unix.cpp b/drivers/unix/os_unix.cpp
index 8617061ad4..94a7b03f45 100644
--- a/drivers/unix/os_unix.cpp
+++ b/drivers/unix/os_unix.cpp
@@ -65,15 +65,25 @@ void OS_Unix::print_error(const char* p_function,const char* p_file,int p_line,c
 	if (!_print_error_enabled)
 		return;
 
-	if (p_rationale && p_rationale[0]) {
-
-		print("\E[1;31;40mERROR: %s: \E[1;37;40m%s\n",p_function,p_rationale);
-		print("\E[0;31;40m   At: %s:%i.\E[0;0;37m\n",p_file,p_line);
-
-	} else {
-		print("\E[1;31;40mERROR: %s: \E[1;37;40m%s\n",p_function,p_code);
-		print("\E[0;31;40m   At: %s:%i.\E[0;0;37m\n",p_file,p_line);
-
+	const char* err_details;
+	if (p_rationale && p_rationale[0])
+		err_details=p_rationale;
+	else
+		err_details=p_code;
+
+	switch(p_type) {
+		case ERR_ERROR:
+			print("\E[1;31mERROR: %s: \E[0m\E[1m%s\n",p_function,err_details);
+			print("\E[0;31m   At: %s:%i.\E[0m\n",p_file,p_line);
+			break;
+		case ERR_WARNING:
+			print("\E[1;33mWARNING: %s: \E[0m\E[1m%s\n",p_function,err_details);
+			print("\E[0;33m     At: %s:%i.\E[0m\n",p_file,p_line);
+			break;
+		case ERR_SCRIPT:
+			print("\E[1;35mSCRIPT ERROR: %s: \E[0m\E[1m%s\n",p_function,err_details);
+			print("\E[0;35m          At: %s:%i.\E[0m\n",p_file,p_line);
+			break;
 	}
 }
 
@@ -467,6 +477,14 @@ String OS_Unix::get_data_dir() const {
 
 }
 
+String OS_Unix::get_installed_templates_path() const {
+	String p=get_global_settings_path();
+	if (p!="")
+		return p+"/templates/";
+	else
+		return "";
+}
+
 String OS_Unix::get_executable_path() const {
 
 #ifdef __linux__
diff --git a/drivers/unix/os_unix.h b/drivers/unix/os_unix.h
index 2ee6102164..9ac18c9055 100644
--- a/drivers/unix/os_unix.h
+++ b/drivers/unix/os_unix.h
@@ -64,6 +64,8 @@ protected:
 	
 	String stdin_buf;
 
+	String get_global_settings_path() const;
+
 public:
 
 
@@ -111,6 +113,7 @@ public:
 
 	virtual void debug_break();
 
+	virtual String get_installed_templates_path() const;
 	virtual String get_executable_path() const;
 	virtual String get_data_dir() const;
 
diff --git a/drivers/unix/packet_peer_udp_posix.cpp b/drivers/unix/packet_peer_udp_posix.cpp
index 94b4c35923..2111619080 100644
--- a/drivers/unix/packet_peer_udp_posix.cpp
+++ b/drivers/unix/packet_peer_udp_posix.cpp
@@ -121,7 +121,7 @@ Error PacketPeerUDPPosix::_poll(bool p_wait) {
 	struct sockaddr_in from = {0};
 	socklen_t len = sizeof(struct sockaddr_in);
 	int ret;
-	while ( (ret = recvfrom(sockfd, recv_buffer, MIN(sizeof(recv_buffer),rb.data_left()-12), p_wait?0:MSG_DONTWAIT, (struct sockaddr*)&from, &len)) > 0) {
+	while ( (ret = recvfrom(sockfd, recv_buffer, MIN((int)sizeof(recv_buffer),MAX(rb.space_left()-12, 0)), p_wait?0:MSG_DONTWAIT, (struct sockaddr*)&from, &len)) > 0) {
 		rb.write((uint8_t*)&from.sin_addr, 4);
 		uint32_t port = ntohs(from.sin_port);
 		rb.write((uint8_t*)&port, 4);
@@ -131,6 +131,8 @@ Error PacketPeerUDPPosix::_poll(bool p_wait) {
 		++queue_count;
 	};
 
+
+	// TODO: Should ECONNRESET be handled here?
 	if (ret == 0 || (ret == -1 && errno != EAGAIN) ) {
 		close();
 		return FAILED;
diff --git a/drivers/vorbis/SCsub b/drivers/vorbis/SCsub
index 2c137629ac..87805cc2d8 100644
--- a/drivers/vorbis/SCsub
+++ b/drivers/vorbis/SCsub
@@ -1,4 +1,3 @@
-
 Import('env')
 
 sources = [
@@ -34,7 +33,4 @@ sources_lib = [
 ]
 
 env.drivers_sources += sources
-
-if env['theora'] != "yes" or env['use_theoraplayer_binary'] != "yes":
-	env.drivers_sources += sources_lib
-
+env.drivers_sources += sources_lib
diff --git a/drivers/vorbis/audio_stream_ogg_vorbis.h b/drivers/vorbis/audio_stream_ogg_vorbis.h
index 827d8b0be3..5dbada962a 100644
--- a/drivers/vorbis/audio_stream_ogg_vorbis.h
+++ b/drivers/vorbis/audio_stream_ogg_vorbis.h
@@ -85,7 +85,7 @@ public:
 	virtual void stop();
 	virtual bool is_playing() const;
 
-	virtual void set_loop_restart_time(float p_time) { loop_restart_time=0; }
+	virtual void set_loop_restart_time(float p_time) { loop_restart_time=p_time; }
 
 	virtual void set_paused(bool p_paused);
 	virtual bool is_paused(bool p_paused) const;
diff --git a/drivers/webp/SCsub b/drivers/webp/SCsub
index 3ae046ff79..f65bd13dba 100644
--- a/drivers/webp/SCsub
+++ b/drivers/webp/SCsub
@@ -1,64 +1,115 @@
 Import('env')
 
-
 webp_sources = [
-	"webp/mux/muxedit.c",
-	"webp/mux/muxread.c",
-	"webp/mux/muxinternal.c",
-	"webp/mux/demux.c",
-	"webp/enc/tree.c",
-	"webp/enc/analysis.c",
-	"webp/enc/backward_references.c",
-	"webp/enc/alpha.c",
-	"webp/enc/picture.c",
-	"webp/enc/frame.c",
-	"webp/enc/webpenc.c",
-	"webp/enc/cost.c",
-	"webp/enc/filter.c",
-	"webp/enc/vp8l.c",
-	"webp/enc/quant.c",
-	"webp/enc/histogram.c",
-	"webp/enc/syntax.c",
-	"webp/enc/config.c",
-	"webp/enc/layer.c",
-	"webp/enc/iterator.c",
-	"webp/dsp/dec_sse2.c",
-	"webp/dsp/upsampling_sse2.c",
-	"webp/dsp/dec_neon.c",
-	"webp/dsp/enc.c",
-	"webp/dsp/enc_sse2.c",
-	"webp/dsp/upsampling.c",
-	"webp/dsp/lossless.c",
-	"webp/dsp/cpu.c",
-	"webp/dsp/dec.c",
-	"webp/dsp/yuv.c",
-	"webp/utils/bit_reader.c",
-	"webp/utils/filters.c",
-	"webp/utils/bit_writer.c",
-	"webp/utils/thread.c",
-	"webp/utils/quant_levels.c",
-	"webp/utils/color_cache.c",
-	"webp/utils/rescaler.c",
-	"webp/utils/utils.c",
-	"webp/utils/huffman.c",
-	"webp/utils/huffman_encode.c",
-	"webp/dec/tree.c",
-	"webp/dec/alpha.c",
-	"webp/dec/frame.c",
-	"webp/dec/vp8l.c",
-	"webp/dec/vp8.c",
-	"webp/dec/quant.c",
-	"webp/dec/webp.c",
-	"webp/dec/buffer.c",
-	"webp/dec/io.c",
-	"webp/dec/layer.c",
-	"webp/dec/idec.c",
-	"webp/image_loader_webp.cpp"
+"webp/enc/webpenc.c",\
+"webp/enc/near_lossless.c",\
+"webp/enc/frame.c",\
+"webp/enc/alpha.c",\
+"webp/enc/picture_csp.c",\
+"webp/enc/vp8l.c",\
+"webp/enc/picture_psnr.c",\
+"webp/enc/delta_palettization.c",\
+"webp/enc/syntax.c",\
+"webp/enc/backward_references.c",\
+"webp/enc/token.c",\
+"webp/enc/analysis.c",\
+"webp/enc/iterator.c",\
+"webp/enc/picture_tools.c",\
+"webp/enc/picture_rescale.c",\
+"webp/enc/config.c",\
+"webp/enc/tree.c",\
+"webp/enc/cost.c",\
+"webp/enc/picture.c",\
+"webp/enc/quant.c",\
+"webp/enc/filter.c",\
+"webp/enc/histogram.c",\
+"webp/image_loader_webp.cpp",\
+"webp/utils/rescaler.c",\
+"webp/utils/filters.c",\
+"webp/utils/quant_levels_dec.c",\
+"webp/utils/huffman.c",\
+"webp/utils/thread.c",\
+"webp/utils/quant_levels.c",\
+"webp/utils/bit_writer.c",\
+"webp/utils/bit_reader.c",\
+"webp/utils/random.c",\
+"webp/utils/utils.c",\
+"webp/utils/huffman_encode.c",\
+"webp/utils/color_cache.c",\
+"webp/mux/muxinternal.c",\
+"webp/mux/muxread.c",\
+"webp/mux/anim_encode.c",\
+"webp/mux/muxedit.c",\
+"webp/dec/webp.c",\
+"webp/dec/frame.c",\
+"webp/dec/alpha.c",\
+"webp/dec/vp8l.c",\
+"webp/dec/io.c",\
+"webp/dec/vp8.c",\
+"webp/dec/idec.c",\
+"webp/dec/tree.c",\
+"webp/dec/buffer.c",\
+"webp/dec/quant.c",\
+"webp/demux/demux.c",\
+"webp/demux/anim_decode.c",\
+"webp/dsp/yuv.c",\
+"webp/dsp/filters_sse2.c",\
+"webp/dsp/dec_sse41.c",\
+"webp/dsp/rescaler.c",\
+"webp/dsp/lossless_sse2.c",\
+"webp/dsp/alpha_processing_sse41.c",\
+"webp/dsp/alpha_processing_sse2.c",\
+"webp/dsp/filters.c",\
+"webp/dsp/upsampling_mips_dsp_r2.c",\
+"webp/dsp/dec_neon.c",\
+"webp/dsp/enc_neon.c",\
+"webp/dsp/lossless_enc_mips32.c",\
+"webp/dsp/lossless_enc_sse2.c",\
+"webp/dsp/upsampling.c",\
+"webp/dsp/lossless_enc_neon.c",\
+"webp/dsp/alpha_processing.c",\
+"webp/dsp/cost_sse2.c",\
+"webp/dsp/dec_mips32.c",\
+"webp/dsp/enc_avx2.c",\
+"webp/dsp/rescaler_mips32.c",\
+"webp/dsp/enc.c",\
+"webp/dsp/lossless_enc_sse41.c",\
+"webp/dsp/cost_mips32.c",\
+"webp/dsp/lossless_mips_dsp_r2.c",\
+"webp/dsp/filters_mips_dsp_r2.c",\
+"webp/dsp/upsampling_neon.c",\
+"webp/dsp/alpha_processing_mips_dsp_r2.c",\
+"webp/dsp/enc_mips_dsp_r2.c",\
+"webp/dsp/lossless.c",\
+"webp/dsp/yuv_mips_dsp_r2.c",\
+"webp/dsp/cost_mips_dsp_r2.c",\
+"webp/dsp/argb.c",\
+"webp/dsp/dec_sse2.c",\
+"webp/dsp/rescaler_sse2.c",\
+"webp/dsp/enc_sse41.c",\
+"webp/dsp/argb_mips_dsp_r2.c",\
+"webp/dsp/lossless_enc_mips_dsp_r2.c",\
+"webp/dsp/dec_clip_tables.c",\
+"webp/dsp/yuv_mips32.c",\
+"webp/dsp/cpu.c",\
+"webp/dsp/dec.c",\
+"webp/dsp/argb_sse2.c",\
+"webp/dsp/lossless_neon.c",\
+"webp/dsp/lossless_enc.c",\
+"webp/dsp/enc_mips32.c",\
+"webp/dsp/cost.c",\
+"webp/dsp/rescaler_mips_dsp_r2.c",\
+"webp/dsp/dec_mips_dsp_r2.c",\
+"webp/dsp/rescaler_neon.c",\
+"webp/dsp/yuv_sse2.c",\
+"webp/dsp/enc_sse2.c",\
+"webp/dsp/upsampling_sse2.c"
 ]
 
 env.drivers_sources+=webp_sources
 
 #env.add_source_files(env.drivers_sources, webp_sources)
 
+
 Export('env')
 
diff --git a/drivers/webp/config.h b/drivers/webp/config.h
new file mode 100644
index 0000000000..d85e5d1da6
--- /dev/null
+++ b/drivers/webp/config.h
@@ -0,0 +1,145 @@
+/* src/webp/config.h.  Generated from config.h.in by configure.  */
+/* src/webp/config.h.in.  Generated from configure.ac by autoheader.  */
+
+/* Define if building universal (internal helper macro) */
+/* #undef AC_APPLE_UNIVERSAL_BUILD */
+
+/* Set to 1 if __builtin_bswap16 is available */
+#define HAVE_BUILTIN_BSWAP16 1
+
+/* Set to 1 if __builtin_bswap32 is available */
+#define HAVE_BUILTIN_BSWAP32 1
+
+/* Set to 1 if __builtin_bswap64 is available */
+#define HAVE_BUILTIN_BSWAP64 1
+
+/* Define to 1 if you have the <dlfcn.h> header file. */
+#define HAVE_DLFCN_H 1
+
+/* Define to 1 if you have the <GLUT/glut.h> header file. */
+/* #undef HAVE_GLUT_GLUT_H */
+
+/* Define to 1 if you have the <GL/glut.h> header file. */
+#define HAVE_GL_GLUT_H 1
+
+/* Define to 1 if you have the <inttypes.h> header file. */
+#define HAVE_INTTYPES_H 1
+
+/* Define to 1 if you have the <memory.h> header file. */
+#define HAVE_MEMORY_H 1
+
+/* Define to 1 if you have the <OpenGL/glut.h> header file. */
+/* #undef HAVE_OPENGL_GLUT_H */
+
+/* Have PTHREAD_PRIO_INHERIT. */
+#define HAVE_PTHREAD_PRIO_INHERIT 1
+
+/* Define to 1 if you have the <shlwapi.h> header file. */
+/* #undef HAVE_SHLWAPI_H */
+
+/* Define to 1 if you have the <stdint.h> header file. */
+#define HAVE_STDINT_H 1
+
+/* Define to 1 if you have the <stdlib.h> header file. */
+#define HAVE_STDLIB_H 1
+
+/* Define to 1 if you have the <strings.h> header file. */
+#define HAVE_STRINGS_H 1
+
+/* Define to 1 if you have the <string.h> header file. */
+#define HAVE_STRING_H 1
+
+/* Define to 1 if you have the <sys/stat.h> header file. */
+#define HAVE_SYS_STAT_H 1
+
+/* Define to 1 if you have the <sys/types.h> header file. */
+#define HAVE_SYS_TYPES_H 1
+
+/* Define to 1 if you have the <unistd.h> header file. */
+#define HAVE_UNISTD_H 1
+
+/* Define to 1 if you have the <wincodec.h> header file. */
+/* #undef HAVE_WINCODEC_H */
+
+/* Define to 1 if you have the <windows.h> header file. */
+/* #undef HAVE_WINDOWS_H */
+
+/* Define to the sub-directory in which libtool stores uninstalled libraries.
+   */
+#define LT_OBJDIR ".libs/"
+
+/* Name of package */
+#define PACKAGE "libwebp"
+
+/* Define to the address where bug reports for this package should be sent. */
+#define PACKAGE_BUGREPORT "http://code.google.com/p/webp/issues"
+
+/* Define to the full name of this package. */
+#define PACKAGE_NAME "libwebp"
+
+/* Define to the full name and version of this package. */
+#define PACKAGE_STRING "libwebp 0.4.4"
+
+/* Define to the one symbol short name of this package. */
+#define PACKAGE_TARNAME "libwebp"
+
+/* Define to the home page for this package. */
+#define PACKAGE_URL "http://developers.google.com/speed/webp"
+
+/* Define to the version of this package. */
+#define PACKAGE_VERSION "0.4.4"
+
+/* Define to necessary symbol if this constant uses a non-standard name on
+   your system. */
+/* #undef PTHREAD_CREATE_JOINABLE */
+
+/* Define to 1 if you have the ANSI C header files. */
+#define STDC_HEADERS 1
+
+/* Version number of package */
+#define VERSION "0.4.4"
+
+/* Enable experimental code */
+/* #undef WEBP_EXPERIMENTAL_FEATURES */
+
+/* Define to 1 to force aligned memory operations */
+/* #undef WEBP_FORCE_ALIGNED */
+
+/* Set to 1 if AVX2 is supported */
+#define WEBP_HAVE_AVX2 1
+
+/* Set to 1 if GIF library is installed */
+/* #undef WEBP_HAVE_GIF */
+
+/* Set to 1 if OpenGL is supported */
+#define WEBP_HAVE_GL 1
+
+/* Set to 1 if JPEG library is installed */
+/* #undef WEBP_HAVE_JPEG */
+
+/* Set to 1 if PNG library is installed */
+#define WEBP_HAVE_PNG 1
+
+/* Set to 1 if SSE2 is supported */
+#define WEBP_HAVE_SSE2 1
+
+/* Set to 1 if SSE4.1 is supported */
+#define WEBP_HAVE_SSE41 1
+
+/* Set to 1 if TIFF library is installed */
+/* #undef WEBP_HAVE_TIFF */
+
+/* Undefine this to disable thread support. */
+#define WEBP_USE_THREAD 1
+
+/* Define WORDS_BIGENDIAN to 1 if your processor stores words with the most
+   significant byte first (like Motorola and SPARC, unlike Intel). */
+#if defined AC_APPLE_UNIVERSAL_BUILD
+# if defined __BIG_ENDIAN__
+#  define WORDS_BIGENDIAN 1
+# endif
+#else
+# ifndef WORDS_BIGENDIAN
+/* #  undef WORDS_BIGENDIAN */
+# endif
+#endif
diff --git a/drivers/webp/dec/alpha.c b/drivers/webp/dec/alpha.c
index d1095fa555..1d029b0e6a 100644
--- a/drivers/webp/dec/alpha.c
+++ b/drivers/webp/dec/alpha.c
@@ -1,8 +1,10 @@
 // Copyright 2011 Google Inc. All Rights Reserved.
 //
-// This code is licensed under the same terms as WebM:
-//  Software License Agreement:  http://www.webmproject.org/license/software/
-//  Additional IP Rights Grant:  http://www.webmproject.org/license/additional/
+// Use of this source code is governed by a BSD-style license
+// that can be found in the COPYING file in the root of the source
+// tree. An additional intellectual property rights grant can be found
+// in the file PATENTS. All contributing project authors may
+// be found in the AUTHORS file in the root of the source tree.
 // -----------------------------------------------------------------------------
 //
 // Alpha-plane decompression.
@@ -10,131 +12,156 @@
 // Author: Skal (pascal.massimino@gmail.com)
 
 #include <stdlib.h>
+#include "./alphai.h"
 #include "./vp8i.h"
 #include "./vp8li.h"
-#include "../utils/filters.h"
-#include "../utils/quant_levels.h"
-#include "../format_constants.h"
-
-#if defined(__cplusplus) || defined(c_plusplus)
-extern "C" {
-#endif
-
-// TODO(skal): move to dsp/ ?
-static void CopyPlane(const uint8_t* src, int src_stride,
-                      uint8_t* dst, int dst_stride, int width, int height) {
-  while (height-- > 0) {
-    memcpy(dst, src, width);
-    src += src_stride;
-    dst += dst_stride;
+#include "../dsp/dsp.h"
+#include "../utils/quant_levels_dec.h"
+#include "../utils/utils.h"
+#include "webp/format_constants.h"
+
+//------------------------------------------------------------------------------
+// ALPHDecoder object.
+
+ALPHDecoder* ALPHNew(void) {
+  ALPHDecoder* const dec = (ALPHDecoder*)WebPSafeCalloc(1ULL, sizeof(*dec));
+  return dec;
+}
+
+void ALPHDelete(ALPHDecoder* const dec) {
+  if (dec != NULL) {
+    VP8LDelete(dec->vp8l_dec_);
+    dec->vp8l_dec_ = NULL;
+    WebPSafeFree(dec);
   }
 }
 
 //------------------------------------------------------------------------------
-// Decodes the compressed data 'data' of size 'data_size' into the 'output'.
-// The 'output' buffer should be pre-allocated and must be of the same
-// dimension 'height'x'stride', as that of the image.
-//
-// Returns 1 on successfully decoding the compressed alpha and
-//         0 if either:
-//           error in bit-stream header (invalid compression mode or filter), or
-//           error returned by appropriate compression method.
-
-static int DecodeAlpha(const uint8_t* data, size_t data_size,
-                       int width, int height, int stride, uint8_t* output) {
-  uint8_t* decoded_data = NULL;
-  const size_t decoded_size = height * width;
-  uint8_t* unfiltered_data = NULL;
-  WEBP_FILTER_TYPE filter;
-  int pre_processing;
-  int rsrv;
+// Decoding.
+
+// Initialize alpha decoding by parsing the alpha header and decoding the image
+// header for alpha data stored using lossless compression.
+// Returns false in case of error in alpha header (data too short, invalid
+// compression method or filter, error in lossless header data etc).
+static int ALPHInit(ALPHDecoder* const dec, const uint8_t* data,
+                    size_t data_size, int width, int height, uint8_t* output) {
   int ok = 0;
-  int method;
+  const uint8_t* const alpha_data = data + ALPHA_HEADER_LEN;
+  const size_t alpha_data_size = data_size - ALPHA_HEADER_LEN;
+  int rsrv;
 
-  assert(width > 0 && height > 0 && stride >= width);
+  assert(width > 0 && height > 0);
   assert(data != NULL && output != NULL);
 
+  dec->width_ = width;
+  dec->height_ = height;
+
   if (data_size <= ALPHA_HEADER_LEN) {
     return 0;
   }
 
-  method = (data[0] >> 0) & 0x03;
-  filter = (data[0] >> 2) & 0x03;
-  pre_processing = (data[0] >> 4) & 0x03;
+  dec->method_ = (data[0] >> 0) & 0x03;
+  dec->filter_ = (data[0] >> 2) & 0x03;
+  dec->pre_processing_ = (data[0] >> 4) & 0x03;
   rsrv = (data[0] >> 6) & 0x03;
-  if (method < ALPHA_NO_COMPRESSION ||
-      method > ALPHA_LOSSLESS_COMPRESSION ||
-      filter >= WEBP_FILTER_LAST ||
-      pre_processing > ALPHA_PREPROCESSED_LEVELS ||
+  if (dec->method_ < ALPHA_NO_COMPRESSION ||
+      dec->method_ > ALPHA_LOSSLESS_COMPRESSION ||
+      dec->filter_ >= WEBP_FILTER_LAST ||
+      dec->pre_processing_ > ALPHA_PREPROCESSED_LEVELS ||
       rsrv != 0) {
     return 0;
   }
 
-  if (method == ALPHA_NO_COMPRESSION) {
-    ok = (data_size >= decoded_size);
-    decoded_data = (uint8_t*)data + ALPHA_HEADER_LEN;
+  if (dec->method_ == ALPHA_NO_COMPRESSION) {
+    const size_t alpha_decoded_size = dec->width_ * dec->height_;
+    ok = (alpha_data_size >= alpha_decoded_size);
   } else {
-    decoded_data = (uint8_t*)malloc(decoded_size);
-    if (decoded_data == NULL) return 0;
-    ok = VP8LDecodeAlphaImageStream(width, height,
-                                    data + ALPHA_HEADER_LEN,
-                                    data_size - ALPHA_HEADER_LEN,
-                                    decoded_data);
+    assert(dec->method_ == ALPHA_LOSSLESS_COMPRESSION);
+    ok = VP8LDecodeAlphaHeader(dec, alpha_data, alpha_data_size, output);
   }
+  VP8FiltersInit();
+  return ok;
+}
 
-  if (ok) {
-    WebPFilterFunc unfilter_func = WebPUnfilters[filter];
-    if (unfilter_func != NULL) {
-      unfiltered_data = (uint8_t*)malloc(decoded_size);
-      if (unfiltered_data == NULL) {
-        ok = 0;
-        goto Error;
-      }
-      // TODO(vikas): Implement on-the-fly decoding & filter mechanism to decode
-      // and apply filter per image-row.
-      unfilter_func(decoded_data, width, height, 1, width, unfiltered_data);
-      // Construct raw_data (height x stride) from alpha data (height x width).
-      CopyPlane(unfiltered_data, width, output, stride, width, height);
-      free(unfiltered_data);
-    } else {
-      // Construct raw_data (height x stride) from alpha data (height x width).
-      CopyPlane(decoded_data, width, output, stride, width, height);
-    }
-    if (pre_processing == ALPHA_PREPROCESSED_LEVELS) {
-      ok = DequantizeLevels(decoded_data, width, height);
+// Decodes, unfilters and dequantizes *at least* 'num_rows' rows of alpha
+// starting from row number 'row'. It assumes that rows up to (row - 1) have
+// already been decoded.
+// Returns false in case of bitstream error.
+static int ALPHDecode(VP8Decoder* const dec, int row, int num_rows) {
+  ALPHDecoder* const alph_dec = dec->alph_dec_;
+  const int width = alph_dec->width_;
+  const int height = alph_dec->height_;
+  WebPUnfilterFunc unfilter_func = WebPUnfilters[alph_dec->filter_];
+  uint8_t* const output = dec->alpha_plane_;
+  if (alph_dec->method_ == ALPHA_NO_COMPRESSION) {
+    const size_t offset = row * width;
+    const size_t num_pixels = num_rows * width;
+    assert(dec->alpha_data_size_ >= ALPHA_HEADER_LEN + offset + num_pixels);
+    memcpy(dec->alpha_plane_ + offset,
+           dec->alpha_data_ + ALPHA_HEADER_LEN + offset, num_pixels);
+  } else {  // alph_dec->method_ == ALPHA_LOSSLESS_COMPRESSION
+    assert(alph_dec->vp8l_dec_ != NULL);
+    if (!VP8LDecodeAlphaImageStream(alph_dec, row + num_rows)) {
+      return 0;
     }
   }
 
- Error:
-  if (method != ALPHA_NO_COMPRESSION) {
-    free(decoded_data);
+  if (unfilter_func != NULL) {
+    unfilter_func(width, height, width, row, num_rows, output);
   }
-  return ok;
+
+  if (row + num_rows == dec->pic_hdr_.height_) {
+    dec->is_alpha_decoded_ = 1;
+  }
+  return 1;
 }
 
 //------------------------------------------------------------------------------
+// Main entry point.
 
 const uint8_t* VP8DecompressAlphaRows(VP8Decoder* const dec,
                                       int row, int num_rows) {
-  const int stride = dec->pic_hdr_.width_;
+  const int width = dec->pic_hdr_.width_;
+  const int height = dec->pic_hdr_.height_;
 
-  if (row < 0 || num_rows < 0 || row + num_rows > dec->pic_hdr_.height_) {
+  if (row < 0 || num_rows <= 0 || row + num_rows > height) {
     return NULL;    // sanity check.
   }
 
   if (row == 0) {
-    // Decode everything during the first call.
-    if (!DecodeAlpha(dec->alpha_data_, (size_t)dec->alpha_data_size_,
-                     dec->pic_hdr_.width_, dec->pic_hdr_.height_, stride,
-                     dec->alpha_plane_)) {
-      return NULL;  // Error.
+    // Initialize decoding.
+    assert(dec->alpha_plane_ != NULL);
+    dec->alph_dec_ = ALPHNew();
+    if (dec->alph_dec_ == NULL) return NULL;
+    if (!ALPHInit(dec->alph_dec_, dec->alpha_data_, dec->alpha_data_size_,
+                  width, height, dec->alpha_plane_)) {
+      ALPHDelete(dec->alph_dec_);
+      dec->alph_dec_ = NULL;
+      return NULL;
+    }
+    // if we allowed use of alpha dithering, check whether it's needed at all
+    if (dec->alph_dec_->pre_processing_ != ALPHA_PREPROCESSED_LEVELS) {
+      dec->alpha_dithering_ = 0;  // disable dithering
+    } else {
+      num_rows = height;          // decode everything in one pass
     }
   }
 
+  if (!dec->is_alpha_decoded_) {
+    int ok = 0;
+    assert(dec->alph_dec_ != NULL);
+    ok = ALPHDecode(dec, row, num_rows);
+    if (ok && dec->alpha_dithering_ > 0) {
+      ok = WebPDequantizeLevels(dec->alpha_plane_, width, height,
+                                dec->alpha_dithering_);
+    }
+    if (!ok || dec->is_alpha_decoded_) {
+      ALPHDelete(dec->alph_dec_);
+      dec->alph_dec_ = NULL;
+    }
+    if (!ok) return NULL;  // Error.
+  }
+
   // Return a pointer to the current decoded row.
-  return dec->alpha_plane_ + row * stride;
+  return dec->alpha_plane_ + row * width;
 }
-
-#if defined(__cplusplus) || defined(c_plusplus)
-}    // extern "C"
-#endif
diff --git a/drivers/webp/dec/alphai.h b/drivers/webp/dec/alphai.h
new file mode 100644
index 0000000000..5fa230ca82
--- /dev/null
+++ b/drivers/webp/dec/alphai.h
@@ -0,0 +1,55 @@
+// Copyright 2013 Google Inc. All Rights Reserved.
+//
+// Use of this source code is governed by a BSD-style license
+// that can be found in the COPYING file in the root of the source
+// tree. An additional intellectual property rights grant can be found
+// in the file PATENTS. All contributing project authors may
+// be found in the AUTHORS file in the root of the source tree.
+// -----------------------------------------------------------------------------
+//
+// Alpha decoder: internal header.
+//
+// Author: Urvang (urvang@google.com)
+
+#ifndef WEBP_DEC_ALPHAI_H_
+#define WEBP_DEC_ALPHAI_H_
+
+#include "./webpi.h"
+#include "../utils/filters.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+struct VP8LDecoder;  // Defined in dec/vp8li.h.
+
+typedef struct ALPHDecoder ALPHDecoder;
+struct ALPHDecoder {
+  int width_;
+  int height_;
+  int method_;
+  WEBP_FILTER_TYPE filter_;
+  int pre_processing_;
+  struct VP8LDecoder* vp8l_dec_;
+  VP8Io io_;
+  int use_8b_decode;  // Although alpha channel requires only 1 byte per
+                      // pixel, sometimes VP8LDecoder may need to allocate
+                      // 4 bytes per pixel internally during decode.
+};
+
+//------------------------------------------------------------------------------
+// internal functions. Not public.
+
+// Allocates a new alpha decoder instance.
+ALPHDecoder* ALPHNew(void);
+
+// Clears and deallocates an alpha decoder instance.
+void ALPHDelete(ALPHDecoder* const dec);
+
+//------------------------------------------------------------------------------
+
+#ifdef __cplusplus
+}    // extern "C"
+#endif
+
+#endif  /* WEBP_DEC_ALPHAI_H_ */
diff --git a/drivers/webp/dec/buffer.c b/drivers/webp/dec/buffer.c
index c159f6f248..9ed2b3fe1a 100644
--- a/drivers/webp/dec/buffer.c
+++ b/drivers/webp/dec/buffer.c
@@ -1,8 +1,10 @@
 // Copyright 2011 Google Inc. All Rights Reserved.
 //
-// This code is licensed under the same terms as WebM:
-//  Software License Agreement:  http://www.webmproject.org/license/software/
-//  Additional IP Rights Grant:  http://www.webmproject.org/license/additional/
+// Use of this source code is governed by a BSD-style license
+// that can be found in the COPYING file in the root of the source
+// tree. An additional intellectual property rights grant can be found
+// in the file PATENTS. All contributing project authors may
+// be found in the AUTHORS file in the root of the source tree.
 // -----------------------------------------------------------------------------
 //
 // Everything about WebPDecBuffer
@@ -15,10 +17,6 @@
 #include "./webpi.h"
 #include "../utils/utils.h"
 
-#if defined(__cplusplus) || defined(c_plusplus)
-extern "C" {
-#endif
-
 //------------------------------------------------------------------------------
 // WebPDecBuffer
 
@@ -35,6 +33,11 @@ static int IsValidColorspace(int webp_csp_mode) {
   return (webp_csp_mode >= MODE_RGB && webp_csp_mode < MODE_LAST);
 }
 
+// strictly speaking, the very last (or first, if flipped) row
+// doesn't require padding.
+#define MIN_BUFFER_SIZE(WIDTH, HEIGHT, STRIDE)       \
+    (uint64_t)(STRIDE) * ((HEIGHT) - 1) + (WIDTH)
+
 static VP8StatusCode CheckDecBuffer(const WebPDecBuffer* const buffer) {
   int ok = 1;
   const WEBP_CSP_MODE mode = buffer->colorspace;
@@ -44,33 +47,41 @@ static VP8StatusCode CheckDecBuffer(const WebPDecBuffer* const buffer) {
     ok = 0;
   } else if (!WebPIsRGBMode(mode)) {   // YUV checks
     const WebPYUVABuffer* const buf = &buffer->u.YUVA;
-    const uint64_t y_size = (uint64_t)buf->y_stride * height;
-    const uint64_t u_size = (uint64_t)buf->u_stride * ((height + 1) / 2);
-    const uint64_t v_size = (uint64_t)buf->v_stride * ((height + 1) / 2);
-    const uint64_t a_size = (uint64_t)buf->a_stride * height;
+    const int uv_width  = (width  + 1) / 2;
+    const int uv_height = (height + 1) / 2;
+    const int y_stride = abs(buf->y_stride);
+    const int u_stride = abs(buf->u_stride);
+    const int v_stride = abs(buf->v_stride);
+    const int a_stride = abs(buf->a_stride);
+    const uint64_t y_size = MIN_BUFFER_SIZE(width, height, y_stride);
+    const uint64_t u_size = MIN_BUFFER_SIZE(uv_width, uv_height, u_stride);
+    const uint64_t v_size = MIN_BUFFER_SIZE(uv_width, uv_height, v_stride);
+    const uint64_t a_size = MIN_BUFFER_SIZE(width, height, a_stride);
     ok &= (y_size <= buf->y_size);
     ok &= (u_size <= buf->u_size);
     ok &= (v_size <= buf->v_size);
-    ok &= (buf->y_stride >= width);
-    ok &= (buf->u_stride >= (width + 1) / 2);
-    ok &= (buf->v_stride >= (width + 1) / 2);
+    ok &= (y_stride >= width);
+    ok &= (u_stride >= uv_width);
+    ok &= (v_stride >= uv_width);
     ok &= (buf->y != NULL);
     ok &= (buf->u != NULL);
     ok &= (buf->v != NULL);
     if (mode == MODE_YUVA) {
-      ok &= (buf->a_stride >= width);
+      ok &= (a_stride >= width);
       ok &= (a_size <= buf->a_size);
       ok &= (buf->a != NULL);
     }
   } else {    // RGB checks
     const WebPRGBABuffer* const buf = &buffer->u.RGBA;
-    const uint64_t size = (uint64_t)buf->stride * height;
+    const int stride = abs(buf->stride);
+    const uint64_t size = MIN_BUFFER_SIZE(width, height, stride);
     ok &= (size <= buf->size);
-    ok &= (buf->stride >= width * kModeBpp[mode]);
+    ok &= (stride >= width * kModeBpp[mode]);
     ok &= (buf->rgba != NULL);
   }
   return ok ? VP8_STATUS_OK : VP8_STATUS_INVALID_PARAM;
 }
+#undef MIN_BUFFER_SIZE
 
 static VP8StatusCode AllocateBuffer(WebPDecBuffer* const buffer) {
   const int w = buffer->width;
@@ -133,9 +144,35 @@ static VP8StatusCode AllocateBuffer(WebPDecBuffer* const buffer) {
   return CheckDecBuffer(buffer);
 }
 
+VP8StatusCode WebPFlipBuffer(WebPDecBuffer* const buffer) {
+  if (buffer == NULL) {
+    return VP8_STATUS_INVALID_PARAM;
+  }
+  if (WebPIsRGBMode(buffer->colorspace)) {
+    WebPRGBABuffer* const buf = &buffer->u.RGBA;
+    buf->rgba += (buffer->height - 1) * buf->stride;
+    buf->stride = -buf->stride;
+  } else {
+    WebPYUVABuffer* const buf = &buffer->u.YUVA;
+    const int H = buffer->height;
+    buf->y += (H - 1) * buf->y_stride;
+    buf->y_stride = -buf->y_stride;
+    buf->u += ((H - 1) >> 1) * buf->u_stride;
+    buf->u_stride = -buf->u_stride;
+    buf->v += ((H - 1) >> 1) * buf->v_stride;
+    buf->v_stride = -buf->v_stride;
+    if (buf->a != NULL) {
+      buf->a += (H - 1) * buf->a_stride;
+      buf->a_stride = -buf->a_stride;
+    }
+  }
+  return VP8_STATUS_OK;
+}
+
 VP8StatusCode WebPAllocateDecBuffer(int w, int h,
                                     const WebPDecoderOptions* const options,
                                     WebPDecBuffer* const out) {
+  VP8StatusCode status;
   if (out == NULL || w <= 0 || h <= 0) {
     return VP8_STATUS_INVALID_PARAM;
   }
@@ -152,18 +189,28 @@ VP8StatusCode WebPAllocateDecBuffer(int w, int h,
       h = ch;
     }
     if (options->use_scaling) {
-      if (options->scaled_width <= 0 || options->scaled_height <= 0) {
+      int scaled_width = options->scaled_width;
+      int scaled_height = options->scaled_height;
+      if (!WebPRescalerGetScaledDimensions(
+              w, h, &scaled_width, &scaled_height)) {
         return VP8_STATUS_INVALID_PARAM;
       }
-      w = options->scaled_width;
-      h = options->scaled_height;
+      w = scaled_width;
+      h = scaled_height;
     }
   }
   out->width = w;
   out->height = h;
 
-  // Then, allocate buffer for real
-  return AllocateBuffer(out);
+  // Then, allocate buffer for real.
+  status = AllocateBuffer(out);
+  if (status != VP8_STATUS_OK) return status;
+
+  // Use the stride trick if vertical flip is needed.
+  if (options != NULL && options->flip) {
+    status = WebPFlipBuffer(out);
+  }
+  return status;
 }
 
 //------------------------------------------------------------------------------
@@ -180,8 +227,9 @@ int WebPInitDecBufferInternal(WebPDecBuffer* buffer, int version) {
 
 void WebPFreeDecBuffer(WebPDecBuffer* buffer) {
   if (buffer != NULL) {
-    if (!buffer->is_external_memory)
-      free(buffer->private_memory);
+    if (!buffer->is_external_memory) {
+      WebPSafeFree(buffer->private_memory);
+    }
     buffer->private_memory = NULL;
   }
 }
@@ -210,6 +258,3 @@ void WebPGrabDecBuffer(WebPDecBuffer* const src, WebPDecBuffer* const dst) {
 
 //------------------------------------------------------------------------------
 
-#if defined(__cplusplus) || defined(c_plusplus)
-}    // extern "C"
-#endif
diff --git a/drivers/webp/dec/common.h b/drivers/webp/dec/common.h
new file mode 100644
index 0000000000..6961e22470
--- /dev/null
+++ b/drivers/webp/dec/common.h
@@ -0,0 +1,54 @@
+// Copyright 2015 Google Inc. All Rights Reserved.
+//
+// Use of this source code is governed by a BSD-style license
+// that can be found in the COPYING file in the root of the source
+// tree. An additional intellectual property rights grant can be found
+// in the file PATENTS. All contributing project authors may
+// be found in the AUTHORS file in the root of the source tree.
+// -----------------------------------------------------------------------------
+//
+// Definitions and macros common to encoding and decoding
+//
+// Author: Skal (pascal.massimino@gmail.com)
+
+#ifndef WEBP_DEC_COMMON_H_
+#define WEBP_DEC_COMMON_H_
+
+// intra prediction modes
+enum { B_DC_PRED = 0,   // 4x4 modes
+       B_TM_PRED = 1,
+       B_VE_PRED = 2,
+       B_HE_PRED = 3,
+       B_RD_PRED = 4,
+       B_VR_PRED = 5,
+       B_LD_PRED = 6,
+       B_VL_PRED = 7,
+       B_HD_PRED = 8,
+       B_HU_PRED = 9,
+       NUM_BMODES = B_HU_PRED + 1 - B_DC_PRED,  // = 10
+
+       // Luma16 or UV modes
+       DC_PRED = B_DC_PRED, V_PRED = B_VE_PRED,
+       H_PRED = B_HE_PRED, TM_PRED = B_TM_PRED,
+       B_PRED = NUM_BMODES,   // refined I4x4 mode
+       NUM_PRED_MODES = 4,
+
+       // special modes
+       B_DC_PRED_NOTOP = 4,
+       B_DC_PRED_NOLEFT = 5,
+       B_DC_PRED_NOTOPLEFT = 6,
+       NUM_B_DC_MODES = 7 };
+
+enum { MB_FEATURE_TREE_PROBS = 3,
+       NUM_MB_SEGMENTS = 4,
+       NUM_REF_LF_DELTAS = 4,
+       NUM_MODE_LF_DELTAS = 4,    // I4x4, ZERO, *, SPLIT
+       MAX_NUM_PARTITIONS = 8,
+       // Probabilities
+       NUM_TYPES = 4,   // 0: i16-AC,  1: i16-DC,  2:chroma-AC,  3:i4-AC
+       NUM_BANDS = 8,
+       NUM_CTX = 3,
+       NUM_PROBAS = 11
+     };
+
+#endif    // WEBP_DEC_COMMON_H_
diff --git a/drivers/webp/dec/decode_vp8.h b/drivers/webp/dec/decode_vp8.h
index c26a9fc891..2bf1bdbbf5 100644
--- a/drivers/webp/dec/decode_vp8.h
+++ b/drivers/webp/dec/decode_vp8.h
@@ -1,8 +1,10 @@
 // Copyright 2010 Google Inc. All Rights Reserved.
 //
-// This code is licensed under the same terms as WebM:
-//  Software License Agreement:  http://www.webmproject.org/license/software/
-//  Additional IP Rights Grant:  http://www.webmproject.org/license/additional/
+// Use of this source code is governed by a BSD-style license
+// that can be found in the COPYING file in the root of the source
+// tree. An additional intellectual property rights grant can be found
+// in the file PATENTS. All contributing project authors may
+// be found in the AUTHORS file in the root of the source tree.
 // -----------------------------------------------------------------------------
 //
 //  Low-level API for VP8 decoder
@@ -12,9 +14,9 @@
 #ifndef WEBP_WEBP_DECODE_VP8_H_
 #define WEBP_WEBP_DECODE_VP8_H_
 
-#include "../decode.h"
+#include "webp/decode.h"
 
-#if defined(__cplusplus) || defined(c_plusplus)
+#ifdef __cplusplus
 extern "C" {
 #endif
 
@@ -130,7 +132,8 @@ static WEBP_INLINE int VP8InitIo(VP8Io* const io) {
   return VP8InitIoInternal(io, WEBP_DECODER_ABI_VERSION);
 }
 
-// Start decoding a new picture. Returns true if ok.
+// Decode the VP8 frame header. Returns true if ok.
+// Note: 'io->data' must be pointing to the start of the VP8 frame header.
 int VP8GetHeaders(VP8Decoder* const dec, VP8Io* const io);
 
 // Decode a picture. Will call VP8GetHeaders() if it wasn't done already.
@@ -175,7 +178,7 @@ WEBP_EXTERN(int) VP8LGetInfo(
     const uint8_t* data, size_t data_size,  // data available so far
     int* const width, int* const height, int* const has_alpha);
 
-#if defined(__cplusplus) || defined(c_plusplus)
+#ifdef __cplusplus
 }    // extern "C"
 #endif
 
diff --git a/drivers/webp/dec/frame.c b/drivers/webp/dec/frame.c
index 9c91a48e17..b882133eab 100644
--- a/drivers/webp/dec/frame.c
+++ b/drivers/webp/dec/frame.c
@@ -1,8 +1,10 @@
 // Copyright 2010 Google Inc. All Rights Reserved.
 //
-// This code is licensed under the same terms as WebM:
-//  Software License Agreement:  http://www.webmproject.org/license/software/
-//  Additional IP Rights Grant:  http://www.webmproject.org/license/additional/
+// Use of this source code is governed by a BSD-style license
+// that can be found in the COPYING file in the root of the source
+// tree. An additional intellectual property rights grant can be found
+// in the file PATENTS. All contributing project authors may
+// be found in the AUTHORS file in the root of the source tree.
 // -----------------------------------------------------------------------------
 //
 // Frame-reconstruction function. Memory allocation.
@@ -13,11 +15,180 @@
 #include "./vp8i.h"
 #include "../utils/utils.h"
 
-#if defined(__cplusplus) || defined(c_plusplus)
-extern "C" {
-#endif
+//------------------------------------------------------------------------------
+// Main reconstruction function.
+
+static const int kScan[16] = {
+  0 +  0 * BPS,  4 +  0 * BPS, 8 +  0 * BPS, 12 +  0 * BPS,
+  0 +  4 * BPS,  4 +  4 * BPS, 8 +  4 * BPS, 12 +  4 * BPS,
+  0 +  8 * BPS,  4 +  8 * BPS, 8 +  8 * BPS, 12 +  8 * BPS,
+  0 + 12 * BPS,  4 + 12 * BPS, 8 + 12 * BPS, 12 + 12 * BPS
+};
+
+static int CheckMode(int mb_x, int mb_y, int mode) {
+  if (mode == B_DC_PRED) {
+    if (mb_x == 0) {
+      return (mb_y == 0) ? B_DC_PRED_NOTOPLEFT : B_DC_PRED_NOLEFT;
+    } else {
+      return (mb_y == 0) ? B_DC_PRED_NOTOP : B_DC_PRED;
+    }
+  }
+  return mode;
+}
 
-#define ALIGN_MASK (32 - 1)
+static void Copy32b(uint8_t* const dst, const uint8_t* const src) {
+  memcpy(dst, src, 4);
+}
+
+static WEBP_INLINE void DoTransform(uint32_t bits, const int16_t* const src,
+                                    uint8_t* const dst) {
+  switch (bits >> 30) {
+    case 3:
+      VP8Transform(src, dst, 0);
+      break;
+    case 2:
+      VP8TransformAC3(src, dst);
+      break;
+    case 1:
+      VP8TransformDC(src, dst);
+      break;
+    default:
+      break;
+  }
+}
+
+static void DoUVTransform(uint32_t bits, const int16_t* const src,
+                          uint8_t* const dst) {
+  if (bits & 0xff) {    // any non-zero coeff at all?
+    if (bits & 0xaa) {  // any non-zero AC coefficient?
+      VP8TransformUV(src, dst);   // note we don't use the AC3 variant for U/V
+    } else {
+      VP8TransformDCUV(src, dst);
+    }
+  }
+}
+
+static void ReconstructRow(const VP8Decoder* const dec,
+                           const VP8ThreadContext* ctx) {
+  int j;
+  int mb_x;
+  const int mb_y = ctx->mb_y_;
+  const int cache_id = ctx->id_;
+  uint8_t* const y_dst = dec->yuv_b_ + Y_OFF;
+  uint8_t* const u_dst = dec->yuv_b_ + U_OFF;
+  uint8_t* const v_dst = dec->yuv_b_ + V_OFF;
+
+  // Initialize left-most block.
+  for (j = 0; j < 16; ++j) {
+    y_dst[j * BPS - 1] = 129;
+  }
+  for (j = 0; j < 8; ++j) {
+    u_dst[j * BPS - 1] = 129;
+    v_dst[j * BPS - 1] = 129;
+  }
+
+  // Init top-left sample on left column too.
+  if (mb_y > 0) {
+    y_dst[-1 - BPS] = u_dst[-1 - BPS] = v_dst[-1 - BPS] = 129;
+  } else {
+    // we only need to do this init once at block (0,0).
+    // Afterward, it remains valid for the whole topmost row.
+    memset(y_dst - BPS - 1, 127, 16 + 4 + 1);
+    memset(u_dst - BPS - 1, 127, 8 + 1);
+    memset(v_dst - BPS - 1, 127, 8 + 1);
+  }
+
+  // Reconstruct one row.
+  for (mb_x = 0; mb_x < dec->mb_w_; ++mb_x) {
+    const VP8MBData* const block = ctx->mb_data_ + mb_x;
+
+    // Rotate in the left samples from previously decoded block. We move four
+    // pixels at a time for alignment reason, and because of in-loop filter.
+    if (mb_x > 0) {
+      for (j = -1; j < 16; ++j) {
+        Copy32b(&y_dst[j * BPS - 4], &y_dst[j * BPS + 12]);
+      }
+      for (j = -1; j < 8; ++j) {
+        Copy32b(&u_dst[j * BPS - 4], &u_dst[j * BPS + 4]);
+        Copy32b(&v_dst[j * BPS - 4], &v_dst[j * BPS + 4]);
+      }
+    }
+    {
+      // bring top samples into the cache
+      VP8TopSamples* const top_yuv = dec->yuv_t_ + mb_x;
+      const int16_t* const coeffs = block->coeffs_;
+      uint32_t bits = block->non_zero_y_;
+      int n;
+
+      if (mb_y > 0) {
+        memcpy(y_dst - BPS, top_yuv[0].y, 16);
+        memcpy(u_dst - BPS, top_yuv[0].u, 8);
+        memcpy(v_dst - BPS, top_yuv[0].v, 8);
+      }
+
+      // predict and add residuals
+      if (block->is_i4x4_) {   // 4x4
+        uint32_t* const top_right = (uint32_t*)(y_dst - BPS + 16);
+
+        if (mb_y > 0) {
+          if (mb_x >= dec->mb_w_ - 1) {    // on rightmost border
+            memset(top_right, top_yuv[0].y[15], sizeof(*top_right));
+          } else {
+            memcpy(top_right, top_yuv[1].y, sizeof(*top_right));
+          }
+        }
+        // replicate the top-right pixels below
+        top_right[BPS] = top_right[2 * BPS] = top_right[3 * BPS] = top_right[0];
+
+        // predict and add residuals for all 4x4 blocks in turn.
+        for (n = 0; n < 16; ++n, bits <<= 2) {
+          uint8_t* const dst = y_dst + kScan[n];
+          VP8PredLuma4[block->imodes_[n]](dst);
+          DoTransform(bits, coeffs + n * 16, dst);
+        }
+      } else {    // 16x16
+        const int pred_func = CheckMode(mb_x, mb_y, block->imodes_[0]);
+        VP8PredLuma16[pred_func](y_dst);
+        if (bits != 0) {
+          for (n = 0; n < 16; ++n, bits <<= 2) {
+            DoTransform(bits, coeffs + n * 16, y_dst + kScan[n]);
+          }
+        }
+      }
+      {
+        // Chroma
+        const uint32_t bits_uv = block->non_zero_uv_;
+        const int pred_func = CheckMode(mb_x, mb_y, block->uvmode_);
+        VP8PredChroma8[pred_func](u_dst);
+        VP8PredChroma8[pred_func](v_dst);
+        DoUVTransform(bits_uv >> 0, coeffs + 16 * 16, u_dst);
+        DoUVTransform(bits_uv >> 8, coeffs + 20 * 16, v_dst);
+      }
+
+      // stash away top samples for next block
+      if (mb_y < dec->mb_h_ - 1) {
+        memcpy(top_yuv[0].y, y_dst + 15 * BPS, 16);
+        memcpy(top_yuv[0].u, u_dst +  7 * BPS,  8);
+        memcpy(top_yuv[0].v, v_dst +  7 * BPS,  8);
+      }
+    }
+    // Transfer reconstructed samples from yuv_b_ cache to final destination.
+    {
+      const int y_offset = cache_id * 16 * dec->cache_y_stride_;
+      const int uv_offset = cache_id * 8 * dec->cache_uv_stride_;
+      uint8_t* const y_out = dec->cache_y_ + mb_x * 16 + y_offset;
+      uint8_t* const u_out = dec->cache_u_ + mb_x * 8 + uv_offset;
+      uint8_t* const v_out = dec->cache_v_ + mb_x * 8 + uv_offset;
+      for (j = 0; j < 16; ++j) {
+        memcpy(y_out + j * dec->cache_y_stride_, y_dst + j * BPS, 16);
+      }
+      for (j = 0; j < 8; ++j) {
+        memcpy(u_out + j * dec->cache_uv_stride_, u_dst + j * BPS, 8);
+        memcpy(v_out + j * dec->cache_uv_stride_, v_dst + j * BPS, 8);
+      }
+    }
+  }
+}
 
 //------------------------------------------------------------------------------
 // Filtering
@@ -29,25 +200,18 @@ extern "C" {
 //                 U/V, so it's 8 samples total (because of the 2x upsampling).
 static const uint8_t kFilterExtraRows[3] = { 0, 2, 8 };
 
-static WEBP_INLINE int hev_thresh_from_level(int level, int keyframe) {
-  if (keyframe) {
-    return (level >= 40) ? 2 : (level >= 15) ? 1 : 0;
-  } else {
-    return (level >= 40) ? 3 : (level >= 20) ? 2 : (level >= 15) ? 1 : 0;
-  }
-}
-
 static void DoFilter(const VP8Decoder* const dec, int mb_x, int mb_y) {
   const VP8ThreadContext* const ctx = &dec->thread_ctx_;
+  const int cache_id = ctx->id_;
   const int y_bps = dec->cache_y_stride_;
-  VP8FInfo* const f_info = ctx->f_info_ + mb_x;
-  uint8_t* const y_dst = dec->cache_y_ + ctx->id_ * 16 * y_bps + mb_x * 16;
-  const int level = f_info->f_level_;
+  const VP8FInfo* const f_info = ctx->f_info_ + mb_x;
+  uint8_t* const y_dst = dec->cache_y_ + cache_id * 16 * y_bps + mb_x * 16;
   const int ilevel = f_info->f_ilevel_;
-  const int limit = 2 * level + ilevel;
-  if (level == 0) {
+  const int limit = f_info->f_limit_;
+  if (limit == 0) {
     return;
   }
+  assert(limit >= 3);
   if (dec->filter_type_ == 1) {   // simple
     if (mb_x > 0) {
       VP8SimpleHFilter16(y_dst, y_bps, limit + 4);
@@ -63,10 +227,9 @@ static void DoFilter(const VP8Decoder* const dec, int mb_x, int mb_y) {
     }
   } else {    // complex
     const int uv_bps = dec->cache_uv_stride_;
-    uint8_t* const u_dst = dec->cache_u_ + ctx->id_ * 8 * uv_bps + mb_x * 8;
-    uint8_t* const v_dst = dec->cache_v_ + ctx->id_ * 8 * uv_bps + mb_x * 8;
-    const int hev_thresh =
-        hev_thresh_from_level(level, dec->frm_hdr_.key_frame_);
+    uint8_t* const u_dst = dec->cache_u_ + cache_id * 8 * uv_bps + mb_x * 8;
+    uint8_t* const v_dst = dec->cache_v_ + cache_id * 8 * uv_bps + mb_x * 8;
+    const int hev_thresh = f_info->hev_thresh_;
     if (mb_x > 0) {
       VP8HFilter16(y_dst, y_bps, limit + 4, ilevel, hev_thresh);
       VP8HFilter8(u_dst, v_dst, uv_bps, limit + 4, ilevel, hev_thresh);
@@ -97,53 +260,138 @@ static void FilterRow(const VP8Decoder* const dec) {
 }
 
 //------------------------------------------------------------------------------
+// Precompute the filtering strength for each segment and each i4x4/i16x16 mode.
 
-void VP8StoreBlock(VP8Decoder* const dec) {
+static void PrecomputeFilterStrengths(VP8Decoder* const dec) {
   if (dec->filter_type_ > 0) {
-    VP8FInfo* const info = dec->f_info_ + dec->mb_x_;
-    const int skip = dec->mb_info_[dec->mb_x_].skip_;
-    int level = dec->filter_levels_[dec->segment_];
-    if (dec->filter_hdr_.use_lf_delta_) {
-      // TODO(skal): only CURRENT is handled for now.
-      level += dec->filter_hdr_.ref_lf_delta_[0];
-      if (dec->is_i4x4_) {
-        level += dec->filter_hdr_.mode_lf_delta_[0];
+    int s;
+    const VP8FilterHeader* const hdr = &dec->filter_hdr_;
+    for (s = 0; s < NUM_MB_SEGMENTS; ++s) {
+      int i4x4;
+      // First, compute the initial level
+      int base_level;
+      if (dec->segment_hdr_.use_segment_) {
+        base_level = dec->segment_hdr_.filter_strength_[s];
+        if (!dec->segment_hdr_.absolute_delta_) {
+          base_level += hdr->level_;
+        }
+      } else {
+        base_level = hdr->level_;
+      }
+      for (i4x4 = 0; i4x4 <= 1; ++i4x4) {
+        VP8FInfo* const info = &dec->fstrengths_[s][i4x4];
+        int level = base_level;
+        if (hdr->use_lf_delta_) {
+          level += hdr->ref_lf_delta_[0];
+          if (i4x4) {
+            level += hdr->mode_lf_delta_[0];
+          }
+        }
+        level = (level < 0) ? 0 : (level > 63) ? 63 : level;
+        if (level > 0) {
+          int ilevel = level;
+          if (hdr->sharpness_ > 0) {
+            if (hdr->sharpness_ > 4) {
+              ilevel >>= 2;
+            } else {
+              ilevel >>= 1;
+            }
+            if (ilevel > 9 - hdr->sharpness_) {
+              ilevel = 9 - hdr->sharpness_;
+            }
+          }
+          if (ilevel < 1) ilevel = 1;
+          info->f_ilevel_ = ilevel;
+          info->f_limit_ = 2 * level + ilevel;
+          info->hev_thresh_ = (level >= 40) ? 2 : (level >= 15) ? 1 : 0;
+        } else {
+          info->f_limit_ = 0;  // no filtering
+        }
+        info->f_inner_ = i4x4;
       }
     }
-    level = (level < 0) ? 0 : (level > 63) ? 63 : level;
-    info->f_level_ = level;
+  }
+}
 
-    if (dec->filter_hdr_.sharpness_ > 0) {
-      if (dec->filter_hdr_.sharpness_ > 4) {
-        level >>= 2;
-      } else {
-        level >>= 1;
+//------------------------------------------------------------------------------
+// Dithering
+
+#define DITHER_AMP_TAB_SIZE 12
+static const int kQuantToDitherAmp[DITHER_AMP_TAB_SIZE] = {
+  // roughly, it's dqm->uv_mat_[1]
+  8, 7, 6, 4, 4, 2, 2, 2, 1, 1, 1, 1
+};
+
+void VP8InitDithering(const WebPDecoderOptions* const options,
+                      VP8Decoder* const dec) {
+  assert(dec != NULL);
+  if (options != NULL) {
+    const int d = options->dithering_strength;
+    const int max_amp = (1 << VP8_RANDOM_DITHER_FIX) - 1;
+    const int f = (d < 0) ? 0 : (d > 100) ? max_amp : (d * max_amp / 100);
+    if (f > 0) {
+      int s;
+      int all_amp = 0;
+      for (s = 0; s < NUM_MB_SEGMENTS; ++s) {
+        VP8QuantMatrix* const dqm = &dec->dqm_[s];
+        if (dqm->uv_quant_ < DITHER_AMP_TAB_SIZE) {
+          // TODO(skal): should we specially dither more for uv_quant_ < 0?
+          const int idx = (dqm->uv_quant_ < 0) ? 0 : dqm->uv_quant_;
+          dqm->dither_ = (f * kQuantToDitherAmp[idx]) >> 3;
+        }
+        all_amp |= dqm->dither_;
       }
-      if (level > 9 - dec->filter_hdr_.sharpness_) {
-        level = 9 - dec->filter_hdr_.sharpness_;
+      if (all_amp != 0) {
+        VP8InitRandom(&dec->dithering_rg_, 1.0f);
+        dec->dither_ = 1;
       }
     }
+    // potentially allow alpha dithering
+    dec->alpha_dithering_ = options->alpha_dithering_strength;
+    if (dec->alpha_dithering_ > 100) {
+      dec->alpha_dithering_ = 100;
+    } else if (dec->alpha_dithering_ < 0) {
+      dec->alpha_dithering_ = 0;
+    }
+  }
+}
 
-    info->f_ilevel_ = (level < 1) ? 1 : level;
-    info->f_inner_ = (!skip || dec->is_i4x4_);
+// minimal amp that will provide a non-zero dithering effect
+#define MIN_DITHER_AMP 4
+#define DITHER_DESCALE 4
+#define DITHER_DESCALE_ROUNDER (1 << (DITHER_DESCALE - 1))
+#define DITHER_AMP_BITS 8
+#define DITHER_AMP_CENTER (1 << DITHER_AMP_BITS)
+
+static void Dither8x8(VP8Random* const rg, uint8_t* dst, int bps, int amp) {
+  int i, j;
+  for (j = 0; j < 8; ++j) {
+    for (i = 0; i < 8; ++i) {
+      // TODO: could be made faster with SSE2
+      const int bits =
+          VP8RandomBits2(rg, DITHER_AMP_BITS + 1, amp) - DITHER_AMP_CENTER;
+      // Convert to range: [-2,2] for dither=50, [-4,4] for dither=100
+      const int delta = (bits + DITHER_DESCALE_ROUNDER) >> DITHER_DESCALE;
+      const int v = (int)dst[i] + delta;
+      dst[i] = (v < 0) ? 0 : (v > 255) ? 255u : (uint8_t)v;
+    }
+    dst += bps;
   }
-  {
-    // Transfer samples to row cache
-    int y;
-    const int y_offset = dec->cache_id_ * 16 * dec->cache_y_stride_;
-    const int uv_offset = dec->cache_id_ * 8 * dec->cache_uv_stride_;
-    uint8_t* const ydst = dec->cache_y_ + dec->mb_x_ * 16 + y_offset;
-    uint8_t* const udst = dec->cache_u_ + dec->mb_x_ * 8 + uv_offset;
-    uint8_t* const vdst = dec->cache_v_ + dec->mb_x_ * 8 + uv_offset;
-    for (y = 0; y < 16; ++y) {
-      memcpy(ydst + y * dec->cache_y_stride_,
-             dec->yuv_b_ + Y_OFF + y * BPS, 16);
-    }
-    for (y = 0; y < 8; ++y) {
-      memcpy(udst + y * dec->cache_uv_stride_,
-           dec->yuv_b_ + U_OFF + y * BPS, 8);
-      memcpy(vdst + y * dec->cache_uv_stride_,
-           dec->yuv_b_ + V_OFF + y * BPS, 8);
+}
+
+static void DitherRow(VP8Decoder* const dec) {
+  int mb_x;
+  assert(dec->dither_);
+  for (mb_x = dec->tl_mb_x_; mb_x < dec->br_mb_x_; ++mb_x) {
+    const VP8ThreadContext* const ctx = &dec->thread_ctx_;
+    const VP8MBData* const data = ctx->mb_data_ + mb_x;
+    const int cache_id = ctx->id_;
+    const int uv_bps = dec->cache_uv_stride_;
+    if (data->dither_ >= MIN_DITHER_AMP) {
+      uint8_t* const u_dst = dec->cache_u_ + cache_id * 8 * uv_bps + mb_x * 8;
+      uint8_t* const v_dst = dec->cache_v_ + cache_id * 8 * uv_bps + mb_x * 8;
+      Dither8x8(&dec->dithering_rg_, u_dst, uv_bps, data->dither_);
+      Dither8x8(&dec->dithering_rg_, v_dst, uv_bps, data->dither_);
     }
   }
 }
@@ -165,25 +413,35 @@ void VP8StoreBlock(VP8Decoder* const dec) {
 static int FinishRow(VP8Decoder* const dec, VP8Io* const io) {
   int ok = 1;
   const VP8ThreadContext* const ctx = &dec->thread_ctx_;
+  const int cache_id = ctx->id_;
   const int extra_y_rows = kFilterExtraRows[dec->filter_type_];
   const int ysize = extra_y_rows * dec->cache_y_stride_;
   const int uvsize = (extra_y_rows / 2) * dec->cache_uv_stride_;
-  const int y_offset = ctx->id_ * 16 * dec->cache_y_stride_;
-  const int uv_offset = ctx->id_ * 8 * dec->cache_uv_stride_;
+  const int y_offset = cache_id * 16 * dec->cache_y_stride_;
+  const int uv_offset = cache_id * 8 * dec->cache_uv_stride_;
   uint8_t* const ydst = dec->cache_y_ - ysize + y_offset;
   uint8_t* const udst = dec->cache_u_ - uvsize + uv_offset;
   uint8_t* const vdst = dec->cache_v_ - uvsize + uv_offset;
-  const int first_row = (ctx->mb_y_ == 0);
-  const int last_row = (ctx->mb_y_ >= dec->br_mb_y_ - 1);
-  int y_start = MACROBLOCK_VPOS(ctx->mb_y_);
-  int y_end = MACROBLOCK_VPOS(ctx->mb_y_ + 1);
+  const int mb_y = ctx->mb_y_;
+  const int is_first_row = (mb_y == 0);
+  const int is_last_row = (mb_y >= dec->br_mb_y_ - 1);
+
+  if (dec->mt_method_ == 2) {
+    ReconstructRow(dec, ctx);
+  }
 
   if (ctx->filter_row_) {
     FilterRow(dec);
   }
 
-  if (io->put) {
-    if (!first_row) {
+  if (dec->dither_) {
+    DitherRow(dec);
+  }
+
+  if (io->put != NULL) {
+    int y_start = MACROBLOCK_VPOS(mb_y);
+    int y_end = MACROBLOCK_VPOS(mb_y + 1);
+    if (!is_first_row) {
       y_start -= extra_y_rows;
       io->y = ydst;
       io->u = udst;
@@ -194,7 +452,7 @@ static int FinishRow(VP8Decoder* const dec, VP8Io* const io) {
       io->v = dec->cache_v_ + uv_offset;
     }
 
-    if (!last_row) {
+    if (!is_last_row) {
       y_end -= extra_y_rows;
     }
     if (y_end > io->crop_bottom) {
@@ -202,11 +460,8 @@ static int FinishRow(VP8Decoder* const dec, VP8Io* const io) {
     }
     io->a = NULL;
     if (dec->alpha_data_ != NULL && y_start < y_end) {
-      // TODO(skal): several things to correct here:
-      // * testing presence of alpha with dec->alpha_data_ is not a good idea
-      // * we're actually decompressing the full plane only once. It should be
-      //   more obvious from signature.
-      // * we could free alpha_data_ right after this call, but we don't own.
+      // TODO(skal): testing presence of alpha with dec->alpha_data_ is not a
+      // good idea.
       io->a = VP8DecompressAlphaRows(dec, y_start, y_end - y_start);
       if (io->a == NULL) {
         return VP8SetError(dec, VP8_STATUS_BITSTREAM_ERROR,
@@ -238,8 +493,8 @@ static int FinishRow(VP8Decoder* const dec, VP8Io* const io) {
     }
   }
   // rotate top samples if needed
-  if (ctx->id_ + 1 == dec->num_caches_) {
-    if (!last_row) {
+  if (cache_id + 1 == dec->num_caches_) {
+    if (!is_last_row) {
       memcpy(dec->cache_y_ - ysize, ydst + 16 * dec->cache_y_stride_, ysize);
       memcpy(dec->cache_u_ - uvsize, udst + 8 * dec->cache_uv_stride_, uvsize);
       memcpy(dec->cache_v_ - uvsize, vdst + 8 * dec->cache_uv_stride_, uvsize);
@@ -256,27 +511,40 @@ static int FinishRow(VP8Decoder* const dec, VP8Io* const io) {
 int VP8ProcessRow(VP8Decoder* const dec, VP8Io* const io) {
   int ok = 1;
   VP8ThreadContext* const ctx = &dec->thread_ctx_;
-  if (!dec->use_threads_) {
+  const int filter_row =
+      (dec->filter_type_ > 0) &&
+      (dec->mb_y_ >= dec->tl_mb_y_) && (dec->mb_y_ <= dec->br_mb_y_);
+  if (dec->mt_method_ == 0) {
     // ctx->id_ and ctx->f_info_ are already set
     ctx->mb_y_ = dec->mb_y_;
-    ctx->filter_row_ = dec->filter_row_;
+    ctx->filter_row_ = filter_row;
+    ReconstructRow(dec, ctx);
     ok = FinishRow(dec, io);
   } else {
     WebPWorker* const worker = &dec->worker_;
     // Finish previous job *before* updating context
-    ok &= WebPWorkerSync(worker);
+    ok &= WebPGetWorkerInterface()->Sync(worker);
     assert(worker->status_ == OK);
     if (ok) {   // spawn a new deblocking/output job
       ctx->io_ = *io;
       ctx->id_ = dec->cache_id_;
       ctx->mb_y_ = dec->mb_y_;
-      ctx->filter_row_ = dec->filter_row_;
-      if (ctx->filter_row_) {    // just swap filter info
+      ctx->filter_row_ = filter_row;
+      if (dec->mt_method_ == 2) {  // swap macroblock data
+        VP8MBData* const tmp = ctx->mb_data_;
+        ctx->mb_data_ = dec->mb_data_;
+        dec->mb_data_ = tmp;
+      } else {
+        // perform reconstruction directly in main thread
+        ReconstructRow(dec, ctx);
+      }
+      if (filter_row) {            // swap filter info
         VP8FInfo* const tmp = ctx->f_info_;
         ctx->f_info_ = dec->f_info_;
         dec->f_info_ = tmp;
       }
-      WebPWorkerLaunch(worker);
+      // (reconstruct)+filter in parallel
+      WebPGetWorkerInterface()->Launch(worker);
       if (++dec->cache_id_ == dec->num_caches_) {
         dec->cache_id_ = 0;
       }
@@ -290,8 +558,8 @@ int VP8ProcessRow(VP8Decoder* const dec, VP8Io* const io) {
 
 VP8StatusCode VP8EnterCritical(VP8Decoder* const dec, VP8Io* const io) {
   // Call setup() first. This may trigger additional decoding features on 'io'.
-  // Note: Afterward, we must call teardown() not matter what.
-  if (io->setup && !io->setup(io)) {
+  // Note: Afterward, we must call teardown() no matter what.
+  if (io->setup != NULL && !io->setup(io)) {
     VP8SetError(dec, VP8_STATUS_USER_ABORT, "Frame setup failed");
     return dec->status_;
   }
@@ -304,7 +572,7 @@ VP8StatusCode VP8EnterCritical(VP8Decoder* const dec, VP8Io* const io) {
 
   // Define the area where we can skip in-loop filtering, in case of cropping.
   //
-  // 'Simple' filter reads two luma samples outside of the macroblock and
+  // 'Simple' filter reads two luma samples outside of the macroblock
   // and filters one. It doesn't filter the chroma samples. Hence, we can
   // avoid doing the in-loop filtering before crop_top/crop_left position.
   // For the 'Complex' filter, 3 samples are read and up to 3 are filtered.
@@ -339,16 +607,17 @@ VP8StatusCode VP8EnterCritical(VP8Decoder* const dec, VP8Io* const io) {
       dec->br_mb_y_ = dec->mb_h_;
     }
   }
+  PrecomputeFilterStrengths(dec);
   return VP8_STATUS_OK;
 }
 
 int VP8ExitCritical(VP8Decoder* const dec, VP8Io* const io) {
   int ok = 1;
-  if (dec->use_threads_) {
-    ok = WebPWorkerSync(&dec->worker_);
+  if (dec->mt_method_ > 0) {
+    ok = WebPGetWorkerInterface()->Sync(&dec->worker_);
   }
 
-  if (io->teardown) {
+  if (io->teardown != NULL) {
     io->teardown(io);
   }
   return ok;
@@ -384,9 +653,9 @@ int VP8ExitCritical(VP8Decoder* const dec, VP8Io* const io) {
 // Initialize multi/single-thread worker
 static int InitThreadContext(VP8Decoder* const dec) {
   dec->cache_id_ = 0;
-  if (dec->use_threads_) {
+  if (dec->mt_method_ > 0) {
     WebPWorker* const worker = &dec->worker_;
-    if (!WebPWorkerReset(worker)) {
+    if (!WebPGetWorkerInterface()->Reset(worker)) {
       return VP8SetError(dec, VP8_STATUS_OUT_OF_MEMORY,
                          "thread initialization failed.");
     }
@@ -401,6 +670,28 @@ static int InitThreadContext(VP8Decoder* const dec) {
   return 1;
 }
 
+int VP8GetThreadMethod(const WebPDecoderOptions* const options,
+                       const WebPHeaderStructure* const headers,
+                       int width, int height) {
+  if (options == NULL || options->use_threads == 0) {
+    return 0;
+  }
+  (void)headers;
+  (void)width;
+  (void)height;
+  assert(headers == NULL || !headers->is_lossless);
+#if defined(WEBP_USE_THREAD)
+  if (width < MIN_WIDTH_FOR_THREADS) return 0;
+  // TODO(skal): tune the heuristic further
+#if 0
+  if (height < 2 * width) return 2;
+#endif
+  return 2;
+#else   // !WEBP_USE_THREAD
+  return 0;
+#endif
+}
+
 #undef MT_CACHE_LINES
 #undef ST_CACHE_LINES
 
@@ -412,14 +703,15 @@ static int AllocateMemory(VP8Decoder* const dec) {
   const int mb_w = dec->mb_w_;
   // Note: we use 'size_t' when there's no overflow risk, uint64_t otherwise.
   const size_t intra_pred_mode_size = 4 * mb_w * sizeof(uint8_t);
-  const size_t top_size = (16 + 8 + 8) * mb_w;
+  const size_t top_size = sizeof(VP8TopSamples) * mb_w;
   const size_t mb_info_size = (mb_w + 1) * sizeof(VP8MB);
   const size_t f_info_size =
       (dec->filter_type_ > 0) ?
-          mb_w * (dec->use_threads_ ? 2 : 1) * sizeof(VP8FInfo)
+          mb_w * (dec->mt_method_ > 0 ? 2 : 1) * sizeof(VP8FInfo)
         : 0;
   const size_t yuv_size = YUV_SIZE * sizeof(*dec->yuv_b_);
-  const size_t coeffs_size = 384 * sizeof(*dec->coeffs_);
+  const size_t mb_data_size =
+      (dec->mt_method_ == 2 ? 2 : 1) * mb_w * sizeof(*dec->mb_data_);
   const size_t cache_height = (16 * num_caches
                             + kFilterExtraRows[dec->filter_type_]) * 3 / 2;
   const size_t cache_size = top_size * cache_height;
@@ -428,13 +720,13 @@ static int AllocateMemory(VP8Decoder* const dec) {
       (uint64_t)dec->pic_hdr_.width_ * dec->pic_hdr_.height_ : 0ULL;
   const uint64_t needed = (uint64_t)intra_pred_mode_size
                         + top_size + mb_info_size + f_info_size
-                        + yuv_size + coeffs_size
-                        + cache_size + alpha_size + ALIGN_MASK;
+                        + yuv_size + mb_data_size
+                        + cache_size + alpha_size + WEBP_ALIGN_CST;
   uint8_t* mem;
 
   if (needed != (size_t)needed) return 0;  // check for overflow
   if (needed > dec->mem_size_) {
-    free(dec->mem_);
+    WebPSafeFree(dec->mem_);
     dec->mem_size_ = 0;
     dec->mem_ = WebPSafeMalloc(needed, sizeof(uint8_t));
     if (dec->mem_ == NULL) {
@@ -449,12 +741,8 @@ static int AllocateMemory(VP8Decoder* const dec) {
   dec->intra_t_ = (uint8_t*)mem;
   mem += intra_pred_mode_size;
 
-  dec->y_t_ = (uint8_t*)mem;
-  mem += 16 * mb_w;
-  dec->u_t_ = (uint8_t*)mem;
-  mem += 8 * mb_w;
-  dec->v_t_ = (uint8_t*)mem;
-  mem += 8 * mb_w;
+  dec->yuv_t_ = (VP8TopSamples*)mem;
+  mem += top_size;
 
   dec->mb_info_ = ((VP8MB*)mem) + 1;
   mem += mb_info_size;
@@ -463,20 +751,24 @@ static int AllocateMemory(VP8Decoder* const dec) {
   mem += f_info_size;
   dec->thread_ctx_.id_ = 0;
   dec->thread_ctx_.f_info_ = dec->f_info_;
-  if (dec->use_threads_) {
+  if (dec->mt_method_ > 0) {
     // secondary cache line. The deblocking process need to make use of the
     // filtering strength from previous macroblock row, while the new ones
     // are being decoded in parallel. We'll just swap the pointers.
     dec->thread_ctx_.f_info_ += mb_w;
   }
 
-  mem = (uint8_t*)((uintptr_t)(mem + ALIGN_MASK) & ~ALIGN_MASK);
-  assert((yuv_size & ALIGN_MASK) == 0);
+  mem = (uint8_t*)WEBP_ALIGN(mem);
+  assert((yuv_size & WEBP_ALIGN_CST) == 0);
   dec->yuv_b_ = (uint8_t*)mem;
   mem += yuv_size;
 
-  dec->coeffs_ = (int16_t*)mem;
-  mem += coeffs_size;
+  dec->mb_data_ = (VP8MBData*)mem;
+  dec->thread_ctx_.mb_data_ = (VP8MBData*)mem;
+  if (dec->mt_method_ == 2) {
+    dec->thread_ctx_.mb_data_ += mb_w;
+  }
+  mem += mb_data_size;
 
   dec->cache_y_stride_ = 16 * mb_w;
   dec->cache_uv_stride_ = 8 * mb_w;
@@ -496,9 +788,11 @@ static int AllocateMemory(VP8Decoder* const dec) {
   // alpha plane
   dec->alpha_plane_ = alpha_size ? (uint8_t*)mem : NULL;
   mem += alpha_size;
+  assert(mem <= (uint8_t*)dec->mem_ + dec->mem_size_);
 
-  // note: left-info is initialized once for all.
+  // note: left/top-info is initialized once for all.
   memset(dec->mb_info_ - 1, 0, mb_info_size);
+  VP8InitScanline(dec);   // initialize left too.
 
   // initialize top
   memset(dec->intra_t_, B_DC_PRED, intra_pred_mode_size);
@@ -517,7 +811,7 @@ static void InitIo(VP8Decoder* const dec, VP8Io* io) {
   io->a = NULL;
 }
 
-int VP8InitFrame(VP8Decoder* const dec, VP8Io* io) {
+int VP8InitFrame(VP8Decoder* const dec, VP8Io* const io) {
   if (!InitThreadContext(dec)) return 0;  // call first. Sets dec->num_caches_.
   if (!AllocateMemory(dec)) return 0;
   InitIo(dec, io);
@@ -526,154 +820,3 @@ int VP8InitFrame(VP8Decoder* const dec, VP8Io* io) {
 }
 
 //------------------------------------------------------------------------------
-// Main reconstruction function.
-
-static const int kScan[16] = {
-  0 +  0 * BPS,  4 +  0 * BPS, 8 +  0 * BPS, 12 +  0 * BPS,
-  0 +  4 * BPS,  4 +  4 * BPS, 8 +  4 * BPS, 12 +  4 * BPS,
-  0 +  8 * BPS,  4 +  8 * BPS, 8 +  8 * BPS, 12 +  8 * BPS,
-  0 + 12 * BPS,  4 + 12 * BPS, 8 + 12 * BPS, 12 + 12 * BPS
-};
-
-static WEBP_INLINE int CheckMode(VP8Decoder* const dec, int mode) {
-  if (mode == B_DC_PRED) {
-    if (dec->mb_x_ == 0) {
-      return (dec->mb_y_ == 0) ? B_DC_PRED_NOTOPLEFT : B_DC_PRED_NOLEFT;
-    } else {
-      return (dec->mb_y_ == 0) ? B_DC_PRED_NOTOP : B_DC_PRED;
-    }
-  }
-  return mode;
-}
-
-static WEBP_INLINE void Copy32b(uint8_t* dst, uint8_t* src) {
-  *(uint32_t*)dst = *(uint32_t*)src;
-}
-
-void VP8ReconstructBlock(VP8Decoder* const dec) {
-  uint8_t* const y_dst = dec->yuv_b_ + Y_OFF;
-  uint8_t* const u_dst = dec->yuv_b_ + U_OFF;
-  uint8_t* const v_dst = dec->yuv_b_ + V_OFF;
-
-  // Rotate in the left samples from previously decoded block. We move four
-  // pixels at a time for alignment reason, and because of in-loop filter.
-  if (dec->mb_x_ > 0) {
-    int j;
-    for (j = -1; j < 16; ++j) {
-      Copy32b(&y_dst[j * BPS - 4], &y_dst[j * BPS + 12]);
-    }
-    for (j = -1; j < 8; ++j) {
-      Copy32b(&u_dst[j * BPS - 4], &u_dst[j * BPS + 4]);
-      Copy32b(&v_dst[j * BPS - 4], &v_dst[j * BPS + 4]);
-    }
-  } else {
-    int j;
-    for (j = 0; j < 16; ++j) {
-      y_dst[j * BPS - 1] = 129;
-    }
-    for (j = 0; j < 8; ++j) {
-      u_dst[j * BPS - 1] = 129;
-      v_dst[j * BPS - 1] = 129;
-    }
-    // Init top-left sample on left column too
-    if (dec->mb_y_ > 0) {
-      y_dst[-1 - BPS] = u_dst[-1 - BPS] = v_dst[-1 - BPS] = 129;
-    }
-  }
-  {
-    // bring top samples into the cache
-    uint8_t* const top_y = dec->y_t_ + dec->mb_x_ * 16;
-    uint8_t* const top_u = dec->u_t_ + dec->mb_x_ * 8;
-    uint8_t* const top_v = dec->v_t_ + dec->mb_x_ * 8;
-    const int16_t* coeffs = dec->coeffs_;
-    int n;
-
-    if (dec->mb_y_ > 0) {
-      memcpy(y_dst - BPS, top_y, 16);
-      memcpy(u_dst - BPS, top_u, 8);
-      memcpy(v_dst - BPS, top_v, 8);
-    } else if (dec->mb_x_ == 0) {
-      // we only need to do this init once at block (0,0).
-      // Afterward, it remains valid for the whole topmost row.
-      memset(y_dst - BPS - 1, 127, 16 + 4 + 1);
-      memset(u_dst - BPS - 1, 127, 8 + 1);
-      memset(v_dst - BPS - 1, 127, 8 + 1);
-    }
-
-    // predict and add residuals
-
-    if (dec->is_i4x4_) {   // 4x4
-      uint32_t* const top_right = (uint32_t*)(y_dst - BPS + 16);
-
-      if (dec->mb_y_ > 0) {
-        if (dec->mb_x_ >= dec->mb_w_ - 1) {    // on rightmost border
-          top_right[0] = top_y[15] * 0x01010101u;
-        } else {
-          memcpy(top_right, top_y + 16, sizeof(*top_right));
-        }
-      }
-      // replicate the top-right pixels below
-      top_right[BPS] = top_right[2 * BPS] = top_right[3 * BPS] = top_right[0];
-
-      // predict and add residues for all 4x4 blocks in turn.
-      for (n = 0; n < 16; n++) {
-        uint8_t* const dst = y_dst + kScan[n];
-        VP8PredLuma4[dec->imodes_[n]](dst);
-        if (dec->non_zero_ac_ & (1 << n)) {
-          VP8Transform(coeffs + n * 16, dst, 0);
-        } else if (dec->non_zero_ & (1 << n)) {  // only DC is present
-          VP8TransformDC(coeffs + n * 16, dst);
-        }
-      }
-    } else {    // 16x16
-      const int pred_func = CheckMode(dec, dec->imodes_[0]);
-      VP8PredLuma16[pred_func](y_dst);
-      if (dec->non_zero_) {
-        for (n = 0; n < 16; n++) {
-          uint8_t* const dst = y_dst + kScan[n];
-          if (dec->non_zero_ac_ & (1 << n)) {
-            VP8Transform(coeffs + n * 16, dst, 0);
-          } else if (dec->non_zero_ & (1 << n)) {  // only DC is present
-            VP8TransformDC(coeffs + n * 16, dst);
-          }
-        }
-      }
-    }
-    {
-      // Chroma
-      const int pred_func = CheckMode(dec, dec->uvmode_);
-      VP8PredChroma8[pred_func](u_dst);
-      VP8PredChroma8[pred_func](v_dst);
-
-      if (dec->non_zero_ & 0x0f0000) {   // chroma-U
-        const int16_t* const u_coeffs = dec->coeffs_ + 16 * 16;
-        if (dec->non_zero_ac_ & 0x0f0000) {
-          VP8TransformUV(u_coeffs, u_dst);
-        } else {
-          VP8TransformDCUV(u_coeffs, u_dst);
-        }
-      }
-      if (dec->non_zero_ & 0xf00000) {   // chroma-V
-        const int16_t* const v_coeffs = dec->coeffs_ + 20 * 16;
-        if (dec->non_zero_ac_ & 0xf00000) {
-          VP8TransformUV(v_coeffs, v_dst);
-        } else {
-          VP8TransformDCUV(v_coeffs, v_dst);
-        }
-      }
-
-      // stash away top samples for next block
-      if (dec->mb_y_ < dec->mb_h_ - 1) {
-        memcpy(top_y, y_dst + 15 * BPS, 16);
-        memcpy(top_u, u_dst +  7 * BPS,  8);
-        memcpy(top_v, v_dst +  7 * BPS,  8);
-      }
-    }
-  }
-}
-
-//------------------------------------------------------------------------------
-
-#if defined(__cplusplus) || defined(c_plusplus)
-}    // extern "C"
-#endif
diff --git a/drivers/webp/dec/idec.c b/drivers/webp/dec/idec.c
index 7df790ced8..abafb9f3d1 100644
--- a/drivers/webp/dec/idec.c
+++ b/drivers/webp/dec/idec.c
@@ -1,8 +1,10 @@
 // Copyright 2011 Google Inc. All Rights Reserved.
 //
-// This code is licensed under the same terms as WebM:
-//  Software License Agreement:  http://www.webmproject.org/license/software/
-//  Additional IP Rights Grant:  http://www.webmproject.org/license/additional/
+// Use of this source code is governed by a BSD-style license
+// that can be found in the COPYING file in the root of the source
+// tree. An additional intellectual property rights grant can be found
+// in the file PATENTS. All contributing project authors may
+// be found in the AUTHORS file in the root of the source tree.
 // -----------------------------------------------------------------------------
 //
 // Incremental decoding
@@ -13,14 +15,11 @@
 #include <string.h>
 #include <stdlib.h>
 
+#include "./alphai.h"
 #include "./webpi.h"
 #include "./vp8i.h"
 #include "../utils/utils.h"
 
-#if defined(__cplusplus) || defined(c_plusplus)
-extern "C" {
-#endif
-
 // In append mode, buffer allocations increase as multiples of this value.
 // Needs to be a power of 2.
 #define CHUNK_SIZE 4096
@@ -29,11 +28,13 @@ extern "C" {
 //------------------------------------------------------------------------------
 // Data structures for memory and states
 
-// Decoding states. State normally flows like HEADER->PARTS0->DATA->DONE.
+// Decoding states. State normally flows as:
+// WEBP_HEADER->VP8_HEADER->VP8_PARTS0->VP8_DATA->DONE for a lossy image, and
+// WEBP_HEADER->VP8L_HEADER->VP8L_DATA->DONE for a lossless image.
 // If there is any error the decoder goes into state ERROR.
 typedef enum {
-  STATE_PRE_VP8,  // All data before that of the first VP8 chunk.
-  STATE_VP8_FRAME_HEADER,  // For VP8 Frame header (within VP8 chunk).
+  STATE_WEBP_HEADER,  // All the data before that of the VP8/VP8L chunk.
+  STATE_VP8_HEADER,   // The VP8 Frame header (within the VP8 chunk).
   STATE_VP8_PARTS0,
   STATE_VP8_DATA,
   STATE_VP8L_HEADER,
@@ -71,32 +72,41 @@ struct WebPIDecoder {
   MemBuffer mem_;          // input memory buffer.
   WebPDecBuffer output_;   // output buffer (when no external one is supplied)
   size_t chunk_size_;      // Compressed VP8/VP8L size extracted from Header.
+
+  int last_mb_y_;          // last row reached for intra-mode decoding
 };
 
 // MB context to restore in case VP8DecodeMB() fails
 typedef struct {
   VP8MB left_;
   VP8MB info_;
-  uint8_t intra_t_[4];
-  uint8_t intra_l_[4];
-  VP8BitReader br_;
   VP8BitReader token_br_;
 } MBContext;
 
 //------------------------------------------------------------------------------
 // MemBuffer: incoming data handling
 
-static void RemapBitReader(VP8BitReader* const br, ptrdiff_t offset) {
-  if (br->buf_ != NULL) {
-    br->buf_ += offset;
-    br->buf_end_ += offset;
-  }
-}
-
 static WEBP_INLINE size_t MemDataSize(const MemBuffer* mem) {
   return (mem->end_ - mem->start_);
 }
 
+// Check if we need to preserve the compressed alpha data, as it may not have
+// been decoded yet.
+static int NeedCompressedAlpha(const WebPIDecoder* const idec) {
+  if (idec->state_ == STATE_WEBP_HEADER) {
+    // We haven't parsed the headers yet, so we don't know whether the image is
+    // lossy or lossless. This also means that we haven't parsed the ALPH chunk.
+    return 0;
+  }
+  if (idec->is_lossless_) {
+    return 0;  // ALPH chunk is not present for lossless images.
+  } else {
+    const VP8Decoder* const dec = (VP8Decoder*)idec->dec_;
+    assert(dec != NULL);  // Must be true as idec->state_ != STATE_WEBP_HEADER.
+    return (dec->alpha_data_ != NULL) && !dec->is_alpha_decoded_;
+  }
+}
+
 static void DoRemap(WebPIDecoder* const idec, ptrdiff_t offset) {
   MemBuffer* const mem = &idec->mem_;
   const uint8_t* const new_base = mem->buf_ + mem->start_;
@@ -112,16 +122,36 @@ static void DoRemap(WebPIDecoder* const idec, ptrdiff_t offset) {
       if (offset != 0) {
         int p;
         for (p = 0; p <= last_part; ++p) {
-          RemapBitReader(dec->parts_ + p, offset);
+          VP8RemapBitReader(dec->parts_ + p, offset);
         }
         // Remap partition #0 data pointer to new offset, but only in MAP
         // mode (in APPEND mode, partition #0 is copied into a fixed memory).
         if (mem->mode_ == MEM_MODE_MAP) {
-          RemapBitReader(&dec->br_, offset);
+          VP8RemapBitReader(&dec->br_, offset);
+        }
+      }
+      {
+        const uint8_t* const last_start = dec->parts_[last_part].buf_;
+        assert(last_part >= 0);
+        VP8BitReaderSetBuffer(&dec->parts_[last_part], last_start,
+                              mem->buf_ + mem->end_ - last_start);
+      }
+      if (NeedCompressedAlpha(idec)) {
+        ALPHDecoder* const alph_dec = dec->alph_dec_;
+        dec->alpha_data_ += offset;
+        if (alph_dec != NULL) {
+          if (alph_dec->method_ == ALPHA_LOSSLESS_COMPRESSION) {
+            VP8LDecoder* const alph_vp8l_dec = alph_dec->vp8l_dec_;
+            assert(alph_vp8l_dec != NULL);
+            assert(dec->alpha_data_size_ >= ALPHA_HEADER_LEN);
+            VP8LBitReaderSetBuffer(&alph_vp8l_dec->br_,
+                                   dec->alpha_data_ + ALPHA_HEADER_LEN,
+                                   dec->alpha_data_size_ - ALPHA_HEADER_LEN);
+          } else {  // alph_dec->method_ == ALPHA_NO_COMPRESSION
+            // Nothing special to do in this case.
+          }
         }
       }
-      assert(last_part >= 0);
-      dec->parts_[last_part].buf_end_ = mem->buf_ + mem->end_;
     } else {    // Resize lossless bitreader
       VP8LDecoder* const dec = (VP8LDecoder*)idec->dec_;
       VP8LBitReaderSetBuffer(&dec->br_, new_base, MemDataSize(mem));
@@ -133,8 +163,12 @@ static void DoRemap(WebPIDecoder* const idec, ptrdiff_t offset) {
 // size if required and also updates VP8BitReader's if new memory is allocated.
 static int AppendToMemBuffer(WebPIDecoder* const idec,
                              const uint8_t* const data, size_t data_size) {
+  VP8Decoder* const dec = (VP8Decoder*)idec->dec_;
   MemBuffer* const mem = &idec->mem_;
-  const uint8_t* const old_base = mem->buf_ + mem->start_;
+  const int need_compressed_alpha = NeedCompressedAlpha(idec);
+  const uint8_t* const old_start = mem->buf_ + mem->start_;
+  const uint8_t* const old_base =
+      need_compressed_alpha ? dec->alpha_data_ : old_start;
   assert(mem->mode_ == MEM_MODE_APPEND);
   if (data_size > MAX_CHUNK_PAYLOAD) {
     // security safeguard: trying to allocate more than what the format
@@ -143,17 +177,18 @@ static int AppendToMemBuffer(WebPIDecoder* const idec,
   }
 
   if (mem->end_ + data_size > mem->buf_size_) {  // Need some free memory
-    const size_t current_size = MemDataSize(mem);
+    const size_t new_mem_start = old_start - old_base;
+    const size_t current_size = MemDataSize(mem) + new_mem_start;
     const uint64_t new_size = (uint64_t)current_size + data_size;
     const uint64_t extra_size = (new_size + CHUNK_SIZE - 1) & ~(CHUNK_SIZE - 1);
     uint8_t* const new_buf =
         (uint8_t*)WebPSafeMalloc(extra_size, sizeof(*new_buf));
     if (new_buf == NULL) return 0;
     memcpy(new_buf, old_base, current_size);
-    free(mem->buf_);
+    WebPSafeFree(mem->buf_);
     mem->buf_ = new_buf;
     mem->buf_size_ = (size_t)extra_size;
-    mem->start_ = 0;
+    mem->start_ = new_mem_start;
     mem->end_ = current_size;
   }
 
@@ -161,14 +196,15 @@ static int AppendToMemBuffer(WebPIDecoder* const idec,
   mem->end_ += data_size;
   assert(mem->end_ <= mem->buf_size_);
 
-  DoRemap(idec, mem->buf_ + mem->start_ - old_base);
+  DoRemap(idec, mem->buf_ + mem->start_ - old_start);
   return 1;
 }
 
 static int RemapMemBuffer(WebPIDecoder* const idec,
                           const uint8_t* const data, size_t data_size) {
   MemBuffer* const mem = &idec->mem_;
-  const uint8_t* const old_base = mem->buf_ + mem->start_;
+  const uint8_t* const old_buf = mem->buf_;
+  const uint8_t* const old_start = old_buf + mem->start_;
   assert(mem->mode_ == MEM_MODE_MAP);
 
   if (data_size < mem->buf_size_) return 0;  // can't remap to a shorter buffer!
@@ -176,7 +212,7 @@ static int RemapMemBuffer(WebPIDecoder* const idec,
   mem->buf_ = (uint8_t*)data;
   mem->end_ = mem->buf_size_ = data_size;
 
-  DoRemap(idec, mem->buf_ + mem->start_ - old_base);
+  DoRemap(idec, mem->buf_ + mem->start_ - old_start);
   return 1;
 }
 
@@ -191,8 +227,8 @@ static void InitMemBuffer(MemBuffer* const mem) {
 static void ClearMemBuffer(MemBuffer* const mem) {
   assert(mem);
   if (mem->mode_ == MEM_MODE_APPEND) {
-    free(mem->buf_);
-    free((void*)mem->part0_buf_);
+    WebPSafeFree(mem->buf_);
+    WebPSafeFree((void*)mem->part0_buf_);
   }
 }
 
@@ -206,35 +242,34 @@ static int CheckMemBufferMode(MemBuffer* const mem, MemBufferMode expected) {
   return 1;
 }
 
+// To be called last.
+static VP8StatusCode FinishDecoding(WebPIDecoder* const idec) {
+  const WebPDecoderOptions* const options = idec->params_.options;
+  WebPDecBuffer* const output = idec->params_.output;
+
+  idec->state_ = STATE_DONE;
+  if (options != NULL && options->flip) {
+    return WebPFlipBuffer(output);
+  } else {
+    return VP8_STATUS_OK;
+  }
+}
+
 //------------------------------------------------------------------------------
 // Macroblock-decoding contexts
 
 static void SaveContext(const VP8Decoder* dec, const VP8BitReader* token_br,
                         MBContext* const context) {
-  const VP8BitReader* const br = &dec->br_;
-  const VP8MB* const left = dec->mb_info_ - 1;
-  const VP8MB* const info = dec->mb_info_ + dec->mb_x_;
-
-  context->left_ = *left;
-  context->info_ = *info;
-  context->br_ = *br;
+  context->left_ = dec->mb_info_[-1];
+  context->info_ = dec->mb_info_[dec->mb_x_];
   context->token_br_ = *token_br;
-  memcpy(context->intra_t_, dec->intra_t_ + 4 * dec->mb_x_, 4);
-  memcpy(context->intra_l_, dec->intra_l_, 4);
 }
 
 static void RestoreContext(const MBContext* context, VP8Decoder* const dec,
                            VP8BitReader* const token_br) {
-  VP8BitReader* const br = &dec->br_;
-  VP8MB* const left = dec->mb_info_ - 1;
-  VP8MB* const info = dec->mb_info_ + dec->mb_x_;
-
-  *left = context->left_;
-  *info = context->info_;
-  *br = context->br_;
+  dec->mb_info_[-1] = context->left_;
+  dec->mb_info_[dec->mb_x_] = context->info_;
   *token_br = context->token_br_;
-  memcpy(dec->intra_t_ + 4 * dec->mb_x_, context->intra_t_, 4);
-  memcpy(dec->intra_l_, context->intra_l_, 4);
 }
 
 //------------------------------------------------------------------------------
@@ -242,7 +277,7 @@ static void RestoreContext(const MBContext* context, VP8Decoder* const dec,
 static VP8StatusCode IDecError(WebPIDecoder* const idec, VP8StatusCode error) {
   if (idec->state_ == STATE_VP8_DATA) {
     VP8Io* const io = &idec->io_;
-    if (io->teardown) {
+    if (io->teardown != NULL) {
       io->teardown(io);
     }
   }
@@ -270,6 +305,7 @@ static VP8StatusCode DecodeWebPHeaders(WebPIDecoder* const idec) {
 
   headers.data = data;
   headers.data_size = curr_size;
+  headers.have_all_data = 0;
   status = WebPParseHeaders(&headers);
   if (status == VP8_STATUS_NOT_ENOUGH_DATA) {
     return VP8_STATUS_SUSPENDED;  // We haven't found a VP8 chunk yet.
@@ -285,15 +321,9 @@ static VP8StatusCode DecodeWebPHeaders(WebPIDecoder* const idec) {
       return VP8_STATUS_OUT_OF_MEMORY;
     }
     idec->dec_ = dec;
-#ifdef WEBP_USE_THREAD
-    dec->use_threads_ = (idec->params_.options != NULL) &&
-                        (idec->params_.options->use_threads > 0);
-#else
-    dec->use_threads_ = 0;
-#endif
     dec->alpha_data_ = headers.alpha_data;
     dec->alpha_data_size_ = headers.alpha_data_size;
-    ChangeState(idec, STATE_VP8_FRAME_HEADER, headers.offset);
+    ChangeState(idec, STATE_VP8_HEADER, headers.offset);
   } else {
     VP8LDecoder* const dec = VP8LNew();
     if (dec == NULL) {
@@ -308,13 +338,14 @@ static VP8StatusCode DecodeWebPHeaders(WebPIDecoder* const idec) {
 static VP8StatusCode DecodeVP8FrameHeader(WebPIDecoder* const idec) {
   const uint8_t* data = idec->mem_.buf_ + idec->mem_.start_;
   const size_t curr_size = MemDataSize(&idec->mem_);
+  int width, height;
   uint32_t bits;
 
   if (curr_size < VP8_FRAME_HEADER_SIZE) {
     // Not enough data bytes to extract VP8 Frame Header.
     return VP8_STATUS_SUSPENDED;
   }
-  if (!VP8GetInfo(data, curr_size, idec->chunk_size_, NULL, NULL)) {
+  if (!VP8GetInfo(data, curr_size, idec->chunk_size_, &width, &height)) {
     return IDecError(idec, VP8_STATUS_BITSTREAM_ERROR);
   }
 
@@ -328,30 +359,32 @@ static VP8StatusCode DecodeVP8FrameHeader(WebPIDecoder* const idec) {
 }
 
 // Partition #0
-static int CopyParts0Data(WebPIDecoder* const idec) {
+static VP8StatusCode CopyParts0Data(WebPIDecoder* const idec) {
   VP8Decoder* const dec = (VP8Decoder*)idec->dec_;
   VP8BitReader* const br = &dec->br_;
-  const size_t psize = br->buf_end_ - br->buf_;
+  const size_t part_size = br->buf_end_ - br->buf_;
   MemBuffer* const mem = &idec->mem_;
   assert(!idec->is_lossless_);
   assert(mem->part0_buf_ == NULL);
-  assert(psize > 0);
-  assert(psize <= mem->part0_size_);  // Format limit: no need for runtime check
+  // the following is a format limitation, no need for runtime check:
+  assert(part_size <= mem->part0_size_);
+  if (part_size == 0) {   // can't have zero-size partition #0
+    return VP8_STATUS_BITSTREAM_ERROR;
+  }
   if (mem->mode_ == MEM_MODE_APPEND) {
     // We copy and grab ownership of the partition #0 data.
-    uint8_t* const part0_buf = (uint8_t*)malloc(psize);
+    uint8_t* const part0_buf = (uint8_t*)WebPSafeMalloc(1ULL, part_size);
     if (part0_buf == NULL) {
-      return 0;
+      return VP8_STATUS_OUT_OF_MEMORY;
     }
-    memcpy(part0_buf, br->buf_, psize);
+    memcpy(part0_buf, br->buf_, part_size);
     mem->part0_buf_ = part0_buf;
-    br->buf_ = part0_buf;
-    br->buf_end_ = part0_buf + psize;
+    VP8BitReaderSetBuffer(br, part0_buf, part_size);
   } else {
     // Else: just keep pointers to the partition #0's data in dec_->br_.
   }
-  mem->start_ += psize;
-  return 1;
+  mem->start_ += part_size;
+  return VP8_STATUS_OK;
 }
 
 static VP8StatusCode DecodePartition0(WebPIDecoder* const idec) {
@@ -381,9 +414,14 @@ static VP8StatusCode DecodePartition0(WebPIDecoder* const idec) {
   if (dec->status_ != VP8_STATUS_OK) {
     return IDecError(idec, dec->status_);
   }
+  // This change must be done before calling VP8InitFrame()
+  dec->mt_method_ = VP8GetThreadMethod(params->options, NULL,
+                                       io->width, io->height);
+  VP8InitDithering(params->options, dec);
 
-  if (!CopyParts0Data(idec)) {
-    return IDecError(idec, VP8_STATUS_OUT_OF_MEMORY);
+  dec->status_ = CopyParts0Data(idec);
+  if (dec->status_ != VP8_STATUS_OK) {
+    return IDecError(idec, dec->status_);
   }
 
   // Finish setting up the decoding parameters. Will call io->setup().
@@ -407,50 +445,52 @@ static VP8StatusCode DecodeRemaining(WebPIDecoder* const idec) {
   VP8Io* const io = &idec->io_;
 
   assert(dec->ready_);
-
   for (; dec->mb_y_ < dec->mb_h_; ++dec->mb_y_) {
-    VP8BitReader* token_br = &dec->parts_[dec->mb_y_ & (dec->num_parts_ - 1)];
-    if (dec->mb_x_ == 0) {
-      VP8InitScanline(dec);
+    if (idec->last_mb_y_ != dec->mb_y_) {
+      if (!VP8ParseIntraModeRow(&dec->br_, dec)) {
+        // note: normally, error shouldn't occur since we already have the whole
+        // partition0 available here in DecodeRemaining(). Reaching EOF while
+        // reading intra modes really means a BITSTREAM_ERROR.
+        return IDecError(idec, VP8_STATUS_BITSTREAM_ERROR);
+      }
+      idec->last_mb_y_ = dec->mb_y_;
     }
-    for (; dec->mb_x_ < dec->mb_w_;  dec->mb_x_++) {
+    for (; dec->mb_x_ < dec->mb_w_; ++dec->mb_x_) {
+      VP8BitReader* const token_br =
+          &dec->parts_[dec->mb_y_ & (dec->num_parts_ - 1)];
       MBContext context;
       SaveContext(dec, token_br, &context);
-
       if (!VP8DecodeMB(dec, token_br)) {
-        RestoreContext(&context, dec, token_br);
         // We shouldn't fail when MAX_MB data was available
         if (dec->num_parts_ == 1 && MemDataSize(&idec->mem_) > MAX_MB_SIZE) {
           return IDecError(idec, VP8_STATUS_BITSTREAM_ERROR);
         }
+        RestoreContext(&context, dec, token_br);
         return VP8_STATUS_SUSPENDED;
       }
-      VP8ReconstructBlock(dec);
-      // Store data and save block's filtering params
-      VP8StoreBlock(dec);
-
       // Release buffer only if there is only one partition
       if (dec->num_parts_ == 1) {
         idec->mem_.start_ = token_br->buf_ - idec->mem_.buf_;
         assert(idec->mem_.start_ <= idec->mem_.end_);
       }
     }
+    VP8InitScanline(dec);   // Prepare for next scanline
+
+    // Reconstruct, filter and emit the row.
     if (!VP8ProcessRow(dec, io)) {
       return IDecError(idec, VP8_STATUS_USER_ABORT);
     }
-    dec->mb_x_ = 0;
   }
   // Synchronize the thread and check for errors.
   if (!VP8ExitCritical(dec, io)) {
     return IDecError(idec, VP8_STATUS_USER_ABORT);
   }
   dec->ready_ = 0;
-  idec->state_ = STATE_DONE;
-
-  return VP8_STATUS_OK;
+  return FinishDecoding(idec);
 }
 
-static int ErrorStatusLossless(WebPIDecoder* const idec, VP8StatusCode status) {
+static VP8StatusCode ErrorStatusLossless(WebPIDecoder* const idec,
+                                         VP8StatusCode status) {
   if (status == VP8_STATUS_SUSPENDED || status == VP8_STATUS_NOT_ENOUGH_DATA) {
     return VP8_STATUS_SUSPENDED;
   }
@@ -467,9 +507,15 @@ static VP8StatusCode DecodeVP8LHeader(WebPIDecoder* const idec) {
 
   // Wait until there's enough data for decoding header.
   if (curr_size < (idec->chunk_size_ >> 3)) {
-    return VP8_STATUS_SUSPENDED;
+    dec->status_ = VP8_STATUS_SUSPENDED;
+    return ErrorStatusLossless(idec, dec->status_);
   }
+
   if (!VP8LDecodeHeader(dec, io)) {
+    if (dec->status_ == VP8_STATUS_BITSTREAM_ERROR &&
+        curr_size < idec->chunk_size_) {
+      dec->status_ = VP8_STATUS_SUSPENDED;
+    }
     return ErrorStatusLossless(idec, dec->status_);
   }
   // Allocate/verify output buffer now.
@@ -488,33 +534,29 @@ static VP8StatusCode DecodeVP8LData(WebPIDecoder* const idec) {
   const size_t curr_size = MemDataSize(&idec->mem_);
   assert(idec->is_lossless_);
 
-  // At present Lossless decoder can't decode image incrementally. So wait till
-  // all the image data is aggregated before image can be decoded.
-  if (curr_size < idec->chunk_size_) {
-    return VP8_STATUS_SUSPENDED;
-  }
+  // Switch to incremental decoding if we don't have all the bytes available.
+  dec->incremental_ = (curr_size < idec->chunk_size_);
 
   if (!VP8LDecodeImage(dec)) {
     return ErrorStatusLossless(idec, dec->status_);
   }
-
-  idec->state_ = STATE_DONE;
-
-  return VP8_STATUS_OK;
+  assert(dec->status_ == VP8_STATUS_OK || dec->status_ == VP8_STATUS_SUSPENDED);
+  return (dec->status_ == VP8_STATUS_SUSPENDED) ? dec->status_
+                                                : FinishDecoding(idec);
 }
 
   // Main decoding loop
 static VP8StatusCode IDecode(WebPIDecoder* idec) {
   VP8StatusCode status = VP8_STATUS_SUSPENDED;
 
-  if (idec->state_ == STATE_PRE_VP8) {
+  if (idec->state_ == STATE_WEBP_HEADER) {
     status = DecodeWebPHeaders(idec);
   } else {
     if (idec->dec_ == NULL) {
       return VP8_STATUS_SUSPENDED;    // can't continue if we have no decoder.
     }
   }
-  if (idec->state_ == STATE_VP8_FRAME_HEADER) {
+  if (idec->state_ == STATE_VP8_HEADER) {
     status = DecodeVP8FrameHeader(idec);
   }
   if (idec->state_ == STATE_VP8_PARTS0) {
@@ -536,20 +578,23 @@ static VP8StatusCode IDecode(WebPIDecoder* idec) {
 // Public functions
 
 WebPIDecoder* WebPINewDecoder(WebPDecBuffer* output_buffer) {
-  WebPIDecoder* idec = (WebPIDecoder*)calloc(1, sizeof(*idec));
+  WebPIDecoder* idec = (WebPIDecoder*)WebPSafeCalloc(1ULL, sizeof(*idec));
   if (idec == NULL) {
     return NULL;
   }
 
-  idec->state_ = STATE_PRE_VP8;
+  idec->state_ = STATE_WEBP_HEADER;
   idec->chunk_size_ = 0;
 
+  idec->last_mb_y_ = -1;
+
   InitMemBuffer(&idec->mem_);
   WebPInitDecBuffer(&idec->output_);
   VP8InitIo(&idec->io_);
 
   WebPResetDecParams(&idec->params_);
-  idec->params_.output = output_buffer ? output_buffer : &idec->output_;
+  idec->params_.output = (output_buffer != NULL) ? output_buffer
+                                                 : &idec->output_;
   WebPInitCustomIo(&idec->params_, &idec->io_);  // Plug the I/O functions.
 
   return idec;
@@ -581,14 +626,18 @@ void WebPIDelete(WebPIDecoder* idec) {
   if (idec == NULL) return;
   if (idec->dec_ != NULL) {
     if (!idec->is_lossless_) {
-      VP8Delete(idec->dec_);
+      if (idec->state_ == STATE_VP8_DATA) {
+        // Synchronize the thread, clean-up and check for errors.
+        VP8ExitCritical((VP8Decoder*)idec->dec_, &idec->io_);
+      }
+      VP8Delete((VP8Decoder*)idec->dec_);
     } else {
-      VP8LDelete(idec->dec_);
+      VP8LDelete((VP8LDecoder*)idec->dec_);
     }
   }
   ClearMemBuffer(&idec->mem_);
   WebPFreeDecBuffer(&idec->output_);
-  free(idec);
+  WebPSafeFree(idec);
 }
 
 //------------------------------------------------------------------------------
@@ -596,12 +645,22 @@ void WebPIDelete(WebPIDecoder* idec) {
 
 WebPIDecoder* WebPINewRGB(WEBP_CSP_MODE mode, uint8_t* output_buffer,
                           size_t output_buffer_size, int output_stride) {
+  const int is_external_memory = (output_buffer != NULL);
   WebPIDecoder* idec;
+
   if (mode >= MODE_YUV) return NULL;
+  if (!is_external_memory) {    // Overwrite parameters to sane values.
+    output_buffer_size = 0;
+    output_stride = 0;
+  } else {  // A buffer was passed. Validate the other params.
+    if (output_stride == 0 || output_buffer_size == 0) {
+      return NULL;   // invalid parameter.
+    }
+  }
   idec = WebPINewDecoder(NULL);
   if (idec == NULL) return NULL;
   idec->output_.colorspace = mode;
-  idec->output_.is_external_memory = 1;
+  idec->output_.is_external_memory = is_external_memory;
   idec->output_.u.RGBA.rgba = output_buffer;
   idec->output_.u.RGBA.stride = output_stride;
   idec->output_.u.RGBA.size = output_buffer_size;
@@ -612,10 +671,30 @@ WebPIDecoder* WebPINewYUVA(uint8_t* luma, size_t luma_size, int luma_stride,
                            uint8_t* u, size_t u_size, int u_stride,
                            uint8_t* v, size_t v_size, int v_stride,
                            uint8_t* a, size_t a_size, int a_stride) {
-  WebPIDecoder* const idec = WebPINewDecoder(NULL);
+  const int is_external_memory = (luma != NULL);
+  WebPIDecoder* idec;
+  WEBP_CSP_MODE colorspace;
+
+  if (!is_external_memory) {    // Overwrite parameters to sane values.
+    luma_size = u_size = v_size = a_size = 0;
+    luma_stride = u_stride = v_stride = a_stride = 0;
+    u = v = a = NULL;
+    colorspace = MODE_YUVA;
+  } else {  // A luma buffer was passed. Validate the other parameters.
+    if (u == NULL || v == NULL) return NULL;
+    if (luma_size == 0 || u_size == 0 || v_size == 0) return NULL;
+    if (luma_stride == 0 || u_stride == 0 || v_stride == 0) return NULL;
+    if (a != NULL) {
+      if (a_size == 0 || a_stride == 0) return NULL;
+    }
+    colorspace = (a == NULL) ? MODE_YUV : MODE_YUVA;
+  }
+
+  idec = WebPINewDecoder(NULL);
   if (idec == NULL) return NULL;
-  idec->output_.colorspace = (a == NULL) ? MODE_YUV : MODE_YUVA;
-  idec->output_.is_external_memory = 1;
+
+  idec->output_.colorspace = colorspace;
+  idec->output_.is_external_memory = is_external_memory;
   idec->output_.u.YUVA.y = luma;
   idec->output_.u.YUVA.y_stride = luma_stride;
   idec->output_.u.YUVA.y_size = luma_size;
@@ -768,7 +847,7 @@ int WebPISetIOHooks(WebPIDecoder* const idec,
                     VP8IoSetupHook setup,
                     VP8IoTeardownHook teardown,
                     void* user_data) {
-  if (idec == NULL || idec->state_ > STATE_PRE_VP8) {
+  if (idec == NULL || idec->state_ > STATE_WEBP_HEADER) {
     return 0;
   }
 
@@ -779,7 +858,3 @@ int WebPISetIOHooks(WebPIDecoder* const idec,
 
   return 1;
 }
-
-#if defined(__cplusplus) || defined(c_plusplus)
-}    // extern "C"
-#endif
diff --git a/drivers/webp/dec/io.c b/drivers/webp/dec/io.c
index 594804c2e6..13e469ab73 100644
--- a/drivers/webp/dec/io.c
+++ b/drivers/webp/dec/io.c
@@ -1,8 +1,10 @@
 // Copyright 2011 Google Inc. All Rights Reserved.
 //
-// This code is licensed under the same terms as WebM:
-//  Software License Agreement:  http://www.webmproject.org/license/software/
-//  Additional IP Rights Grant:  http://www.webmproject.org/license/additional/
+// Use of this source code is governed by a BSD-style license
+// that can be found in the COPYING file in the root of the source
+// tree. An additional intellectual property rights grant can be found
+// in the file PATENTS. All contributing project authors may
+// be found in the AUTHORS file in the root of the source tree.
 // -----------------------------------------------------------------------------
 //
 // functions for sample output.
@@ -15,10 +17,7 @@
 #include "./webpi.h"
 #include "../dsp/dsp.h"
 #include "../dsp/yuv.h"
-
-#if defined(__cplusplus) || defined(c_plusplus)
-extern "C" {
-#endif
+#include "../utils/utils.h"
 
 //------------------------------------------------------------------------------
 // Main YUV<->RGB conversion functions
@@ -46,57 +45,17 @@ static int EmitYUV(const VP8Io* const io, WebPDecParams* const p) {
 
 // Point-sampling U/V sampler.
 static int EmitSampledRGB(const VP8Io* const io, WebPDecParams* const p) {
-  WebPDecBuffer* output = p->output;
-  const WebPRGBABuffer* const buf = &output->u.RGBA;
-  uint8_t* dst = buf->rgba + io->mb_y * buf->stride;
-  const uint8_t* y_src = io->y;
-  const uint8_t* u_src = io->u;
-  const uint8_t* v_src = io->v;
-  const WebPSampleLinePairFunc sample = WebPSamplers[output->colorspace];
-  const int mb_w = io->mb_w;
-  const int last = io->mb_h - 1;
-  int j;
-  for (j = 0; j < last; j += 2) {
-    sample(y_src, y_src + io->y_stride, u_src, v_src,
-           dst, dst + buf->stride, mb_w);
-    y_src += 2 * io->y_stride;
-    u_src += io->uv_stride;
-    v_src += io->uv_stride;
-    dst += 2 * buf->stride;
-  }
-  if (j == last) {  // Just do the last line twice
-    sample(y_src, y_src, u_src, v_src, dst, dst, mb_w);
-  }
+  WebPDecBuffer* const output = p->output;
+  WebPRGBABuffer* const buf = &output->u.RGBA;
+  uint8_t* const dst = buf->rgba + io->mb_y * buf->stride;
+  WebPSamplerProcessPlane(io->y, io->y_stride,
+                          io->u, io->v, io->uv_stride,
+                          dst, buf->stride, io->mb_w, io->mb_h,
+                          WebPSamplers[output->colorspace]);
   return io->mb_h;
 }
 
 //------------------------------------------------------------------------------
-// YUV444 -> RGB conversion
-
-#if 0   // TODO(skal): this is for future rescaling.
-static int EmitRGB(const VP8Io* const io, WebPDecParams* const p) {
-  WebPDecBuffer* output = p->output;
-  const WebPRGBABuffer* const buf = &output->u.RGBA;
-  uint8_t* dst = buf->rgba + io->mb_y * buf->stride;
-  const uint8_t* y_src = io->y;
-  const uint8_t* u_src = io->u;
-  const uint8_t* v_src = io->v;
-  const WebPYUV444Converter convert = WebPYUV444Converters[output->colorspace];
-  const int mb_w = io->mb_w;
-  const int last = io->mb_h;
-  int j;
-  for (j = 0; j < last; ++j) {
-    convert(y_src, u_src, v_src, dst, mb_w);
-    y_src += io->y_stride;
-    u_src += io->uv_stride;
-    v_src += io->uv_stride;
-    dst += buf->stride;
-  }
-  return io->mb_h;
-}
-#endif
-
-//------------------------------------------------------------------------------
 // Fancy upsampling
 
 #ifdef FANCY_UPSAMPLING
@@ -117,7 +76,7 @@ static int EmitFancyRGB(const VP8Io* const io, WebPDecParams* const p) {
 
   if (y == 0) {
     // First line is special cased. We mirror the u/v samples at boundary.
-    upsample(NULL, cur_y, cur_u, cur_v, cur_u, cur_v, NULL, dst, mb_w);
+    upsample(cur_y, NULL, cur_u, cur_v, cur_u, cur_v, dst, NULL, mb_w);
   } else {
     // We can finish the left-over line from previous call.
     upsample(p->tmp_y, cur_y, top_u, top_v, cur_u, cur_v,
@@ -160,14 +119,16 @@ static int EmitFancyRGB(const VP8Io* const io, WebPDecParams* const p) {
 
 //------------------------------------------------------------------------------
 
-static int EmitAlphaYUV(const VP8Io* const io, WebPDecParams* const p) {
+static int EmitAlphaYUV(const VP8Io* const io, WebPDecParams* const p,
+                        int expected_num_lines_out) {
   const uint8_t* alpha = io->a;
   const WebPYUVABuffer* const buf = &p->output->u.YUVA;
   const int mb_w = io->mb_w;
   const int mb_h = io->mb_h;
   uint8_t* dst = buf->a + io->mb_y * buf->a_stride;
   int j;
-
+  (void)expected_num_lines_out;
+  assert(expected_num_lines_out == mb_h);
   if (alpha != NULL) {
     for (j = 0; j < mb_h; ++j) {
       memcpy(dst, alpha, mb_w * sizeof(*dst));
@@ -210,7 +171,8 @@ static int GetAlphaSourceRow(const VP8Io* const io,
   return start_y;
 }
 
-static int EmitAlphaRGB(const VP8Io* const io, WebPDecParams* const p) {
+static int EmitAlphaRGB(const VP8Io* const io, WebPDecParams* const p,
+                        int expected_num_lines_out) {
   const uint8_t* alpha = io->a;
   if (alpha != NULL) {
     const int mb_w = io->mb_w;
@@ -221,21 +183,13 @@ static int EmitAlphaRGB(const VP8Io* const io, WebPDecParams* const p) {
     int num_rows;
     const int start_y = GetAlphaSourceRow(io, &alpha, &num_rows);
     uint8_t* const base_rgba = buf->rgba + start_y * buf->stride;
-    uint8_t* dst = base_rgba + (alpha_first ? 0 : 3);
-    uint32_t alpha_mask = 0xff;
-    int i, j;
-
-    for (j = 0; j < num_rows; ++j) {
-      for (i = 0; i < mb_w; ++i) {
-        const uint32_t alpha_value = alpha[i];
-        dst[4 * i] = alpha_value;
-        alpha_mask &= alpha_value;
-      }
-      alpha += io->width;
-      dst += buf->stride;
-    }
-    // alpha_mask is < 0xff if there's non-trivial alpha to premultiply with.
-    if (alpha_mask != 0xff && WebPIsPremultipliedMode(colorspace)) {
+    uint8_t* const dst = base_rgba + (alpha_first ? 0 : 3);
+    const int has_alpha = WebPDispatchAlpha(alpha, io->width, mb_w,
+                                            num_rows, dst, buf->stride);
+    (void)expected_num_lines_out;
+    assert(expected_num_lines_out == num_rows);
+    // has_alpha is true if there's non-trivial alpha to premultiply with.
+    if (has_alpha && WebPIsPremultipliedMode(colorspace)) {
       WebPApplyAlphaMultiply(base_rgba, alpha_first,
                              mb_w, num_rows, buf->stride);
     }
@@ -243,7 +197,8 @@ static int EmitAlphaRGB(const VP8Io* const io, WebPDecParams* const p) {
   return 0;
 }
 
-static int EmitAlphaRGBA4444(const VP8Io* const io, WebPDecParams* const p) {
+static int EmitAlphaRGBA4444(const VP8Io* const io, WebPDecParams* const p,
+                             int expected_num_lines_out) {
   const uint8_t* alpha = io->a;
   if (alpha != NULL) {
     const int mb_w = io->mb_w;
@@ -252,10 +207,13 @@ static int EmitAlphaRGBA4444(const VP8Io* const io, WebPDecParams* const p) {
     int num_rows;
     const int start_y = GetAlphaSourceRow(io, &alpha, &num_rows);
     uint8_t* const base_rgba = buf->rgba + start_y * buf->stride;
+#ifdef WEBP_SWAP_16BIT_CSP
+    uint8_t* alpha_dst = base_rgba;
+#else
     uint8_t* alpha_dst = base_rgba + 1;
+#endif
     uint32_t alpha_mask = 0x0f;
     int i, j;
-
     for (j = 0; j < num_rows; ++j) {
       for (i = 0; i < mb_w; ++i) {
         // Fill in the alpha value (converted to 4 bits).
@@ -266,6 +224,8 @@ static int EmitAlphaRGBA4444(const VP8Io* const io, WebPDecParams* const p) {
       alpha += io->width;
       alpha_dst += buf->stride;
     }
+    (void)expected_num_lines_out;
+    assert(expected_num_lines_out == num_rows);
     if (alpha_mask != 0x0f && WebPIsPremultipliedMode(colorspace)) {
       WebPApplyAlphaMultiply4444(base_rgba, mb_w, num_rows, buf->stride);
     }
@@ -291,15 +251,35 @@ static int Rescale(const uint8_t* src, int src_stride,
 static int EmitRescaledYUV(const VP8Io* const io, WebPDecParams* const p) {
   const int mb_h = io->mb_h;
   const int uv_mb_h = (mb_h + 1) >> 1;
-  const int num_lines_out = Rescale(io->y, io->y_stride, mb_h, &p->scaler_y);
+  WebPRescaler* const scaler = &p->scaler_y;
+  int num_lines_out = 0;
+  if (WebPIsAlphaMode(p->output->colorspace) && io->a != NULL) {
+    // Before rescaling, we premultiply the luma directly into the io->y
+    // internal buffer. This is OK since these samples are not used for
+    // intra-prediction (the top samples are saved in cache_y_/u_/v_).
+    // But we need to cast the const away, though.
+    WebPMultRows((uint8_t*)io->y, io->y_stride,
+                 io->a, io->width, io->mb_w, mb_h, 0);
+  }
+  num_lines_out = Rescale(io->y, io->y_stride, mb_h, scaler);
   Rescale(io->u, io->uv_stride, uv_mb_h, &p->scaler_u);
   Rescale(io->v, io->uv_stride, uv_mb_h, &p->scaler_v);
   return num_lines_out;
 }
 
-static int EmitRescaledAlphaYUV(const VP8Io* const io, WebPDecParams* const p) {
+static int EmitRescaledAlphaYUV(const VP8Io* const io, WebPDecParams* const p,
+                                int expected_num_lines_out) {
   if (io->a != NULL) {
-    Rescale(io->a, io->width, io->mb_h, &p->scaler_a);
+    const WebPYUVABuffer* const buf = &p->output->u.YUVA;
+    uint8_t* dst_y = buf->y + p->last_y * buf->y_stride;
+    const uint8_t* src_a = buf->a + p->last_y * buf->a_stride;
+    const int num_lines_out = Rescale(io->a, io->width, io->mb_h, &p->scaler_a);
+    (void)expected_num_lines_out;
+    assert(expected_num_lines_out == num_lines_out);
+    if (num_lines_out > 0) {   // unmultiply the Y
+      WebPMultRows(dst_y, buf->y_stride, src_a, buf->a_stride,
+                   p->scaler_a.dst_width, num_lines_out, 1);
+    }
   }
   return 0;
 }
@@ -316,39 +296,34 @@ static int InitYUVRescaler(const VP8Io* const io, WebPDecParams* const p) {
   const size_t work_size = 2 * out_width;   // scratch memory for luma rescaler
   const size_t uv_work_size = 2 * uv_out_width;  // and for each u/v ones
   size_t tmp_size;
-  int32_t* work;
+  rescaler_t* work;
 
-  tmp_size = work_size + 2 * uv_work_size;
+  tmp_size = (work_size + 2 * uv_work_size) * sizeof(*work);
   if (has_alpha) {
-    tmp_size += work_size;
+    tmp_size += work_size * sizeof(*work);
   }
-  p->memory = calloc(1, tmp_size * sizeof(*work));
+  p->memory = WebPSafeMalloc(1ULL, tmp_size);
   if (p->memory == NULL) {
     return 0;   // memory error
   }
-  work = (int32_t*)p->memory;
+  work = (rescaler_t*)p->memory;
   WebPRescalerInit(&p->scaler_y, io->mb_w, io->mb_h,
                    buf->y, out_width, out_height, buf->y_stride, 1,
-                   io->mb_w, out_width, io->mb_h, out_height,
                    work);
   WebPRescalerInit(&p->scaler_u, uv_in_width, uv_in_height,
                    buf->u, uv_out_width, uv_out_height, buf->u_stride, 1,
-                   uv_in_width, uv_out_width,
-                   uv_in_height, uv_out_height,
                    work + work_size);
   WebPRescalerInit(&p->scaler_v, uv_in_width, uv_in_height,
                    buf->v, uv_out_width, uv_out_height, buf->v_stride, 1,
-                   uv_in_width, uv_out_width,
-                   uv_in_height, uv_out_height,
                    work + work_size + uv_work_size);
   p->emit = EmitRescaledYUV;
 
   if (has_alpha) {
     WebPRescalerInit(&p->scaler_a, io->mb_w, io->mb_h,
                      buf->a, out_width, out_height, buf->a_stride, 1,
-                     io->mb_w, out_width, io->mb_h, out_height,
                      work + work_size + 2 * uv_work_size);
     p->emit_alpha = EmitRescaledAlphaYUV;
+    WebPInitAlphaProcessing();
   }
   return 1;
 }
@@ -360,13 +335,13 @@ static int ExportRGB(WebPDecParams* const p, int y_pos) {
   const WebPYUV444Converter convert =
       WebPYUV444Converters[p->output->colorspace];
   const WebPRGBABuffer* const buf = &p->output->u.RGBA;
-  uint8_t* dst = buf->rgba + (p->last_y + y_pos) * buf->stride;
+  uint8_t* dst = buf->rgba + y_pos * buf->stride;
   int num_lines_out = 0;
   // For RGB rescaling, because of the YUV420, current scan position
   // U/V can be +1/-1 line from the Y one.  Hence the double test.
   while (WebPRescalerHasPendingOutput(&p->scaler_y) &&
          WebPRescalerHasPendingOutput(&p->scaler_u)) {
-    assert(p->last_y + y_pos + num_lines_out < p->output->height);
+    assert(y_pos + num_lines_out < p->output->height);
     assert(p->scaler_u.y_accum == p->scaler_v.y_accum);
     WebPRescalerExportRow(&p->scaler_y);
     WebPRescalerExportRow(&p->scaler_u);
@@ -388,65 +363,69 @@ static int EmitRescaledRGB(const VP8Io* const io, WebPDecParams* const p) {
     const int y_lines_in =
         WebPRescalerImport(&p->scaler_y, mb_h - j,
                            io->y + j * io->y_stride, io->y_stride);
-    const int u_lines_in =
-        WebPRescalerImport(&p->scaler_u, uv_mb_h - uv_j,
-                           io->u + uv_j * io->uv_stride, io->uv_stride);
-    const int v_lines_in =
-        WebPRescalerImport(&p->scaler_v, uv_mb_h - uv_j,
-                           io->v + uv_j * io->uv_stride, io->uv_stride);
-    (void)v_lines_in;   // remove a gcc warning
-    assert(u_lines_in == v_lines_in);
     j += y_lines_in;
-    uv_j += u_lines_in;
-    num_lines_out += ExportRGB(p, num_lines_out);
+    if (WebPRescaleNeededLines(&p->scaler_u, uv_mb_h - uv_j)) {
+      const int u_lines_in =
+          WebPRescalerImport(&p->scaler_u, uv_mb_h - uv_j,
+                             io->u + uv_j * io->uv_stride, io->uv_stride);
+      const int v_lines_in =
+          WebPRescalerImport(&p->scaler_v, uv_mb_h - uv_j,
+                             io->v + uv_j * io->uv_stride, io->uv_stride);
+      (void)v_lines_in;   // remove a gcc warning
+      assert(u_lines_in == v_lines_in);
+      uv_j += u_lines_in;
+    }
+    num_lines_out += ExportRGB(p, p->last_y + num_lines_out);
   }
   return num_lines_out;
 }
 
-static int ExportAlpha(WebPDecParams* const p, int y_pos) {
+static int ExportAlpha(WebPDecParams* const p, int y_pos, int max_lines_out) {
   const WebPRGBABuffer* const buf = &p->output->u.RGBA;
-  uint8_t* const base_rgba = buf->rgba + (p->last_y + y_pos) * buf->stride;
+  uint8_t* const base_rgba = buf->rgba + y_pos * buf->stride;
   const WEBP_CSP_MODE colorspace = p->output->colorspace;
   const int alpha_first =
       (colorspace == MODE_ARGB || colorspace == MODE_Argb);
   uint8_t* dst = base_rgba + (alpha_first ? 0 : 3);
   int num_lines_out = 0;
   const int is_premult_alpha = WebPIsPremultipliedMode(colorspace);
-  uint32_t alpha_mask = 0xff;
+  uint32_t non_opaque = 0;
   const int width = p->scaler_a.dst_width;
 
-  while (WebPRescalerHasPendingOutput(&p->scaler_a)) {
-    int i;
-    assert(p->last_y + y_pos + num_lines_out < p->output->height);
+  while (WebPRescalerHasPendingOutput(&p->scaler_a) &&
+         num_lines_out < max_lines_out) {
+    assert(y_pos + num_lines_out < p->output->height);
     WebPRescalerExportRow(&p->scaler_a);
-    for (i = 0; i < width; ++i) {
-      const uint32_t alpha_value = p->scaler_a.dst[i];
-      dst[4 * i] = alpha_value;
-      alpha_mask &= alpha_value;
-    }
+    non_opaque |= WebPDispatchAlpha(p->scaler_a.dst, 0, width, 1, dst, 0);
     dst += buf->stride;
     ++num_lines_out;
   }
-  if (is_premult_alpha && alpha_mask != 0xff) {
+  if (is_premult_alpha && non_opaque) {
     WebPApplyAlphaMultiply(base_rgba, alpha_first,
                            width, num_lines_out, buf->stride);
   }
   return num_lines_out;
 }
 
-static int ExportAlphaRGBA4444(WebPDecParams* const p, int y_pos) {
+static int ExportAlphaRGBA4444(WebPDecParams* const p, int y_pos,
+                               int max_lines_out) {
   const WebPRGBABuffer* const buf = &p->output->u.RGBA;
-  uint8_t* const base_rgba = buf->rgba + (p->last_y + y_pos) * buf->stride;
+  uint8_t* const base_rgba = buf->rgba + y_pos * buf->stride;
+#ifdef WEBP_SWAP_16BIT_CSP
+  uint8_t* alpha_dst = base_rgba;
+#else
   uint8_t* alpha_dst = base_rgba + 1;
+#endif
   int num_lines_out = 0;
   const WEBP_CSP_MODE colorspace = p->output->colorspace;
   const int width = p->scaler_a.dst_width;
   const int is_premult_alpha = WebPIsPremultipliedMode(colorspace);
   uint32_t alpha_mask = 0x0f;
 
-  while (WebPRescalerHasPendingOutput(&p->scaler_a)) {
+  while (WebPRescalerHasPendingOutput(&p->scaler_a) &&
+         num_lines_out < max_lines_out) {
     int i;
-    assert(p->last_y + y_pos + num_lines_out < p->output->height);
+    assert(y_pos + num_lines_out < p->output->height);
     WebPRescalerExportRow(&p->scaler_a);
     for (i = 0; i < width; ++i) {
       // Fill in the alpha value (converted to 4 bits).
@@ -463,15 +442,17 @@ static int ExportAlphaRGBA4444(WebPDecParams* const p, int y_pos) {
   return num_lines_out;
 }
 
-static int EmitRescaledAlphaRGB(const VP8Io* const io, WebPDecParams* const p) {
+static int EmitRescaledAlphaRGB(const VP8Io* const io, WebPDecParams* const p,
+                                int expected_num_out_lines) {
   if (io->a != NULL) {
     WebPRescaler* const scaler = &p->scaler_a;
-    int j = 0;
-    int pos = 0;
-    while (j < io->mb_h) {
-      j += WebPRescalerImport(scaler, io->mb_h - j,
-                              io->a + j * io->width, io->width);
-      pos += p->emit_alpha_row(p, pos);
+    int lines_left = expected_num_out_lines;
+    const int y_end = p->last_y + lines_left;
+    while (lines_left > 0) {
+      const int row_offset = scaler->src_y - io->mb_y;
+      WebPRescalerImport(scaler, io->mb_h + io->mb_y - scaler->src_y,
+                         io->a + row_offset * io->width, io->width);
+      lines_left -= p->emit_alpha_row(p, y_end - lines_left, lines_left);
     }
   }
   return 0;
@@ -484,9 +465,9 @@ static int InitRGBRescaler(const VP8Io* const io, WebPDecParams* const p) {
   const int uv_in_width  = (io->mb_w + 1) >> 1;
   const int uv_in_height = (io->mb_h + 1) >> 1;
   const size_t work_size = 2 * out_width;   // scratch memory for one rescaler
-  int32_t* work;  // rescalers work area
+  rescaler_t* work;  // rescalers work area
   uint8_t* tmp;   // tmp storage for scaled YUV444 samples before RGB conversion
-  size_t tmp_size1, tmp_size2;
+  size_t tmp_size1, tmp_size2, total_size;
 
   tmp_size1 = 3 * work_size;
   tmp_size2 = 3 * out_width;
@@ -494,30 +475,28 @@ static int InitRGBRescaler(const VP8Io* const io, WebPDecParams* const p) {
     tmp_size1 += work_size;
     tmp_size2 += out_width;
   }
-  p->memory = calloc(1, tmp_size1 * sizeof(*work) + tmp_size2 * sizeof(*tmp));
+  total_size = tmp_size1 * sizeof(*work) + tmp_size2 * sizeof(*tmp);
+  p->memory = WebPSafeMalloc(1ULL, total_size);
   if (p->memory == NULL) {
     return 0;   // memory error
   }
-  work = (int32_t*)p->memory;
+  work = (rescaler_t*)p->memory;
   tmp = (uint8_t*)(work + tmp_size1);
   WebPRescalerInit(&p->scaler_y, io->mb_w, io->mb_h,
                    tmp + 0 * out_width, out_width, out_height, 0, 1,
-                   io->mb_w, out_width, io->mb_h, out_height,
                    work + 0 * work_size);
   WebPRescalerInit(&p->scaler_u, uv_in_width, uv_in_height,
                    tmp + 1 * out_width, out_width, out_height, 0, 1,
-                   io->mb_w, 2 * out_width, io->mb_h, 2 * out_height,
                    work + 1 * work_size);
   WebPRescalerInit(&p->scaler_v, uv_in_width, uv_in_height,
                    tmp + 2 * out_width, out_width, out_height, 0, 1,
-                   io->mb_w, 2 * out_width, io->mb_h, 2 * out_height,
                    work + 2 * work_size);
   p->emit = EmitRescaledRGB;
+  WebPInitYUV444Converters();
 
   if (has_alpha) {
     WebPRescalerInit(&p->scaler_a, io->mb_w, io->mb_h,
                      tmp + 3 * out_width, out_width, out_height, 0, 1,
-                     io->mb_w, out_width, io->mb_h, out_height,
                      work + 3 * work_size);
     p->emit_alpha = EmitRescaledAlphaRGB;
     if (p->output->colorspace == MODE_RGBA_4444 ||
@@ -526,6 +505,7 @@ static int InitRGBRescaler(const VP8Io* const io, WebPDecParams* const p) {
     } else {
       p->emit_alpha_row = ExportAlpha;
     }
+    WebPInitAlphaProcessing();
   }
   return 1;
 }
@@ -546,7 +526,9 @@ static int CustomSetup(VP8Io* io) {
   if (!WebPIoInitFromOptions(p->options, io, is_alpha ? MODE_YUV : MODE_YUVA)) {
     return 0;
   }
-
+  if (is_alpha && WebPIsPremultipliedMode(colorspace)) {
+    WebPInitUpsamplers();
+  }
   if (io->use_scaling) {
     const int ok = is_rgb ? InitRGBRescaler(io, p) : InitYUVRescaler(io, p);
     if (!ok) {
@@ -554,11 +536,12 @@ static int CustomSetup(VP8Io* io) {
     }
   } else {
     if (is_rgb) {
+      WebPInitSamplers();
       p->emit = EmitSampledRGB;   // default
-#ifdef FANCY_UPSAMPLING
       if (io->fancy_upsampling) {
+#ifdef FANCY_UPSAMPLING
         const int uv_width = (io->mb_w + 1) >> 1;
-        p->memory = malloc(io->mb_w + 2 * uv_width);
+        p->memory = WebPSafeMalloc(1ULL, (size_t)(io->mb_w + 2 * uv_width));
         if (p->memory == NULL) {
           return 0;   // memory error.
         }
@@ -567,18 +550,20 @@ static int CustomSetup(VP8Io* io) {
         p->tmp_v = p->tmp_u + uv_width;
         p->emit = EmitFancyRGB;
         WebPInitUpsamplers();
-      }
 #endif
+      }
     } else {
       p->emit = EmitYUV;
     }
     if (is_alpha) {  // need transparency output
-      if (WebPIsPremultipliedMode(colorspace)) WebPInitPremultiply();
       p->emit_alpha =
           (colorspace == MODE_RGBA_4444 || colorspace == MODE_rgbA_4444) ?
               EmitAlphaRGBA4444
           : is_rgb ? EmitAlphaRGB
           : EmitAlphaYUV;
+      if (is_rgb) {
+        WebPInitAlphaProcessing();
+      }
     }
   }
 
@@ -601,8 +586,8 @@ static int CustomPut(const VP8Io* io) {
     return 0;
   }
   num_lines_out = p->emit(io, p);
-  if (p->emit_alpha) {
-    p->emit_alpha(io, p);
+  if (p->emit_alpha != NULL) {
+    p->emit_alpha(io, p, num_lines_out);
   }
   p->last_y += num_lines_out;
   return 1;
@@ -612,7 +597,7 @@ static int CustomPut(const VP8Io* io) {
 
 static void CustomTeardown(const VP8Io* io) {
   WebPDecParams* const p = (WebPDecParams*)io->opaque;
-  free(p->memory);
+  WebPSafeFree(p->memory);
   p->memory = NULL;
 }
 
@@ -627,7 +612,3 @@ void WebPInitCustomIo(WebPDecParams* const params, VP8Io* const io) {
 }
 
 //------------------------------------------------------------------------------
-
-#if defined(__cplusplus) || defined(c_plusplus)
-}    // extern "C"
-#endif
diff --git a/drivers/webp/dec/quant.c b/drivers/webp/dec/quant.c
index d54097af0d..5b648f942c 100644
--- a/drivers/webp/dec/quant.c
+++ b/drivers/webp/dec/quant.c
@@ -1,8 +1,10 @@
 // Copyright 2010 Google Inc. All Rights Reserved.
 //
-// This code is licensed under the same terms as WebM:
-//  Software License Agreement:  http://www.webmproject.org/license/software/
-//  Additional IP Rights Grant:  http://www.webmproject.org/license/additional/
+// Use of this source code is governed by a BSD-style license
+// that can be found in the COPYING file in the root of the source
+// tree. An additional intellectual property rights grant can be found
+// in the file PATENTS. All contributing project authors may
+// be found in the AUTHORS file in the root of the source tree.
 // -----------------------------------------------------------------------------
 //
 // Quantizer initialization
@@ -11,10 +13,6 @@
 
 #include "./vp8i.h"
 
-#if defined(__cplusplus) || defined(c_plusplus)
-extern "C" {
-#endif
-
 static WEBP_INLINE int clip(int v, int M) {
   return v < 0 ? 0 : v > M ? M : v;
 }
@@ -102,12 +100,11 @@ void VP8ParseQuant(VP8Decoder* const dec) {
 
       m->uv_mat_[0] = kDcTable[clip(q + dquv_dc, 117)];
       m->uv_mat_[1] = kAcTable[clip(q + dquv_ac, 127)];
+
+      m->uv_quant_ = q + dquv_ac;   // for dithering strength evaluation
     }
   }
 }
 
 //------------------------------------------------------------------------------
 
-#if defined(__cplusplus) || defined(c_plusplus)
-}    // extern "C"
-#endif
diff --git a/drivers/webp/dec/tree.c b/drivers/webp/dec/tree.c
index 82484e4c55..c2007ea733 100644
--- a/drivers/webp/dec/tree.c
+++ b/drivers/webp/dec/tree.c
@@ -1,22 +1,21 @@
 // Copyright 2010 Google Inc. All Rights Reserved.
 //
-// This code is licensed under the same terms as WebM:
-//  Software License Agreement:  http://www.webmproject.org/license/software/
-//  Additional IP Rights Grant:  http://www.webmproject.org/license/additional/
+// Use of this source code is governed by a BSD-style license
+// that can be found in the COPYING file in the root of the source
+// tree. An additional intellectual property rights grant can be found
+// in the file PATENTS. All contributing project authors may
+// be found in the AUTHORS file in the root of the source tree.
 // -----------------------------------------------------------------------------
 //
 // Coding trees and probas
 //
 // Author: Skal (pascal.massimino@gmail.com)
 
-#include "vp8i.h"
+#include "./vp8i.h"
+#include "../utils/bit_reader_inl.h"
 
 #define USE_GENERIC_TREE
 
-#if defined(__cplusplus) || defined(c_plusplus)
-extern "C" {
-#endif
-
 #ifdef USE_GENERIC_TREE
 static const int8_t kYModesIntra4[18] = {
   -B_DC_PRED, 1,
@@ -31,61 +30,12 @@ static const int8_t kYModesIntra4[18] = {
 };
 #endif
 
-#ifndef ONLY_KEYFRAME_CODE
-
-// inter prediction modes
-enum {
-  LEFT4 = 0, ABOVE4 = 1, ZERO4 = 2, NEW4 = 3,
-  NEARESTMV, NEARMV, ZEROMV, NEWMV, SPLITMV };
-
-static const int8_t kYModesInter[8] = {
-  -DC_PRED, 1,
-    2, 3,
-      -V_PRED, -H_PRED,
-      -TM_PRED, -B_PRED
-};
-
-static const int8_t kMBSplit[6] = {
-  -3, 1,
-    -2, 2,
-      -0, -1
-};
-
-static const int8_t kMVRef[8] = {
-  -ZEROMV, 1,
-    -NEARESTMV, 2,
-      -NEARMV, 3,
-        -NEWMV, -SPLITMV
-};
-
-static const int8_t kMVRef4[6] = {
-  -LEFT4, 1,
-    -ABOVE4, 2,
-      -ZERO4, -NEW4
-};
-#endif
-
 //------------------------------------------------------------------------------
 // Default probabilities
 
-// Inter
-#ifndef ONLY_KEYFRAME_CODE
-static const uint8_t kYModeProbaInter0[4] = { 112, 86, 140, 37 };
-static const uint8_t kUVModeProbaInter0[3] = { 162, 101, 204 };
-static const uint8_t kMVProba0[2][NUM_MV_PROBAS] = {
-  { 162, 128, 225, 146, 172, 147, 214,  39,
-    156, 128, 129, 132,  75, 145, 178, 206,
-    239, 254, 254 },
-  { 164, 128, 204, 170, 119, 235, 140, 230,
-    228, 128, 130, 130,  74, 148, 180, 203,
-    236, 254, 254 }
-};
-#endif
-
 // Paragraph 13.5
 static const uint8_t
   CoeffsProba0[NUM_TYPES][NUM_BANDS][NUM_CTX][NUM_PROBAS] = {
-  // genereated using vp8_default_coef_probs() in entropy.c:129
   { { { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 },
       { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 },
       { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }
@@ -326,28 +276,38 @@ static const uint8_t kBModesProba[NUM_BMODES][NUM_BMODES][NUM_BMODES - 1] = {
 
 void VP8ResetProba(VP8Proba* const proba) {
   memset(proba->segments_, 255u, sizeof(proba->segments_));
-  memcpy(proba->coeffs_, CoeffsProba0, sizeof(CoeffsProba0));
-#ifndef ONLY_KEYFRAME_CODE
-  memcpy(proba->mv_, kMVProba0, sizeof(kMVProba0));
-  memcpy(proba->ymode_, kYModeProbaInter0, sizeof(kYModeProbaInter0));
-  memcpy(proba->uvmode_, kUVModeProbaInter0, sizeof(kUVModeProbaInter0));
-#endif
+  // proba->bands_[][] is initialized later
 }
 
-void VP8ParseIntraMode(VP8BitReader* const br,  VP8Decoder* const dec) {
-  uint8_t* const top = dec->intra_t_ + 4 * dec->mb_x_;
+static void ParseIntraMode(VP8BitReader* const br,
+                           VP8Decoder* const dec, int mb_x) {
+  uint8_t* const top = dec->intra_t_ + 4 * mb_x;
   uint8_t* const left = dec->intra_l_;
-  // Hardcoded 16x16 intra-mode decision tree.
-  dec->is_i4x4_ = !VP8GetBit(br, 145);   // decide for B_PRED first
-  if (!dec->is_i4x4_) {
+  VP8MBData* const block = dec->mb_data_ + mb_x;
+
+  // Note: we don't save segment map (yet), as we don't expect
+  // to decode more than 1 keyframe.
+  if (dec->segment_hdr_.update_map_) {
+    // Hardcoded tree parsing
+    block->segment_ = !VP8GetBit(br, dec->proba_.segments_[0])
+                    ? VP8GetBit(br, dec->proba_.segments_[1])
+                    : 2 + VP8GetBit(br, dec->proba_.segments_[2]);
+  } else {
+    block->segment_ = 0;  // default for intra
+  }
+  if (dec->use_skip_proba_) block->skip_ = VP8GetBit(br, dec->skip_p_);
+
+  block->is_i4x4_ = !VP8GetBit(br, 145);   // decide for B_PRED first
+  if (!block->is_i4x4_) {
+    // Hardcoded 16x16 intra-mode decision tree.
     const int ymode =
         VP8GetBit(br, 156) ? (VP8GetBit(br, 128) ? TM_PRED : H_PRED)
                            : (VP8GetBit(br, 163) ? V_PRED : DC_PRED);
-    dec->imodes_[0] = ymode;
-    memset(top, ymode, 4 * sizeof(top[0]));
-    memset(left, ymode, 4 * sizeof(left[0]));
+    block->imodes_[0] = ymode;
+    memset(top, ymode, 4 * sizeof(*top));
+    memset(left, ymode, 4 * sizeof(*left));
   } else {
-    uint8_t* modes = dec->imodes_;
+    uint8_t* modes = block->imodes_;
     int y;
     for (y = 0; y < 4; ++y) {
       int ymode = left[y];
@@ -356,10 +316,10 @@ void VP8ParseIntraMode(VP8BitReader* const br,  VP8Decoder* const dec) {
         const uint8_t* const prob = kBModesProba[top[x]][ymode];
 #ifdef USE_GENERIC_TREE
         // Generic tree-parsing
-        int i = 0;
-        do {
+        int i = kYModesIntra4[VP8GetBit(br, prob[0])];
+        while (i > 0) {
           i = kYModesIntra4[2 * i + VP8GetBit(br, prob[i])];
-        } while (i > 0);
+        }
         ymode = -i;
 #else
         // Hardcoded tree parsing
@@ -374,15 +334,24 @@ void VP8ParseIntraMode(VP8BitReader* const br,  VP8Decoder* const dec) {
                             (!VP8GetBit(br, prob[8]) ? B_HD_PRED : B_HU_PRED)));
 #endif    // USE_GENERIC_TREE
         top[x] = ymode;
-        *modes++ = ymode;
       }
+      memcpy(modes, top, 4 * sizeof(*top));
+      modes += 4;
       left[y] = ymode;
     }
   }
   // Hardcoded UVMode decision tree
-  dec->uvmode_ = !VP8GetBit(br, 142) ? DC_PRED
-               : !VP8GetBit(br, 114) ? V_PRED
-               : VP8GetBit(br, 183) ? TM_PRED : H_PRED;
+  block->uvmode_ = !VP8GetBit(br, 142) ? DC_PRED
+                 : !VP8GetBit(br, 114) ? V_PRED
+                 : VP8GetBit(br, 183) ? TM_PRED : H_PRED;
+}
+
+int VP8ParseIntraModeRow(VP8BitReader* const br, VP8Decoder* const dec) {
+  int mb_x;
+  for (mb_x = 0; mb_x < dec->mb_w_; ++mb_x) {
+    ParseIntraMode(br, dec, mb_x);
+  }
+  return !dec->br_.eof_;
 }
 
 //------------------------------------------------------------------------------
@@ -524,18 +493,13 @@ static const uint8_t
   }
 };
 
-#ifndef ONLY_KEYFRAME_CODE
-static const uint8_t MVUpdateProba[2][NUM_MV_PROBAS] = {
-  { 237, 246, 253, 253, 254, 254, 254, 254,
-    254, 254, 254, 254, 254, 254, 250, 250,
-    252, 254, 254 },
-  { 231, 243, 245, 253, 254, 254, 254, 254,
-    254, 254, 254, 254, 254, 254, 251, 251,
-    254, 254, 254 }
+// Paragraph 9.9
+
+static const int kBands[16 + 1] = {
+  0, 1, 2, 3, 6, 4, 5, 6, 6, 6, 6, 6, 6, 6, 6, 7,
+  0  // extra entry as sentinel
 };
-#endif
 
-// Paragraph 9.9
 void VP8ParseProba(VP8BitReader* const br, VP8Decoder* const dec) {
   VP8Proba* const proba = &dec->proba_;
   int t, b, c, p;
@@ -543,47 +507,19 @@ void VP8ParseProba(VP8BitReader* const br, VP8Decoder* const dec) {
     for (b = 0; b < NUM_BANDS; ++b) {
       for (c = 0; c < NUM_CTX; ++c) {
         for (p = 0; p < NUM_PROBAS; ++p) {
-          if (VP8GetBit(br, CoeffsUpdateProba[t][b][c][p])) {
-            proba->coeffs_[t][b][c][p] = VP8GetValue(br, 8);
-          }
+          const int v = VP8GetBit(br, CoeffsUpdateProba[t][b][c][p]) ?
+                        VP8GetValue(br, 8) : CoeffsProba0[t][b][c][p];
+          proba->bands_[t][b].probas_[c][p] = v;
         }
       }
     }
+    for (b = 0; b < 16 + 1; ++b) {
+      proba->bands_ptr_[t][b] = &proba->bands_[t][kBands[b]];
+    }
   }
   dec->use_skip_proba_ = VP8Get(br);
   if (dec->use_skip_proba_) {
     dec->skip_p_ = VP8GetValue(br, 8);
   }
-#ifndef ONLY_KEYFRAME_CODE
-  if (!dec->frm_hdr_.key_frame_) {
-    int i;
-    dec->intra_p_ = VP8GetValue(br, 8);
-    dec->last_p_ = VP8GetValue(br, 8);
-    dec->golden_p_ = VP8GetValue(br, 8);
-    if (VP8Get(br)) {   // update y-mode
-      for (i = 0; i < 4; ++i) {
-        proba->ymode_[i] = VP8GetValue(br, 8);
-      }
-    }
-    if (VP8Get(br)) {   // update uv-mode
-      for (i = 0; i < 3; ++i) {
-        proba->uvmode_[i] = VP8GetValue(br, 8);
-      }
-    }
-    // update MV
-    for (i = 0; i < 2; ++i) {
-      int k;
-      for (k = 0; k < NUM_MV_PROBAS; ++k) {
-        if (VP8GetBit(br, MVUpdateProba[i][k])) {
-          const int v = VP8GetValue(br, 7);
-          proba->mv_[i][k] = v ? v << 1 : 1;
-        }
-      }
-    }
-  }
-#endif
 }
 
-#if defined(__cplusplus) || defined(c_plusplus)
-}    // extern "C"
-#endif
diff --git a/drivers/webp/dec/vp8.c b/drivers/webp/dec/vp8.c
index b0ccfa2a06..d89eb1c59e 100644
--- a/drivers/webp/dec/vp8.c
+++ b/drivers/webp/dec/vp8.c
@@ -1,8 +1,10 @@
 // Copyright 2010 Google Inc. All Rights Reserved.
 //
-// This code is licensed under the same terms as WebM:
-//  Software License Agreement:  http://www.webmproject.org/license/software/
-//  Additional IP Rights Grant:  http://www.webmproject.org/license/additional/
+// Use of this source code is governed by a BSD-style license
+// that can be found in the COPYING file in the root of the source
+// tree. An additional intellectual property rights grant can be found
+// in the file PATENTS. All contributing project authors may
+// be found in the AUTHORS file in the root of the source tree.
 // -----------------------------------------------------------------------------
 //
 // main entry for the decoder
@@ -11,14 +13,12 @@
 
 #include <stdlib.h>
 
+#include "./alphai.h"
 #include "./vp8i.h"
 #include "./vp8li.h"
 #include "./webpi.h"
-#include "../utils/bit_reader.h"
-
-#if defined(__cplusplus) || defined(c_plusplus)
-extern "C" {
-#endif
+#include "../utils/bit_reader_inl.h"
+#include "../utils/utils.h"
 
 //------------------------------------------------------------------------------
 
@@ -45,10 +45,10 @@ int VP8InitIoInternal(VP8Io* const io, int version) {
 }
 
 VP8Decoder* VP8New(void) {
-  VP8Decoder* const dec = (VP8Decoder*)calloc(1, sizeof(*dec));
+  VP8Decoder* const dec = (VP8Decoder*)WebPSafeCalloc(1ULL, sizeof(*dec));
   if (dec != NULL) {
     SetOk(dec);
-    WebPWorkerInit(&dec->worker_);
+    WebPGetWorkerInterface()->Init(&dec->worker_);
     dec->ready_ = 0;
     dec->num_parts_ = 1;
   }
@@ -69,16 +69,13 @@ const char* VP8StatusMessage(VP8Decoder* const dec) {
 void VP8Delete(VP8Decoder* const dec) {
   if (dec != NULL) {
     VP8Clear(dec);
-    free(dec);
+    WebPSafeFree(dec);
   }
 }
 
 int VP8SetError(VP8Decoder* const dec,
                 VP8StatusCode error, const char* const msg) {
-  // TODO This check would be unnecessary if alpha decompression was separated
-  // from VP8ProcessRow/FinishRow. This avoids setting 'dec->status_' to
-  // something other than VP8_STATUS_BITSTREAM_ERROR on alpha decompression
-  // failure.
+  // The oldest error reported takes precedence over the new one.
   if (dec->status_ == VP8_STATUS_OK) {
     dec->status_ = error;
     dec->error_msg_ = msg;
@@ -121,6 +118,9 @@ int VP8GetInfo(const uint8_t* data, size_t data_size, size_t chunk_size,
     if (((bits >> 5)) >= chunk_size) {  // partition_length
       return 0;         // inconsistent size information.
     }
+    if (w == 0 || h == 0) {
+      return 0;         // We don't support both width and height to be zero.
+    }
 
     if (width) {
       *width = w;
@@ -190,25 +190,27 @@ static VP8StatusCode ParsePartitions(VP8Decoder* const dec,
   const uint8_t* sz = buf;
   const uint8_t* buf_end = buf + size;
   const uint8_t* part_start;
-  int last_part;
-  int p;
+  size_t size_left = size;
+  size_t last_part;
+  size_t p;
 
   dec->num_parts_ = 1 << VP8GetValue(br, 2);
   last_part = dec->num_parts_ - 1;
-  part_start = buf + last_part * 3;
-  if (buf_end < part_start) {
+  if (size < 3 * last_part) {
     // we can't even read the sizes with sz[]! That's a failure.
     return VP8_STATUS_NOT_ENOUGH_DATA;
   }
+  part_start = buf + last_part * 3;
+  size_left -= last_part * 3;
   for (p = 0; p < last_part; ++p) {
-    const uint32_t psize = sz[0] | (sz[1] << 8) | (sz[2] << 16);
-    const uint8_t* part_end = part_start + psize;
-    if (part_end > buf_end) part_end = buf_end;
-    VP8InitBitReader(dec->parts_ + p, part_start, part_end);
-    part_start = part_end;
+    size_t psize = sz[0] | (sz[1] << 8) | (sz[2] << 16);
+    if (psize > size_left) psize = size_left;
+    VP8InitBitReader(dec->parts_ + p, part_start, psize);
+    part_start += psize;
+    size_left -= psize;
     sz += 3;
   }
-  VP8InitBitReader(dec->parts_ + last_part, part_start, buf_end);
+  VP8InitBitReader(dec->parts_ + last_part, part_start, size_left);
   return (part_start < buf_end) ? VP8_STATUS_OK :
            VP8_STATUS_SUSPENDED;   // Init is ok, but there's not enough data
 }
@@ -236,20 +238,6 @@ static int ParseFilterHeader(VP8BitReader* br, VP8Decoder* const dec) {
     }
   }
   dec->filter_type_ = (hdr->level_ == 0) ? 0 : hdr->simple_ ? 1 : 2;
-  if (dec->filter_type_ > 0) {    // precompute filter levels per segment
-    if (dec->segment_hdr_.use_segment_) {
-      int s;
-      for (s = 0; s < NUM_MB_SEGMENTS; ++s) {
-        int strength = dec->segment_hdr_.filter_strength_[s];
-        if (!dec->segment_hdr_.absolute_delta_) {
-          strength += hdr->level_;
-        }
-        dec->filter_levels_[s] = strength;
-      }
-    } else {
-      dec->filter_levels_[0] = hdr->level_;
-    }
-  }
   return !br->eof_;
 }
 
@@ -261,7 +249,6 @@ int VP8GetHeaders(VP8Decoder* const dec, VP8Io* const io) {
   VP8PictureHeader* pic_hdr;
   VP8BitReader* br;
   VP8StatusCode status;
-  WebPHeaderStructure headers;
 
   if (dec == NULL) {
     return 0;
@@ -271,33 +258,8 @@ int VP8GetHeaders(VP8Decoder* const dec, VP8Io* const io) {
     return VP8SetError(dec, VP8_STATUS_INVALID_PARAM,
                        "null VP8Io passed to VP8GetHeaders()");
   }
-
-  // Process Pre-VP8 chunks.
-  headers.data = io->data;
-  headers.data_size = io->data_size;
-  status = WebPParseHeaders(&headers);
-  if (status != VP8_STATUS_OK) {
-    return VP8SetError(dec, status, "Incorrect/incomplete header.");
-  }
-  if (headers.is_lossless) {
-    return VP8SetError(dec, VP8_STATUS_BITSTREAM_ERROR,
-                       "Unexpected lossless format encountered.");
-  }
-
-  if (dec->alpha_data_ == NULL) {
-    assert(dec->alpha_data_size_ == 0);
-    // We have NOT set alpha data yet. Set it now.
-    // (This is to ensure that dec->alpha_data_ is NOT reset to NULL if
-    // WebPParseHeaders() is called more than once, as in incremental decoding
-    // case.)
-    dec->alpha_data_ = headers.alpha_data;
-    dec->alpha_data_size_ = headers.alpha_data_size;
-  }
-
-  // Process the VP8 frame header.
-  buf = headers.data + headers.offset;
-  buf_size = headers.data_size - headers.offset;
-  assert(headers.data_size >= headers.offset);  // WebPParseHeaders' guarantee
+  buf = io->data;
+  buf_size = io->data_size;
   if (buf_size < 4) {
     return VP8SetError(dec, VP8_STATUS_NOT_ENOUGH_DATA,
                        "Truncated header.");
@@ -355,7 +317,6 @@ int VP8GetHeaders(VP8Decoder* const dec, VP8Io* const io) {
 
     VP8ResetProba(&dec->proba_);
     ResetSegmentHeader(&dec->segment_hdr_);
-    dec->segment_ = 0;    // default for intra
   }
 
   // Check if we have all the partition #0 available, and initialize dec->br_
@@ -366,7 +327,7 @@ int VP8GetHeaders(VP8Decoder* const dec, VP8Io* const io) {
   }
 
   br = &dec->br_;
-  VP8InitBitReader(br, buf, buf + frm_hdr->partition_length_);
+  VP8InitBitReader(br, buf, frm_hdr->partition_length_);
   buf += frm_hdr->partition_length_;
   buf_size -= frm_hdr->partition_length_;
 
@@ -393,63 +354,14 @@ int VP8GetHeaders(VP8Decoder* const dec, VP8Io* const io) {
 
   // Frame buffer marking
   if (!frm_hdr->key_frame_) {
-    // Paragraph 9.7
-#ifndef ONLY_KEYFRAME_CODE
-    dec->buffer_flags_ = VP8Get(br) << 0;   // update golden
-    dec->buffer_flags_ |= VP8Get(br) << 1;  // update alt ref
-    if (!(dec->buffer_flags_ & 1)) {
-      dec->buffer_flags_ |= VP8GetValue(br, 2) << 2;
-    }
-    if (!(dec->buffer_flags_ & 2)) {
-      dec->buffer_flags_ |= VP8GetValue(br, 2) << 4;
-    }
-    dec->buffer_flags_ |= VP8Get(br) << 6;    // sign bias golden
-    dec->buffer_flags_ |= VP8Get(br) << 7;    // sign bias alt ref
-#else
     return VP8SetError(dec, VP8_STATUS_UNSUPPORTED_FEATURE,
                        "Not a key frame.");
-#endif
-  } else {
-    dec->buffer_flags_ = 0x003 | 0x100;
   }
 
-  // Paragraph 9.8
-#ifndef ONLY_KEYFRAME_CODE
-  dec->update_proba_ = VP8Get(br);
-  if (!dec->update_proba_) {    // save for later restore
-    dec->proba_saved_ = dec->proba_;
-  }
-  dec->buffer_flags_ &= 1 << 8;
-  dec->buffer_flags_ |=
-      (frm_hdr->key_frame_ || VP8Get(br)) << 8;    // refresh last frame
-#else
-  VP8Get(br);   // just ignore the value of update_proba_
-#endif
+  VP8Get(br);   // ignore the value of update_proba_
 
   VP8ParseProba(br, dec);
 
-#ifdef WEBP_EXPERIMENTAL_FEATURES
-  // Extensions
-  if (dec->pic_hdr_.colorspace_) {
-    const size_t kTrailerSize = 8;
-    const uint8_t kTrailerMarker = 0x01;
-    const uint8_t* ext_buf = buf - kTrailerSize;
-    size_t size;
-
-    if (frm_hdr->partition_length_ < kTrailerSize ||
-        ext_buf[kTrailerSize - 1] != kTrailerMarker) {
-      return VP8SetError(dec, VP8_STATUS_BITSTREAM_ERROR,
-                         "RIFF: Inconsistent extra information.");
-    }
-
-    // Layer
-    size = (ext_buf[0] << 0) | (ext_buf[1] << 8) | (ext_buf[2] << 16);
-    dec->layer_data_size_ = size;
-    dec->layer_data_ = NULL;  // will be set later
-    dec->layer_colorspace_ = ext_buf[3];
-  }
-#endif
-
   // sanitized state
   dec->ready_ = 1;
   return 1;
@@ -458,11 +370,6 @@ int VP8GetHeaders(VP8Decoder* const dec, VP8Io* const io) {
 //------------------------------------------------------------------------------
 // Residual decoding (Paragraph 13.2 / 13.3)
 
-static const uint8_t kBands[16 + 1] = {
-  0, 1, 2, 3, 6, 4, 5, 6, 6, 6, 6, 6, 6, 6, 6, 7,
-  0  // extra entry as sentinel
-};
-
 static const uint8_t kCat3[] = { 173, 148, 140, 0 };
 static const uint8_t kCat4[] = { 176, 155, 140, 135, 0 };
 static const uint8_t kCat5[] = { 180, 157, 141, 134, 130, 0 };
@@ -473,253 +380,226 @@ static const uint8_t kZigzag[16] = {
   0, 1, 4, 8,  5, 2, 3, 6,  9, 12, 13, 10,  7, 11, 14, 15
 };
 
-typedef const uint8_t (*ProbaArray)[NUM_CTX][NUM_PROBAS];  // for const-casting
+// See section 13-2: http://tools.ietf.org/html/rfc6386#section-13.2
+static int GetLargeValue(VP8BitReader* const br, const uint8_t* const p) {
+  int v;
+  if (!VP8GetBit(br, p[3])) {
+    if (!VP8GetBit(br, p[4])) {
+      v = 2;
+    } else {
+      v = 3 + VP8GetBit(br, p[5]);
+    }
+  } else {
+    if (!VP8GetBit(br, p[6])) {
+      if (!VP8GetBit(br, p[7])) {
+        v = 5 + VP8GetBit(br, 159);
+      } else {
+        v = 7 + 2 * VP8GetBit(br, 165);
+        v += VP8GetBit(br, 145);
+      }
+    } else {
+      const uint8_t* tab;
+      const int bit1 = VP8GetBit(br, p[8]);
+      const int bit0 = VP8GetBit(br, p[9 + bit1]);
+      const int cat = 2 * bit1 + bit0;
+      v = 0;
+      for (tab = kCat3456[cat]; *tab; ++tab) {
+        v += v + VP8GetBit(br, *tab);
+      }
+      v += 3 + (8 << cat);
+    }
+  }
+  return v;
+}
 
 // Returns the position of the last non-zero coeff plus one
-// (and 0 if there's no coeff at all)
-static int GetCoeffs(VP8BitReader* const br, ProbaArray prob,
+static int GetCoeffs(VP8BitReader* const br, const VP8BandProbas* const prob[],
                      int ctx, const quant_t dq, int n, int16_t* out) {
-  // n is either 0 or 1 here. kBands[n] is not necessary for extracting '*p'.
-  const uint8_t* p = prob[n][ctx];
-  if (!VP8GetBit(br, p[0])) {   // first EOB is more a 'CBP' bit.
-    return 0;
-  }
-  while (1) {
-    ++n;
-    if (!VP8GetBit(br, p[1])) {
-      p = prob[kBands[n]][0];
-    } else {  // non zero coeff
-      int v, j;
+  const uint8_t* p = prob[n]->probas_[ctx];
+  for (; n < 16; ++n) {
+    if (!VP8GetBit(br, p[0])) {
+      return n;  // previous coeff was last non-zero coeff
+    }
+    while (!VP8GetBit(br, p[1])) {       // sequence of zero coeffs
+      p = prob[++n]->probas_[0];
+      if (n == 16) return 16;
+    }
+    {        // non zero coeff
+      const VP8ProbaArray* const p_ctx = &prob[n + 1]->probas_[0];
+      int v;
       if (!VP8GetBit(br, p[2])) {
-        p = prob[kBands[n]][1];
         v = 1;
+        p = p_ctx[1];
       } else {
-        if (!VP8GetBit(br, p[3])) {
-          if (!VP8GetBit(br, p[4])) {
-            v = 2;
-          } else {
-            v = 3 + VP8GetBit(br, p[5]);
-          }
-        } else {
-          if (!VP8GetBit(br, p[6])) {
-            if (!VP8GetBit(br, p[7])) {
-              v = 5 + VP8GetBit(br, 159);
-            } else {
-              v = 7 + 2 * VP8GetBit(br, 165);
-              v += VP8GetBit(br, 145);
-            }
-          } else {
-            const uint8_t* tab;
-            const int bit1 = VP8GetBit(br, p[8]);
-            const int bit0 = VP8GetBit(br, p[9 + bit1]);
-            const int cat = 2 * bit1 + bit0;
-            v = 0;
-            for (tab = kCat3456[cat]; *tab; ++tab) {
-              v += v + VP8GetBit(br, *tab);
-            }
-            v += 3 + (8 << cat);
-          }
-        }
-        p = prob[kBands[n]][2];
+        v = GetLargeValue(br, p);
+        p = p_ctx[2];
       }
-      j = kZigzag[n - 1];
-      out[j] = VP8GetSigned(br, v) * dq[j > 0];
-      if (n == 16 || !VP8GetBit(br, p[0])) {   // EOB
-        return n;
-      }
-    }
-    if (n == 16) {
-      return 16;
+      out[kZigzag[n]] = VP8GetSigned(br, v) * dq[n > 0];
     }
   }
+  return 16;
 }
 
-// Alias-safe way of converting 4bytes to 32bits.
-typedef union {
-  uint8_t  i8[4];
-  uint32_t i32;
-} PackedNz;
-
-// Table to unpack four bits into four bytes
-static const PackedNz kUnpackTab[16] = {
-  {{0, 0, 0, 0}},  {{1, 0, 0, 0}},  {{0, 1, 0, 0}},  {{1, 1, 0, 0}},
-  {{0, 0, 1, 0}},  {{1, 0, 1, 0}},  {{0, 1, 1, 0}},  {{1, 1, 1, 0}},
-  {{0, 0, 0, 1}},  {{1, 0, 0, 1}},  {{0, 1, 0, 1}},  {{1, 1, 0, 1}},
-  {{0, 0, 1, 1}},  {{1, 0, 1, 1}},  {{0, 1, 1, 1}},  {{1, 1, 1, 1}} };
-
-// Macro to pack four LSB of four bytes into four bits.
-#if defined(__PPC__) || defined(_M_PPC) || defined(_ARCH_PPC) || \
-    defined(__BIG_ENDIAN__)
-#define PACK_CST 0x08040201U
-#else
-#define PACK_CST 0x01020408U
-#endif
-#define PACK(X, S) ((((X).i32 * PACK_CST) & 0xff000000) >> (S))
-
-static void ParseResiduals(VP8Decoder* const dec,
-                           VP8MB* const mb, VP8BitReader* const token_br) {
-  int out_t_nz, out_l_nz, first;
-  ProbaArray ac_prob;
-  const VP8QuantMatrix* q = &dec->dqm_[dec->segment_];
-  int16_t* dst = dec->coeffs_;
+static WEBP_INLINE uint32_t NzCodeBits(uint32_t nz_coeffs, int nz, int dc_nz) {
+  nz_coeffs <<= 2;
+  nz_coeffs |= (nz > 3) ? 3 : (nz > 1) ? 2 : dc_nz;
+  return nz_coeffs;
+}
+
+static int ParseResiduals(VP8Decoder* const dec,
+                          VP8MB* const mb, VP8BitReader* const token_br) {
+  const VP8BandProbas* (* const bands)[16 + 1] = dec->proba_.bands_ptr_;
+  const VP8BandProbas* const * ac_proba;
+  VP8MBData* const block = dec->mb_data_ + dec->mb_x_;
+  const VP8QuantMatrix* const q = &dec->dqm_[block->segment_];
+  int16_t* dst = block->coeffs_;
   VP8MB* const left_mb = dec->mb_info_ - 1;
-  PackedNz nz_ac, nz_dc;
-  PackedNz tnz, lnz;
-  uint32_t non_zero_ac = 0;
-  uint32_t non_zero_dc = 0;
+  uint8_t tnz, lnz;
+  uint32_t non_zero_y = 0;
+  uint32_t non_zero_uv = 0;
   int x, y, ch;
+  uint32_t out_t_nz, out_l_nz;
+  int first;
 
-  nz_dc.i32 = nz_ac.i32 = 0;
   memset(dst, 0, 384 * sizeof(*dst));
-  if (!dec->is_i4x4_) {    // parse DC
+  if (!block->is_i4x4_) {    // parse DC
     int16_t dc[16] = { 0 };
-    const int ctx = mb->dc_nz_ + left_mb->dc_nz_;
-    mb->dc_nz_ = left_mb->dc_nz_ =
-        (GetCoeffs(token_br, (ProbaArray)dec->proba_.coeffs_[1],
-                   ctx, q->y2_mat_, 0, dc) > 0);
+    const int ctx = mb->nz_dc_ + left_mb->nz_dc_;
+    const int nz = GetCoeffs(token_br, bands[1], ctx, q->y2_mat_, 0, dc);
+    mb->nz_dc_ = left_mb->nz_dc_ = (nz > 0);
+    if (nz > 1) {   // more than just the DC -> perform the full transform
+      VP8TransformWHT(dc, dst);
+    } else {        // only DC is non-zero -> inlined simplified transform
+      int i;
+      const int dc0 = (dc[0] + 3) >> 3;
+      for (i = 0; i < 16 * 16; i += 16) dst[i] = dc0;
+    }
     first = 1;
-    ac_prob = (ProbaArray)dec->proba_.coeffs_[0];
-    VP8TransformWHT(dc, dst);
+    ac_proba = bands[0];
   } else {
     first = 0;
-    ac_prob = (ProbaArray)dec->proba_.coeffs_[3];
+    ac_proba = bands[3];
   }
 
-  tnz = kUnpackTab[mb->nz_ & 0xf];
-  lnz = kUnpackTab[left_mb->nz_ & 0xf];
+  tnz = mb->nz_ & 0x0f;
+  lnz = left_mb->nz_ & 0x0f;
   for (y = 0; y < 4; ++y) {
-    int l = lnz.i8[y];
+    int l = lnz & 1;
+    uint32_t nz_coeffs = 0;
     for (x = 0; x < 4; ++x) {
-      const int ctx = l + tnz.i8[x];
-      const int nz = GetCoeffs(token_br, ac_prob, ctx,
-                               q->y1_mat_, first, dst);
-      tnz.i8[x] = l = (nz > 0);
-      nz_dc.i8[x] = (dst[0] != 0);
-      nz_ac.i8[x] = (nz > 1);
+      const int ctx = l + (tnz & 1);
+      const int nz = GetCoeffs(token_br, ac_proba, ctx, q->y1_mat_, first, dst);
+      l = (nz > first);
+      tnz = (tnz >> 1) | (l << 7);
+      nz_coeffs = NzCodeBits(nz_coeffs, nz, dst[0] != 0);
       dst += 16;
     }
-    lnz.i8[y] = l;
-    non_zero_dc |= PACK(nz_dc, 24 - y * 4);
-    non_zero_ac |= PACK(nz_ac, 24 - y * 4);
+    tnz >>= 4;
+    lnz = (lnz >> 1) | (l << 7);
+    non_zero_y = (non_zero_y << 8) | nz_coeffs;
   }
-  out_t_nz = PACK(tnz, 24);
-  out_l_nz = PACK(lnz, 24);
+  out_t_nz = tnz;
+  out_l_nz = lnz >> 4;
 
-  tnz = kUnpackTab[mb->nz_ >> 4];
-  lnz = kUnpackTab[left_mb->nz_ >> 4];
   for (ch = 0; ch < 4; ch += 2) {
+    uint32_t nz_coeffs = 0;
+    tnz = mb->nz_ >> (4 + ch);
+    lnz = left_mb->nz_ >> (4 + ch);
     for (y = 0; y < 2; ++y) {
-      int l = lnz.i8[ch + y];
+      int l = lnz & 1;
       for (x = 0; x < 2; ++x) {
-        const int ctx = l + tnz.i8[ch + x];
-        const int nz =
-            GetCoeffs(token_br, (ProbaArray)dec->proba_.coeffs_[2],
-                      ctx, q->uv_mat_, 0, dst);
-        tnz.i8[ch + x] = l = (nz > 0);
-        nz_dc.i8[y * 2 + x] = (dst[0] != 0);
-        nz_ac.i8[y * 2 + x] = (nz > 1);
+        const int ctx = l + (tnz & 1);
+        const int nz = GetCoeffs(token_br, bands[2], ctx, q->uv_mat_, 0, dst);
+        l = (nz > 0);
+        tnz = (tnz >> 1) | (l << 3);
+        nz_coeffs = NzCodeBits(nz_coeffs, nz, dst[0] != 0);
         dst += 16;
       }
-      lnz.i8[ch + y] = l;
+      tnz >>= 2;
+      lnz = (lnz >> 1) | (l << 5);
     }
-    non_zero_dc |= PACK(nz_dc, 8 - ch * 2);
-    non_zero_ac |= PACK(nz_ac, 8 - ch * 2);
+    // Note: we don't really need the per-4x4 details for U/V blocks.
+    non_zero_uv |= nz_coeffs << (4 * ch);
+    out_t_nz |= (tnz << 4) << ch;
+    out_l_nz |= (lnz & 0xf0) << ch;
   }
-  out_t_nz |= PACK(tnz, 20);
-  out_l_nz |= PACK(lnz, 20);
   mb->nz_ = out_t_nz;
   left_mb->nz_ = out_l_nz;
 
-  dec->non_zero_ac_ = non_zero_ac;
-  dec->non_zero_ = non_zero_ac | non_zero_dc;
-  mb->skip_ = !dec->non_zero_;
+  block->non_zero_y_ = non_zero_y;
+  block->non_zero_uv_ = non_zero_uv;
+
+  // We look at the mode-code of each block and check if some blocks have less
+  // than three non-zero coeffs (code < 2). This is to avoid dithering flat and
+  // empty blocks.
+  block->dither_ = (non_zero_uv & 0xaaaa) ? 0 : q->dither_;
+
+  return !(non_zero_y | non_zero_uv);  // will be used for further optimization
 }
-#undef PACK
 
 //------------------------------------------------------------------------------
 // Main loop
 
 int VP8DecodeMB(VP8Decoder* const dec, VP8BitReader* const token_br) {
-  VP8BitReader* const br = &dec->br_;
   VP8MB* const left = dec->mb_info_ - 1;
-  VP8MB* const info = dec->mb_info_ + dec->mb_x_;
-
-  // Note: we don't save segment map (yet), as we don't expect
-  // to decode more than 1 keyframe.
-  if (dec->segment_hdr_.update_map_) {
-    // Hardcoded tree parsing
-    dec->segment_ = !VP8GetBit(br, dec->proba_.segments_[0]) ?
-        VP8GetBit(br, dec->proba_.segments_[1]) :
-        2 + VP8GetBit(br, dec->proba_.segments_[2]);
-  }
-  info->skip_ = dec->use_skip_proba_ ? VP8GetBit(br, dec->skip_p_) : 0;
+  VP8MB* const mb = dec->mb_info_ + dec->mb_x_;
+  VP8MBData* const block = dec->mb_data_ + dec->mb_x_;
+  int skip = dec->use_skip_proba_ ? block->skip_ : 0;
 
-  VP8ParseIntraMode(br, dec);
-  if (br->eof_) {
-    return 0;
-  }
-
-  if (!info->skip_) {
-    ParseResiduals(dec, info, token_br);
+  if (!skip) {
+    skip = ParseResiduals(dec, mb, token_br);
   } else {
-    left->nz_ = info->nz_ = 0;
-    if (!dec->is_i4x4_) {
-      left->dc_nz_ = info->dc_nz_ = 0;
+    left->nz_ = mb->nz_ = 0;
+    if (!block->is_i4x4_) {
+      left->nz_dc_ = mb->nz_dc_ = 0;
     }
-    dec->non_zero_ = 0;
-    dec->non_zero_ac_ = 0;
+    block->non_zero_y_ = 0;
+    block->non_zero_uv_ = 0;
+    block->dither_ = 0;
+  }
+
+  if (dec->filter_type_ > 0) {  // store filter info
+    VP8FInfo* const finfo = dec->f_info_ + dec->mb_x_;
+    *finfo = dec->fstrengths_[block->segment_][block->is_i4x4_];
+    finfo->f_inner_ |= !skip;
   }
 
-  return (!token_br->eof_);
+  return !token_br->eof_;
 }
 
 void VP8InitScanline(VP8Decoder* const dec) {
   VP8MB* const left = dec->mb_info_ - 1;
   left->nz_ = 0;
-  left->dc_nz_ = 0;
+  left->nz_dc_ = 0;
   memset(dec->intra_l_, B_DC_PRED, sizeof(dec->intra_l_));
-  dec->filter_row_ =
-    (dec->filter_type_ > 0) &&
-    (dec->mb_y_ >= dec->tl_mb_y_) && (dec->mb_y_ <= dec->br_mb_y_);
+  dec->mb_x_ = 0;
 }
 
 static int ParseFrame(VP8Decoder* const dec, VP8Io* io) {
   for (dec->mb_y_ = 0; dec->mb_y_ < dec->br_mb_y_; ++dec->mb_y_) {
+    // Parse bitstream for this row.
     VP8BitReader* const token_br =
         &dec->parts_[dec->mb_y_ & (dec->num_parts_ - 1)];
-    VP8InitScanline(dec);
-    for (dec->mb_x_ = 0; dec->mb_x_ < dec->mb_w_;  dec->mb_x_++) {
+    if (!VP8ParseIntraModeRow(&dec->br_, dec)) {
+      return VP8SetError(dec, VP8_STATUS_NOT_ENOUGH_DATA,
+                         "Premature end-of-partition0 encountered.");
+    }
+    for (; dec->mb_x_ < dec->mb_w_; ++dec->mb_x_) {
       if (!VP8DecodeMB(dec, token_br)) {
         return VP8SetError(dec, VP8_STATUS_NOT_ENOUGH_DATA,
                            "Premature end-of-file encountered.");
       }
-      VP8ReconstructBlock(dec);
-
-      // Store data and save block's filtering params
-      VP8StoreBlock(dec);
     }
+    VP8InitScanline(dec);   // Prepare for next scanline
+
+    // Reconstruct, filter and emit the row.
     if (!VP8ProcessRow(dec, io)) {
       return VP8SetError(dec, VP8_STATUS_USER_ABORT, "Output aborted.");
     }
   }
-  if (dec->use_threads_ && !WebPWorkerSync(&dec->worker_)) {
-    return 0;
-  }
-
-  // Finish
-#ifndef ONLY_KEYFRAME_CODE
-  if (!dec->update_proba_) {
-    dec->proba_ = dec->proba_saved_;
+  if (dec->mt_method_ > 0) {
+    if (!WebPGetWorkerInterface()->Sync(&dec->worker_)) return 0;
   }
-#endif
-
-#ifdef WEBP_EXPERIMENTAL_FEATURES
-  if (dec->layer_data_size_ > 0) {
-    if (!VP8DecodeLayer(dec)) {
-      return 0;
-    }
-  }
-#endif
 
   return 1;
 }
@@ -768,12 +648,10 @@ void VP8Clear(VP8Decoder* const dec) {
   if (dec == NULL) {
     return;
   }
-  if (dec->use_threads_) {
-    WebPWorkerEnd(&dec->worker_);
-  }
-  if (dec->mem_) {
-    free(dec->mem_);
-  }
+  WebPGetWorkerInterface()->End(&dec->worker_);
+  ALPHDelete(dec->alph_dec_);
+  dec->alph_dec_ = NULL;
+  WebPSafeFree(dec->mem_);
   dec->mem_ = NULL;
   dec->mem_size_ = 0;
   memset(&dec->br_, 0, sizeof(dec->br_));
@@ -782,6 +660,3 @@ void VP8Clear(VP8Decoder* const dec) {
 
 //------------------------------------------------------------------------------
 
-#if defined(__cplusplus) || defined(c_plusplus)
-}    // extern "C"
-#endif
diff --git a/drivers/webp/dec/vp8i.h b/drivers/webp/dec/vp8i.h
index 4382edfd8e..b5f2b23009 100644
--- a/drivers/webp/dec/vp8i.h
+++ b/drivers/webp/dec/vp8i.h
@@ -1,8 +1,10 @@
 // Copyright 2010 Google Inc. All Rights Reserved.
 //
-// This code is licensed under the same terms as WebM:
-//  Software License Agreement:  http://www.webmproject.org/license/software/
-//  Additional IP Rights Grant:  http://www.webmproject.org/license/additional/
+// Use of this source code is governed by a BSD-style license
+// that can be found in the COPYING file in the root of the source
+// tree. An additional intellectual property rights grant can be found
+// in the file PATENTS. All contributing project authors may
+// be found in the AUTHORS file in the root of the source tree.
 // -----------------------------------------------------------------------------
 //
 // VP8 decoder: internal header.
@@ -13,12 +15,14 @@
 #define WEBP_DEC_VP8I_H_
 
 #include <string.h>     // for memcpy()
+#include "./common.h"
 #include "./vp8li.h"
 #include "../utils/bit_reader.h"
+#include "../utils/random.h"
 #include "../utils/thread.h"
 #include "../dsp/dsp.h"
 
-#if defined(__cplusplus) || defined(c_plusplus)
+#ifdef __cplusplus
 extern "C" {
 #endif
 
@@ -27,48 +31,10 @@ extern "C" {
 
 // version numbers
 #define DEC_MAJ_VERSION 0
-#define DEC_MIN_VERSION 2
-#define DEC_REV_VERSION 0
-
-#define ONLY_KEYFRAME_CODE      // to remove any code related to P-Frames
-
-// intra prediction modes
-enum { B_DC_PRED = 0,   // 4x4 modes
-       B_TM_PRED,
-       B_VE_PRED,
-       B_HE_PRED,
-       B_RD_PRED,
-       B_VR_PRED,
-       B_LD_PRED,
-       B_VL_PRED,
-       B_HD_PRED,
-       B_HU_PRED,
-       NUM_BMODES = B_HU_PRED + 1 - B_DC_PRED,  // = 10
-
-       // Luma16 or UV modes
-       DC_PRED = B_DC_PRED, V_PRED = B_VE_PRED,
-       H_PRED = B_HE_PRED, TM_PRED = B_TM_PRED,
-       B_PRED = NUM_BMODES,   // refined I4x4 mode
-
-       // special modes
-       B_DC_PRED_NOTOP = 4,
-       B_DC_PRED_NOLEFT = 5,
-       B_DC_PRED_NOTOPLEFT = 6,
-       NUM_B_DC_MODES = 7 };
-
-enum { MB_FEATURE_TREE_PROBS = 3,
-       NUM_MB_SEGMENTS = 4,
-       NUM_REF_LF_DELTAS = 4,
-       NUM_MODE_LF_DELTAS = 4,    // I4x4, ZERO, *, SPLIT
-       MAX_NUM_PARTITIONS = 8,
-       // Probabilities
-       NUM_TYPES = 4,
-       NUM_BANDS = 8,
-       NUM_CTX = 3,
-       NUM_PROBAS = 11,
-       NUM_MV_PROBAS = 19 };
-
-// YUV-cache parameters.
+#define DEC_MIN_VERSION 4
+#define DEC_REV_VERSION 4
+
+// YUV-cache parameters. Cache is 32-bytes wide (= one cacheline).
 // Constraints are: We need to store one 16x16 block of luma samples (y),
 // and two 8x8 chroma blocks (u/v). These are better be 16-bytes aligned,
 // in order to be SIMD-friendly. We also need to store the top, left and
@@ -90,14 +56,15 @@ enum { MB_FEATURE_TREE_PROBS = 3,
 //  'y' = y-samples   'u' = u-samples     'v' = u-samples
 //  '|' = left sample,   '-' = top sample,    '+' = top-left sample
 //  't' = extra top-right sample for 4x4 modes
-// With this layout, BPS (=Bytes Per Scan-line) is one cacheline size.
-#define BPS       32    // this is the common stride used by yuv[]
 #define YUV_SIZE (BPS * 17 + BPS * 9)
 #define Y_SIZE   (BPS * 17)
 #define Y_OFF    (BPS * 1 + 8)
 #define U_OFF    (Y_OFF + BPS * 16 + BPS)
 #define V_OFF    (U_OFF + 16)
 
+// minimal width under which lossy multi-threading is always disabled
+#define MIN_WIDTH_FOR_THREADS 512
+
 //------------------------------------------------------------------------------
 // Headers
 
@@ -126,15 +93,19 @@ typedef struct {
   int8_t filter_strength_[NUM_MB_SEGMENTS];  // filter strength for segments
 } VP8SegmentHeader;
 
+// probas associated to one of the contexts
+typedef uint8_t VP8ProbaArray[NUM_PROBAS];
+
+typedef struct {   // all the probas associated to one band
+  VP8ProbaArray probas_[NUM_CTX];
+} VP8BandProbas;
+
 // Struct collecting all frame-persistent probabilities.
 typedef struct {
   uint8_t segments_[MB_FEATURE_TREE_PROBS];
   // Type: 0:Intra16-AC  1:Intra16-DC   2:Chroma   3:Intra4
-  uint8_t coeffs_[NUM_TYPES][NUM_BANDS][NUM_CTX][NUM_PROBAS];
-#ifndef ONLY_KEYFRAME_CODE
-  uint8_t ymode_[4], uvmode_[3];
-  uint8_t mv_[2][NUM_MV_PROBAS];
-#endif
+  VP8BandProbas bands_[NUM_TYPES][NUM_BANDS];
+  const VP8BandProbas* bands_ptr_[NUM_TYPES][16 + 1];
 } VP8Proba;
 
 // Filter parameters
@@ -151,32 +122,61 @@ typedef struct {
 // Informations about the macroblocks.
 
 typedef struct {  // filter specs
-  unsigned int f_level_:6;      // filter strength: 0..63
-  unsigned int f_ilevel_:6;     // inner limit: 1..63
-  unsigned int f_inner_:1;      // do inner filtering?
+  uint8_t f_limit_;      // filter limit in [3..189], or 0 if no filtering
+  uint8_t f_ilevel_;     // inner limit in [1..63]
+  uint8_t f_inner_;      // do inner filtering?
+  uint8_t hev_thresh_;   // high edge variance threshold in [0..2]
 } VP8FInfo;
 
-typedef struct {  // used for syntax-parsing
-  unsigned int nz_;          // non-zero AC/DC coeffs
-  unsigned int dc_nz_:1;     // non-zero DC coeffs
-  unsigned int skip_:1;      // block type
+typedef struct {  // Top/Left Contexts used for syntax-parsing
+  uint8_t nz_;        // non-zero AC/DC coeffs (4bit for luma + 4bit for chroma)
+  uint8_t nz_dc_;     // non-zero DC coeff (1bit)
 } VP8MB;
 
 // Dequantization matrices
 typedef int quant_t[2];      // [DC / AC].  Can be 'uint16_t[2]' too (~slower).
 typedef struct {
   quant_t y1_mat_, y2_mat_, uv_mat_;
+
+  int uv_quant_;   // U/V quantizer value
+  int dither_;     // dithering amplitude (0 = off, max=255)
 } VP8QuantMatrix;
 
+// Data needed to reconstruct a macroblock
+typedef struct {
+  int16_t coeffs_[384];   // 384 coeffs = (16+4+4) * 4*4
+  uint8_t is_i4x4_;       // true if intra4x4
+  uint8_t imodes_[16];    // one 16x16 mode (#0) or sixteen 4x4 modes
+  uint8_t uvmode_;        // chroma prediction mode
+  // bit-wise info about the content of each sub-4x4 blocks (in decoding order).
+  // Each of the 4x4 blocks for y/u/v is associated with a 2b code according to:
+  //   code=0 -> no coefficient
+  //   code=1 -> only DC
+  //   code=2 -> first three coefficients are non-zero
+  //   code=3 -> more than three coefficients are non-zero
+  // This allows to call specialized transform functions.
+  uint32_t non_zero_y_;
+  uint32_t non_zero_uv_;
+  uint8_t dither_;      // local dithering strength (deduced from non_zero_*)
+  uint8_t skip_;
+  uint8_t segment_;
+} VP8MBData;
+
 // Persistent information needed by the parallel processing
 typedef struct {
-  int id_;            // cache row to process (in [0..2])
-  int mb_y_;          // macroblock position of the row
-  int filter_row_;    // true if row-filtering is needed
-  VP8FInfo* f_info_;  // filter strengths
-  VP8Io io_;          // copy of the VP8Io to pass to put()
+  int id_;              // cache row to process (in [0..2])
+  int mb_y_;            // macroblock position of the row
+  int filter_row_;      // true if row-filtering is needed
+  VP8FInfo* f_info_;    // filter strengths (swapped with dec->f_info_)
+  VP8MBData* mb_data_;  // reconstruction data (swapped with dec->mb_data_)
+  VP8Io io_;            // copy of the VP8Io to pass to put()
 } VP8ThreadContext;
 
+// Saved top samples, per macroblock. Fits into a cache-line.
+typedef struct {
+  uint8_t y[16], u[8], v[8];
+} VP8TopSamples;
+
 //------------------------------------------------------------------------------
 // VP8Decoder: the main opaque structure handed over to user
 
@@ -196,7 +196,8 @@ struct VP8Decoder {
 
   // Worker
   WebPWorker worker_;
-  int use_threads_;    // use multi-thread
+  int mt_method_;      // multi-thread method: 0=off, 1=[parse+recon][filter]
+                       // 2=[parse][recon+filter]
   int cache_id_;       // current cache row
   int num_caches_;     // number of cached rows of 16 pixels (1, 2 or 3)
   VP8ThreadContext thread_ctx_;  // Thread context
@@ -213,12 +214,9 @@ struct VP8Decoder {
   // per-partition boolean decoders.
   VP8BitReader parts_[MAX_NUM_PARTITIONS];
 
-  // buffer refresh flags
-  //   bit 0: refresh Gold, bit 1: refresh Alt
-  //   bit 2-3: copy to Gold, bit 4-5: copy to Alt
-  //   bit 6: Gold sign bias, bit 7: Alt sign bias
-  //   bit 8: refresh last frame
-  uint32_t buffer_flags_;
+  // Dithering strength, deduced from decoding options
+  int dither_;                // whether to use dithering or not
+  VP8Random dithering_rg_;    // random generator for dithering
 
   // dequantization (one set of DC/AC dequant factor per segment)
   VP8QuantMatrix dqm_[NUM_MB_SEGMENTS];
@@ -227,24 +225,18 @@ struct VP8Decoder {
   VP8Proba proba_;
   int use_skip_proba_;
   uint8_t skip_p_;
-#ifndef ONLY_KEYFRAME_CODE
-  uint8_t intra_p_, last_p_, golden_p_;
-  VP8Proba proba_saved_;
-  int update_proba_;
-#endif
 
   // Boundary data cache and persistent buffers.
-  uint8_t* intra_t_;     // top intra modes values: 4 * mb_w_
-  uint8_t  intra_l_[4];  // left intra modes values
-  uint8_t* y_t_;         // top luma samples: 16 * mb_w_
-  uint8_t* u_t_, *v_t_;  // top u/v samples: 8 * mb_w_ each
+  uint8_t* intra_t_;      // top intra modes values: 4 * mb_w_
+  uint8_t  intra_l_[4];   // left intra modes values
 
-  VP8MB* mb_info_;       // contextual macroblock info (mb_w_ + 1)
-  VP8FInfo* f_info_;     // filter strength info
-  uint8_t* yuv_b_;       // main block for Y/U/V (size = YUV_SIZE)
-  int16_t* coeffs_;      // 384 coeffs = (16+8+8) * 4*4
+  VP8TopSamples* yuv_t_;  // top y/u/v samples
 
-  uint8_t* cache_y_;     // macroblock row for storing unfiltered samples
+  VP8MB* mb_info_;        // contextual macroblock info (mb_w_ + 1)
+  VP8FInfo* f_info_;      // filter strength info
+  uint8_t* yuv_b_;        // main block for Y/U/V (size = YUV_SIZE)
+
+  uint8_t* cache_y_;      // macroblock row for storing unfiltered samples
   uint8_t* cache_u_;
   uint8_t* cache_v_;
   int cache_y_stride_;
@@ -256,31 +248,19 @@ struct VP8Decoder {
 
   // Per macroblock non-persistent infos.
   int mb_x_, mb_y_;       // current position, in macroblock units
-  uint8_t is_i4x4_;       // true if intra4x4
-  uint8_t imodes_[16];    // one 16x16 mode (#0) or sixteen 4x4 modes
-  uint8_t uvmode_;        // chroma prediction mode
-  uint8_t segment_;       // block's segment
-
-  // bit-wise info about the content of each sub-4x4 blocks: there are 16 bits
-  // for luma (bits #0->#15), then 4 bits for chroma-u (#16->#19) and 4 bits for
-  // chroma-v (#20->#23), each corresponding to one 4x4 block in decoding order.
-  // If the bit is set, the 4x4 block contains some non-zero coefficients.
-  uint32_t non_zero_;
-  uint32_t non_zero_ac_;
+  VP8MBData* mb_data_;    // parsed reconstruction data
 
   // Filtering side-info
-  int filter_type_;                         // 0=off, 1=simple, 2=complex
-  int filter_row_;                          // per-row flag
-  uint8_t filter_levels_[NUM_MB_SEGMENTS];  // precalculated per-segment
+  int filter_type_;                          // 0=off, 1=simple, 2=complex
+  VP8FInfo fstrengths_[NUM_MB_SEGMENTS][2];  // precalculated per-segment/type
 
-  // extensions
-  const uint8_t* alpha_data_;   // compressed alpha data (if present)
+  // Alpha
+  struct ALPHDecoder* alph_dec_;  // alpha-plane decoder object
+  const uint8_t* alpha_data_;     // compressed alpha data (if present)
   size_t alpha_data_size_;
-  uint8_t* alpha_plane_;        // output. Persistent, contains the whole data.
-
-  int layer_colorspace_;
-  const uint8_t* layer_data_;   // compressed layer data (if present)
-  size_t layer_data_size_;
+  int is_alpha_decoded_;  // true if alpha_data_ is decoded in alpha_plane_
+  uint8_t* alpha_plane_;  // output. Persistent, contains the whole data.
+  int alpha_dithering_;   // derived from decoding options (0=off, 100=full).
 };
 
 //------------------------------------------------------------------------------
@@ -293,15 +273,14 @@ int VP8SetError(VP8Decoder* const dec,
 // in tree.c
 void VP8ResetProba(VP8Proba* const proba);
 void VP8ParseProba(VP8BitReader* const br, VP8Decoder* const dec);
-void VP8ParseIntraMode(VP8BitReader* const br,  VP8Decoder* const dec);
+// parses one row of intra mode data in partition 0, returns !eof
+int VP8ParseIntraModeRow(VP8BitReader* const br, VP8Decoder* const dec);
 
 // in quant.c
 void VP8ParseQuant(VP8Decoder* const dec);
 
 // in frame.c
-int VP8InitFrame(VP8Decoder* const dec, VP8Io* io);
-// Predict a block and add residual
-void VP8ReconstructBlock(VP8Decoder* const dec);
+int VP8InitFrame(VP8Decoder* const dec, VP8Io* const io);
 // Call io->setup() and finish setting up scan parameters.
 // After this call returns, one must always call VP8ExitCritical() with the
 // same parameters. Both functions should be used in pair. Returns VP8_STATUS_OK
@@ -310,10 +289,16 @@ VP8StatusCode VP8EnterCritical(VP8Decoder* const dec, VP8Io* const io);
 // Must always be called in pair with VP8EnterCritical().
 // Returns false in case of error.
 int VP8ExitCritical(VP8Decoder* const dec, VP8Io* const io);
-// Process the last decoded row (filtering + output)
+// Return the multi-threading method to use (0=off), depending
+// on options and bitstream size. Only for lossy decoding.
+int VP8GetThreadMethod(const WebPDecoderOptions* const options,
+                       const WebPHeaderStructure* const headers,
+                       int width, int height);
+// Initialize dithering post-process if needed.
+void VP8InitDithering(const WebPDecoderOptions* const options,
+                      VP8Decoder* const dec);
+// Process the last decoded row (filtering + output).
 int VP8ProcessRow(VP8Decoder* const dec, VP8Io* const io);
-// Store a block, along with filtering params
-void VP8StoreBlock(VP8Decoder* const dec);
 // To be called at the start of a new scanline, to initialize predictors.
 void VP8InitScanline(VP8Decoder* const dec);
 // Decode one macroblock. Returns false if there is not enough data.
@@ -323,12 +308,9 @@ int VP8DecodeMB(VP8Decoder* const dec, VP8BitReader* const token_br);
 const uint8_t* VP8DecompressAlphaRows(VP8Decoder* const dec,
                                       int row, int num_rows);
 
-// in layer.c
-int VP8DecodeLayer(VP8Decoder* const dec);
-
 //------------------------------------------------------------------------------
 
-#if defined(__cplusplus) || defined(c_plusplus)
+#ifdef __cplusplus
 }    // extern "C"
 #endif
 
diff --git a/drivers/webp/dec/vp8l.c b/drivers/webp/dec/vp8l.c
index 897e4395c7..19665a007d 100644
--- a/drivers/webp/dec/vp8l.c
+++ b/drivers/webp/dec/vp8l.c
@@ -1,8 +1,10 @@
 // Copyright 2012 Google Inc. All Rights Reserved.
 //
-// This code is licensed under the same terms as WebM:
-//  Software License Agreement:  http://www.webmproject.org/license/software/
-//  Additional IP Rights Grant:  http://www.webmproject.org/license/additional/
+// Use of this source code is governed by a BSD-style license
+// that can be found in the COPYING file in the root of the source
+// tree. An additional intellectual property rights grant can be found
+// in the file PATENTS. All contributing project authors may
+// be found in the AUTHORS file in the root of the source tree.
 // -----------------------------------------------------------------------------
 //
 // main entry for the decoder
@@ -10,18 +12,17 @@
 // Authors: Vikas Arora (vikaas.arora@gmail.com)
 //          Jyrki Alakuijala (jyrki@google.com)
 
-#include <stdio.h>
 #include <stdlib.h>
+
+#include "./alphai.h"
 #include "./vp8li.h"
+#include "../dsp/dsp.h"
 #include "../dsp/lossless.h"
 #include "../dsp/yuv.h"
+#include "../utils/endian_inl.h"
 #include "../utils/huffman.h"
 #include "../utils/utils.h"
 
-#if defined(__cplusplus) || defined(c_plusplus)
-extern "C" {
-#endif
-
 #define NUM_ARGB_CACHE_ROWS          16
 
 static const int kCodeLengthLiterals = 16;
@@ -50,6 +51,9 @@ static const uint16_t kAlphabetSize[HUFFMAN_CODES_PER_META_CODE] = {
   NUM_DISTANCE_CODES
 };
 
+static const uint8_t kLiteralMap[HUFFMAN_CODES_PER_META_CODE] = {
+  0, 1, 1, 1, 0
+};
 
 #define NUM_CODE_LENGTH_CODES       19
 static const uint8_t kCodeLengthCodeOrder[NUM_CODE_LENGTH_CODES] = {
@@ -57,19 +61,43 @@ static const uint8_t kCodeLengthCodeOrder[NUM_CODE_LENGTH_CODES] = {
 };
 
 #define CODE_TO_PLANE_CODES        120
-static const uint8_t code_to_plane_lut[CODE_TO_PLANE_CODES] = {
-   0x18, 0x07, 0x17, 0x19, 0x28, 0x06, 0x27, 0x29, 0x16, 0x1a,
-   0x26, 0x2a, 0x38, 0x05, 0x37, 0x39, 0x15, 0x1b, 0x36, 0x3a,
-   0x25, 0x2b, 0x48, 0x04, 0x47, 0x49, 0x14, 0x1c, 0x35, 0x3b,
-   0x46, 0x4a, 0x24, 0x2c, 0x58, 0x45, 0x4b, 0x34, 0x3c, 0x03,
-   0x57, 0x59, 0x13, 0x1d, 0x56, 0x5a, 0x23, 0x2d, 0x44, 0x4c,
-   0x55, 0x5b, 0x33, 0x3d, 0x68, 0x02, 0x67, 0x69, 0x12, 0x1e,
-   0x66, 0x6a, 0x22, 0x2e, 0x54, 0x5c, 0x43, 0x4d, 0x65, 0x6b,
-   0x32, 0x3e, 0x78, 0x01, 0x77, 0x79, 0x53, 0x5d, 0x11, 0x1f,
-   0x64, 0x6c, 0x42, 0x4e, 0x76, 0x7a, 0x21, 0x2f, 0x75, 0x7b,
-   0x31, 0x3f, 0x63, 0x6d, 0x52, 0x5e, 0x00, 0x74, 0x7c, 0x41,
-   0x4f, 0x10, 0x20, 0x62, 0x6e, 0x30, 0x73, 0x7d, 0x51, 0x5f,
-   0x40, 0x72, 0x7e, 0x61, 0x6f, 0x50, 0x71, 0x7f, 0x60, 0x70
+static const uint8_t kCodeToPlane[CODE_TO_PLANE_CODES] = {
+  0x18, 0x07, 0x17, 0x19, 0x28, 0x06, 0x27, 0x29, 0x16, 0x1a,
+  0x26, 0x2a, 0x38, 0x05, 0x37, 0x39, 0x15, 0x1b, 0x36, 0x3a,
+  0x25, 0x2b, 0x48, 0x04, 0x47, 0x49, 0x14, 0x1c, 0x35, 0x3b,
+  0x46, 0x4a, 0x24, 0x2c, 0x58, 0x45, 0x4b, 0x34, 0x3c, 0x03,
+  0x57, 0x59, 0x13, 0x1d, 0x56, 0x5a, 0x23, 0x2d, 0x44, 0x4c,
+  0x55, 0x5b, 0x33, 0x3d, 0x68, 0x02, 0x67, 0x69, 0x12, 0x1e,
+  0x66, 0x6a, 0x22, 0x2e, 0x54, 0x5c, 0x43, 0x4d, 0x65, 0x6b,
+  0x32, 0x3e, 0x78, 0x01, 0x77, 0x79, 0x53, 0x5d, 0x11, 0x1f,
+  0x64, 0x6c, 0x42, 0x4e, 0x76, 0x7a, 0x21, 0x2f, 0x75, 0x7b,
+  0x31, 0x3f, 0x63, 0x6d, 0x52, 0x5e, 0x00, 0x74, 0x7c, 0x41,
+  0x4f, 0x10, 0x20, 0x62, 0x6e, 0x30, 0x73, 0x7d, 0x51, 0x5f,
+  0x40, 0x72, 0x7e, 0x61, 0x6f, 0x50, 0x71, 0x7f, 0x60, 0x70
+};
+
+// Memory needed for lookup tables of one Huffman tree group. Red, blue, alpha
+// and distance alphabets are constant (256 for red, blue and alpha, 40 for
+// distance) and lookup table sizes for them in worst case are 630 and 410
+// respectively. Size of green alphabet depends on color cache size and is equal
+// to 256 (green component values) + 24 (length prefix values)
+// + color_cache_size (between 0 and 2048).
+// All values computed for 8-bit first level lookup with Mark Adler's tool:
+// http://www.hdfgroup.org/ftp/lib-external/zlib/zlib-1.2.5/examples/enough.c
+#define FIXED_TABLE_SIZE (630 * 3 + 410)
+static const int kTableSize[12] = {
+  FIXED_TABLE_SIZE + 654,
+  FIXED_TABLE_SIZE + 656,
+  FIXED_TABLE_SIZE + 658,
+  FIXED_TABLE_SIZE + 662,
+  FIXED_TABLE_SIZE + 670,
+  FIXED_TABLE_SIZE + 686,
+  FIXED_TABLE_SIZE + 718,
+  FIXED_TABLE_SIZE + 782,
+  FIXED_TABLE_SIZE + 912,
+  FIXED_TABLE_SIZE + 1168,
+  FIXED_TABLE_SIZE + 1680,
+  FIXED_TABLE_SIZE + 2704
 };
 
 static int DecodeImageStream(int xsize, int ysize,
@@ -80,27 +108,28 @@ static int DecodeImageStream(int xsize, int ysize,
 //------------------------------------------------------------------------------
 
 int VP8LCheckSignature(const uint8_t* const data, size_t size) {
-  return (size >= 1) && (data[0] == VP8L_MAGIC_BYTE);
+  return (size >= VP8L_FRAME_HEADER_SIZE &&
+          data[0] == VP8L_MAGIC_BYTE &&
+          (data[4] >> 5) == 0);  // version
 }
 
 static int ReadImageInfo(VP8LBitReader* const br,
                          int* const width, int* const height,
                          int* const has_alpha) {
-  const uint8_t signature = VP8LReadBits(br, 8);
-  if (!VP8LCheckSignature(&signature, 1)) {
-    return 0;
-  }
+  if (VP8LReadBits(br, 8) != VP8L_MAGIC_BYTE) return 0;
   *width = VP8LReadBits(br, VP8L_IMAGE_SIZE_BITS) + 1;
   *height = VP8LReadBits(br, VP8L_IMAGE_SIZE_BITS) + 1;
   *has_alpha = VP8LReadBits(br, 1);
-  VP8LReadBits(br, VP8L_VERSION_BITS);  // Read/ignore the version number.
-  return 1;
+  if (VP8LReadBits(br, VP8L_VERSION_BITS) != 0) return 0;
+  return !br->eos_;
 }
 
 int VP8LGetInfo(const uint8_t* data, size_t data_size,
                 int* const width, int* const height, int* const has_alpha) {
   if (data == NULL || data_size < VP8L_FRAME_HEADER_SIZE) {
     return 0;         // not enough data
+  } else if (!VP8LCheckSignature(data, data_size)) {
+    return 0;         // bad signature
   } else {
     int w, h, a;
     VP8LBitReader br;
@@ -138,39 +167,80 @@ static WEBP_INLINE int PlaneCodeToDistance(int xsize, int plane_code) {
   if (plane_code > CODE_TO_PLANE_CODES) {
     return plane_code - CODE_TO_PLANE_CODES;
   } else {
-    const int dist_code = code_to_plane_lut[plane_code - 1];
+    const int dist_code = kCodeToPlane[plane_code - 1];
     const int yoffset = dist_code >> 4;
     const int xoffset = 8 - (dist_code & 0xf);
     const int dist = yoffset * xsize + xoffset;
-    return (dist >= 1) ? dist : 1;
+    return (dist >= 1) ? dist : 1;  // dist<1 can happen if xsize is very small
   }
 }
 
 //------------------------------------------------------------------------------
 // Decodes the next Huffman code from bit-stream.
 // FillBitWindow(br) needs to be called at minimum every second call
-// to ReadSymbolUnsafe.
-static int ReadSymbolUnsafe(const HuffmanTree* tree, VP8LBitReader* const br) {
-  const HuffmanTreeNode* node = tree->root_;
-  assert(node != NULL);
-  while (!HuffmanTreeNodeIsLeaf(node)) {
-    node = HuffmanTreeNextNode(node, VP8LReadOneBitUnsafe(br));
-  }
-  return node->symbol_;
+// to ReadSymbol, in order to pre-fetch enough bits.
+static WEBP_INLINE int ReadSymbol(const HuffmanCode* table,
+                                  VP8LBitReader* const br) {
+  int nbits;
+  uint32_t val = VP8LPrefetchBits(br);
+  table += val & HUFFMAN_TABLE_MASK;
+  nbits = table->bits - HUFFMAN_TABLE_BITS;
+  if (nbits > 0) {
+    VP8LSetBitPos(br, br->bit_pos_ + HUFFMAN_TABLE_BITS);
+    val = VP8LPrefetchBits(br);
+    table += table->value;
+    table += val & ((1 << nbits) - 1);
+  }
+  VP8LSetBitPos(br, br->bit_pos_ + table->bits);
+  return table->value;
 }
 
-static WEBP_INLINE int ReadSymbol(const HuffmanTree* tree,
-                                  VP8LBitReader* const br) {
-  const int read_safe = (br->pos_ + 8 > br->len_);
-  if (!read_safe) {
-    return ReadSymbolUnsafe(tree, br);
+// Reads packed symbol depending on GREEN channel
+#define BITS_SPECIAL_MARKER 0x100  // something large enough (and a bit-mask)
+#define PACKED_NON_LITERAL_CODE 0  // must be < NUM_LITERAL_CODES
+static WEBP_INLINE int ReadPackedSymbols(const HTreeGroup* group,
+                                         VP8LBitReader* const br,
+                                         uint32_t* const dst) {
+  const uint32_t val = VP8LPrefetchBits(br) & (HUFFMAN_PACKED_TABLE_SIZE - 1);
+  const HuffmanCode32 code = group->packed_table[val];
+  assert(group->use_packed_table);
+  if (code.bits < BITS_SPECIAL_MARKER) {
+    VP8LSetBitPos(br, br->bit_pos_ + code.bits);
+    *dst = code.value;
+    return PACKED_NON_LITERAL_CODE;
   } else {
-    const HuffmanTreeNode* node = tree->root_;
-    assert(node != NULL);
-    while (!HuffmanTreeNodeIsLeaf(node)) {
-      node = HuffmanTreeNextNode(node, VP8LReadOneBit(br));
+    VP8LSetBitPos(br, br->bit_pos_ + code.bits - BITS_SPECIAL_MARKER);
+    assert(code.value >= NUM_LITERAL_CODES);
+    return code.value;
+  }
+}
+
+static int AccumulateHCode(HuffmanCode hcode, int shift,
+                           HuffmanCode32* const huff) {
+  huff->bits += hcode.bits;
+  huff->value |= (uint32_t)hcode.value << shift;
+  assert(huff->bits <= HUFFMAN_TABLE_BITS);
+  return hcode.bits;
+}
+
+static void BuildPackedTable(HTreeGroup* const htree_group) {
+  uint32_t code;
+  for (code = 0; code < HUFFMAN_PACKED_TABLE_SIZE; ++code) {
+    uint32_t bits = code;
+    HuffmanCode32* const huff = &htree_group->packed_table[bits];
+    HuffmanCode hcode = htree_group->htrees[GREEN][bits];
+    if (hcode.value >= NUM_LITERAL_CODES) {
+      huff->bits = hcode.bits + BITS_SPECIAL_MARKER;
+      huff->value = hcode.value;
+    } else {
+      huff->bits = 0;
+      huff->value = 0;
+      bits >>= AccumulateHCode(hcode, 8, huff);
+      bits >>= AccumulateHCode(htree_group->htrees[RED][bits], 16, huff);
+      bits >>= AccumulateHCode(htree_group->htrees[BLUE][bits], 0, huff);
+      bits >>= AccumulateHCode(htree_group->htrees[ALPHA][bits], 24, huff);
+      (void)bits;
     }
-    return node->symbol_;
   }
 }
 
@@ -182,19 +252,18 @@ static int ReadHuffmanCodeLengths(
   int symbol;
   int max_symbol;
   int prev_code_len = DEFAULT_CODE_LENGTH;
-  HuffmanTree tree;
+  HuffmanCode table[1 << LENGTHS_TABLE_BITS];
 
-  if (!HuffmanTreeBuildImplicit(&tree, code_length_code_lengths,
-                                NUM_CODE_LENGTH_CODES)) {
-    dec->status_ = VP8_STATUS_BITSTREAM_ERROR;
-    return 0;
+  if (!VP8LBuildHuffmanTable(table, LENGTHS_TABLE_BITS,
+                             code_length_code_lengths,
+                             NUM_CODE_LENGTH_CODES)) {
+    goto End;
   }
 
   if (VP8LReadBits(br, 1)) {    // use length
     const int length_nbits = 2 + 2 * VP8LReadBits(br, 3);
     max_symbol = 2 + VP8LReadBits(br, length_nbits);
     if (max_symbol > num_symbols) {
-      dec->status_ = VP8_STATUS_BITSTREAM_ERROR;
       goto End;
     }
   } else {
@@ -203,10 +272,13 @@ static int ReadHuffmanCodeLengths(
 
   symbol = 0;
   while (symbol < num_symbols) {
+    const HuffmanCode* p;
     int code_len;
     if (max_symbol-- == 0) break;
     VP8LFillBitWindow(br);
-    code_len = ReadSymbol(&tree, br);
+    p = &table[VP8LPrefetchBits(br) & LENGTHS_TABLE_MASK];
+    VP8LSetBitPos(br, br->bit_pos_ + p->bits);
+    code_len = p->value;
     if (code_len < kCodeLengthLiterals) {
       code_lengths[symbol++] = code_len;
       if (code_len != 0) prev_code_len = code_len;
@@ -217,7 +289,6 @@ static int ReadHuffmanCodeLengths(
       const int repeat_offset = kCodeLengthRepeatOffsets[slot];
       int repeat = VP8LReadBits(br, extra_bits) + repeat_offset;
       if (symbol + repeat > num_symbols) {
-        dec->status_ = VP8_STATUS_BITSTREAM_ERROR;
         goto End;
       } else {
         const int length = use_prev ? prev_code_len : 0;
@@ -228,36 +299,34 @@ static int ReadHuffmanCodeLengths(
   ok = 1;
 
  End:
-  HuffmanTreeRelease(&tree);
+  if (!ok) dec->status_ = VP8_STATUS_BITSTREAM_ERROR;
   return ok;
 }
 
+// 'code_lengths' is pre-allocated temporary buffer, used for creating Huffman
+// tree.
 static int ReadHuffmanCode(int alphabet_size, VP8LDecoder* const dec,
-                           HuffmanTree* const tree) {
+                           int* const code_lengths, HuffmanCode* const table) {
   int ok = 0;
+  int size = 0;
   VP8LBitReader* const br = &dec->br_;
   const int simple_code = VP8LReadBits(br, 1);
 
+  memset(code_lengths, 0, alphabet_size * sizeof(*code_lengths));
+
   if (simple_code) {  // Read symbols, codes & code lengths directly.
-    int symbols[2];
-    int codes[2];
-    int code_lengths[2];
     const int num_symbols = VP8LReadBits(br, 1) + 1;
     const int first_symbol_len_code = VP8LReadBits(br, 1);
     // The first code is either 1 bit or 8 bit code.
-    symbols[0] = VP8LReadBits(br, (first_symbol_len_code == 0) ? 1 : 8);
-    codes[0] = 0;
-    code_lengths[0] = num_symbols - 1;
+    int symbol = VP8LReadBits(br, (first_symbol_len_code == 0) ? 1 : 8);
+    code_lengths[symbol] = 1;
     // The second code (if present), is always 8 bit long.
     if (num_symbols == 2) {
-      symbols[1] = VP8LReadBits(br, 8);
-      codes[1] = 1;
-      code_lengths[1] = num_symbols - 1;
+      symbol = VP8LReadBits(br, 8);
+      code_lengths[symbol] = 1;
     }
-    ok = HuffmanTreeBuildExplicit(tree, code_lengths, codes, symbols,
-                                  alphabet_size, num_symbols);
+    ok = 1;
   } else {  // Decode Huffman-coded code lengths.
-    int* code_lengths = NULL;
     int i;
     int code_length_code_lengths[NUM_CODE_LENGTH_CODES] = { 0 };
     const int num_codes = VP8LReadBits(br, 4) + 4;
@@ -266,42 +335,23 @@ static int ReadHuffmanCode(int alphabet_size, VP8LDecoder* const dec,
       return 0;
     }
 
-    code_lengths =
-        (int*)WebPSafeCalloc((uint64_t)alphabet_size, sizeof(*code_lengths));
-    if (code_lengths == NULL) {
-      dec->status_ = VP8_STATUS_OUT_OF_MEMORY;
-      return 0;
-    }
-
     for (i = 0; i < num_codes; ++i) {
       code_length_code_lengths[kCodeLengthCodeOrder[i]] = VP8LReadBits(br, 3);
     }
     ok = ReadHuffmanCodeLengths(dec, code_length_code_lengths, alphabet_size,
                                 code_lengths);
-    if (ok) {
-      ok = HuffmanTreeBuildImplicit(tree, code_lengths, alphabet_size);
-    }
-    free(code_lengths);
   }
-  ok = ok && !br->error_;
-  if (!ok) {
+
+  ok = ok && !br->eos_;
+  if (ok) {
+    size = VP8LBuildHuffmanTable(table, HUFFMAN_TABLE_BITS,
+                                 code_lengths, alphabet_size);
+  }
+  if (!ok || size == 0) {
     dec->status_ = VP8_STATUS_BITSTREAM_ERROR;
     return 0;
   }
-  return 1;
-}
-
-static void DeleteHtreeGroups(HTreeGroup* htree_groups, int num_htree_groups) {
-  if (htree_groups != NULL) {
-    int i, j;
-    for (i = 0; i < num_htree_groups; ++i) {
-      HuffmanTree* const htrees = htree_groups[i].htrees_;
-      for (j = 0; j < HUFFMAN_CODES_PER_META_CODE; ++j) {
-        HuffmanTreeRelease(&htrees[j]);
-      }
-    }
-    free(htree_groups);
-  }
+  return size;
 }
 
 static int ReadHuffmanCodes(VP8LDecoder* const dec, int xsize, int ysize,
@@ -311,7 +361,12 @@ static int ReadHuffmanCodes(VP8LDecoder* const dec, int xsize, int ysize,
   VP8LMetadata* const hdr = &dec->hdr_;
   uint32_t* huffman_image = NULL;
   HTreeGroup* htree_groups = NULL;
+  HuffmanCode* huffman_tables = NULL;
+  HuffmanCode* next = NULL;
   int num_htree_groups = 1;
+  int max_alphabet_size = 0;
+  int* code_lengths = NULL;
+  const int table_size = kTableSize[color_cache_bits];
 
   if (allow_recursion && VP8LReadBits(br, 1)) {
     // use meta Huffman codes.
@@ -321,51 +376,108 @@ static int ReadHuffmanCodes(VP8LDecoder* const dec, int xsize, int ysize,
     const int huffman_pixs = huffman_xsize * huffman_ysize;
     if (!DecodeImageStream(huffman_xsize, huffman_ysize, 0, dec,
                            &huffman_image)) {
-      dec->status_ = VP8_STATUS_BITSTREAM_ERROR;
       goto Error;
     }
     hdr->huffman_subsample_bits_ = huffman_precision;
     for (i = 0; i < huffman_pixs; ++i) {
       // The huffman data is stored in red and green bytes.
-      const int index = (huffman_image[i] >> 8) & 0xffff;
-      huffman_image[i] = index;
-      if (index >= num_htree_groups) {
-        num_htree_groups = index + 1;
+      const int group = (huffman_image[i] >> 8) & 0xffff;
+      huffman_image[i] = group;
+      if (group >= num_htree_groups) {
+        num_htree_groups = group + 1;
       }
     }
   }
 
-  if (br->error_) goto Error;
+  if (br->eos_) goto Error;
+
+  // Find maximum alphabet size for the htree group.
+  for (j = 0; j < HUFFMAN_CODES_PER_META_CODE; ++j) {
+    int alphabet_size = kAlphabetSize[j];
+    if (j == 0 && color_cache_bits > 0) {
+      alphabet_size += 1 << color_cache_bits;
+    }
+    if (max_alphabet_size < alphabet_size) {
+      max_alphabet_size = alphabet_size;
+    }
+  }
+
+  huffman_tables = (HuffmanCode*)WebPSafeMalloc(num_htree_groups * table_size,
+                                                sizeof(*huffman_tables));
+  htree_groups = VP8LHtreeGroupsNew(num_htree_groups);
+  code_lengths = (int*)WebPSafeCalloc((uint64_t)max_alphabet_size,
+                                      sizeof(*code_lengths));
 
-  assert(num_htree_groups <= 0x10000);
-  htree_groups =
-      (HTreeGroup*)WebPSafeCalloc((uint64_t)num_htree_groups,
-                                  sizeof(*htree_groups));
-  if (htree_groups == NULL) {
+  if (htree_groups == NULL || code_lengths == NULL || huffman_tables == NULL) {
     dec->status_ = VP8_STATUS_OUT_OF_MEMORY;
     goto Error;
   }
 
+  next = huffman_tables;
   for (i = 0; i < num_htree_groups; ++i) {
-    HuffmanTree* const htrees = htree_groups[i].htrees_;
+    HTreeGroup* const htree_group = &htree_groups[i];
+    HuffmanCode** const htrees = htree_group->htrees;
+    int size;
+    int total_size = 0;
+    int is_trivial_literal = 1;
+    int max_bits = 0;
     for (j = 0; j < HUFFMAN_CODES_PER_META_CODE; ++j) {
       int alphabet_size = kAlphabetSize[j];
+      htrees[j] = next;
       if (j == 0 && color_cache_bits > 0) {
         alphabet_size += 1 << color_cache_bits;
       }
-      if (!ReadHuffmanCode(alphabet_size, dec, htrees + j)) goto Error;
+      size = ReadHuffmanCode(alphabet_size, dec, code_lengths, next);
+      if (is_trivial_literal && kLiteralMap[j] == 1) {
+        is_trivial_literal = (next->bits == 0);
+      }
+      total_size += next->bits;
+      next += size;
+      if (size == 0) {
+        goto Error;
+      }
+      if (j <= ALPHA) {
+        int local_max_bits = code_lengths[0];
+        int k;
+        for (k = 1; k < alphabet_size; ++k) {
+          if (code_lengths[k] > local_max_bits) {
+            local_max_bits = code_lengths[k];
+          }
+        }
+        max_bits += local_max_bits;
+      }
     }
+    htree_group->is_trivial_literal = is_trivial_literal;
+    htree_group->is_trivial_code = 0;
+    if (is_trivial_literal) {
+      const int red = htrees[RED][0].value;
+      const int blue = htrees[BLUE][0].value;
+      const int alpha = htrees[ALPHA][0].value;
+      htree_group->literal_arb =
+          ((uint32_t)alpha << 24) | (red << 16) | blue;
+      if (total_size == 0 && htrees[GREEN][0].value < NUM_LITERAL_CODES) {
+        htree_group->is_trivial_code = 1;
+        htree_group->literal_arb |= htrees[GREEN][0].value << 8;
+      }
+    }
+    htree_group->use_packed_table = !htree_group->is_trivial_code &&
+                                    (max_bits < HUFFMAN_PACKED_BITS);
+    if (htree_group->use_packed_table) BuildPackedTable(htree_group);
   }
+  WebPSafeFree(code_lengths);
 
   // All OK. Finalize pointers and return.
   hdr->huffman_image_ = huffman_image;
   hdr->num_htree_groups_ = num_htree_groups;
   hdr->htree_groups_ = htree_groups;
+  hdr->huffman_tables_ = huffman_tables;
   return 1;
 
  Error:
-  free(huffman_image);
-  DeleteHtreeGroups(htree_groups, num_htree_groups);
+  WebPSafeFree(code_lengths);
+  WebPSafeFree(huffman_image);
+  WebPSafeFree(huffman_tables);
+  VP8LHtreeGroupsFree(htree_groups);
   return 0;
 }
 
@@ -379,13 +491,13 @@ static int AllocateAndInitRescaler(VP8LDecoder* const dec, VP8Io* const io) {
   const int in_height = io->mb_h;
   const int out_height = io->scaled_height;
   const uint64_t work_size = 2 * num_channels * (uint64_t)out_width;
-  int32_t* work;        // Rescaler work area.
-  const uint64_t scaled_data_size = num_channels * (uint64_t)out_width;
+  rescaler_t* work;        // Rescaler work area.
+  const uint64_t scaled_data_size = (uint64_t)out_width;
   uint32_t* scaled_data;  // Temporary storage for scaled BGRA data.
   const uint64_t memory_size = sizeof(*dec->rescaler) +
                                work_size * sizeof(*work) +
                                scaled_data_size * sizeof(*scaled_data);
-  uint8_t* memory = (uint8_t*)WebPSafeCalloc(memory_size, sizeof(*memory));
+  uint8_t* memory = (uint8_t*)WebPSafeMalloc(memory_size, sizeof(*memory));
   if (memory == NULL) {
     dec->status_ = VP8_STATUS_OUT_OF_MEMORY;
     return 0;
@@ -395,13 +507,12 @@ static int AllocateAndInitRescaler(VP8LDecoder* const dec, VP8Io* const io) {
 
   dec->rescaler = (WebPRescaler*)memory;
   memory += sizeof(*dec->rescaler);
-  work = (int32_t*)memory;
+  work = (rescaler_t*)memory;
   memory += work_size * sizeof(*work);
   scaled_data = (uint32_t*)memory;
 
   WebPRescalerInit(dec->rescaler, in_width, in_height, (uint8_t*)scaled_data,
-                   out_width, out_height, 0, num_channels,
-                   in_width, out_width, in_height, out_height, work);
+                   out_width, out_height, 0, num_channels, work);
   return 1;
 }
 
@@ -411,12 +522,13 @@ static int AllocateAndInitRescaler(VP8LDecoder* const dec, VP8Io* const io) {
 // We have special "export" function since we need to convert from BGRA
 static int Export(WebPRescaler* const rescaler, WEBP_CSP_MODE colorspace,
                   int rgba_stride, uint8_t* const rgba) {
-  const uint32_t* const src = (const uint32_t*)rescaler->dst;
+  uint32_t* const src = (uint32_t*)rescaler->dst;
   const int dst_width = rescaler->dst_width;
   int num_lines_out = 0;
   while (WebPRescalerHasPendingOutput(rescaler)) {
     uint8_t* const dst = rgba + num_lines_out * rgba_stride;
     WebPRescalerExportRow(rescaler);
+    WebPMultARGBRow(src, dst_width, 1);
     VP8LConvertFromBGRA(src, dst_width, colorspace, dst);
     ++num_lines_out;
   }
@@ -424,18 +536,22 @@ static int Export(WebPRescaler* const rescaler, WEBP_CSP_MODE colorspace,
 }
 
 // Emit scaled rows.
-static int EmitRescaledRows(const VP8LDecoder* const dec,
-                            const uint32_t* const data, int in_stride, int mb_h,
-                            uint8_t* const out, int out_stride) {
+static int EmitRescaledRowsRGBA(const VP8LDecoder* const dec,
+                                uint8_t* in, int in_stride, int mb_h,
+                                uint8_t* const out, int out_stride) {
   const WEBP_CSP_MODE colorspace = dec->output_->colorspace;
-  const uint8_t* const in = (const uint8_t*)data;
   int num_lines_in = 0;
   int num_lines_out = 0;
   while (num_lines_in < mb_h) {
-    const uint8_t* const row_in = in + num_lines_in * in_stride;
+    uint8_t* const row_in = in + num_lines_in * in_stride;
     uint8_t* const row_out = out + num_lines_out * out_stride;
-    num_lines_in += WebPRescalerImport(dec->rescaler, mb_h - num_lines_in,
-                                       row_in, in_stride);
+    const int lines_left = mb_h - num_lines_in;
+    const int needed_lines = WebPRescaleNeededLines(dec->rescaler, lines_left);
+    assert(needed_lines > 0 && needed_lines <= lines_left);
+    WebPMultARGBRows(row_in, in_stride,
+                     dec->rescaler->src_width, needed_lines, 0);
+    WebPRescalerImport(dec->rescaler, lines_left, row_in, in_stride);
+    num_lines_in += needed_lines;
     num_lines_out += Export(dec->rescaler, colorspace, out_stride, row_out);
   }
   return num_lines_out;
@@ -443,11 +559,10 @@ static int EmitRescaledRows(const VP8LDecoder* const dec,
 
 // Emit rows without any scaling.
 static int EmitRows(WEBP_CSP_MODE colorspace,
-                    const uint32_t* const data, int in_stride,
+                    const uint8_t* row_in, int in_stride,
                     int mb_w, int mb_h,
                     uint8_t* const out, int out_stride) {
   int lines = mb_h;
-  const uint8_t* row_in = (const uint8_t*)data;
   uint8_t* row_out = out;
   while (lines-- > 0) {
     VP8LConvertFromBGRA((const uint32_t*)row_in, mb_w, colorspace, row_out);
@@ -463,72 +578,37 @@ static int EmitRows(WEBP_CSP_MODE colorspace,
 static void ConvertToYUVA(const uint32_t* const src, int width, int y_pos,
                           const WebPDecBuffer* const output) {
   const WebPYUVABuffer* const buf = &output->u.YUVA;
+
   // first, the luma plane
-  {
-    int i;
-    uint8_t* const y = buf->y + y_pos * buf->y_stride;
-    for (i = 0; i < width; ++i) {
-      const uint32_t p = src[i];
-      y[i] = VP8RGBToY((p >> 16) & 0xff, (p >> 8) & 0xff, (p >> 0) & 0xff);
-    }
-  }
+  WebPConvertARGBToY(src, buf->y + y_pos * buf->y_stride, width);
 
   // then U/V planes
   {
     uint8_t* const u = buf->u + (y_pos >> 1) * buf->u_stride;
     uint8_t* const v = buf->v + (y_pos >> 1) * buf->v_stride;
-    const int uv_width = width >> 1;
-    int i;
-    for (i = 0; i < uv_width; ++i) {
-      const uint32_t v0 = src[2 * i + 0];
-      const uint32_t v1 = src[2 * i + 1];
-      // VP8RGBToU/V expects four accumulated pixels. Hence we need to
-      // scale r/g/b value by a factor 2. We just shift v0/v1 one bit less.
-      const int r = ((v0 >> 15) & 0x1fe) + ((v1 >> 15) & 0x1fe);
-      const int g = ((v0 >>  7) & 0x1fe) + ((v1 >>  7) & 0x1fe);
-      const int b = ((v0 <<  1) & 0x1fe) + ((v1 <<  1) & 0x1fe);
-      if (!(y_pos & 1)) {  // even lines: store values
-        u[i] = VP8RGBToU(r, g, b);
-        v[i] = VP8RGBToV(r, g, b);
-      } else {             // odd lines: average with previous values
-        const int tmp_u = VP8RGBToU(r, g, b);
-        const int tmp_v = VP8RGBToV(r, g, b);
-        // Approximated average-of-four. But it's an acceptable diff.
-        u[i] = (u[i] + tmp_u + 1) >> 1;
-        v[i] = (v[i] + tmp_v + 1) >> 1;
-      }
-    }
-    if (width & 1) {       // last pixel
-      const uint32_t v0 = src[2 * i + 0];
-      const int r = (v0 >> 14) & 0x3fc;
-      const int g = (v0 >>  6) & 0x3fc;
-      const int b = (v0 <<  2) & 0x3fc;
-      if (!(y_pos & 1)) {  // even lines
-        u[i] = VP8RGBToU(r, g, b);
-        v[i] = VP8RGBToV(r, g, b);
-      } else {             // odd lines (note: we could just skip this)
-        const int tmp_u = VP8RGBToU(r, g, b);
-        const int tmp_v = VP8RGBToV(r, g, b);
-        u[i] = (u[i] + tmp_u + 1) >> 1;
-        v[i] = (v[i] + tmp_v + 1) >> 1;
-      }
-    }
+    // even lines: store values
+    // odd lines: average with previous values
+    WebPConvertARGBToUV(src, u, v, width, !(y_pos & 1));
   }
   // Lastly, store alpha if needed.
   if (buf->a != NULL) {
-    int i;
     uint8_t* const a = buf->a + y_pos * buf->a_stride;
-    for (i = 0; i < width; ++i) a[i] = (src[i] >> 24);
+#if defined(WORDS_BIGENDIAN)
+    WebPExtractAlpha((uint8_t*)src + 0, 0, width, 1, a, 0);
+#else
+    WebPExtractAlpha((uint8_t*)src + 3, 0, width, 1, a, 0);
+#endif
   }
 }
 
 static int ExportYUVA(const VP8LDecoder* const dec, int y_pos) {
   WebPRescaler* const rescaler = dec->rescaler;
-  const uint32_t* const src = (const uint32_t*)rescaler->dst;
+  uint32_t* const src = (uint32_t*)rescaler->dst;
   const int dst_width = rescaler->dst_width;
   int num_lines_out = 0;
   while (WebPRescalerHasPendingOutput(rescaler)) {
     WebPRescalerExportRow(rescaler);
+    WebPMultARGBRow(src, dst_width, 1);
     ConvertToYUVA(src, dst_width, y_pos, dec->output_);
     ++y_pos;
     ++num_lines_out;
@@ -537,28 +617,28 @@ static int ExportYUVA(const VP8LDecoder* const dec, int y_pos) {
 }
 
 static int EmitRescaledRowsYUVA(const VP8LDecoder* const dec,
-                                const uint32_t* const data,
-                                int in_stride, int mb_h) {
-  const uint8_t* const in = (const uint8_t*)data;
+                                uint8_t* in, int in_stride, int mb_h) {
   int num_lines_in = 0;
   int y_pos = dec->last_out_row_;
   while (num_lines_in < mb_h) {
-    const uint8_t* const row_in = in + num_lines_in * in_stride;
-    num_lines_in += WebPRescalerImport(dec->rescaler, mb_h - num_lines_in,
-                                       row_in, in_stride);
+    const int lines_left = mb_h - num_lines_in;
+    const int needed_lines = WebPRescaleNeededLines(dec->rescaler, lines_left);
+    WebPMultARGBRows(in, in_stride, dec->rescaler->src_width, needed_lines, 0);
+    WebPRescalerImport(dec->rescaler, lines_left, in, in_stride);
+    num_lines_in += needed_lines;
+    in += needed_lines * in_stride;
     y_pos += ExportYUVA(dec, y_pos);
   }
   return y_pos;
 }
 
 static int EmitRowsYUVA(const VP8LDecoder* const dec,
-                        const uint32_t* const data, int in_stride,
+                        const uint8_t* in, int in_stride,
                         int mb_w, int num_rows) {
   int y_pos = dec->last_out_row_;
-  const uint8_t* row_in = (const uint8_t*)data;
   while (num_rows-- > 0) {
-    ConvertToYUVA((const uint32_t*)row_in, mb_w, y_pos, dec->output_);
-    row_in += in_stride;
+    ConvertToYUVA((const uint32_t*)in, mb_w, y_pos, dec->output_);
+    in += in_stride;
     ++y_pos;
   }
   return y_pos;
@@ -569,11 +649,11 @@ static int EmitRowsYUVA(const VP8LDecoder* const dec,
 
 // Sets io->mb_y, io->mb_h & io->mb_w according to start row, end row and
 // crop options. Also updates the input data pointer, so that it points to the
-// start of the cropped window.
-// Note that 'pixel_stride' is in units of 'uint32_t' (and not 'bytes).
+// start of the cropped window. Note that pixels are in ARGB format even if
+// 'in_data' is uint8_t*.
 // Returns true if the crop window is not empty.
 static int SetCropWindow(VP8Io* const io, int y_start, int y_end,
-                         const uint32_t** const in_data, int pixel_stride) {
+                         uint8_t** const in_data, int pixel_stride) {
   assert(y_start < y_end);
   assert(io->crop_left < io->crop_right);
   if (y_end > io->crop_bottom) {
@@ -582,11 +662,11 @@ static int SetCropWindow(VP8Io* const io, int y_start, int y_end,
   if (y_start < io->crop_top) {
     const int delta = io->crop_top - y_start;
     y_start = io->crop_top;
-    *in_data += pixel_stride * delta;
+    *in_data += delta * pixel_stride;
   }
   if (y_start >= y_end) return 0;  // Crop window is empty.
 
-  *in_data += io->crop_left;
+  *in_data += io->crop_left * sizeof(uint32_t);
 
   io->mb_y = y_start - io->crop_top;
   io->mb_w = io->crop_right - io->crop_left;
@@ -634,10 +714,24 @@ static void ApplyInverseTransforms(VP8LDecoder* const dec, int num_rows,
   }
 }
 
+// Special method for paletted alpha data.
+static void ApplyInverseTransformsAlpha(VP8LDecoder* const dec, int num_rows,
+                                        const uint8_t* const rows) {
+  const int start_row = dec->last_row_;
+  const int end_row = start_row + num_rows;
+  const uint8_t* rows_in = rows;
+  uint8_t* rows_out = (uint8_t*)dec->io_->opaque + dec->io_->width * start_row;
+  VP8LTransform* const transform = &dec->transforms_[0];
+  assert(dec->next_transform_ == 1);
+  assert(transform->type_ == COLOR_INDEXING_TRANSFORM);
+  VP8LColorIndexInverseTransformAlpha(transform, start_row, end_row, rows_in,
+                                      rows_out);
+}
+
 // Processes (transforms, scales & color-converts) the rows decoded after the
 // last call.
 static void ProcessRows(VP8LDecoder* const dec, int row) {
-  const uint32_t* const rows = dec->argb_ + dec->width_ * dec->last_row_;
+  const uint32_t* const rows = dec->pixels_ + dec->width_ * dec->last_row_;
   const int num_rows = row - dec->last_row_;
 
   if (num_rows <= 0) return;  // Nothing to be done.
@@ -646,18 +740,18 @@ static void ProcessRows(VP8LDecoder* const dec, int row) {
   // Emit output.
   {
     VP8Io* const io = dec->io_;
-    const uint32_t* rows_data = dec->argb_cache_;
-    if (!SetCropWindow(io, dec->last_row_, row, &rows_data, io->width)) {
+    uint8_t* rows_data = (uint8_t*)dec->argb_cache_;
+    const int in_stride = io->width * sizeof(uint32_t);  // in unit of RGBA
+    if (!SetCropWindow(io, dec->last_row_, row, &rows_data, in_stride)) {
       // Nothing to output (this time).
     } else {
       const WebPDecBuffer* const output = dec->output_;
-      const int in_stride = io->width * sizeof(*rows_data);
-      if (output->colorspace < MODE_YUV) {  // convert to RGBA
+      if (WebPIsRGBMode(output->colorspace)) {  // convert to RGBA
         const WebPRGBABuffer* const buf = &output->u.RGBA;
         uint8_t* const rgba = buf->rgba + dec->last_out_row_ * buf->stride;
         const int num_rows_out = io->use_scaling ?
-            EmitRescaledRows(dec, rows_data, in_stride, io->mb_h,
-                             rgba, buf->stride) :
+            EmitRescaledRowsRGBA(dec, rows_data, in_stride, io->mb_h,
+                                 rgba, buf->stride) :
             EmitRows(output->colorspace, rows_data, in_stride,
                      io->mb_w, io->mb_h, rgba, buf->stride);
         // Update 'last_out_row_'.
@@ -676,50 +770,317 @@ static void ProcessRows(VP8LDecoder* const dec, int row) {
   assert(dec->last_row_ <= dec->height_);
 }
 
-static int DecodeImageData(VP8LDecoder* const dec,
-                           uint32_t* const data, int width, int height,
-                           ProcessRowsFunc process_func) {
+// Row-processing for the special case when alpha data contains only one
+// transform (color indexing), and trivial non-green literals.
+static int Is8bOptimizable(const VP8LMetadata* const hdr) {
+  int i;
+  if (hdr->color_cache_size_ > 0) return 0;
+  // When the Huffman tree contains only one symbol, we can skip the
+  // call to ReadSymbol() for red/blue/alpha channels.
+  for (i = 0; i < hdr->num_htree_groups_; ++i) {
+    HuffmanCode** const htrees = hdr->htree_groups_[i].htrees;
+    if (htrees[RED][0].bits > 0) return 0;
+    if (htrees[BLUE][0].bits > 0) return 0;
+    if (htrees[ALPHA][0].bits > 0) return 0;
+  }
+  return 1;
+}
+
+static void ExtractPalettedAlphaRows(VP8LDecoder* const dec, int row) {
+  const int num_rows = row - dec->last_row_;
+  const uint8_t* const in =
+      (uint8_t*)dec->pixels_ + dec->width_ * dec->last_row_;
+  if (num_rows > 0) {
+    ApplyInverseTransformsAlpha(dec, num_rows, in);
+  }
+  dec->last_row_ = dec->last_out_row_ = row;
+}
+
+//------------------------------------------------------------------------------
+// Helper functions for fast pattern copy (8b and 32b)
+
+// cyclic rotation of pattern word
+static WEBP_INLINE uint32_t Rotate8b(uint32_t V) {
+#if defined(WORDS_BIGENDIAN)
+  return ((V & 0xff000000u) >> 24) | (V << 8);
+#else
+  return ((V & 0xffu) << 24) | (V >> 8);
+#endif
+}
+
+// copy 1, 2 or 4-bytes pattern
+static WEBP_INLINE void CopySmallPattern8b(const uint8_t* src, uint8_t* dst,
+                                           int length, uint32_t pattern) {
+  int i;
+  // align 'dst' to 4-bytes boundary. Adjust the pattern along the way.
+  while ((uintptr_t)dst & 3) {
+    *dst++ = *src++;
+    pattern = Rotate8b(pattern);
+    --length;
+  }
+  // Copy the pattern 4 bytes at a time.
+  for (i = 0; i < (length >> 2); ++i) {
+    ((uint32_t*)dst)[i] = pattern;
+  }
+  // Finish with left-overs. 'pattern' is still correctly positioned,
+  // so no Rotate8b() call is needed.
+  for (i <<= 2; i < length; ++i) {
+    dst[i] = src[i];
+  }
+}
+
+static WEBP_INLINE void CopyBlock8b(uint8_t* const dst, int dist, int length) {
+  const uint8_t* src = dst - dist;
+  if (length >= 8) {
+    uint32_t pattern = 0;
+    switch (dist) {
+      case 1:
+        pattern = src[0];
+#if defined(__arm__) || defined(_M_ARM)   // arm doesn't like multiply that much
+        pattern |= pattern << 8;
+        pattern |= pattern << 16;
+#elif defined(WEBP_USE_MIPS_DSP_R2)
+        __asm__ volatile ("replv.qb %0, %0" : "+r"(pattern));
+#else
+        pattern = 0x01010101u * pattern;
+#endif
+        break;
+      case 2:
+        memcpy(&pattern, src, sizeof(uint16_t));
+#if defined(__arm__) || defined(_M_ARM)
+        pattern |= pattern << 16;
+#elif defined(WEBP_USE_MIPS_DSP_R2)
+        __asm__ volatile ("replv.ph %0, %0" : "+r"(pattern));
+#else
+        pattern = 0x00010001u * pattern;
+#endif
+        break;
+      case 4:
+        memcpy(&pattern, src, sizeof(uint32_t));
+        break;
+      default:
+        goto Copy;
+        break;
+    }
+    CopySmallPattern8b(src, dst, length, pattern);
+    return;
+  }
+ Copy:
+  if (dist >= length) {  // no overlap -> use memcpy()
+    memcpy(dst, src, length * sizeof(*dst));
+  } else {
+    int i;
+    for (i = 0; i < length; ++i) dst[i] = src[i];
+  }
+}
+
+// copy pattern of 1 or 2 uint32_t's
+static WEBP_INLINE void CopySmallPattern32b(const uint32_t* src,
+                                            uint32_t* dst,
+                                            int length, uint64_t pattern) {
+  int i;
+  if ((uintptr_t)dst & 4) {           // Align 'dst' to 8-bytes boundary.
+    *dst++ = *src++;
+    pattern = (pattern >> 32) | (pattern << 32);
+    --length;
+  }
+  assert(0 == ((uintptr_t)dst & 7));
+  for (i = 0; i < (length >> 1); ++i) {
+    ((uint64_t*)dst)[i] = pattern;    // Copy the pattern 8 bytes at a time.
+  }
+  if (length & 1) {                   // Finish with left-over.
+    dst[i << 1] = src[i << 1];
+  }
+}
+
+static WEBP_INLINE void CopyBlock32b(uint32_t* const dst,
+                                     int dist, int length) {
+  const uint32_t* const src = dst - dist;
+  if (dist <= 2 && length >= 4 && ((uintptr_t)dst & 3) == 0) {
+    uint64_t pattern;
+    if (dist == 1) {
+      pattern = (uint64_t)src[0];
+      pattern |= pattern << 32;
+    } else {
+      memcpy(&pattern, src, sizeof(pattern));
+    }
+    CopySmallPattern32b(src, dst, length, pattern);
+  } else if (dist >= length) {  // no overlap
+    memcpy(dst, src, length * sizeof(*dst));
+  } else {
+    int i;
+    for (i = 0; i < length; ++i) dst[i] = src[i];
+  }
+}
+
+//------------------------------------------------------------------------------
+
+static int DecodeAlphaData(VP8LDecoder* const dec, uint8_t* const data,
+                           int width, int height, int last_row) {
   int ok = 1;
-  int col = 0, row = 0;
+  int row = dec->last_pixel_ / width;
+  int col = dec->last_pixel_ % width;
   VP8LBitReader* const br = &dec->br_;
   VP8LMetadata* const hdr = &dec->hdr_;
-  HTreeGroup* htree_group = hdr->htree_groups_;
-  uint32_t* src = data;
-  uint32_t* last_cached = data;
-  uint32_t* const src_end = data + width * height;
+  const HTreeGroup* htree_group = GetHtreeGroupForPos(hdr, col, row);
+  int pos = dec->last_pixel_;         // current position
+  const int end = width * height;     // End of data
+  const int last = width * last_row;  // Last pixel to decode
   const int len_code_limit = NUM_LITERAL_CODES + NUM_LENGTH_CODES;
-  const int color_cache_limit = len_code_limit + hdr->color_cache_size_;
-  VP8LColorCache* const color_cache =
-      (hdr->color_cache_size_ > 0) ? &hdr->color_cache_ : NULL;
   const int mask = hdr->huffman_mask_;
-
   assert(htree_group != NULL);
+  assert(pos < end);
+  assert(last_row <= height);
+  assert(Is8bOptimizable(hdr));
 
-  while (!br->eos_ && src < src_end) {
+  while (!br->eos_ && pos < last) {
     int code;
-    // Only update when changing tile. Note we could use the following test:
-    //   if "((((prev_col ^ col) | prev_row ^ row)) > mask)" -> tile changed
-    // but that's actually slower and requires storing the previous col/row
+    // Only update when changing tile.
     if ((col & mask) == 0) {
       htree_group = GetHtreeGroupForPos(hdr, col, row);
     }
     VP8LFillBitWindow(br);
-    code = ReadSymbol(&htree_group->htrees_[GREEN], br);
-    if (code < NUM_LITERAL_CODES) {   // Literal.
-      int red, green, blue, alpha;
-      red = ReadSymbol(&htree_group->htrees_[RED], br);
-      green = code;
+    code = ReadSymbol(htree_group->htrees[GREEN], br);
+    if (code < NUM_LITERAL_CODES) {  // Literal
+      data[pos] = code;
+      ++pos;
+      ++col;
+      if (col >= width) {
+        col = 0;
+        ++row;
+        if (row % NUM_ARGB_CACHE_ROWS == 0) {
+          ExtractPalettedAlphaRows(dec, row);
+        }
+      }
+    } else if (code < len_code_limit) {  // Backward reference
+      int dist_code, dist;
+      const int length_sym = code - NUM_LITERAL_CODES;
+      const int length = GetCopyLength(length_sym, br);
+      const int dist_symbol = ReadSymbol(htree_group->htrees[DIST], br);
       VP8LFillBitWindow(br);
-      blue = ReadSymbol(&htree_group->htrees_[BLUE], br);
-      alpha = ReadSymbol(&htree_group->htrees_[ALPHA], br);
-      *src = (alpha << 24) + (red << 16) + (green << 8) + blue;
- AdvanceByOne:
+      dist_code = GetCopyDistance(dist_symbol, br);
+      dist = PlaneCodeToDistance(width, dist_code);
+      if (pos >= dist && end - pos >= length) {
+        CopyBlock8b(data + pos, dist, length);
+      } else {
+        ok = 0;
+        goto End;
+      }
+      pos += length;
+      col += length;
+      while (col >= width) {
+        col -= width;
+        ++row;
+        if (row % NUM_ARGB_CACHE_ROWS == 0) {
+          ExtractPalettedAlphaRows(dec, row);
+        }
+      }
+      if (pos < last && (col & mask)) {
+        htree_group = GetHtreeGroupForPos(hdr, col, row);
+      }
+    } else {  // Not reached
+      ok = 0;
+      goto End;
+    }
+    assert(br->eos_ == VP8LIsEndOfStream(br));
+  }
+  // Process the remaining rows corresponding to last row-block.
+  ExtractPalettedAlphaRows(dec, row);
+
+ End:
+  if (!ok || (br->eos_ && pos < end)) {
+    ok = 0;
+    dec->status_ = br->eos_ ? VP8_STATUS_SUSPENDED
+                            : VP8_STATUS_BITSTREAM_ERROR;
+  } else {
+    dec->last_pixel_ = pos;
+  }
+  return ok;
+}
+
+static void SaveState(VP8LDecoder* const dec, int last_pixel) {
+  assert(dec->incremental_);
+  dec->saved_br_ = dec->br_;
+  dec->saved_last_pixel_ = last_pixel;
+  if (dec->hdr_.color_cache_size_ > 0) {
+    VP8LColorCacheCopy(&dec->hdr_.color_cache_, &dec->hdr_.saved_color_cache_);
+  }
+}
+
+static void RestoreState(VP8LDecoder* const dec) {
+  assert(dec->br_.eos_);
+  dec->status_ = VP8_STATUS_SUSPENDED;
+  dec->br_ = dec->saved_br_;
+  dec->last_pixel_ = dec->saved_last_pixel_;
+  if (dec->hdr_.color_cache_size_ > 0) {
+    VP8LColorCacheCopy(&dec->hdr_.saved_color_cache_, &dec->hdr_.color_cache_);
+  }
+}
+
+#define SYNC_EVERY_N_ROWS 8  // minimum number of rows between check-points
+static int DecodeImageData(VP8LDecoder* const dec, uint32_t* const data,
+                           int width, int height, int last_row,
+                           ProcessRowsFunc process_func) {
+  int row = dec->last_pixel_ / width;
+  int col = dec->last_pixel_ % width;
+  VP8LBitReader* const br = &dec->br_;
+  VP8LMetadata* const hdr = &dec->hdr_;
+  HTreeGroup* htree_group = GetHtreeGroupForPos(hdr, col, row);
+  uint32_t* src = data + dec->last_pixel_;
+  uint32_t* last_cached = src;
+  uint32_t* const src_end = data + width * height;     // End of data
+  uint32_t* const src_last = data + width * last_row;  // Last pixel to decode
+  const int len_code_limit = NUM_LITERAL_CODES + NUM_LENGTH_CODES;
+  const int color_cache_limit = len_code_limit + hdr->color_cache_size_;
+  int next_sync_row = dec->incremental_ ? row : 1 << 24;
+  VP8LColorCache* const color_cache =
+      (hdr->color_cache_size_ > 0) ? &hdr->color_cache_ : NULL;
+  const int mask = hdr->huffman_mask_;
+  assert(htree_group != NULL);
+  assert(src < src_end);
+  assert(src_last <= src_end);
+
+  while (src < src_last) {
+    int code;
+    if (row >= next_sync_row) {
+      SaveState(dec, (int)(src - data));
+      next_sync_row = row + SYNC_EVERY_N_ROWS;
+    }
+    // Only update when changing tile. Note we could use this test:
+    // if "((((prev_col ^ col) | prev_row ^ row)) > mask)" -> tile changed
+    // but that's actually slower and needs storing the previous col/row.
+    if ((col & mask) == 0) htree_group = GetHtreeGroupForPos(hdr, col, row);
+    if (htree_group->is_trivial_code) {
+      *src = htree_group->literal_arb;
+      goto AdvanceByOne;
+    }
+    VP8LFillBitWindow(br);
+    if (htree_group->use_packed_table) {
+      code = ReadPackedSymbols(htree_group, br, src);
+      if (code == PACKED_NON_LITERAL_CODE) goto AdvanceByOne;
+    } else {
+      code = ReadSymbol(htree_group->htrees[GREEN], br);
+    }
+    if (br->eos_) break;  // early out
+    if (code < NUM_LITERAL_CODES) {  // Literal
+      if (htree_group->is_trivial_literal) {
+        *src = htree_group->literal_arb | (code << 8);
+      } else {
+        int red, blue, alpha;
+        red = ReadSymbol(htree_group->htrees[RED], br);
+        VP8LFillBitWindow(br);
+        blue = ReadSymbol(htree_group->htrees[BLUE], br);
+        alpha = ReadSymbol(htree_group->htrees[ALPHA], br);
+        if (br->eos_) break;
+        *src = ((uint32_t)alpha << 24) | (red << 16) | (code << 8) | blue;
+      }
+    AdvanceByOne:
       ++src;
       ++col;
       if (col >= width) {
         col = 0;
         ++row;
-        if ((process_func != NULL) && (row % NUM_ARGB_CACHE_ROWS == 0)) {
+        if ((row % NUM_ARGB_CACHE_ROWS == 0) && (process_func != NULL)) {
           process_func(dec, row);
         }
         if (color_cache != NULL) {
@@ -728,40 +1089,39 @@ static int DecodeImageData(VP8LDecoder* const dec,
           }
         }
       }
-    } else if (code < len_code_limit) {           // Backward reference
+    } else if (code < len_code_limit) {  // Backward reference
       int dist_code, dist;
       const int length_sym = code - NUM_LITERAL_CODES;
       const int length = GetCopyLength(length_sym, br);
-      const int dist_symbol = ReadSymbol(&htree_group->htrees_[DIST], br);
+      const int dist_symbol = ReadSymbol(htree_group->htrees[DIST], br);
       VP8LFillBitWindow(br);
       dist_code = GetCopyDistance(dist_symbol, br);
       dist = PlaneCodeToDistance(width, dist_code);
-      if (src - data < dist || src_end - src < length) {
-        ok = 0;
-        goto End;
-      }
-      {
-        int i;
-        for (i = 0; i < length; ++i) src[i] = src[i - dist];
-        src += length;
+      if (br->eos_) break;
+      if (src - data < (ptrdiff_t)dist || src_end - src < (ptrdiff_t)length) {
+        goto Error;
+      } else {
+        CopyBlock32b(src, dist, length);
       }
+      src += length;
       col += length;
       while (col >= width) {
         col -= width;
         ++row;
-        if ((process_func != NULL) && (row % NUM_ARGB_CACHE_ROWS == 0)) {
+        if ((row % NUM_ARGB_CACHE_ROWS == 0) && (process_func != NULL)) {
           process_func(dec, row);
         }
       }
-      if (src < src_end) {
-        htree_group = GetHtreeGroupForPos(hdr, col, row);
-        if (color_cache != NULL) {
-          while (last_cached < src) {
-            VP8LColorCacheInsert(color_cache, *last_cached++);
-          }
+      // Because of the check done above (before 'src' was incremented by
+      // 'length'), the following holds true.
+      assert(src <= src_end);
+      if (col & mask) htree_group = GetHtreeGroupForPos(hdr, col, row);
+      if (color_cache != NULL) {
+        while (last_cached < src) {
+          VP8LColorCacheInsert(color_cache, *last_cached++);
         }
       }
-    } else if (code < color_cache_limit) {    // Color cache.
+    } else if (code < color_cache_limit) {  // Color cache
       const int key = code - len_code_limit;
       assert(color_cache != NULL);
       while (last_cached < src) {
@@ -769,33 +1129,38 @@ static int DecodeImageData(VP8LDecoder* const dec,
       }
       *src = VP8LColorCacheLookup(color_cache, key);
       goto AdvanceByOne;
-    } else {    // Not reached.
-      ok = 0;
-      goto End;
+    } else {  // Not reached
+      goto Error;
     }
-    ok = !br->error_;
-    if (!ok) goto End;
+    assert(br->eos_ == VP8LIsEndOfStream(br));
   }
-  // Process the remaining rows corresponding to last row-block.
-  if (process_func != NULL) process_func(dec, row);
 
- End:
-  if (br->error_ || !ok || (br->eos_ && src < src_end)) {
-    ok = 0;
-    dec->status_ = (!br->eos_) ?
-        VP8_STATUS_BITSTREAM_ERROR : VP8_STATUS_SUSPENDED;
-  } else if (src == src_end) {
-    dec->state_ = READ_DATA;
+  if (dec->incremental_ && br->eos_ && src < src_end) {
+    RestoreState(dec);
+  } else if (!br->eos_) {
+    // Process the remaining rows corresponding to last row-block.
+    if (process_func != NULL) {
+      process_func(dec, row);
+    }
+    dec->status_ = VP8_STATUS_OK;
+    dec->last_pixel_ = (int)(src - data);  // end-of-scan marker
+  } else {
+    // if not incremental, and we are past the end of buffer (eos_=1), then this
+    // is a real bitstream error.
+    goto Error;
   }
+  return 1;
 
-  return ok;
+ Error:
+  dec->status_ = VP8_STATUS_BITSTREAM_ERROR;
+  return 0;
 }
 
 // -----------------------------------------------------------------------------
 // VP8LTransform
 
 static void ClearTransform(VP8LTransform* const transform) {
-  free(transform->data_);
+  WebPSafeFree(transform->data_);
   transform->data_ = NULL;
 }
 
@@ -819,7 +1184,7 @@ static int ExpandColorMap(int num_colors, VP8LTransform* const transform) {
     }
     for (; i < 4 * final_num_colors; ++i)
       new_data[i] = 0;  // black tail.
-    free(transform->data_);
+    WebPSafeFree(transform->data_);
     transform->data_ = new_color_map;
   }
   return 1;
@@ -882,16 +1247,18 @@ static int ReadTransform(int* const xsize, int const* ysize,
 // VP8LMetadata
 
 static void InitMetadata(VP8LMetadata* const hdr) {
-  assert(hdr);
+  assert(hdr != NULL);
   memset(hdr, 0, sizeof(*hdr));
 }
 
 static void ClearMetadata(VP8LMetadata* const hdr) {
-  assert(hdr);
+  assert(hdr != NULL);
 
-  free(hdr->huffman_image_);
-  DeleteHtreeGroups(hdr->htree_groups_, hdr->num_htree_groups_);
+  WebPSafeFree(hdr->huffman_image_);
+  WebPSafeFree(hdr->huffman_tables_);
+  VP8LHtreeGroupsFree(hdr->htree_groups_);
   VP8LColorCacheClear(&hdr->color_cache_);
+  VP8LColorCacheClear(&hdr->saved_color_cache_);
   InitMetadata(hdr);
 }
 
@@ -899,11 +1266,13 @@ static void ClearMetadata(VP8LMetadata* const hdr) {
 // VP8LDecoder
 
 VP8LDecoder* VP8LNew(void) {
-  VP8LDecoder* const dec = (VP8LDecoder*)calloc(1, sizeof(*dec));
+  VP8LDecoder* const dec = (VP8LDecoder*)WebPSafeCalloc(1ULL, sizeof(*dec));
   if (dec == NULL) return NULL;
   dec->status_ = VP8_STATUS_OK;
-  dec->action_ = READ_DIM;
   dec->state_ = READ_DIM;
+
+  VP8LDspInit();  // Init critical function pointers.
+
   return dec;
 }
 
@@ -912,15 +1281,15 @@ void VP8LClear(VP8LDecoder* const dec) {
   if (dec == NULL) return;
   ClearMetadata(&dec->hdr_);
 
-  free(dec->argb_);
-  dec->argb_ = NULL;
+  WebPSafeFree(dec->pixels_);
+  dec->pixels_ = NULL;
   for (i = 0; i < dec->next_transform_; ++i) {
     ClearTransform(&dec->transforms_[i]);
   }
   dec->next_transform_ = 0;
   dec->transforms_seen_ = 0;
 
-  free(dec->rescaler_memory);
+  WebPSafeFree(dec->rescaler_memory);
   dec->rescaler_memory = NULL;
 
   dec->output_ = NULL;   // leave no trace behind
@@ -929,7 +1298,7 @@ void VP8LClear(VP8LDecoder* const dec) {
 void VP8LDelete(VP8LDecoder* const dec) {
   if (dec != NULL) {
     VP8LClear(dec);
-    free(dec);
+    WebPSafeFree(dec);
   }
 }
 
@@ -1009,19 +1378,14 @@ static int DecodeImageStream(int xsize, int ysize,
   }
 
   // Use the Huffman trees to decode the LZ77 encoded data.
-  ok = DecodeImageData(dec, data, transform_xsize, transform_ysize, NULL);
-  ok = ok && !br->error_;
+  ok = DecodeImageData(dec, data, transform_xsize, transform_ysize,
+                       transform_ysize, NULL);
+  ok = ok && !br->eos_;
 
  End:
-
   if (!ok) {
-    free(data);
+    WebPSafeFree(data);
     ClearMetadata(hdr);
-    // If not enough data (br.eos_) resulted in BIT_STREAM_ERROR, update the
-    // status appropriately.
-    if (dec->status_ == VP8_STATUS_BITSTREAM_ERROR && dec->br_.eos_) {
-      dec->status_ = VP8_STATUS_SUSPENDED;
-    }
   } else {
     if (decoded_data != NULL) {
       *decoded_data = data;
@@ -1031,41 +1395,52 @@ static int DecodeImageStream(int xsize, int ysize,
       assert(data == NULL);
       assert(is_level0);
     }
+    dec->last_pixel_ = 0;  // Reset for future DECODE_DATA_FUNC() calls.
     if (!is_level0) ClearMetadata(hdr);  // Clean up temporary data behind.
   }
   return ok;
 }
 
 //------------------------------------------------------------------------------
-// Allocate dec->argb_ and dec->argb_cache_ using dec->width_ and dec->height_
-
-static int AllocateARGBBuffers(VP8LDecoder* const dec, int final_width) {
+// Allocate internal buffers dec->pixels_ and dec->argb_cache_.
+static int AllocateInternalBuffers32b(VP8LDecoder* const dec, int final_width) {
   const uint64_t num_pixels = (uint64_t)dec->width_ * dec->height_;
   // Scratch buffer corresponding to top-prediction row for transforming the
-  // first row in the row-blocks.
-  const uint64_t cache_top_pixels = final_width;
-  // Scratch buffer for temporary BGRA storage.
+  // first row in the row-blocks. Not needed for paletted alpha.
+  const uint64_t cache_top_pixels = (uint16_t)final_width;
+  // Scratch buffer for temporary BGRA storage. Not needed for paletted alpha.
   const uint64_t cache_pixels = (uint64_t)final_width * NUM_ARGB_CACHE_ROWS;
   const uint64_t total_num_pixels =
       num_pixels + cache_top_pixels + cache_pixels;
 
   assert(dec->width_ <= final_width);
-  dec->argb_ = (uint32_t*)WebPSafeMalloc(total_num_pixels, sizeof(*dec->argb_));
-  if (dec->argb_ == NULL) {
+  dec->pixels_ = (uint32_t*)WebPSafeMalloc(total_num_pixels, sizeof(uint32_t));
+  if (dec->pixels_ == NULL) {
     dec->argb_cache_ = NULL;    // for sanity check
     dec->status_ = VP8_STATUS_OUT_OF_MEMORY;
     return 0;
   }
-  dec->argb_cache_ = dec->argb_ + num_pixels + cache_top_pixels;
+  dec->argb_cache_ = dec->pixels_ + num_pixels + cache_top_pixels;
+  return 1;
+}
+
+static int AllocateInternalBuffers8b(VP8LDecoder* const dec) {
+  const uint64_t total_num_pixels = (uint64_t)dec->width_ * dec->height_;
+  dec->argb_cache_ = NULL;    // for sanity check
+  dec->pixels_ = (uint32_t*)WebPSafeMalloc(total_num_pixels, sizeof(uint8_t));
+  if (dec->pixels_ == NULL) {
+    dec->status_ = VP8_STATUS_OUT_OF_MEMORY;
+    return 0;
+  }
   return 1;
 }
 
 //------------------------------------------------------------------------------
-// Special row-processing that only stores the alpha data.
 
+// Special row-processing that only stores the alpha data.
 static void ExtractAlphaRows(VP8LDecoder* const dec, int row) {
   const int num_rows = row - dec->last_row_;
-  const uint32_t* const in = dec->argb_ + dec->width_ * dec->last_row_;
+  const uint32_t* const in = dec->pixels_ + dec->width_ * dec->last_row_;
 
   if (num_rows <= 0) return;  // Nothing to be done.
   ApplyInverseTransforms(dec, num_rows, in);
@@ -1079,44 +1454,77 @@ static void ExtractAlphaRows(VP8LDecoder* const dec, int row) {
     int i;
     for (i = 0; i < cache_pixs; ++i) dst[i] = (src[i] >> 8) & 0xff;
   }
-
   dec->last_row_ = dec->last_out_row_ = row;
 }
 
-int VP8LDecodeAlphaImageStream(int width, int height, const uint8_t* const data,
-                               size_t data_size, uint8_t* const output) {
-  VP8Io io;
+int VP8LDecodeAlphaHeader(ALPHDecoder* const alph_dec,
+                          const uint8_t* const data, size_t data_size,
+                          uint8_t* const output) {
   int ok = 0;
-  VP8LDecoder* const dec = VP8LNew();
-  if (dec == NULL) return 0;
-
-  dec->width_ = width;
-  dec->height_ = height;
-  dec->io_ = &io;
+  VP8LDecoder* dec;
+  VP8Io* io;
+  assert(alph_dec != NULL);
+  alph_dec->vp8l_dec_ = VP8LNew();
+  if (alph_dec->vp8l_dec_ == NULL) return 0;
+  dec = alph_dec->vp8l_dec_;
+
+  dec->width_ = alph_dec->width_;
+  dec->height_ = alph_dec->height_;
+  dec->io_ = &alph_dec->io_;
+  io = dec->io_;
 
-  VP8InitIo(&io);
-  WebPInitCustomIo(NULL, &io);    // Just a sanity Init. io won't be used.
-  io.opaque = output;
-  io.width = width;
-  io.height = height;
+  VP8InitIo(io);
+  WebPInitCustomIo(NULL, io);  // Just a sanity Init. io won't be used.
+  io->opaque = output;
+  io->width = alph_dec->width_;
+  io->height = alph_dec->height_;
 
   dec->status_ = VP8_STATUS_OK;
   VP8LInitBitReader(&dec->br_, data, data_size);
 
-  dec->action_ = READ_HDR;
-  if (!DecodeImageStream(width, height, 1, dec, NULL)) goto Err;
+  if (!DecodeImageStream(alph_dec->width_, alph_dec->height_, 1, dec, NULL)) {
+    goto Err;
+  }
 
-  // Allocate output (note that dec->width_ may have changed here).
-  if (!AllocateARGBBuffers(dec, width)) goto Err;
+  // Special case: if alpha data uses only the color indexing transform and
+  // doesn't use color cache (a frequent case), we will use DecodeAlphaData()
+  // method that only needs allocation of 1 byte per pixel (alpha channel).
+  if (dec->next_transform_ == 1 &&
+      dec->transforms_[0].type_ == COLOR_INDEXING_TRANSFORM &&
+      Is8bOptimizable(&dec->hdr_)) {
+    alph_dec->use_8b_decode = 1;
+    ok = AllocateInternalBuffers8b(dec);
+  } else {
+    // Allocate internal buffers (note that dec->width_ may have changed here).
+    alph_dec->use_8b_decode = 0;
+    ok = AllocateInternalBuffers32b(dec, alph_dec->width_);
+  }
 
-  // Decode (with special row processing).
-  dec->action_ = READ_DATA;
-  ok = DecodeImageData(dec, dec->argb_, dec->width_, dec->height_,
-                       ExtractAlphaRows);
+  if (!ok) goto Err;
+
+  return 1;
 
  Err:
-  VP8LDelete(dec);
-  return ok;
+  VP8LDelete(alph_dec->vp8l_dec_);
+  alph_dec->vp8l_dec_ = NULL;
+  return 0;
+}
+
+int VP8LDecodeAlphaImageStream(ALPHDecoder* const alph_dec, int last_row) {
+  VP8LDecoder* const dec = alph_dec->vp8l_dec_;
+  assert(dec != NULL);
+  assert(last_row <= dec->height_);
+
+  if (dec->last_pixel_ == dec->width_ * dec->height_) {
+    return 1;  // done
+  }
+
+  // Decode (with special row processing).
+  return alph_dec->use_8b_decode ?
+      DecodeAlphaData(dec, (uint8_t*)dec->pixels_, dec->width_, dec->height_,
+                      last_row) :
+      DecodeImageData(dec, dec->pixels_, dec->width_, dec->height_,
+                      last_row, ExtractAlphaRows);
 }
 
 //------------------------------------------------------------------------------
@@ -1141,14 +1549,13 @@ int VP8LDecodeHeader(VP8LDecoder* const dec, VP8Io* const io) {
   io->width = width;
   io->height = height;
 
-  dec->action_ = READ_HDR;
   if (!DecodeImageStream(width, height, 1, dec, NULL)) goto Error;
   return 1;
 
  Error:
-   VP8LClear(dec);
-   assert(dec->status_ != VP8_STATUS_OK);
-   return 0;
+  VP8LClear(dec);
+  assert(dec->status_ != VP8_STATUS_OK);
+  return 0;
 }
 
 int VP8LDecodeImage(VP8LDecoder* const dec) {
@@ -1158,33 +1565,57 @@ int VP8LDecodeImage(VP8LDecoder* const dec) {
   // Sanity checks.
   if (dec == NULL) return 0;
 
+  assert(dec->hdr_.huffman_tables_ != NULL);
+  assert(dec->hdr_.htree_groups_ != NULL);
+  assert(dec->hdr_.num_htree_groups_ > 0);
+
   io = dec->io_;
   assert(io != NULL);
   params = (WebPDecParams*)io->opaque;
   assert(params != NULL);
-  dec->output_ = params->output;
-  assert(dec->output_ != NULL);
 
   // Initialization.
-  if (!WebPIoInitFromOptions(params->options, io, MODE_BGRA)) {
-    dec->status_ = VP8_STATUS_INVALID_PARAM;
-    goto Err;
-  }
+  if (dec->state_ != READ_DATA) {
+    dec->output_ = params->output;
+    assert(dec->output_ != NULL);
+
+    if (!WebPIoInitFromOptions(params->options, io, MODE_BGRA)) {
+      dec->status_ = VP8_STATUS_INVALID_PARAM;
+      goto Err;
+    }
 
-  if (!AllocateARGBBuffers(dec, io->width)) goto Err;
+    if (!AllocateInternalBuffers32b(dec, io->width)) goto Err;
 
-  if (io->use_scaling && !AllocateAndInitRescaler(dec, io)) goto Err;
+    if (io->use_scaling && !AllocateAndInitRescaler(dec, io)) goto Err;
+
+    if (io->use_scaling || WebPIsPremultipliedMode(dec->output_->colorspace)) {
+      // need the alpha-multiply functions for premultiplied output or rescaling
+      WebPInitAlphaProcessing();
+    }
+    if (!WebPIsRGBMode(dec->output_->colorspace)) {
+      WebPInitConvertARGBToYUV();
+      if (dec->output_->u.YUVA.a != NULL) WebPInitAlphaProcessing();
+    }
+    if (dec->incremental_) {
+      if (dec->hdr_.color_cache_size_ > 0 &&
+          dec->hdr_.saved_color_cache_.colors_ == NULL) {
+        if (!VP8LColorCacheInit(&dec->hdr_.saved_color_cache_,
+                                dec->hdr_.color_cache_.hash_bits_)) {
+          dec->status_ = VP8_STATUS_OUT_OF_MEMORY;
+          goto Err;
+        }
+      }
+    }
+    dec->state_ = READ_DATA;
+  }
 
   // Decode.
-  dec->action_ = READ_DATA;
-  if (!DecodeImageData(dec, dec->argb_, dec->width_, dec->height_,
-                       ProcessRows)) {
+  if (!DecodeImageData(dec, dec->pixels_, dec->width_, dec->height_,
+                       dec->height_, ProcessRows)) {
     goto Err;
   }
 
-  // Cleanup.
   params->last_y = dec->last_out_row_;
-  VP8LClear(dec);
   return 1;
 
  Err:
@@ -1194,7 +1625,3 @@ int VP8LDecodeImage(VP8LDecoder* const dec) {
 }
 
 //------------------------------------------------------------------------------
-
-#if defined(__cplusplus) || defined(c_plusplus)
-}    // extern "C"
-#endif
diff --git a/drivers/webp/dec/vp8li.h b/drivers/webp/dec/vp8li.h
index 5f6cd6a01c..8886e47f62 100644
--- a/drivers/webp/dec/vp8li.h
+++ b/drivers/webp/dec/vp8li.h
@@ -1,8 +1,10 @@
 // Copyright 2012 Google Inc. All Rights Reserved.
 //
-// This code is licensed under the same terms as WebM:
-//  Software License Agreement:  http://www.webmproject.org/license/software/
-//  Additional IP Rights Grant:  http://www.webmproject.org/license/additional/
+// Use of this source code is governed by a BSD-style license
+// that can be found in the COPYING file in the root of the source
+// tree. An additional intellectual property rights grant can be found
+// in the file PATENTS. All contributing project authors may
+// be found in the AUTHORS file in the root of the source tree.
 // -----------------------------------------------------------------------------
 //
 // Lossless decoder: internal header.
@@ -18,9 +20,8 @@
 #include "../utils/bit_reader.h"
 #include "../utils/color_cache.h"
 #include "../utils/huffman.h"
-#include "../format_constants.h"
 
-#if defined(__cplusplus) || defined(c_plusplus)
+#ifdef __cplusplus
 extern "C" {
 #endif
 
@@ -40,12 +41,9 @@ struct VP8LTransform {
 };
 
 typedef struct {
-  HuffmanTree htrees_[HUFFMAN_CODES_PER_META_CODE];
-} HTreeGroup;
-
-typedef struct {
   int             color_cache_size_;
   VP8LColorCache  color_cache_;
+  VP8LColorCache  saved_color_cache_;  // for incremental
 
   int             huffman_mask_;
   int             huffman_subsample_bits_;
@@ -53,24 +51,32 @@ typedef struct {
   uint32_t       *huffman_image_;
   int             num_htree_groups_;
   HTreeGroup     *htree_groups_;
+  HuffmanCode    *huffman_tables_;
 } VP8LMetadata;
 
-typedef struct {
+typedef struct VP8LDecoder VP8LDecoder;
+struct VP8LDecoder {
   VP8StatusCode    status_;
-  VP8LDecodeState  action_;
   VP8LDecodeState  state_;
   VP8Io           *io_;
 
   const WebPDecBuffer *output_;    // shortcut to io->opaque->output
 
-  uint32_t        *argb_;          // Internal data: always in BGRA color mode.
+  uint32_t        *pixels_;        // Internal data: either uint8_t* for alpha
+                                   // or uint32_t* for BGRA.
   uint32_t        *argb_cache_;    // Scratch buffer for temporary BGRA storage.
 
   VP8LBitReader    br_;
+  int              incremental_;   // if true, incremental decoding is expected
+  VP8LBitReader    saved_br_;      // note: could be local variables too
+  int              saved_last_pixel_;
 
   int              width_;
   int              height_;
   int              last_row_;      // last input row decoded so far.
+  int              last_pixel_;    // last pixel decoded so far. However, it may
+                                   // not be transformed, scaled and
+                                   // color-converted yet.
   int              last_out_row_;  // last row output so far.
 
   VP8LMetadata     hdr_;
@@ -82,18 +88,27 @@ typedef struct {
 
   uint8_t         *rescaler_memory;  // Working memory for rescaling work.
   WebPRescaler    *rescaler;         // Common rescaler for all channels.
-} VP8LDecoder;
+};
 
 //------------------------------------------------------------------------------
 // internal functions. Not public.
 
+struct ALPHDecoder;  // Defined in dec/alphai.h.
+
 // in vp8l.c
 
-// Decodes a raw image stream (without header) and store the alpha data
-// into *output, which must be of size width x height. Returns false in case
-// of error.
-int VP8LDecodeAlphaImageStream(int width, int height, const uint8_t* const data,
-                               size_t data_size, uint8_t* const output);
+// Decodes image header for alpha data stored using lossless compression.
+// Returns false in case of error.
+int VP8LDecodeAlphaHeader(struct ALPHDecoder* const alph_dec,
+                          const uint8_t* const data, size_t data_size,
+                          uint8_t* const output);
+
+// Decodes *at least* 'last_row' rows of alpha. If some of the initial rows are
+// already decoded in previous call(s), it will resume decoding from where it
+// was paused.
+// Returns false in case of bitstream error.
+int VP8LDecodeAlphaImageStream(struct ALPHDecoder* const alph_dec,
+                               int last_row);
 
 // Allocates and initialize a new lossless decoder instance.
 VP8LDecoder* VP8LNew(void);
@@ -114,7 +129,7 @@ void VP8LDelete(VP8LDecoder* const dec);
 
 //------------------------------------------------------------------------------
 
-#if defined(__cplusplus) || defined(c_plusplus)
+#ifdef __cplusplus
 }    // extern "C"
 #endif
 
diff --git a/drivers/webp/dec/webp.c b/drivers/webp/dec/webp.c
index f44bc2b8ae..93a113a48d 100644
--- a/drivers/webp/dec/webp.c
+++ b/drivers/webp/dec/webp.c
@@ -1,8 +1,10 @@
 // Copyright 2010 Google Inc. All Rights Reserved.
 //
-// This code is licensed under the same terms as WebM:
-//  Software License Agreement:  http://www.webmproject.org/license/software/
-//  Additional IP Rights Grant:  http://www.webmproject.org/license/additional/
+// Use of this source code is governed by a BSD-style license
+// that can be found in the COPYING file in the root of the source
+// tree. An additional intellectual property rights grant can be found
+// in the file PATENTS. All contributing project authors may
+// be found in the AUTHORS file in the root of the source tree.
 // -----------------------------------------------------------------------------
 //
 // Main decoding functions for WEBP images.
@@ -14,11 +16,8 @@
 #include "./vp8i.h"
 #include "./vp8li.h"
 #include "./webpi.h"
-#include "../format_constants.h"
-
-#if defined(__cplusplus) || defined(c_plusplus)
-extern "C" {
-#endif
+#include "../utils/utils.h"
+#include "webp/mux_types.h"  // ALPHA_FLAG
 
 //------------------------------------------------------------------------------
 // RIFF layout is:
@@ -40,27 +39,20 @@ extern "C" {
 //   20..23  VP8X flags bit-map corresponding to the chunk-types present.
 //   24..26  Width of the Canvas Image.
 //   27..29  Height of the Canvas Image.
-// There can be extra chunks after the "VP8X" chunk (ICCP, TILE, FRM, VP8,
-// META  ...)
+// There can be extra chunks after the "VP8X" chunk (ICCP, FRGM, ANMF, VP8,
+// VP8L, XMP, EXIF  ...)
 // All sizes are in little-endian order.
 // Note: chunk data size must be padded to multiple of 2 when written.
 
-static WEBP_INLINE uint32_t get_le24(const uint8_t* const data) {
-  return data[0] | (data[1] << 8) | (data[2] << 16);
-}
-
-static WEBP_INLINE uint32_t get_le32(const uint8_t* const data) {
-  return (uint32_t)get_le24(data) | (data[3] << 24);
-}
-
 // Validates the RIFF container (if detected) and skips over it.
-// If a RIFF container is detected,
-// Returns VP8_STATUS_BITSTREAM_ERROR for invalid header, and
-//         VP8_STATUS_OK otherwise.
+// If a RIFF container is detected, returns:
+//     VP8_STATUS_BITSTREAM_ERROR for invalid header,
+//     VP8_STATUS_NOT_ENOUGH_DATA for truncated data if have_all_data is true,
+// and VP8_STATUS_OK otherwise.
 // In case there are not enough bytes (partial RIFF container), return 0 for
 // *riff_size. Else return the RIFF size extracted from the header.
 static VP8StatusCode ParseRIFF(const uint8_t** const data,
-                               size_t* const data_size,
+                               size_t* const data_size, int have_all_data,
                                size_t* const riff_size) {
   assert(data != NULL);
   assert(data_size != NULL);
@@ -71,11 +63,17 @@ static VP8StatusCode ParseRIFF(const uint8_t** const data,
     if (memcmp(*data + 8, "WEBP", TAG_SIZE)) {
       return VP8_STATUS_BITSTREAM_ERROR;  // Wrong image file signature.
     } else {
-      const uint32_t size = get_le32(*data + TAG_SIZE);
+      const uint32_t size = GetLE32(*data + TAG_SIZE);
       // Check that we have at least one chunk (i.e "WEBP" + "VP8?nnnn").
       if (size < TAG_SIZE + CHUNK_HEADER_SIZE) {
         return VP8_STATUS_BITSTREAM_ERROR;
       }
+      if (size > MAX_CHUNK_PAYLOAD) {
+        return VP8_STATUS_BITSTREAM_ERROR;
+      }
+      if (have_all_data && (size > *data_size - CHUNK_HEADER_SIZE)) {
+        return VP8_STATUS_NOT_ENOUGH_DATA;  // Truncated bitstream.
+      }
       // We have a RIFF container. Skip it.
       *riff_size = size;
       *data += RIFF_HEADER_SIZE;
@@ -111,7 +109,7 @@ static VP8StatusCode ParseVP8X(const uint8_t** const data,
   if (!memcmp(*data, "VP8X", TAG_SIZE)) {
     int width, height;
     uint32_t flags;
-    const uint32_t chunk_size = get_le32(*data + TAG_SIZE);
+    const uint32_t chunk_size = GetLE32(*data + TAG_SIZE);
     if (chunk_size != VP8X_CHUNK_SIZE) {
       return VP8_STATUS_BITSTREAM_ERROR;  // Wrong chunk size.
     }
@@ -120,9 +118,9 @@ static VP8StatusCode ParseVP8X(const uint8_t** const data,
     if (*data_size < vp8x_size) {
       return VP8_STATUS_NOT_ENOUGH_DATA;  // Insufficient data.
     }
-    flags = get_le32(*data + 8);
-    width = 1 + get_le24(*data + 12);
-    height = 1 + get_le24(*data + 15);
+    flags = GetLE32(*data + 8);
+    width = 1 + GetLE24(*data + 12);
+    height = 1 + GetLE24(*data + 15);
     if (width * (uint64_t)height >= MAX_IMAGE_AREA) {
       return VP8_STATUS_BITSTREAM_ERROR;  // image is too large
     }
@@ -176,7 +174,10 @@ static VP8StatusCode ParseOptionalChunks(const uint8_t** const data,
       return VP8_STATUS_NOT_ENOUGH_DATA;
     }
 
-    chunk_size = get_le32(buf + TAG_SIZE);
+    chunk_size = GetLE32(buf + TAG_SIZE);
+    if (chunk_size > MAX_CHUNK_PAYLOAD) {
+      return VP8_STATUS_BITSTREAM_ERROR;          // Not a valid chunk size.
+    }
     // For odd-sized chunk-payload, there's one byte padding at the end.
     disk_chunk_size = (CHUNK_HEADER_SIZE + chunk_size + 1) & ~1;
     total_size += disk_chunk_size;
@@ -186,6 +187,15 @@ static VP8StatusCode ParseOptionalChunks(const uint8_t** const data,
       return VP8_STATUS_BITSTREAM_ERROR;          // Not a valid chunk size.
     }
 
+    // Start of a (possibly incomplete) VP8/VP8L chunk implies that we have
+    // parsed all the optional chunks.
+    // Note: This check must occur before the check 'buf_size < disk_chunk_size'
+    // below to allow incomplete VP8/VP8L chunks.
+    if (!memcmp(buf, "VP8 ", TAG_SIZE) ||
+        !memcmp(buf, "VP8L", TAG_SIZE)) {
+      return VP8_STATUS_OK;
+    }
+
     if (buf_size < disk_chunk_size) {             // Insufficient data.
       return VP8_STATUS_NOT_ENOUGH_DATA;
     }
@@ -193,9 +203,6 @@ static VP8StatusCode ParseOptionalChunks(const uint8_t** const data,
     if (!memcmp(buf, "ALPH", TAG_SIZE)) {         // A valid ALPH header.
       *alpha_data = buf + CHUNK_HEADER_SIZE;
       *alpha_size = chunk_size;
-    } else if (!memcmp(buf, "VP8 ", TAG_SIZE) ||
-               !memcmp(buf, "VP8L", TAG_SIZE)) {  // A valid VP8/VP8L header.
-      return VP8_STATUS_OK;  // Found.
     }
 
     // We have a full and valid chunk; skip it.
@@ -213,9 +220,8 @@ static VP8StatusCode ParseOptionalChunks(const uint8_t** const data,
 // extracted from the VP8/VP8L chunk header.
 // The flag '*is_lossless' is set to 1 in case of VP8L chunk / raw VP8L data.
 static VP8StatusCode ParseVP8Header(const uint8_t** const data_ptr,
-                                    size_t* const data_size,
-                                    size_t riff_size,
-                                    size_t* const chunk_size,
+                                    size_t* const data_size, int have_all_data,
+                                    size_t riff_size, size_t* const chunk_size,
                                     int* const is_lossless) {
   const uint8_t* const data = *data_ptr;
   const int is_vp8 = !memcmp(data, "VP8 ", TAG_SIZE);
@@ -234,10 +240,13 @@ static VP8StatusCode ParseVP8Header(const uint8_t** const data_ptr,
 
   if (is_vp8 || is_vp8l) {
     // Bitstream contains VP8/VP8L header.
-    const uint32_t size = get_le32(data + TAG_SIZE);
+    const uint32_t size = GetLE32(data + TAG_SIZE);
     if ((riff_size >= minimal_size) && (size > riff_size - minimal_size)) {
       return VP8_STATUS_BITSTREAM_ERROR;  // Inconsistent size information.
     }
+    if (have_all_data && (size > *data_size - CHUNK_HEADER_SIZE)) {
+      return VP8_STATUS_NOT_ENOUGH_DATA;  // Truncated bitstream.
+    }
     // Skip over CHUNK_HEADER_SIZE bytes from VP8/VP8L Header.
     *chunk_size = size;
     *data_ptr += CHUNK_HEADER_SIZE;
@@ -270,9 +279,19 @@ static VP8StatusCode ParseHeadersInternal(const uint8_t* data,
                                           int* const width,
                                           int* const height,
                                           int* const has_alpha,
+                                          int* const has_animation,
+                                          int* const format,
                                           WebPHeaderStructure* const headers) {
+  int canvas_width = 0;
+  int canvas_height = 0;
+  int image_width = 0;
+  int image_height = 0;
   int found_riff = 0;
   int found_vp8x = 0;
+  int animation_present = 0;
+  int fragments_present = 0;
+  const int have_all_data = (headers != NULL) ? headers->have_all_data : 0;
+
   VP8StatusCode status;
   WebPHeaderStructure hdrs;
 
@@ -284,7 +303,7 @@ static VP8StatusCode ParseHeadersInternal(const uint8_t* data,
   hdrs.data_size = data_size;
 
   // Skip over RIFF header.
-  status = ParseRIFF(&data, &data_size, &hdrs.riff_size);
+  status = ParseRIFF(&data, &data_size, have_all_data, &hdrs.riff_size);
   if (status != VP8_STATUS_OK) {
     return status;   // Wrong RIFF header / insufficient data.
   }
@@ -293,22 +312,35 @@ static VP8StatusCode ParseHeadersInternal(const uint8_t* data,
   // Skip over VP8X.
   {
     uint32_t flags = 0;
-    status = ParseVP8X(&data, &data_size, &found_vp8x, width, height, &flags);
+    status = ParseVP8X(&data, &data_size, &found_vp8x,
+                       &canvas_width, &canvas_height, &flags);
     if (status != VP8_STATUS_OK) {
       return status;  // Wrong VP8X / insufficient data.
     }
+    animation_present = !!(flags & ANIMATION_FLAG);
+    fragments_present = !!(flags & FRAGMENTS_FLAG);
     if (!found_riff && found_vp8x) {
       // Note: This restriction may be removed in the future, if it becomes
       // necessary to send VP8X chunk to the decoder.
       return VP8_STATUS_BITSTREAM_ERROR;
     }
-    if (has_alpha != NULL) *has_alpha = !!(flags & ALPHA_FLAG_BIT);
-    if (found_vp8x && headers == NULL) {
-      return VP8_STATUS_OK;  // Return features from VP8X header.
+    if (has_alpha != NULL) *has_alpha = !!(flags & ALPHA_FLAG);
+    if (has_animation != NULL) *has_animation = animation_present;
+    if (format != NULL) *format = 0;   // default = undefined
+
+    image_width = canvas_width;
+    image_height = canvas_height;
+    if (found_vp8x && (animation_present || fragments_present) &&
+        headers == NULL) {
+      status = VP8_STATUS_OK;
+      goto ReturnWidthHeight;  // Just return features from VP8X header.
     }
   }
 
-  if (data_size < TAG_SIZE) return VP8_STATUS_NOT_ENOUGH_DATA;
+  if (data_size < TAG_SIZE) {
+    status = VP8_STATUS_NOT_ENOUGH_DATA;
+    goto ReturnWidthHeight;
+  }
 
   // Skip over optional chunks if data started with "RIFF + VP8X" or "ALPH".
   if ((found_riff && found_vp8x) ||
@@ -316,43 +348,49 @@ static VP8StatusCode ParseHeadersInternal(const uint8_t* data,
     status = ParseOptionalChunks(&data, &data_size, hdrs.riff_size,
                                  &hdrs.alpha_data, &hdrs.alpha_data_size);
     if (status != VP8_STATUS_OK) {
-      return status;  // Found an invalid chunk size / insufficient data.
+      goto ReturnWidthHeight;  // Invalid chunk size / insufficient data.
     }
   }
 
   // Skip over VP8/VP8L header.
-  status = ParseVP8Header(&data, &data_size, hdrs.riff_size,
+  status = ParseVP8Header(&data, &data_size, have_all_data, hdrs.riff_size,
                           &hdrs.compressed_size, &hdrs.is_lossless);
   if (status != VP8_STATUS_OK) {
-    return status;  // Wrong VP8/VP8L chunk-header / insufficient data.
+    goto ReturnWidthHeight;  // Wrong VP8/VP8L chunk-header / insufficient data.
   }
   if (hdrs.compressed_size > MAX_CHUNK_PAYLOAD) {
     return VP8_STATUS_BITSTREAM_ERROR;
   }
 
+  if (format != NULL && !(animation_present || fragments_present)) {
+    *format = hdrs.is_lossless ? 2 : 1;
+  }
+
   if (!hdrs.is_lossless) {
     if (data_size < VP8_FRAME_HEADER_SIZE) {
-      return VP8_STATUS_NOT_ENOUGH_DATA;
+      status = VP8_STATUS_NOT_ENOUGH_DATA;
+      goto ReturnWidthHeight;
     }
     // Validates raw VP8 data.
-    if (!VP8GetInfo(data, data_size,
-                    (uint32_t)hdrs.compressed_size, width, height)) {
+    if (!VP8GetInfo(data, data_size, (uint32_t)hdrs.compressed_size,
+                    &image_width, &image_height)) {
       return VP8_STATUS_BITSTREAM_ERROR;
     }
   } else {
     if (data_size < VP8L_FRAME_HEADER_SIZE) {
-      return VP8_STATUS_NOT_ENOUGH_DATA;
+      status = VP8_STATUS_NOT_ENOUGH_DATA;
+      goto ReturnWidthHeight;
     }
     // Validates raw VP8L data.
-    if (!VP8LGetInfo(data, data_size, width, height, has_alpha)) {
+    if (!VP8LGetInfo(data, data_size, &image_width, &image_height, has_alpha)) {
       return VP8_STATUS_BITSTREAM_ERROR;
     }
   }
-
-  if (has_alpha != NULL) {
-    // If the data did not contain a VP8X/VP8L chunk the only definitive way
-    // to set this is by looking for alpha data (from an ALPH chunk).
-    *has_alpha |= (hdrs.alpha_data != NULL);
+  // Validates image size coherency.
+  if (found_vp8x) {
+    if (canvas_width != image_width || canvas_height != image_height) {
+      return VP8_STATUS_BITSTREAM_ERROR;
+    }
   }
   if (headers != NULL) {
     *headers = hdrs;
@@ -360,21 +398,44 @@ static VP8StatusCode ParseHeadersInternal(const uint8_t* data,
     assert((uint64_t)(data - headers->data) < MAX_CHUNK_PAYLOAD);
     assert(headers->offset == headers->data_size - data_size);
   }
-  return VP8_STATUS_OK;  // Return features from VP8 header.
+ ReturnWidthHeight:
+  if (status == VP8_STATUS_OK ||
+      (status == VP8_STATUS_NOT_ENOUGH_DATA && found_vp8x && headers == NULL)) {
+    if (has_alpha != NULL) {
+      // If the data did not contain a VP8X/VP8L chunk the only definitive way
+      // to set this is by looking for alpha data (from an ALPH chunk).
+      *has_alpha |= (hdrs.alpha_data != NULL);
+    }
+    if (width != NULL) *width = image_width;
+    if (height != NULL) *height = image_height;
+    return VP8_STATUS_OK;
+  } else {
+    return status;
+  }
 }
 
 VP8StatusCode WebPParseHeaders(WebPHeaderStructure* const headers) {
+  VP8StatusCode status;
+  int has_animation = 0;
   assert(headers != NULL);
   // fill out headers, ignore width/height/has_alpha.
-  return ParseHeadersInternal(headers->data, headers->data_size,
-                              NULL, NULL, NULL, headers);
+  status = ParseHeadersInternal(headers->data, headers->data_size,
+                                NULL, NULL, NULL, &has_animation,
+                                NULL, headers);
+  if (status == VP8_STATUS_OK || status == VP8_STATUS_NOT_ENOUGH_DATA) {
+    // TODO(jzern): full support of animation frames will require API additions.
+    if (has_animation) {
+      status = VP8_STATUS_UNSUPPORTED_FEATURE;
+    }
+  }
+  return status;
 }
 
 //------------------------------------------------------------------------------
 // WebPDecParams
 
 void WebPResetDecParams(WebPDecParams* const params) {
-  if (params) {
+  if (params != NULL) {
     memset(params, 0, sizeof(*params));
   }
 }
@@ -391,6 +452,7 @@ static VP8StatusCode DecodeInto(const uint8_t* const data, size_t data_size,
 
   headers.data = data;
   headers.data_size = data_size;
+  headers.have_all_data = 1;
   status = WebPParseHeaders(&headers);   // Process Pre-VP8 chunks.
   if (status != VP8_STATUS_OK) {
     return status;
@@ -407,11 +469,6 @@ static VP8StatusCode DecodeInto(const uint8_t* const data, size_t data_size,
     if (dec == NULL) {
       return VP8_STATUS_OUT_OF_MEMORY;
     }
-#ifdef WEBP_USE_THREAD
-    dec->use_threads_ = params->options && (params->options->use_threads > 0);
-#else
-    dec->use_threads_ = 0;
-#endif
     dec->alpha_data_ = headers.alpha_data;
     dec->alpha_data_size_ = headers.alpha_data_size;
 
@@ -423,6 +480,10 @@ static VP8StatusCode DecodeInto(const uint8_t* const data, size_t data_size,
       status = WebPAllocateDecBuffer(io.width, io.height, params->options,
                                      params->output);
       if (status == VP8_STATUS_OK) {  // Decode
+        // This change must be done before calling VP8Decode()
+        dec->mt_method_ = VP8GetThreadMethod(params->options, &headers,
+                                             io.width, io.height);
+        VP8InitDithering(params->options, dec);
         if (!VP8Decode(dec, &io)) {
           status = dec->status_;
         }
@@ -452,6 +513,10 @@ static VP8StatusCode DecodeInto(const uint8_t* const data, size_t data_size,
   if (status != VP8_STATUS_OK) {
     WebPFreeDecBuffer(params->output);
   }
+
+  if (params->options != NULL && params->options->flip) {
+    status = WebPFlipBuffer(params->output);
+  }
   return status;
 }
 
@@ -609,7 +674,6 @@ uint8_t* WebPDecodeYUV(const uint8_t* data, size_t data_size,
 static void DefaultFeatures(WebPBitstreamFeatures* const features) {
   assert(features != NULL);
   memset(features, 0, sizeof(*features));
-  features->bitstream_version = 0;
 }
 
 static VP8StatusCode GetFeatures(const uint8_t* const data, size_t data_size,
@@ -619,10 +683,11 @@ static VP8StatusCode GetFeatures(const uint8_t* const data, size_t data_size,
   }
   DefaultFeatures(features);
 
-  // Only parse enough of the data to retrieve width/height/has_alpha.
+  // Only parse enough of the data to retrieve the features.
   return ParseHeadersInternal(data, data_size,
                               &features->width, &features->height,
-                              &features->has_alpha, NULL);
+                              &features->has_alpha, &features->has_animation,
+                              &features->format, NULL);
 }
 
 //------------------------------------------------------------------------------
@@ -666,19 +731,13 @@ int WebPInitDecoderConfigInternal(WebPDecoderConfig* config,
 VP8StatusCode WebPGetFeaturesInternal(const uint8_t* data, size_t data_size,
                                       WebPBitstreamFeatures* features,
                                       int version) {
-  VP8StatusCode status;
   if (WEBP_ABI_IS_INCOMPATIBLE(version, WEBP_DECODER_ABI_VERSION)) {
     return VP8_STATUS_INVALID_PARAM;   // version mismatch
   }
   if (features == NULL) {
     return VP8_STATUS_INVALID_PARAM;
   }
-
-  status = GetFeatures(data, data_size, features);
-  if (status == VP8_STATUS_NOT_ENOUGH_DATA) {
-    return VP8_STATUS_BITSTREAM_ERROR;  // Not-enough-data treated as error.
-  }
-  return status;
+  return GetFeatures(data, data_size, features);
 }
 
 VP8StatusCode WebPDecode(const uint8_t* data, size_t data_size,
@@ -722,9 +781,9 @@ int WebPIoInitFromOptions(const WebPDecoderOptions* const options,
     h = options->crop_height;
     x = options->crop_left;
     y = options->crop_top;
-    if (!WebPIsRGBMode(src_colorspace)) {   // only snap for YUV420 or YUV422
+    if (!WebPIsRGBMode(src_colorspace)) {   // only snap for YUV420
       x &= ~1;
-      y &= ~1;    // TODO(later): only for YUV420, not YUV422.
+      y &= ~1;
     }
     if (x < 0 || y < 0 || w <= 0 || h <= 0 || x + w > W || y + h > H) {
       return 0;  // out of frame boundary error
@@ -740,11 +799,13 @@ int WebPIoInitFromOptions(const WebPDecoderOptions* const options,
   // Scaling
   io->use_scaling = (options != NULL) && (options->use_scaling > 0);
   if (io->use_scaling) {
-    if (options->scaled_width <= 0 || options->scaled_height <= 0) {
+    int scaled_width = options->scaled_width;
+    int scaled_height = options->scaled_height;
+    if (!WebPRescalerGetScaledDimensions(w, h, &scaled_width, &scaled_height)) {
       return 0;
     }
-    io->scaled_width = options->scaled_width;
-    io->scaled_height = options->scaled_height;
+    io->scaled_width = scaled_width;
+    io->scaled_height = scaled_height;
   }
 
   // Filter
@@ -766,6 +827,3 @@ int WebPIoInitFromOptions(const WebPDecoderOptions* const options,
 
 //------------------------------------------------------------------------------
 
-#if defined(__cplusplus) || defined(c_plusplus)
-}    // extern "C"
-#endif
diff --git a/drivers/webp/dec/webpi.h b/drivers/webp/dec/webpi.h
index 44e5744411..c75a2e4a5b 100644
--- a/drivers/webp/dec/webpi.h
+++ b/drivers/webp/dec/webpi.h
@@ -1,8 +1,10 @@
 // Copyright 2011 Google Inc. All Rights Reserved.
 //
-// This code is licensed under the same terms as WebM:
-//  Software License Agreement:  http://www.webmproject.org/license/software/
-//  Additional IP Rights Grant:  http://www.webmproject.org/license/additional/
+// Use of this source code is governed by a BSD-style license
+// that can be found in the COPYING file in the root of the source
+// tree. An additional intellectual property rights grant can be found
+// in the file PATENTS. All contributing project authors may
+// be found in the AUTHORS file in the root of the source tree.
 // -----------------------------------------------------------------------------
 //
 // Internal header: WebP decoding parameters and custom IO on buffer
@@ -12,7 +14,7 @@
 #ifndef WEBP_DEC_WEBPI_H_
 #define WEBP_DEC_WEBPI_H_
 
-#if defined(__cplusplus) || defined(c_plusplus)
+#ifdef __cplusplus
 extern "C" {
 #endif
 
@@ -24,7 +26,10 @@ extern "C" {
 
 typedef struct WebPDecParams WebPDecParams;
 typedef int (*OutputFunc)(const VP8Io* const io, WebPDecParams* const p);
-typedef int (*OutputRowFunc)(WebPDecParams* const p, int y_pos);
+typedef int (*OutputAlphaFunc)(const VP8Io* const io, WebPDecParams* const p,
+                               int expected_num_out_lines);
+typedef int (*OutputRowFunc)(WebPDecParams* const p, int y_pos,
+                             int max_out_lines);
 
 struct WebPDecParams {
   WebPDecBuffer* output;             // output buffer.
@@ -38,7 +43,7 @@ struct WebPDecParams {
   void* memory;                  // overall scratch memory for the output work.
 
   OutputFunc emit;               // output RGB or YUV samples
-  OutputFunc emit_alpha;         // output alpha channel
+  OutputAlphaFunc emit_alpha;    // output alpha channel
   OutputRowFunc emit_alpha_row;  // output one line of rescaled alpha values
 };
 
@@ -52,6 +57,7 @@ void WebPResetDecParams(WebPDecParams* const params);
 typedef struct {
   const uint8_t* data;         // input buffer
   size_t data_size;            // input buffer size
+  int have_all_data;           // true if all data is known to be available
   size_t offset;               // offset to main data chunk (VP8 or VP8L)
   const uint8_t* alpha_data;   // points to alpha chunk (if present)
   size_t alpha_data_size;      // alpha chunk size
@@ -61,10 +67,10 @@ typedef struct {
 } WebPHeaderStructure;
 
 // Skips over all valid chunks prior to the first VP8/VP8L frame header.
-// Returns VP8_STATUS_OK on success,
-//         VP8_STATUS_BITSTREAM_ERROR if an invalid header/chunk is found, and
-//         VP8_STATUS_NOT_ENOUGH_DATA if case of insufficient data.
-// In 'headers', compressed_size, offset, alpha_data, alpha_size and lossless
+// Returns: VP8_STATUS_OK, VP8_STATUS_BITSTREAM_ERROR (invalid header/chunk),
+// VP8_STATUS_NOT_ENOUGH_DATA (partial input) or VP8_STATUS_UNSUPPORTED_FEATURE
+// in the case of non-decodable features (animation for instance).
+// In 'headers', compressed_size, offset, alpha_data, alpha_size, and lossless
 // fields are updated appropriately upon success.
 VP8StatusCode WebPParseHeaders(WebPHeaderStructure* const headers);
 
@@ -91,10 +97,15 @@ int WebPIoInitFromOptions(const WebPDecoderOptions* const options,
 // dimension / etc.). If *options is not NULL, also verify that the options'
 // parameters are valid and apply them to the width/height dimensions of the
 // output buffer. This takes cropping / scaling / rotation into account.
+// Also incorporates the options->flip flag to flip the buffer parameters if
+// needed.
 VP8StatusCode WebPAllocateDecBuffer(int width, int height,
                                     const WebPDecoderOptions* const options,
                                     WebPDecBuffer* const buffer);
 
+// Flip buffer vertically by negating the various strides.
+VP8StatusCode WebPFlipBuffer(WebPDecBuffer* const buffer);
+
 // Copy 'src' into 'dst' buffer, making sure 'dst' is not marked as owner of the
 // memory (still held by 'src').
 void WebPCopyDecBuffer(const WebPDecBuffer* const src,
@@ -103,11 +114,9 @@ void WebPCopyDecBuffer(const WebPDecBuffer* const src,
 // Copy and transfer ownership from src to dst (beware of parameter order!)
 void WebPGrabDecBuffer(WebPDecBuffer* const src, WebPDecBuffer* const dst);
 
-
-
 //------------------------------------------------------------------------------
 
-#if defined(__cplusplus) || defined(c_plusplus)
+#ifdef __cplusplus
 }    // extern "C"
 #endif
 
diff --git a/drivers/webp/decode.h b/drivers/webp/decode.h
index 43b6c58f4f..fa4b13411d 100644
--- a/drivers/webp/decode.h
+++ b/drivers/webp/decode.h
@@ -1,8 +1,10 @@
 // Copyright 2010 Google Inc. All Rights Reserved.
 //
-// This code is licensed under the same terms as WebM:
-//  Software License Agreement:  http://www.webmproject.org/license/software/
-//  Additional IP Rights Grant:  http://www.webmproject.org/license/additional/
+// Use of this source code is governed by a BSD-style license
+// that can be found in the COPYING file in the root of the source
+// tree. An additional intellectual property rights grant can be found
+// in the file PATENTS. All contributing project authors may
+// be found in the AUTHORS file in the root of the source tree.
 // -----------------------------------------------------------------------------
 //
 //  Main decoding functions for WebP images.
@@ -14,11 +16,23 @@
 
 #include "./types.h"
 
-#if defined(__cplusplus) || defined(c_plusplus)
+#ifdef __cplusplus
 extern "C" {
 #endif
 
-#define WEBP_DECODER_ABI_VERSION 0x0200    // MAJOR(8b) + MINOR(8b)
+#define WEBP_DECODER_ABI_VERSION 0x0207    // MAJOR(8b) + MINOR(8b)
+
+// Note: forward declaring enumerations is not allowed in (strict) C and C++,
+// the types are left here for reference.
+// typedef enum VP8StatusCode VP8StatusCode;
+// typedef enum WEBP_CSP_MODE WEBP_CSP_MODE;
+typedef struct WebPRGBABuffer WebPRGBABuffer;
+typedef struct WebPYUVABuffer WebPYUVABuffer;
+typedef struct WebPDecBuffer WebPDecBuffer;
+typedef struct WebPIDecoder WebPIDecoder;
+typedef struct WebPBitstreamFeatures WebPBitstreamFeatures;
+typedef struct WebPDecoderOptions WebPDecoderOptions;
+typedef struct WebPDecoderConfig WebPDecoderConfig;
 
 // Return the decoder's version number, packed in hexadecimal using 8bits for
 // each of major/minor/revision. E.g: v2.5.7 is 0x020507.
@@ -34,7 +48,7 @@ WEBP_EXTERN(int) WebPGetInfo(const uint8_t* data, size_t data_size,
 // Decodes WebP images pointed to by 'data' and returns RGBA samples, along
 // with the dimensions in *width and *height. The ordering of samples in
 // memory is R, G, B, A, R, G, B, A... in scan order (endian-independent).
-// The returned pointer should be deleted calling free().
+// The returned pointer should be deleted calling WebPFree().
 // Returns NULL in case of error.
 WEBP_EXTERN(uint8_t*) WebPDecodeRGBA(const uint8_t* data, size_t data_size,
                                      int* width, int* height);
@@ -59,9 +73,9 @@ WEBP_EXTERN(uint8_t*) WebPDecodeBGR(const uint8_t* data, size_t data_size,
 
 // Decode WebP images pointed to by 'data' to Y'UV format(*). The pointer
 // returned is the Y samples buffer. Upon return, *u and *v will point to
-// the U and V chroma data. These U and V buffers need NOT be free()'d,
-// unlike the returned Y luma one. The dimension of the U and V planes
-// are both (*width + 1) / 2 and (*height + 1)/ 2.
+// the U and V chroma data. These U and V buffers need NOT be passed to
+// WebPFree(), unlike the returned Y luma one. The dimension of the U and V
+// planes are both (*width + 1) / 2 and (*height + 1)/ 2.
 // Upon return, the Y buffer has a stride returned as '*stride', while U and V
 // have a common stride returned as '*uv_stride'.
 // Return NULL in case of error.
@@ -71,6 +85,9 @@ WEBP_EXTERN(uint8_t*) WebPDecodeYUV(const uint8_t* data, size_t data_size,
                                     uint8_t** u, uint8_t** v,
                                     int* stride, int* uv_stride);
 
+// Releases memory returned by the WebPDecode*() functions above.
+WEBP_EXTERN(void) WebPFree(void* ptr);
+
 // These five functions are variants of the above ones, that decode the image
 // directly into a pre-allocated buffer 'output_buffer'. The maximum storage
 // available in this buffer is indicated by 'output_buffer_size'. If this
@@ -118,20 +135,28 @@ WEBP_EXTERN(uint8_t*) WebPDecodeYUVInto(
 // Note: the naming describes the byte-ordering of packed samples in memory.
 // For instance, MODE_BGRA relates to samples ordered as B,G,R,A,B,G,R,A,...
 // Non-capital names (e.g.:MODE_Argb) relates to pre-multiplied RGB channels.
-// RGB-565 and RGBA-4444 are also endian-agnostic and byte-oriented.
-typedef enum { MODE_RGB = 0, MODE_RGBA = 1,
-               MODE_BGR = 2, MODE_BGRA = 3,
-               MODE_ARGB = 4, MODE_RGBA_4444 = 5,
-               MODE_RGB_565 = 6,
-               // RGB-premultiplied transparent modes (alpha value is preserved)
-               MODE_rgbA = 7,
-               MODE_bgrA = 8,
-               MODE_Argb = 9,
-               MODE_rgbA_4444 = 10,
-               // YUV modes must come after RGB ones.
-               MODE_YUV = 11, MODE_YUVA = 12,  // yuv 4:2:0
-               MODE_LAST = 13
-             } WEBP_CSP_MODE;
+// RGBA-4444 and RGB-565 colorspaces are represented by following byte-order:
+// RGBA-4444: [r3 r2 r1 r0 g3 g2 g1 g0], [b3 b2 b1 b0 a3 a2 a1 a0], ...
+// RGB-565: [r4 r3 r2 r1 r0 g5 g4 g3], [g2 g1 g0 b4 b3 b2 b1 b0], ...
+// In the case WEBP_SWAP_16BITS_CSP is defined, the bytes are swapped for
+// these two modes:
+// RGBA-4444: [b3 b2 b1 b0 a3 a2 a1 a0], [r3 r2 r1 r0 g3 g2 g1 g0], ...
+// RGB-565: [g2 g1 g0 b4 b3 b2 b1 b0], [r4 r3 r2 r1 r0 g5 g4 g3], ...
+
+typedef enum WEBP_CSP_MODE {
+  MODE_RGB = 0, MODE_RGBA = 1,
+  MODE_BGR = 2, MODE_BGRA = 3,
+  MODE_ARGB = 4, MODE_RGBA_4444 = 5,
+  MODE_RGB_565 = 6,
+  // RGB-premultiplied transparent modes (alpha value is preserved)
+  MODE_rgbA = 7,
+  MODE_bgrA = 8,
+  MODE_Argb = 9,
+  MODE_rgbA_4444 = 10,
+  // YUV modes must come after RGB ones.
+  MODE_YUV = 11, MODE_YUVA = 12,  // yuv 4:2:0
+  MODE_LAST = 13
+} WEBP_CSP_MODE;
 
 // Some useful macros:
 static WEBP_INLINE int WebPIsPremultipliedMode(WEBP_CSP_MODE mode) {
@@ -152,13 +177,13 @@ static WEBP_INLINE int WebPIsRGBMode(WEBP_CSP_MODE mode) {
 //------------------------------------------------------------------------------
 // WebPDecBuffer: Generic structure for describing the output sample buffer.
 
-typedef struct {    // view as RGBA
+struct WebPRGBABuffer {    // view as RGBA
   uint8_t* rgba;    // pointer to RGBA samples
   int stride;       // stride in bytes from one scanline to the next.
   size_t size;      // total size of the *rgba buffer.
-} WebPRGBABuffer;
+};
 
-typedef struct {              // view as YUVA
+struct WebPYUVABuffer {              // view as YUVA
   uint8_t* y, *u, *v, *a;     // pointer to luma, chroma U/V, alpha samples
   int y_stride;               // luma stride
   int u_stride, v_stride;     // chroma strides
@@ -166,10 +191,10 @@ typedef struct {              // view as YUVA
   size_t y_size;              // luma plane size
   size_t u_size, v_size;      // chroma planes size
   size_t a_size;              // alpha-plane size
-} WebPYUVABuffer;
+};
 
 // Output buffer
-typedef struct {
+struct WebPDecBuffer {
   WEBP_CSP_MODE colorspace;  // Colorspace.
   int width, height;         // Dimensions.
   int is_external_memory;    // If true, 'internal_memory' pointer is not used.
@@ -182,7 +207,7 @@ typedef struct {
   uint8_t* private_memory;   // Internally allocated memory (only when
                              // is_external_memory is false). Should not be used
                              // externally, but accessed via the buffer union.
-} WebPDecBuffer;
+};
 
 // Internal, version-checked, entry point
 WEBP_EXTERN(int) WebPInitDecBufferInternal(WebPDecBuffer*, int);
@@ -200,7 +225,7 @@ WEBP_EXTERN(void) WebPFreeDecBuffer(WebPDecBuffer* buffer);
 //------------------------------------------------------------------------------
 // Enumeration of the status codes
 
-typedef enum {
+typedef enum VP8StatusCode {
   VP8_STATUS_OK = 0,
   VP8_STATUS_OUT_OF_MEMORY,
   VP8_STATUS_INVALID_PARAM,
@@ -237,13 +262,17 @@ typedef enum {
 //   }
 //   WebPIDelete(idec);
 
-typedef struct WebPIDecoder WebPIDecoder;
-
 // Creates a new incremental decoder with the supplied buffer parameter.
 // This output_buffer can be passed NULL, in which case a default output buffer
 // is used (with MODE_RGB). Otherwise, an internal reference to 'output_buffer'
 // is kept, which means that the lifespan of 'output_buffer' must be larger than
 // that of the returned WebPIDecoder object.
+// The supplied 'output_buffer' content MUST NOT be changed between calls to
+// WebPIAppend() or WebPIUpdate() unless 'output_buffer.is_external_memory' is
+// set to 1. In such a case, it is allowed to modify the pointers, size and
+// stride of output_buffer.u.RGBA or output_buffer.u.YUVA, provided they remain
+// within valid bounds.
+// All other fields of WebPDecBuffer MUST remain constant between calls.
 // Returns NULL if the allocation failed.
 WEBP_EXTERN(WebPIDecoder*) WebPINewDecoder(WebPDecBuffer* output_buffer);
 
@@ -251,19 +280,27 @@ WEBP_EXTERN(WebPIDecoder*) WebPINewDecoder(WebPDecBuffer* output_buffer);
 // will output the RGB/A samples specified by 'csp' into a preallocated
 // buffer 'output_buffer'. The size of this buffer is at least
 // 'output_buffer_size' and the stride (distance in bytes between two scanlines)
-// is specified by 'output_stride'. Returns NULL if the allocation failed.
+// is specified by 'output_stride'.
+// Additionally, output_buffer can be passed NULL in which case the output
+// buffer will be allocated automatically when the decoding starts. The
+// colorspace 'csp' is taken into account for allocating this buffer. All other
+// parameters are ignored.
+// Returns NULL if the allocation failed, or if some parameters are invalid.
 WEBP_EXTERN(WebPIDecoder*) WebPINewRGB(
     WEBP_CSP_MODE csp,
     uint8_t* output_buffer, size_t output_buffer_size, int output_stride);
 
 // This function allocates and initializes an incremental-decoder object, which
-// will output the raw luma/chroma samples into a preallocated planes. The luma
-// plane is specified by its pointer 'luma', its size 'luma_size' and its stride
-// 'luma_stride'. Similarly, the chroma-u plane is specified by the 'u',
-// 'u_size' and 'u_stride' parameters, and the chroma-v plane by 'v'
-// and 'v_size'. And same for the alpha-plane. The 'a' pointer can be pass
-// NULL in case one is not interested in the transparency plane.
-// Returns NULL if the allocation failed.
+// will output the raw luma/chroma samples into a preallocated planes if
+// supplied. The luma plane is specified by its pointer 'luma', its size
+// 'luma_size' and its stride 'luma_stride'. Similarly, the chroma-u plane
+// is specified by the 'u', 'u_size' and 'u_stride' parameters, and the chroma-v
+// plane by 'v' and 'v_size'. And same for the alpha-plane. The 'a' pointer
+// can be pass NULL in case one is not interested in the transparency plane.
+// Conversely, 'luma' can be passed NULL if no preallocated planes are supplied.
+// In this case, the output buffer will be automatically allocated (using
+// MODE_YUVA) when decoding starts. All parameters are then ignored.
+// Returns NULL if the allocation failed or if a parameter is invalid.
 WEBP_EXTERN(WebPIDecoder*) WebPINewYUVA(
     uint8_t* luma, size_t luma_size, int luma_stride,
     uint8_t* u, size_t u_size, int u_stride,
@@ -344,7 +381,7 @@ WEBP_EXTERN(const WebPDecBuffer*) WebPIDecodedArea(
      CHECK(WebPGetFeatures(data, data_size, &config.input) == VP8_STATUS_OK);
 
      // C) Adjust 'config', if needed
-     config.no_fancy = 1;
+     config.no_fancy_upsampling = 1;
      config.output.colorspace = MODE_BGRA;
      // etc.
 
@@ -365,19 +402,15 @@ WEBP_EXTERN(const WebPDecBuffer*) WebPIDecodedArea(
 */
 
 // Features gathered from the bitstream
-typedef struct {
-  int width;        // Width in pixels, as read from the bitstream.
-  int height;       // Height in pixels, as read from the bitstream.
-  int has_alpha;    // True if the bitstream contains an alpha channel.
-
-  // Unused for now:
-  int bitstream_version;        // should be 0 for now. TODO(later)
-  int no_incremental_decoding;  // if true, using incremental decoding is not
-                                // recommended.
-  int rotate;                   // TODO(later)
-  int uv_sampling;              // should be 0 for now. TODO(later)
-  uint32_t pad[3];              // padding for later use
-} WebPBitstreamFeatures;
+struct WebPBitstreamFeatures {
+  int width;          // Width in pixels, as read from the bitstream.
+  int height;         // Height in pixels, as read from the bitstream.
+  int has_alpha;      // True if the bitstream contains an alpha channel.
+  int has_animation;  // True if the bitstream is an animation.
+  int format;         // 0 = undefined (/mixed), 1 = lossy, 2 = lossless
+
+  uint32_t pad[5];    // padding for later use
+};
 
 // Internal, version-checked, entry point
 WEBP_EXTERN(VP8StatusCode) WebPGetFeaturesInternal(
@@ -385,8 +418,9 @@ WEBP_EXTERN(VP8StatusCode) WebPGetFeaturesInternal(
 
 // Retrieve features from the bitstream. The *features structure is filled
 // with information gathered from the bitstream.
-// Returns false in case of error or version mismatch.
-// In case of error, features->bitstream_status will reflect the error code.
+// Returns VP8_STATUS_OK when the features are successfully retrieved. Returns
+// VP8_STATUS_NOT_ENOUGH_DATA when more data is needed to retrieve the
+// features from headers. Returns error in other cases.
 static WEBP_INLINE VP8StatusCode WebPGetFeatures(
     const uint8_t* data, size_t data_size,
     WebPBitstreamFeatures* features) {
@@ -395,7 +429,7 @@ static WEBP_INLINE VP8StatusCode WebPGetFeatures(
 }
 
 // Decoding options
-typedef struct {
+struct WebPDecoderOptions {
   int bypass_filtering;               // if true, skip the in-loop filtering
   int no_fancy_upsampling;            // if true, use faster pointwise upsampler
   int use_cropping;                   // if true, cropping is applied _first_
@@ -405,19 +439,19 @@ typedef struct {
   int use_scaling;                    // if true, scaling is applied _afterward_
   int scaled_width, scaled_height;    // final resolution
   int use_threads;                    // if true, use multi-threaded decoding
+  int dithering_strength;             // dithering strength (0=Off, 100=full)
+  int flip;                           // flip output vertically
+  int alpha_dithering_strength;       // alpha dithering strength in [0..100]
 
-  // Unused for now:
-  int force_rotation;                 // forced rotation (to be applied _last_)
-  int no_enhancement;                 // if true, discard enhancement layer
-  uint32_t pad[6];                    // padding for later use
-} WebPDecoderOptions;
+  uint32_t pad[5];                    // padding for later use
+};
 
 // Main object storing the configuration for advanced decoding.
-typedef struct {
+struct WebPDecoderConfig {
   WebPBitstreamFeatures input;  // Immutable bitstream features (optional)
   WebPDecBuffer output;         // Output buffer (can point to external mem)
   WebPDecoderOptions options;   // Decoding options
-} WebPDecoderConfig;
+};
 
 // Internal, version-checked, entry point
 WEBP_EXTERN(int) WebPInitDecoderConfigInternal(WebPDecoderConfig*, int);
@@ -447,7 +481,7 @@ WEBP_EXTERN(WebPIDecoder*) WebPIDecode(const uint8_t* data, size_t data_size,
 WEBP_EXTERN(VP8StatusCode) WebPDecode(const uint8_t* data, size_t data_size,
                                       WebPDecoderConfig* config);
 
-#if defined(__cplusplus) || defined(c_plusplus)
+#ifdef __cplusplus
 }    // extern "C"
 #endif
 
diff --git a/drivers/webp/demux.h b/drivers/webp/demux.h
new file mode 100644
index 0000000000..6fbe775851
--- /dev/null
+++ b/drivers/webp/demux.h
@@ -0,0 +1,364 @@
+// Copyright 2012 Google Inc. All Rights Reserved.
+//
+// Use of this source code is governed by a BSD-style license
+// that can be found in the COPYING file in the root of the source
+// tree. An additional intellectual property rights grant can be found
+// in the file PATENTS. All contributing project authors may
+// be found in the AUTHORS file in the root of the source tree.
+// -----------------------------------------------------------------------------
+//
+// Demux API.
+// Enables extraction of image and extended format data from WebP files.
+
+// Code Example: Demuxing WebP data to extract all the frames, ICC profile
+// and EXIF/XMP metadata.
+/*
+  WebPDemuxer* demux = WebPDemux(&webp_data);
+
+  uint32_t width = WebPDemuxGetI(demux, WEBP_FF_CANVAS_WIDTH);
+  uint32_t height = WebPDemuxGetI(demux, WEBP_FF_CANVAS_HEIGHT);
+  // ... (Get information about the features present in the WebP file).
+  uint32_t flags = WebPDemuxGetI(demux, WEBP_FF_FORMAT_FLAGS);
+
+  // ... (Iterate over all frames).
+  WebPIterator iter;
+  if (WebPDemuxGetFrame(demux, 1, &iter)) {
+    do {
+      // ... (Consume 'iter'; e.g. Decode 'iter.fragment' with WebPDecode(),
+      // ... and get other frame properties like width, height, offsets etc.
+      // ... see 'struct WebPIterator' below for more info).
+    } while (WebPDemuxNextFrame(&iter));
+    WebPDemuxReleaseIterator(&iter);
+  }
+
+  // ... (Extract metadata).
+  WebPChunkIterator chunk_iter;
+  if (flags & ICCP_FLAG) WebPDemuxGetChunk(demux, "ICCP", 1, &chunk_iter);
+  // ... (Consume the ICC profile in 'chunk_iter.chunk').
+  WebPDemuxReleaseChunkIterator(&chunk_iter);
+  if (flags & EXIF_FLAG) WebPDemuxGetChunk(demux, "EXIF", 1, &chunk_iter);
+  // ... (Consume the EXIF metadata in 'chunk_iter.chunk').
+  WebPDemuxReleaseChunkIterator(&chunk_iter);
+  if (flags & XMP_FLAG) WebPDemuxGetChunk(demux, "XMP ", 1, &chunk_iter);
+  // ... (Consume the XMP metadata in 'chunk_iter.chunk').
+  WebPDemuxReleaseChunkIterator(&chunk_iter);
+  WebPDemuxDelete(demux);
+*/
+
+#ifndef WEBP_WEBP_DEMUX_H_
+#define WEBP_WEBP_DEMUX_H_
+
+#include "./decode.h"     // for WEBP_CSP_MODE
+#include "./mux_types.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#define WEBP_DEMUX_ABI_VERSION 0x0105    // MAJOR(8b) + MINOR(8b)
+
+// Note: forward declaring enumerations is not allowed in (strict) C and C++,
+// the types are left here for reference.
+// typedef enum WebPDemuxState WebPDemuxState;
+// typedef enum WebPFormatFeature WebPFormatFeature;
+typedef struct WebPDemuxer WebPDemuxer;
+typedef struct WebPIterator WebPIterator;
+typedef struct WebPChunkIterator WebPChunkIterator;
+typedef struct WebPAnimInfo WebPAnimInfo;
+typedef struct WebPAnimDecoderOptions WebPAnimDecoderOptions;
+
+//------------------------------------------------------------------------------
+
+// Returns the version number of the demux library, packed in hexadecimal using
+// 8bits for each of major/minor/revision. E.g: v2.5.7 is 0x020507.
+WEBP_EXTERN(int) WebPGetDemuxVersion(void);
+
+//------------------------------------------------------------------------------
+// Life of a Demux object
+
+typedef enum WebPDemuxState {
+  WEBP_DEMUX_PARSE_ERROR    = -1,  // An error occurred while parsing.
+  WEBP_DEMUX_PARSING_HEADER =  0,  // Not enough data to parse full header.
+  WEBP_DEMUX_PARSED_HEADER  =  1,  // Header parsing complete,
+                                   // data may be available.
+  WEBP_DEMUX_DONE           =  2   // Entire file has been parsed.
+} WebPDemuxState;
+
+// Internal, version-checked, entry point
+WEBP_EXTERN(WebPDemuxer*) WebPDemuxInternal(
+    const WebPData*, int, WebPDemuxState*, int);
+
+// Parses the full WebP file given by 'data'.
+// Returns a WebPDemuxer object on successful parse, NULL otherwise.
+static WEBP_INLINE WebPDemuxer* WebPDemux(const WebPData* data) {
+  return WebPDemuxInternal(data, 0, NULL, WEBP_DEMUX_ABI_VERSION);
+}
+
+// Parses the possibly incomplete WebP file given by 'data'.
+// If 'state' is non-NULL it will be set to indicate the status of the demuxer.
+// Returns NULL in case of error or if there isn't enough data to start parsing;
+// and a WebPDemuxer object on successful parse.
+// Note that WebPDemuxer keeps internal pointers to 'data' memory segment.
+// If this data is volatile, the demuxer object should be deleted (by calling
+// WebPDemuxDelete()) and WebPDemuxPartial() called again on the new data.
+// This is usually an inexpensive operation.
+static WEBP_INLINE WebPDemuxer* WebPDemuxPartial(
+    const WebPData* data, WebPDemuxState* state) {
+  return WebPDemuxInternal(data, 1, state, WEBP_DEMUX_ABI_VERSION);
+}
+
+// Frees memory associated with 'dmux'.
+WEBP_EXTERN(void) WebPDemuxDelete(WebPDemuxer* dmux);
+
+//------------------------------------------------------------------------------
+// Data/information extraction.
+
+typedef enum WebPFormatFeature {
+  WEBP_FF_FORMAT_FLAGS,  // Extended format flags present in the 'VP8X' chunk.
+  WEBP_FF_CANVAS_WIDTH,
+  WEBP_FF_CANVAS_HEIGHT,
+  WEBP_FF_LOOP_COUNT,
+  WEBP_FF_BACKGROUND_COLOR,
+  WEBP_FF_FRAME_COUNT    // Number of frames present in the demux object.
+                         // In case of a partial demux, this is the number of
+                         // frames seen so far, with the last frame possibly
+                         // being partial.
+} WebPFormatFeature;
+
+// Get the 'feature' value from the 'dmux'.
+// NOTE: values are only valid if WebPDemux() was used or WebPDemuxPartial()
+// returned a state > WEBP_DEMUX_PARSING_HEADER.
+WEBP_EXTERN(uint32_t) WebPDemuxGetI(
+    const WebPDemuxer* dmux, WebPFormatFeature feature);
+
+//------------------------------------------------------------------------------
+// Frame iteration.
+
+struct WebPIterator {
+  int frame_num;
+  int num_frames;          // equivalent to WEBP_FF_FRAME_COUNT.
+  int fragment_num;
+  int num_fragments;
+  int x_offset, y_offset;  // offset relative to the canvas.
+  int width, height;       // dimensions of this frame or fragment.
+  int duration;            // display duration in milliseconds.
+  WebPMuxAnimDispose dispose_method;  // dispose method for the frame.
+  int complete;   // true if 'fragment' contains a full frame. partial images
+                  // may still be decoded with the WebP incremental decoder.
+  WebPData fragment;  // The frame or fragment given by 'frame_num' and
+                      // 'fragment_num'.
+  int has_alpha;      // True if the frame or fragment contains transparency.
+  WebPMuxAnimBlend blend_method;  // Blend operation for the frame.
+
+  uint32_t pad[2];         // padding for later use.
+  void* private_;          // for internal use only.
+};
+
+// Retrieves frame 'frame_number' from 'dmux'.
+// 'iter->fragment' points to the first fragment on return from this function.
+// Individual fragments may be extracted using WebPDemuxSelectFragment().
+// Setting 'frame_number' equal to 0 will return the last frame of the image.
+// Returns false if 'dmux' is NULL or frame 'frame_number' is not present.
+// Call WebPDemuxReleaseIterator() when use of the iterator is complete.
+// NOTE: 'dmux' must persist for the lifetime of 'iter'.
+WEBP_EXTERN(int) WebPDemuxGetFrame(
+    const WebPDemuxer* dmux, int frame_number, WebPIterator* iter);
+
+// Sets 'iter->fragment' to point to the next ('iter->frame_num' + 1) or
+// previous ('iter->frame_num' - 1) frame. These functions do not loop.
+// Returns true on success, false otherwise.
+WEBP_EXTERN(int) WebPDemuxNextFrame(WebPIterator* iter);
+WEBP_EXTERN(int) WebPDemuxPrevFrame(WebPIterator* iter);
+
+// Sets 'iter->fragment' to reflect fragment number 'fragment_num'.
+// Returns true if fragment 'fragment_num' is present, false otherwise.
+WEBP_EXTERN(int) WebPDemuxSelectFragment(WebPIterator* iter, int fragment_num);
+
+// Releases any memory associated with 'iter'.
+// Must be called before any subsequent calls to WebPDemuxGetChunk() on the same
+// iter. Also, must be called before destroying the associated WebPDemuxer with
+// WebPDemuxDelete().
+WEBP_EXTERN(void) WebPDemuxReleaseIterator(WebPIterator* iter);
+
+//------------------------------------------------------------------------------
+// Chunk iteration.
+
+struct WebPChunkIterator {
+  // The current and total number of chunks with the fourcc given to
+  // WebPDemuxGetChunk().
+  int chunk_num;
+  int num_chunks;
+  WebPData chunk;    // The payload of the chunk.
+
+  uint32_t pad[6];   // padding for later use
+  void* private_;
+};
+
+// Retrieves the 'chunk_number' instance of the chunk with id 'fourcc' from
+// 'dmux'.
+// 'fourcc' is a character array containing the fourcc of the chunk to return,
+// e.g., "ICCP", "XMP ", "EXIF", etc.
+// Setting 'chunk_number' equal to 0 will return the last chunk in a set.
+// Returns true if the chunk is found, false otherwise. Image related chunk
+// payloads are accessed through WebPDemuxGetFrame() and related functions.
+// Call WebPDemuxReleaseChunkIterator() when use of the iterator is complete.
+// NOTE: 'dmux' must persist for the lifetime of the iterator.
+WEBP_EXTERN(int) WebPDemuxGetChunk(const WebPDemuxer* dmux,
+                                   const char fourcc[4], int chunk_number,
+                                   WebPChunkIterator* iter);
+
+// Sets 'iter->chunk' to point to the next ('iter->chunk_num' + 1) or previous
+// ('iter->chunk_num' - 1) chunk. These functions do not loop.
+// Returns true on success, false otherwise.
+WEBP_EXTERN(int) WebPDemuxNextChunk(WebPChunkIterator* iter);
+WEBP_EXTERN(int) WebPDemuxPrevChunk(WebPChunkIterator* iter);
+
+// Releases any memory associated with 'iter'.
+// Must be called before destroying the associated WebPDemuxer with
+// WebPDemuxDelete().
+WEBP_EXTERN(void) WebPDemuxReleaseChunkIterator(WebPChunkIterator* iter);
+
+//------------------------------------------------------------------------------
+// WebPAnimDecoder API
+//
+// This API allows decoding (possibly) animated WebP images.
+//
+// Code Example:
+/*
+  WebPAnimDecoderOptions dec_options;
+  WebPAnimDecoderOptionsInit(&dec_options);
+  // Tune 'dec_options' as needed.
+  WebPAnimDecoder* dec = WebPAnimDecoderNew(webp_data, &dec_options);
+  WebPAnimInfo anim_info;
+  WebPAnimDecoderGetInfo(dec, &anim_info);
+  for (uint32_t i = 0; i < anim_info.loop_count; ++i) {
+    while (WebPAnimDecoderHasMoreFrames(dec)) {
+      uint8_t* buf;
+      int timestamp;
+      WebPAnimDecoderGetNext(dec, &buf, &timestamp);
+      // ... (Render 'buf' based on 'timestamp').
+      // ... (Do NOT free 'buf', as it is owned by 'dec').
+    }
+    WebPAnimDecoderReset(dec);
+  }
+  const WebPDemuxer* demuxer = WebPAnimDecoderGetDemuxer(dec);
+  // ... (Do something using 'demuxer'; e.g. get EXIF/XMP/ICC data).
+  WebPAnimDecoderDelete(dec);
+*/
+
+typedef struct WebPAnimDecoder WebPAnimDecoder;  // Main opaque object.
+
+// Global options.
+struct WebPAnimDecoderOptions {
+  // Output colorspace. Only the following modes are supported:
+  // MODE_RGBA, MODE_BGRA, MODE_rgbA and MODE_bgrA.
+  WEBP_CSP_MODE color_mode;
+  int use_threads;           // If true, use multi-threaded decoding.
+  uint32_t padding[7];       // Padding for later use.
+};
+
+// Internal, version-checked, entry point.
+WEBP_EXTERN(int) WebPAnimDecoderOptionsInitInternal(
+    WebPAnimDecoderOptions*, int);
+
+// Should always be called, to initialize a fresh WebPAnimDecoderOptions
+// structure before modification. Returns false in case of version mismatch.
+// WebPAnimDecoderOptionsInit() must have succeeded before using the
+// 'dec_options' object.
+static WEBP_INLINE int WebPAnimDecoderOptionsInit(
+    WebPAnimDecoderOptions* dec_options) {
+  return WebPAnimDecoderOptionsInitInternal(dec_options,
+                                            WEBP_DEMUX_ABI_VERSION);
+}
+
+// Internal, version-checked, entry point.
+WEBP_EXTERN(WebPAnimDecoder*) WebPAnimDecoderNewInternal(
+    const WebPData*, const WebPAnimDecoderOptions*, int);
+
+// Creates and initializes a WebPAnimDecoder object.
+// Parameters:
+//   webp_data - (in) WebP bitstream. This should remain unchanged during the
+//                    lifetime of the output WebPAnimDecoder object.
+//   dec_options - (in) decoding options. Can be passed NULL to choose
+//                      reasonable defaults (in particular, color mode MODE_RGBA
+//                      will be picked).
+// Returns:
+//   A pointer to the newly created WebPAnimDecoder object, or NULL in case of
+//   parsing error, invalid option or memory error.
+static WEBP_INLINE WebPAnimDecoder* WebPAnimDecoderNew(
+    const WebPData* webp_data, const WebPAnimDecoderOptions* dec_options) {
+  return WebPAnimDecoderNewInternal(webp_data, dec_options,
+                                    WEBP_DEMUX_ABI_VERSION);
+}
+
+// Global information about the animation..
+struct WebPAnimInfo {
+  uint32_t canvas_width;
+  uint32_t canvas_height;
+  uint32_t loop_count;
+  uint32_t bgcolor;
+  uint32_t frame_count;
+  uint32_t pad[4];   // padding for later use
+};
+
+// Get global information about the animation.
+// Parameters:
+//   dec - (in) decoder instance to get information from.
+//   info - (out) global information fetched from the animation.
+// Returns:
+//   True on success.
+WEBP_EXTERN(int) WebPAnimDecoderGetInfo(const WebPAnimDecoder* dec,
+                                        WebPAnimInfo* info);
+
+// Fetch the next frame from 'dec' based on options supplied to
+// WebPAnimDecoderNew(). This will be a fully reconstructed canvas of size
+// 'canvas_width * 4 * canvas_height', and not just the frame sub-rectangle. The
+// returned buffer 'buf' is valid only until the next call to
+// WebPAnimDecoderGetNext(), WebPAnimDecoderReset() or WebPAnimDecoderDelete().
+// Parameters:
+//   dec - (in/out) decoder instance from which the next frame is to be fetched.
+//   buf - (out) decoded frame.
+//   timestamp - (out) timestamp of the frame in milliseconds.
+// Returns:
+//   False if any of the arguments are NULL, or if there is a parsing or
+//   decoding error, or if there are no more frames. Otherwise, returns true.
+WEBP_EXTERN(int) WebPAnimDecoderGetNext(WebPAnimDecoder* dec,
+                                        uint8_t** buf, int* timestamp);
+
+// Check if there are more frames left to decode.
+// Parameters:
+//   dec - (in) decoder instance to be checked.
+// Returns:
+//   True if 'dec' is not NULL and some frames are yet to be decoded.
+//   Otherwise, returns false.
+WEBP_EXTERN(int) WebPAnimDecoderHasMoreFrames(const WebPAnimDecoder* dec);
+
+// Resets the WebPAnimDecoder object, so that next call to
+// WebPAnimDecoderGetNext() will restart decoding from 1st frame. This would be
+// helpful when all frames need to be decoded multiple times (e.g.
+// info.loop_count times) without destroying and recreating the 'dec' object.
+// Parameters:
+//   dec - (in/out) decoder instance to be reset
+WEBP_EXTERN(void) WebPAnimDecoderReset(WebPAnimDecoder* dec);
+
+// Grab the internal demuxer object.
+// Getting the demuxer object can be useful if one wants to use operations only
+// available through demuxer; e.g. to get XMP/EXIF/ICC metadata. The returned
+// demuxer object is owned by 'dec' and is valid only until the next call to
+// WebPAnimDecoderDelete().
+//
+// Parameters:
+//   dec - (in) decoder instance from which the demuxer object is to be fetched.
+WEBP_EXTERN(const WebPDemuxer*) WebPAnimDecoderGetDemuxer(
+    const WebPAnimDecoder* dec);
+
+// Deletes the WebPAnimDecoder object.
+// Parameters:
+//   dec - (in/out) decoder instance to be deleted
+WEBP_EXTERN(void) WebPAnimDecoderDelete(WebPAnimDecoder* dec);
+
+#ifdef __cplusplus
+}    // extern "C"
+#endif
+
+#endif  /* WEBP_WEBP_DEMUX_H_ */
diff --git a/drivers/webp/demux/anim_decode.c b/drivers/webp/demux/anim_decode.c
new file mode 100644
index 0000000000..c81cedfba0
--- /dev/null
+++ b/drivers/webp/demux/anim_decode.c
@@ -0,0 +1,442 @@
+// Copyright 2015 Google Inc. All Rights Reserved.
+//
+// Use of this source code is governed by a BSD-style license
+// that can be found in the COPYING file in the root of the source
+// tree. An additional intellectual property rights grant can be found
+// in the file PATENTS. All contributing project authors may
+// be found in the AUTHORS file in the root of the source tree.
+// -----------------------------------------------------------------------------
+//
+//  AnimDecoder implementation.
+//
+
+#ifdef HAVE_CONFIG_H
+#include "webp/config.h"
+#endif
+
+#include <assert.h>
+#include <string.h>
+
+#include "../utils/utils.h"
+#include "webp/decode.h"
+#include "webp/demux.h"
+
+#define NUM_CHANNELS 4
+
+typedef void (*BlendRowFunc)(uint32_t* const, const uint32_t* const, int);
+static void BlendPixelRowNonPremult(uint32_t* const src,
+                                    const uint32_t* const dst, int num_pixels);
+static void BlendPixelRowPremult(uint32_t* const src, const uint32_t* const dst,
+                                 int num_pixels);
+
+struct WebPAnimDecoder {
+  WebPDemuxer* demux_;             // Demuxer created from given WebP bitstream.
+  WebPDecoderConfig config_;       // Decoder config.
+  // Note: we use a pointer to a function blending multiple pixels at a time to
+  // allow possible inlining of per-pixel blending function.
+  BlendRowFunc blend_func_;        // Pointer to the chose blend row function.
+  WebPAnimInfo info_;              // Global info about the animation.
+  uint8_t* curr_frame_;            // Current canvas (not disposed).
+  uint8_t* prev_frame_disposed_;   // Previous canvas (properly disposed).
+  int prev_frame_timestamp_;       // Previous frame timestamp (milliseconds).
+  WebPIterator prev_iter_;         // Iterator object for previous frame.
+  int prev_frame_was_keyframe_;    // True if previous frame was a keyframe.
+  int next_frame_;                 // Index of the next frame to be decoded
+                                   // (starting from 1).
+};
+
+static void DefaultDecoderOptions(WebPAnimDecoderOptions* const dec_options) {
+  dec_options->color_mode = MODE_RGBA;
+  dec_options->use_threads = 0;
+}
+
+int WebPAnimDecoderOptionsInitInternal(WebPAnimDecoderOptions* dec_options,
+                                            int abi_version) {
+  if (dec_options == NULL ||
+      WEBP_ABI_IS_INCOMPATIBLE(abi_version, WEBP_DEMUX_ABI_VERSION)) {
+    return 0;
+  }
+  DefaultDecoderOptions(dec_options);
+  return 1;
+}
+
+static int ApplyDecoderOptions(const WebPAnimDecoderOptions* const dec_options,
+                               WebPAnimDecoder* const dec) {
+  WEBP_CSP_MODE mode;
+  WebPDecoderConfig* config = &dec->config_;
+  assert(dec_options != NULL);
+
+  mode = dec_options->color_mode;
+  if (mode != MODE_RGBA && mode != MODE_BGRA &&
+      mode != MODE_rgbA && mode != MODE_bgrA) {
+    return 0;
+  }
+  dec->blend_func_ = (mode == MODE_RGBA || mode == MODE_BGRA)
+                         ? &BlendPixelRowNonPremult
+                         : &BlendPixelRowPremult;
+  WebPInitDecoderConfig(config);
+  config->output.colorspace = mode;
+  config->output.is_external_memory = 1;
+  config->options.use_threads = dec_options->use_threads;
+  // Note: config->output.u.RGBA is set at the time of decoding each frame.
+  return 1;
+}
+
+WebPAnimDecoder* WebPAnimDecoderNewInternal(
+    const WebPData* webp_data, const WebPAnimDecoderOptions* dec_options,
+    int abi_version) {
+  WebPAnimDecoderOptions options;
+  WebPAnimDecoder* dec = NULL;
+  if (webp_data == NULL ||
+      WEBP_ABI_IS_INCOMPATIBLE(abi_version, WEBP_DEMUX_ABI_VERSION)) {
+    return NULL;
+  }
+
+  // Note: calloc() so that the pointer members are initialized to NULL.
+  dec = (WebPAnimDecoder*)WebPSafeCalloc(1ULL, sizeof(*dec));
+  if (dec == NULL) goto Error;
+
+  if (dec_options != NULL) {
+    options = *dec_options;
+  } else {
+    DefaultDecoderOptions(&options);
+  }
+  if (!ApplyDecoderOptions(&options, dec)) goto Error;
+
+  dec->demux_ = WebPDemux(webp_data);
+  if (dec->demux_ == NULL) goto Error;
+
+  dec->info_.canvas_width = WebPDemuxGetI(dec->demux_, WEBP_FF_CANVAS_WIDTH);
+  dec->info_.canvas_height = WebPDemuxGetI(dec->demux_, WEBP_FF_CANVAS_HEIGHT);
+  dec->info_.loop_count = WebPDemuxGetI(dec->demux_, WEBP_FF_LOOP_COUNT);
+  dec->info_.bgcolor = WebPDemuxGetI(dec->demux_, WEBP_FF_BACKGROUND_COLOR);
+  dec->info_.frame_count = WebPDemuxGetI(dec->demux_, WEBP_FF_FRAME_COUNT);
+
+  {
+    const int canvas_bytes =
+        dec->info_.canvas_width * NUM_CHANNELS * dec->info_.canvas_height;
+    // Note: calloc() because we fill frame with zeroes as well.
+    dec->curr_frame_ = WebPSafeCalloc(1ULL, canvas_bytes);
+    if (dec->curr_frame_ == NULL) goto Error;
+    dec->prev_frame_disposed_ = WebPSafeCalloc(1ULL, canvas_bytes);
+    if (dec->prev_frame_disposed_ == NULL) goto Error;
+  }
+
+  WebPAnimDecoderReset(dec);
+
+  return dec;
+
+ Error:
+  WebPAnimDecoderDelete(dec);
+  return NULL;
+}
+
+int WebPAnimDecoderGetInfo(const WebPAnimDecoder* dec, WebPAnimInfo* info) {
+  if (dec == NULL || info == NULL) return 0;
+  *info = dec->info_;
+  return 1;
+}
+
+// Returns true if the frame covers the full canvas.
+static int IsFullFrame(int width, int height, int canvas_width,
+                       int canvas_height) {
+  return (width == canvas_width && height == canvas_height);
+}
+
+// Clear the canvas to transparent.
+static void ZeroFillCanvas(uint8_t* buf, uint32_t canvas_width,
+                           uint32_t canvas_height) {
+  memset(buf, 0, canvas_width * NUM_CHANNELS * canvas_height);
+}
+
+// Clear given frame rectangle to transparent.
+static void ZeroFillFrameRect(uint8_t* buf, int buf_stride, int x_offset,
+                              int y_offset, int width, int height) {
+  int j;
+  assert(width * NUM_CHANNELS <= buf_stride);
+  buf += y_offset * buf_stride + x_offset * NUM_CHANNELS;
+  for (j = 0; j < height; ++j) {
+    memset(buf, 0, width * NUM_CHANNELS);
+    buf += buf_stride;
+  }
+}
+
+// Copy width * height pixels from 'src' to 'dst'.
+static void CopyCanvas(const uint8_t* src, uint8_t* dst,
+                       uint32_t width, uint32_t height) {
+  assert(src != NULL && dst != NULL);
+  memcpy(dst, src, width * NUM_CHANNELS * height);
+}
+
+// Returns true if the current frame is a key-frame.
+static int IsKeyFrame(const WebPIterator* const curr,
+                      const WebPIterator* const prev,
+                      int prev_frame_was_key_frame,
+                      int canvas_width, int canvas_height) {
+  if (curr->frame_num == 1) {
+    return 1;
+  } else if ((!curr->has_alpha || curr->blend_method == WEBP_MUX_NO_BLEND) &&
+             IsFullFrame(curr->width, curr->height,
+                         canvas_width, canvas_height)) {
+    return 1;
+  } else {
+    return (prev->dispose_method == WEBP_MUX_DISPOSE_BACKGROUND) &&
+           (IsFullFrame(prev->width, prev->height, canvas_width,
+                        canvas_height) ||
+            prev_frame_was_key_frame);
+  }
+}
+
+
+// Blend a single channel of 'src' over 'dst', given their alpha channel values.
+// 'src' and 'dst' are assumed to be NOT pre-multiplied by alpha.
+static uint8_t BlendChannelNonPremult(uint32_t src, uint8_t src_a,
+                                      uint32_t dst, uint8_t dst_a,
+                                      uint32_t scale, int shift) {
+  const uint8_t src_channel = (src >> shift) & 0xff;
+  const uint8_t dst_channel = (dst >> shift) & 0xff;
+  const uint32_t blend_unscaled = src_channel * src_a + dst_channel * dst_a;
+  assert(blend_unscaled < (1ULL << 32) / scale);
+  return (blend_unscaled * scale) >> 24;
+}
+
+// Blend 'src' over 'dst' assuming they are NOT pre-multiplied by alpha.
+static uint32_t BlendPixelNonPremult(uint32_t src, uint32_t dst) {
+  const uint8_t src_a = (src >> 24) & 0xff;
+
+  if (src_a == 0) {
+    return dst;
+  } else {
+    const uint8_t dst_a = (dst >> 24) & 0xff;
+    // This is the approximate integer arithmetic for the actual formula:
+    // dst_factor_a = (dst_a * (255 - src_a)) / 255.
+    const uint8_t dst_factor_a = (dst_a * (256 - src_a)) >> 8;
+    const uint8_t blend_a = src_a + dst_factor_a;
+    const uint32_t scale = (1UL << 24) / blend_a;
+
+    const uint8_t blend_r =
+        BlendChannelNonPremult(src, src_a, dst, dst_factor_a, scale, 0);
+    const uint8_t blend_g =
+        BlendChannelNonPremult(src, src_a, dst, dst_factor_a, scale, 8);
+    const uint8_t blend_b =
+        BlendChannelNonPremult(src, src_a, dst, dst_factor_a, scale, 16);
+    assert(src_a + dst_factor_a < 256);
+
+    return (blend_r << 0) |
+           (blend_g << 8) |
+           (blend_b << 16) |
+           ((uint32_t)blend_a << 24);
+  }
+}
+
+// Blend 'num_pixels' in 'src' over 'dst' assuming they are NOT pre-multiplied
+// by alpha.
+static void BlendPixelRowNonPremult(uint32_t* const src,
+                                    const uint32_t* const dst, int num_pixels) {
+  int i;
+  for (i = 0; i < num_pixels; ++i) {
+    const uint8_t src_alpha = (src[i] >> 24) & 0xff;
+    if (src_alpha != 0xff) {
+      src[i] = BlendPixelNonPremult(src[i], dst[i]);
+    }
+  }
+}
+
+// Individually multiply each channel in 'pix' by 'scale'.
+static WEBP_INLINE uint32_t ChannelwiseMultiply(uint32_t pix, uint32_t scale) {
+  uint32_t mask = 0x00FF00FF;
+  uint32_t rb = ((pix & mask) * scale) >> 8;
+  uint32_t ag = ((pix >> 8) & mask) * scale;
+  return (rb & mask) | (ag & ~mask);
+}
+
+// Blend 'src' over 'dst' assuming they are pre-multiplied by alpha.
+static uint32_t BlendPixelPremult(uint32_t src, uint32_t dst) {
+  const uint8_t src_a = (src >> 24) & 0xff;
+  return src + ChannelwiseMultiply(dst, 256 - src_a);
+}
+
+// Blend 'num_pixels' in 'src' over 'dst' assuming they are pre-multiplied by
+// alpha.
+static void BlendPixelRowPremult(uint32_t* const src, const uint32_t* const dst,
+                                 int num_pixels) {
+  int i;
+  for (i = 0; i < num_pixels; ++i) {
+    const uint8_t src_alpha = (src[i] >> 24) & 0xff;
+    if (src_alpha != 0xff) {
+      src[i] = BlendPixelPremult(src[i], dst[i]);
+    }
+  }
+}
+
+// Returns two ranges (<left, width> pairs) at row 'canvas_y', that belong to
+// 'src' but not 'dst'. A point range is empty if the corresponding width is 0.
+static void FindBlendRangeAtRow(const WebPIterator* const src,
+                                const WebPIterator* const dst, int canvas_y,
+                                int* const left1, int* const width1,
+                                int* const left2, int* const width2) {
+  const int src_max_x = src->x_offset + src->width;
+  const int dst_max_x = dst->x_offset + dst->width;
+  const int dst_max_y = dst->y_offset + dst->height;
+  assert(canvas_y >= src->y_offset && canvas_y < (src->y_offset + src->height));
+  *left1 = -1;
+  *width1 = 0;
+  *left2 = -1;
+  *width2 = 0;
+
+  if (canvas_y < dst->y_offset || canvas_y >= dst_max_y ||
+      src->x_offset >= dst_max_x || src_max_x <= dst->x_offset) {
+    *left1 = src->x_offset;
+    *width1 = src->width;
+    return;
+  }
+
+  if (src->x_offset < dst->x_offset) {
+    *left1 = src->x_offset;
+    *width1 = dst->x_offset - src->x_offset;
+  }
+
+  if (src_max_x > dst_max_x) {
+    *left2 = dst_max_x;
+    *width2 = src_max_x - dst_max_x;
+  }
+}
+
+int WebPAnimDecoderGetNext(WebPAnimDecoder* dec,
+                           uint8_t** buf_ptr, int* timestamp_ptr) {
+  WebPIterator iter;
+  uint32_t width;
+  uint32_t height;
+  int is_key_frame;
+  int timestamp;
+  BlendRowFunc blend_row;
+
+  if (dec == NULL || buf_ptr == NULL || timestamp_ptr == NULL) return 0;
+  if (!WebPAnimDecoderHasMoreFrames(dec)) return 0;
+
+  width = dec->info_.canvas_width;
+  height = dec->info_.canvas_height;
+  blend_row = dec->blend_func_;
+
+  // Get compressed frame.
+  if (!WebPDemuxGetFrame(dec->demux_, dec->next_frame_, &iter)) {
+    return 0;
+  }
+  timestamp = dec->prev_frame_timestamp_ + iter.duration;
+
+  // Initialize.
+  is_key_frame = IsKeyFrame(&iter, &dec->prev_iter_,
+                            dec->prev_frame_was_keyframe_, width, height);
+  if (is_key_frame) {
+    ZeroFillCanvas(dec->curr_frame_, width, height);
+  } else {
+    CopyCanvas(dec->prev_frame_disposed_, dec->curr_frame_, width, height);
+  }
+
+  // Decode.
+  {
+    const uint8_t* in = iter.fragment.bytes;
+    const size_t in_size = iter.fragment.size;
+    const size_t out_offset =
+        (iter.y_offset * width + iter.x_offset) * NUM_CHANNELS;
+    WebPDecoderConfig* const config = &dec->config_;
+    WebPRGBABuffer* const buf = &config->output.u.RGBA;
+    buf->stride = NUM_CHANNELS * width;
+    buf->size = buf->stride * iter.height;
+    buf->rgba = dec->curr_frame_ + out_offset;
+
+    if (WebPDecode(in, in_size, config) != VP8_STATUS_OK) {
+      goto Error;
+    }
+  }
+
+  // During the decoding of current frame, we may have set some pixels to be
+  // transparent (i.e. alpha < 255). However, the value of each of these
+  // pixels should have been determined by blending it against the value of
+  // that pixel in the previous frame if blending method of is WEBP_MUX_BLEND.
+  if (iter.frame_num > 1 && iter.blend_method == WEBP_MUX_BLEND &&
+      !is_key_frame) {
+    if (dec->prev_iter_.dispose_method == WEBP_MUX_DISPOSE_NONE) {
+      int y;
+      // Blend transparent pixels with pixels in previous canvas.
+      for (y = 0; y < iter.height; ++y) {
+        const size_t offset =
+            (iter.y_offset + y) * width + iter.x_offset;
+        blend_row((uint32_t*)dec->curr_frame_ + offset,
+                  (uint32_t*)dec->prev_frame_disposed_ + offset, iter.width);
+      }
+    } else {
+      int y;
+      assert(dec->prev_iter_.dispose_method == WEBP_MUX_DISPOSE_BACKGROUND);
+      // We need to blend a transparent pixel with its value just after
+      // initialization. That is, blend it with:
+      // * Fully transparent pixel if it belongs to prevRect <-- No-op.
+      // * The pixel in the previous canvas otherwise <-- Need alpha-blending.
+      for (y = 0; y < iter.height; ++y) {
+        const int canvas_y = iter.y_offset + y;
+        int left1, width1, left2, width2;
+        FindBlendRangeAtRow(&iter, &dec->prev_iter_, canvas_y, &left1, &width1,
+                            &left2, &width2);
+        if (width1 > 0) {
+          const size_t offset1 = canvas_y * width + left1;
+          blend_row((uint32_t*)dec->curr_frame_ + offset1,
+                        (uint32_t*)dec->prev_frame_disposed_ + offset1, width1);
+        }
+        if (width2 > 0) {
+          const size_t offset2 = canvas_y * width + left2;
+          blend_row((uint32_t*)dec->curr_frame_ + offset2,
+                        (uint32_t*)dec->prev_frame_disposed_ + offset2, width2);
+        }
+      }
+    }
+  }
+
+  // Update info of the previous frame and dispose it for the next iteration.
+  dec->prev_frame_timestamp_ = timestamp;
+  dec->prev_iter_ = iter;
+  dec->prev_frame_was_keyframe_ = is_key_frame;
+  CopyCanvas(dec->curr_frame_, dec->prev_frame_disposed_, width, height);
+  if (dec->prev_iter_.dispose_method == WEBP_MUX_DISPOSE_BACKGROUND) {
+    ZeroFillFrameRect(dec->prev_frame_disposed_, width * NUM_CHANNELS,
+                      dec->prev_iter_.x_offset, dec->prev_iter_.y_offset,
+                      dec->prev_iter_.width, dec->prev_iter_.height);
+  }
+  ++dec->next_frame_;
+
+  // All OK, fill in the values.
+  *buf_ptr = dec->curr_frame_;
+  *timestamp_ptr = timestamp;
+  return 1;
+
+ Error:
+  WebPDemuxReleaseIterator(&iter);
+  return 0;
+}
+
+int WebPAnimDecoderHasMoreFrames(const WebPAnimDecoder* dec) {
+  if (dec == NULL) return 0;
+  return (dec->next_frame_ <= (int)dec->info_.frame_count);
+}
+
+void WebPAnimDecoderReset(WebPAnimDecoder* dec) {
+  if (dec != NULL) {
+    dec->prev_frame_timestamp_ = 0;
+    memset(&dec->prev_iter_, 0, sizeof(dec->prev_iter_));
+    dec->prev_frame_was_keyframe_ = 0;
+    dec->next_frame_ = 1;
+  }
+}
+
+const WebPDemuxer* WebPAnimDecoderGetDemuxer(const WebPAnimDecoder* dec) {
+  if (dec == NULL) return NULL;
+  return dec->demux_;
+}
+
+void WebPAnimDecoderDelete(WebPAnimDecoder* dec) {
+  if (dec != NULL) {
+    WebPDemuxDelete(dec->demux_);
+    WebPSafeFree(dec->curr_frame_);
+    WebPSafeFree(dec->prev_frame_disposed_);
+    WebPSafeFree(dec);
+  }
+}
diff --git a/drivers/webp/demux/demux.c b/drivers/webp/demux/demux.c
new file mode 100644
index 0000000000..3717e21165
--- /dev/null
+++ b/drivers/webp/demux/demux.c
@@ -0,0 +1,957 @@
+// Copyright 2012 Google Inc. All Rights Reserved.
+//
+// Use of this source code is governed by a BSD-style license
+// that can be found in the COPYING file in the root of the source
+// tree. An additional intellectual property rights grant can be found
+// in the file PATENTS. All contributing project authors may
+// be found in the AUTHORS file in the root of the source tree.
+// -----------------------------------------------------------------------------
+//
+//  WebP container demux.
+//
+
+#ifdef HAVE_CONFIG_H
+#include "webp/config.h"
+#endif
+
+#include <assert.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include "../utils/utils.h"
+#include "webp/decode.h"     // WebPGetFeatures
+#include "webp/demux.h"
+#include "webp/format_constants.h"
+
+#define DMUX_MAJ_VERSION 0
+#define DMUX_MIN_VERSION 2
+#define DMUX_REV_VERSION 2
+
+typedef struct {
+  size_t start_;        // start location of the data
+  size_t end_;          // end location
+  size_t riff_end_;     // riff chunk end location, can be > end_.
+  size_t buf_size_;     // size of the buffer
+  const uint8_t* buf_;
+} MemBuffer;
+
+typedef struct {
+  size_t offset_;
+  size_t size_;
+} ChunkData;
+
+typedef struct Frame {
+  int x_offset_, y_offset_;
+  int width_, height_;
+  int has_alpha_;
+  int duration_;
+  WebPMuxAnimDispose dispose_method_;
+  WebPMuxAnimBlend blend_method_;
+  int is_fragment_;  // this is a frame fragment (and not a full frame).
+  int frame_num_;  // the referent frame number for use in assembling fragments.
+  int complete_;   // img_components_ contains a full image.
+  ChunkData img_components_[2];  // 0=VP8{,L} 1=ALPH
+  struct Frame* next_;
+} Frame;
+
+typedef struct Chunk {
+  ChunkData data_;
+  struct Chunk* next_;
+} Chunk;
+
+struct WebPDemuxer {
+  MemBuffer mem_;
+  WebPDemuxState state_;
+  int is_ext_format_;
+  uint32_t feature_flags_;
+  int canvas_width_, canvas_height_;
+  int loop_count_;
+  uint32_t bgcolor_;
+  int num_frames_;
+  Frame* frames_;
+  Frame** frames_tail_;
+  Chunk* chunks_;  // non-image chunks
+  Chunk** chunks_tail_;
+};
+
+typedef enum {
+  PARSE_OK,
+  PARSE_NEED_MORE_DATA,
+  PARSE_ERROR
+} ParseStatus;
+
+typedef struct ChunkParser {
+  uint8_t id[4];
+  ParseStatus (*parse)(WebPDemuxer* const dmux);
+  int (*valid)(const WebPDemuxer* const dmux);
+} ChunkParser;
+
+static ParseStatus ParseSingleImage(WebPDemuxer* const dmux);
+static ParseStatus ParseVP8X(WebPDemuxer* const dmux);
+static int IsValidSimpleFormat(const WebPDemuxer* const dmux);
+static int IsValidExtendedFormat(const WebPDemuxer* const dmux);
+
+static const ChunkParser kMasterChunks[] = {
+  { { 'V', 'P', '8', ' ' }, ParseSingleImage, IsValidSimpleFormat },
+  { { 'V', 'P', '8', 'L' }, ParseSingleImage, IsValidSimpleFormat },
+  { { 'V', 'P', '8', 'X' }, ParseVP8X,        IsValidExtendedFormat },
+  { { '0', '0', '0', '0' }, NULL,             NULL },
+};
+
+//------------------------------------------------------------------------------
+
+int WebPGetDemuxVersion(void) {
+  return (DMUX_MAJ_VERSION << 16) | (DMUX_MIN_VERSION << 8) | DMUX_REV_VERSION;
+}
+
+// -----------------------------------------------------------------------------
+// MemBuffer
+
+static int RemapMemBuffer(MemBuffer* const mem,
+                          const uint8_t* data, size_t size) {
+  if (size < mem->buf_size_) return 0;  // can't remap to a shorter buffer!
+
+  mem->buf_ = data;
+  mem->end_ = mem->buf_size_ = size;
+  return 1;
+}
+
+static int InitMemBuffer(MemBuffer* const mem,
+                         const uint8_t* data, size_t size) {
+  memset(mem, 0, sizeof(*mem));
+  return RemapMemBuffer(mem, data, size);
+}
+
+// Return the remaining data size available in 'mem'.
+static WEBP_INLINE size_t MemDataSize(const MemBuffer* const mem) {
+  return (mem->end_ - mem->start_);
+}
+
+// Return true if 'size' exceeds the end of the RIFF chunk.
+static WEBP_INLINE int SizeIsInvalid(const MemBuffer* const mem, size_t size) {
+  return (size > mem->riff_end_ - mem->start_);
+}
+
+static WEBP_INLINE void Skip(MemBuffer* const mem, size_t size) {
+  mem->start_ += size;
+}
+
+static WEBP_INLINE void Rewind(MemBuffer* const mem, size_t size) {
+  mem->start_ -= size;
+}
+
+static WEBP_INLINE const uint8_t* GetBuffer(MemBuffer* const mem) {
+  return mem->buf_ + mem->start_;
+}
+
+// Read from 'mem' and skip the read bytes.
+static WEBP_INLINE uint8_t ReadByte(MemBuffer* const mem) {
+  const uint8_t byte = mem->buf_[mem->start_];
+  Skip(mem, 1);
+  return byte;
+}
+
+static WEBP_INLINE int ReadLE16s(MemBuffer* const mem) {
+  const uint8_t* const data = mem->buf_ + mem->start_;
+  const int val = GetLE16(data);
+  Skip(mem, 2);
+  return val;
+}
+
+static WEBP_INLINE int ReadLE24s(MemBuffer* const mem) {
+  const uint8_t* const data = mem->buf_ + mem->start_;
+  const int val = GetLE24(data);
+  Skip(mem, 3);
+  return val;
+}
+
+static WEBP_INLINE uint32_t ReadLE32(MemBuffer* const mem) {
+  const uint8_t* const data = mem->buf_ + mem->start_;
+  const uint32_t val = GetLE32(data);
+  Skip(mem, 4);
+  return val;
+}
+
+// -----------------------------------------------------------------------------
+// Secondary chunk parsing
+
+static void AddChunk(WebPDemuxer* const dmux, Chunk* const chunk) {
+  *dmux->chunks_tail_ = chunk;
+  chunk->next_ = NULL;
+  dmux->chunks_tail_ = &chunk->next_;
+}
+
+// Add a frame to the end of the list, ensuring the last frame is complete.
+// Returns true on success, false otherwise.
+static int AddFrame(WebPDemuxer* const dmux, Frame* const frame) {
+  const Frame* const last_frame = *dmux->frames_tail_;
+  if (last_frame != NULL && !last_frame->complete_) return 0;
+
+  *dmux->frames_tail_ = frame;
+  frame->next_ = NULL;
+  dmux->frames_tail_ = &frame->next_;
+  return 1;
+}
+
+// Store image bearing chunks to 'frame'.
+static ParseStatus StoreFrame(int frame_num, uint32_t min_size,
+                              MemBuffer* const mem, Frame* const frame) {
+  int alpha_chunks = 0;
+  int image_chunks = 0;
+  int done = (MemDataSize(mem) < min_size);
+  ParseStatus status = PARSE_OK;
+
+  if (done) return PARSE_NEED_MORE_DATA;
+
+  do {
+    const size_t chunk_start_offset = mem->start_;
+    const uint32_t fourcc = ReadLE32(mem);
+    const uint32_t payload_size = ReadLE32(mem);
+    const uint32_t payload_size_padded = payload_size + (payload_size & 1);
+    const size_t payload_available = (payload_size_padded > MemDataSize(mem))
+                                   ? MemDataSize(mem) : payload_size_padded;
+    const size_t chunk_size = CHUNK_HEADER_SIZE + payload_available;
+
+    if (payload_size > MAX_CHUNK_PAYLOAD) return PARSE_ERROR;
+    if (SizeIsInvalid(mem, payload_size_padded)) return PARSE_ERROR;
+    if (payload_size_padded > MemDataSize(mem)) status = PARSE_NEED_MORE_DATA;
+
+    switch (fourcc) {
+      case MKFOURCC('A', 'L', 'P', 'H'):
+        if (alpha_chunks == 0) {
+          ++alpha_chunks;
+          frame->img_components_[1].offset_ = chunk_start_offset;
+          frame->img_components_[1].size_ = chunk_size;
+          frame->has_alpha_ = 1;
+          frame->frame_num_ = frame_num;
+          Skip(mem, payload_available);
+        } else {
+          goto Done;
+        }
+        break;
+      case MKFOURCC('V', 'P', '8', 'L'):
+        if (alpha_chunks > 0) return PARSE_ERROR;  // VP8L has its own alpha
+        // fall through
+      case MKFOURCC('V', 'P', '8', ' '):
+        if (image_chunks == 0) {
+          // Extract the bitstream features, tolerating failures when the data
+          // is incomplete.
+          WebPBitstreamFeatures features;
+          const VP8StatusCode vp8_status =
+              WebPGetFeatures(mem->buf_ + chunk_start_offset, chunk_size,
+                              &features);
+          if (status == PARSE_NEED_MORE_DATA &&
+              vp8_status == VP8_STATUS_NOT_ENOUGH_DATA) {
+            return PARSE_NEED_MORE_DATA;
+          } else if (vp8_status != VP8_STATUS_OK) {
+            // We have enough data, and yet WebPGetFeatures() failed.
+            return PARSE_ERROR;
+          }
+          ++image_chunks;
+          frame->img_components_[0].offset_ = chunk_start_offset;
+          frame->img_components_[0].size_ = chunk_size;
+          frame->width_ = features.width;
+          frame->height_ = features.height;
+          frame->has_alpha_ |= features.has_alpha;
+          frame->frame_num_ = frame_num;
+          frame->complete_ = (status == PARSE_OK);
+          Skip(mem, payload_available);
+        } else {
+          goto Done;
+        }
+        break;
+ Done:
+      default:
+        // Restore fourcc/size when moving up one level in parsing.
+        Rewind(mem, CHUNK_HEADER_SIZE);
+        done = 1;
+        break;
+    }
+
+    if (mem->start_ == mem->riff_end_) {
+      done = 1;
+    } else if (MemDataSize(mem) < CHUNK_HEADER_SIZE) {
+      status = PARSE_NEED_MORE_DATA;
+    }
+  } while (!done && status == PARSE_OK);
+
+  return status;
+}
+
+// Creates a new Frame if 'actual_size' is within bounds and 'mem' contains
+// enough data ('min_size') to parse the payload.
+// Returns PARSE_OK on success with *frame pointing to the new Frame.
+// Returns PARSE_NEED_MORE_DATA with insufficient data, PARSE_ERROR otherwise.
+static ParseStatus NewFrame(const MemBuffer* const mem,
+                            uint32_t min_size, uint32_t actual_size,
+                            Frame** frame) {
+  if (SizeIsInvalid(mem, min_size)) return PARSE_ERROR;
+  if (actual_size < min_size) return PARSE_ERROR;
+  if (MemDataSize(mem) < min_size)  return PARSE_NEED_MORE_DATA;
+
+  *frame = (Frame*)WebPSafeCalloc(1ULL, sizeof(**frame));
+  return (*frame == NULL) ? PARSE_ERROR : PARSE_OK;
+}
+
+// Parse a 'ANMF' chunk and any image bearing chunks that immediately follow.
+// 'frame_chunk_size' is the previously validated, padded chunk size.
+static ParseStatus ParseAnimationFrame(
+    WebPDemuxer* const dmux, uint32_t frame_chunk_size) {
+  const int is_animation = !!(dmux->feature_flags_ & ANIMATION_FLAG);
+  const uint32_t anmf_payload_size = frame_chunk_size - ANMF_CHUNK_SIZE;
+  int added_frame = 0;
+  int bits;
+  MemBuffer* const mem = &dmux->mem_;
+  Frame* frame;
+  ParseStatus status =
+      NewFrame(mem, ANMF_CHUNK_SIZE, frame_chunk_size, &frame);
+  if (status != PARSE_OK) return status;
+
+  frame->x_offset_       = 2 * ReadLE24s(mem);
+  frame->y_offset_       = 2 * ReadLE24s(mem);
+  frame->width_          = 1 + ReadLE24s(mem);
+  frame->height_         = 1 + ReadLE24s(mem);
+  frame->duration_       = ReadLE24s(mem);
+  bits = ReadByte(mem);
+  frame->dispose_method_ =
+      (bits & 1) ? WEBP_MUX_DISPOSE_BACKGROUND : WEBP_MUX_DISPOSE_NONE;
+  frame->blend_method_ = (bits & 2) ? WEBP_MUX_NO_BLEND : WEBP_MUX_BLEND;
+  if (frame->width_ * (uint64_t)frame->height_ >= MAX_IMAGE_AREA) {
+    WebPSafeFree(frame);
+    return PARSE_ERROR;
+  }
+
+  // Store a frame only if the animation flag is set there is some data for
+  // this frame is available.
+  status = StoreFrame(dmux->num_frames_ + 1, anmf_payload_size, mem, frame);
+  if (status != PARSE_ERROR && is_animation && frame->frame_num_ > 0) {
+    added_frame = AddFrame(dmux, frame);
+    if (added_frame) {
+      ++dmux->num_frames_;
+    } else {
+      status = PARSE_ERROR;
+    }
+  }
+
+  if (!added_frame) WebPSafeFree(frame);
+  return status;
+}
+
+// General chunk storage, starting with the header at 'start_offset', allowing
+// the user to request the payload via a fourcc string. 'size' includes the
+// header and the unpadded payload size.
+// Returns true on success, false otherwise.
+static int StoreChunk(WebPDemuxer* const dmux,
+                      size_t start_offset, uint32_t size) {
+  Chunk* const chunk = (Chunk*)WebPSafeCalloc(1ULL, sizeof(*chunk));
+  if (chunk == NULL) return 0;
+
+  chunk->data_.offset_ = start_offset;
+  chunk->data_.size_ = size;
+  AddChunk(dmux, chunk);
+  return 1;
+}
+
+// -----------------------------------------------------------------------------
+// Primary chunk parsing
+
+static ParseStatus ReadHeader(MemBuffer* const mem) {
+  const size_t min_size = RIFF_HEADER_SIZE + CHUNK_HEADER_SIZE;
+  uint32_t riff_size;
+
+  // Basic file level validation.
+  if (MemDataSize(mem) < min_size) return PARSE_NEED_MORE_DATA;
+  if (memcmp(GetBuffer(mem), "RIFF", CHUNK_SIZE_BYTES) ||
+      memcmp(GetBuffer(mem) + CHUNK_HEADER_SIZE, "WEBP", CHUNK_SIZE_BYTES)) {
+    return PARSE_ERROR;
+  }
+
+  riff_size = GetLE32(GetBuffer(mem) + TAG_SIZE);
+  if (riff_size < CHUNK_HEADER_SIZE) return PARSE_ERROR;
+  if (riff_size > MAX_CHUNK_PAYLOAD) return PARSE_ERROR;
+
+  // There's no point in reading past the end of the RIFF chunk
+  mem->riff_end_ = riff_size + CHUNK_HEADER_SIZE;
+  if (mem->buf_size_ > mem->riff_end_) {
+    mem->buf_size_ = mem->end_ = mem->riff_end_;
+  }
+
+  Skip(mem, RIFF_HEADER_SIZE);
+  return PARSE_OK;
+}
+
+static ParseStatus ParseSingleImage(WebPDemuxer* const dmux) {
+  const size_t min_size = CHUNK_HEADER_SIZE;
+  MemBuffer* const mem = &dmux->mem_;
+  Frame* frame;
+  ParseStatus status;
+  int image_added = 0;
+
+  if (dmux->frames_ != NULL) return PARSE_ERROR;
+  if (SizeIsInvalid(mem, min_size)) return PARSE_ERROR;
+  if (MemDataSize(mem) < min_size) return PARSE_NEED_MORE_DATA;
+
+  frame = (Frame*)WebPSafeCalloc(1ULL, sizeof(*frame));
+  if (frame == NULL) return PARSE_ERROR;
+
+  // For the single image case we allow parsing of a partial frame, but we need
+  // at least CHUNK_HEADER_SIZE for parsing.
+  status = StoreFrame(1, CHUNK_HEADER_SIZE, &dmux->mem_, frame);
+  if (status != PARSE_ERROR) {
+    const int has_alpha = !!(dmux->feature_flags_ & ALPHA_FLAG);
+    // Clear any alpha when the alpha flag is missing.
+    if (!has_alpha && frame->img_components_[1].size_ > 0) {
+      frame->img_components_[1].offset_ = 0;
+      frame->img_components_[1].size_ = 0;
+      frame->has_alpha_ = 0;
+    }
+
+    // Use the frame width/height as the canvas values for non-vp8x files.
+    // Also, set ALPHA_FLAG if this is a lossless image with alpha.
+    if (!dmux->is_ext_format_ && frame->width_ > 0 && frame->height_ > 0) {
+      dmux->state_ = WEBP_DEMUX_PARSED_HEADER;
+      dmux->canvas_width_ = frame->width_;
+      dmux->canvas_height_ = frame->height_;
+      dmux->feature_flags_ |= frame->has_alpha_ ? ALPHA_FLAG : 0;
+    }
+    if (!AddFrame(dmux, frame)) {
+      status = PARSE_ERROR;  // last frame was left incomplete
+    } else {
+      image_added = 1;
+      dmux->num_frames_ = 1;
+    }
+  }
+
+  if (!image_added) WebPSafeFree(frame);
+  return status;
+}
+
+static ParseStatus ParseVP8XChunks(WebPDemuxer* const dmux) {
+  const int is_animation = !!(dmux->feature_flags_ & ANIMATION_FLAG);
+  MemBuffer* const mem = &dmux->mem_;
+  int anim_chunks = 0;
+  ParseStatus status = PARSE_OK;
+
+  do {
+    int store_chunk = 1;
+    const size_t chunk_start_offset = mem->start_;
+    const uint32_t fourcc = ReadLE32(mem);
+    const uint32_t chunk_size = ReadLE32(mem);
+    const uint32_t chunk_size_padded = chunk_size + (chunk_size & 1);
+
+    if (chunk_size > MAX_CHUNK_PAYLOAD) return PARSE_ERROR;
+    if (SizeIsInvalid(mem, chunk_size_padded)) return PARSE_ERROR;
+
+    switch (fourcc) {
+      case MKFOURCC('V', 'P', '8', 'X'): {
+        return PARSE_ERROR;
+      }
+      case MKFOURCC('A', 'L', 'P', 'H'):
+      case MKFOURCC('V', 'P', '8', ' '):
+      case MKFOURCC('V', 'P', '8', 'L'): {
+        // check that this isn't an animation (all frames should be in an ANMF).
+        if (anim_chunks > 0 || is_animation) return PARSE_ERROR;
+
+        Rewind(mem, CHUNK_HEADER_SIZE);
+        status = ParseSingleImage(dmux);
+        break;
+      }
+      case MKFOURCC('A', 'N', 'I', 'M'): {
+        if (chunk_size_padded < ANIM_CHUNK_SIZE) return PARSE_ERROR;
+
+        if (MemDataSize(mem) < chunk_size_padded) {
+          status = PARSE_NEED_MORE_DATA;
+        } else if (anim_chunks == 0) {
+          ++anim_chunks;
+          dmux->bgcolor_ = ReadLE32(mem);
+          dmux->loop_count_ = ReadLE16s(mem);
+          Skip(mem, chunk_size_padded - ANIM_CHUNK_SIZE);
+        } else {
+          store_chunk = 0;
+          goto Skip;
+        }
+        break;
+      }
+      case MKFOURCC('A', 'N', 'M', 'F'): {
+        if (anim_chunks == 0) return PARSE_ERROR;  // 'ANIM' precedes frames.
+        status = ParseAnimationFrame(dmux, chunk_size_padded);
+        break;
+      }
+      case MKFOURCC('I', 'C', 'C', 'P'): {
+        store_chunk = !!(dmux->feature_flags_ & ICCP_FLAG);
+        goto Skip;
+      }
+      case MKFOURCC('E', 'X', 'I', 'F'): {
+        store_chunk = !!(dmux->feature_flags_ & EXIF_FLAG);
+        goto Skip;
+      }
+      case MKFOURCC('X', 'M', 'P', ' '): {
+        store_chunk = !!(dmux->feature_flags_ & XMP_FLAG);
+        goto Skip;
+      }
+ Skip:
+      default: {
+        if (chunk_size_padded <= MemDataSize(mem)) {
+          if (store_chunk) {
+            // Store only the chunk header and unpadded size as only the payload
+            // will be returned to the user.
+            if (!StoreChunk(dmux, chunk_start_offset,
+                            CHUNK_HEADER_SIZE + chunk_size)) {
+              return PARSE_ERROR;
+            }
+          }
+          Skip(mem, chunk_size_padded);
+        } else {
+          status = PARSE_NEED_MORE_DATA;
+        }
+      }
+    }
+
+    if (mem->start_ == mem->riff_end_) {
+      break;
+    } else if (MemDataSize(mem) < CHUNK_HEADER_SIZE) {
+      status = PARSE_NEED_MORE_DATA;
+    }
+  } while (status == PARSE_OK);
+
+  return status;
+}
+
+static ParseStatus ParseVP8X(WebPDemuxer* const dmux) {
+  MemBuffer* const mem = &dmux->mem_;
+  uint32_t vp8x_size;
+
+  if (MemDataSize(mem) < CHUNK_HEADER_SIZE) return PARSE_NEED_MORE_DATA;
+
+  dmux->is_ext_format_ = 1;
+  Skip(mem, TAG_SIZE);  // VP8X
+  vp8x_size = ReadLE32(mem);
+  if (vp8x_size > MAX_CHUNK_PAYLOAD) return PARSE_ERROR;
+  if (vp8x_size < VP8X_CHUNK_SIZE) return PARSE_ERROR;
+  vp8x_size += vp8x_size & 1;
+  if (SizeIsInvalid(mem, vp8x_size)) return PARSE_ERROR;
+  if (MemDataSize(mem) < vp8x_size) return PARSE_NEED_MORE_DATA;
+
+  dmux->feature_flags_ = ReadByte(mem);
+  Skip(mem, 3);  // Reserved.
+  dmux->canvas_width_  = 1 + ReadLE24s(mem);
+  dmux->canvas_height_ = 1 + ReadLE24s(mem);
+  if (dmux->canvas_width_ * (uint64_t)dmux->canvas_height_ >= MAX_IMAGE_AREA) {
+    return PARSE_ERROR;  // image final dimension is too large
+  }
+  Skip(mem, vp8x_size - VP8X_CHUNK_SIZE);  // skip any trailing data.
+  dmux->state_ = WEBP_DEMUX_PARSED_HEADER;
+
+  if (SizeIsInvalid(mem, CHUNK_HEADER_SIZE)) return PARSE_ERROR;
+  if (MemDataSize(mem) < CHUNK_HEADER_SIZE) return PARSE_NEED_MORE_DATA;
+
+  return ParseVP8XChunks(dmux);
+}
+
+// -----------------------------------------------------------------------------
+// Format validation
+
+static int IsValidSimpleFormat(const WebPDemuxer* const dmux) {
+  const Frame* const frame = dmux->frames_;
+  if (dmux->state_ == WEBP_DEMUX_PARSING_HEADER) return 1;
+
+  if (dmux->canvas_width_ <= 0 || dmux->canvas_height_ <= 0) return 0;
+  if (dmux->state_ == WEBP_DEMUX_DONE && frame == NULL) return 0;
+
+  if (frame->width_ <= 0 || frame->height_ <= 0) return 0;
+  return 1;
+}
+
+// If 'exact' is true, check that the image resolution matches the canvas.
+// If 'exact' is false, check that the x/y offsets do not exceed the canvas.
+// TODO(jzern): this is insufficient in the fragmented image case if the
+// expectation is that the fragments completely cover the canvas.
+static int CheckFrameBounds(const Frame* const frame, int exact,
+                            int canvas_width, int canvas_height) {
+  if (exact) {
+    if (frame->x_offset_ != 0 || frame->y_offset_ != 0) {
+      return 0;
+    }
+    if (frame->width_ != canvas_width || frame->height_ != canvas_height) {
+      return 0;
+    }
+  } else {
+    if (frame->x_offset_ < 0 || frame->y_offset_ < 0) return 0;
+    if (frame->width_ + frame->x_offset_ > canvas_width) return 0;
+    if (frame->height_ + frame->y_offset_ > canvas_height) return 0;
+  }
+  return 1;
+}
+
+static int IsValidExtendedFormat(const WebPDemuxer* const dmux) {
+  const int is_animation = !!(dmux->feature_flags_ & ANIMATION_FLAG);
+  const int is_fragmented = !!(dmux->feature_flags_ & FRAGMENTS_FLAG);
+  const Frame* f = dmux->frames_;
+
+  if (dmux->state_ == WEBP_DEMUX_PARSING_HEADER) return 1;
+
+  if (dmux->canvas_width_ <= 0 || dmux->canvas_height_ <= 0) return 0;
+  if (dmux->loop_count_ < 0) return 0;
+  if (dmux->state_ == WEBP_DEMUX_DONE && dmux->frames_ == NULL) return 0;
+  if (is_fragmented) return 0;
+
+  while (f != NULL) {
+    const int cur_frame_set = f->frame_num_;
+    int frame_count = 0, fragment_count = 0;
+
+    // Check frame properties and if the image is composed of fragments that
+    // each fragment came from a fragment.
+    for (; f != NULL && f->frame_num_ == cur_frame_set; f = f->next_) {
+      const ChunkData* const image = f->img_components_;
+      const ChunkData* const alpha = f->img_components_ + 1;
+
+      if (is_fragmented && !f->is_fragment_) return 0;
+      if (!is_fragmented && f->is_fragment_) return 0;
+      if (!is_animation && f->frame_num_ > 1) return 0;
+
+      if (f->complete_) {
+        if (alpha->size_ == 0 && image->size_ == 0) return 0;
+        // Ensure alpha precedes image bitstream.
+        if (alpha->size_ > 0 && alpha->offset_ > image->offset_) {
+          return 0;
+        }
+
+        if (f->width_ <= 0 || f->height_ <= 0) return 0;
+      } else {
+        // There shouldn't be a partial frame in a complete file.
+        if (dmux->state_ == WEBP_DEMUX_DONE) return 0;
+
+        // Ensure alpha precedes image bitstream.
+        if (alpha->size_ > 0 && image->size_ > 0 &&
+            alpha->offset_ > image->offset_) {
+          return 0;
+        }
+        // There shouldn't be any frames after an incomplete one.
+        if (f->next_ != NULL) return 0;
+      }
+
+      if (f->width_ > 0 && f->height_ > 0 &&
+          !CheckFrameBounds(f, !(is_animation || is_fragmented),
+                            dmux->canvas_width_, dmux->canvas_height_)) {
+        return 0;
+      }
+
+      fragment_count += f->is_fragment_;
+      ++frame_count;
+    }
+    if (!is_fragmented && frame_count > 1) return 0;
+    if (fragment_count > 0 && frame_count != fragment_count) return 0;
+  }
+  return 1;
+}
+
+// -----------------------------------------------------------------------------
+// WebPDemuxer object
+
+static void InitDemux(WebPDemuxer* const dmux, const MemBuffer* const mem) {
+  dmux->state_ = WEBP_DEMUX_PARSING_HEADER;
+  dmux->loop_count_ = 1;
+  dmux->bgcolor_ = 0xFFFFFFFF;  // White background by default.
+  dmux->canvas_width_ = -1;
+  dmux->canvas_height_ = -1;
+  dmux->frames_tail_ = &dmux->frames_;
+  dmux->chunks_tail_ = &dmux->chunks_;
+  dmux->mem_ = *mem;
+}
+
+WebPDemuxer* WebPDemuxInternal(const WebPData* data, int allow_partial,
+                               WebPDemuxState* state, int version) {
+  const ChunkParser* parser;
+  int partial;
+  ParseStatus status = PARSE_ERROR;
+  MemBuffer mem;
+  WebPDemuxer* dmux;
+
+  if (state != NULL) *state = WEBP_DEMUX_PARSE_ERROR;
+
+  if (WEBP_ABI_IS_INCOMPATIBLE(version, WEBP_DEMUX_ABI_VERSION)) return NULL;
+  if (data == NULL || data->bytes == NULL || data->size == 0) return NULL;
+
+  if (!InitMemBuffer(&mem, data->bytes, data->size)) return NULL;
+  status = ReadHeader(&mem);
+  if (status != PARSE_OK) {
+    if (state != NULL) {
+      *state = (status == PARSE_NEED_MORE_DATA) ? WEBP_DEMUX_PARSING_HEADER
+                                                : WEBP_DEMUX_PARSE_ERROR;
+    }
+    return NULL;
+  }
+
+  partial = (mem.buf_size_ < mem.riff_end_);
+  if (!allow_partial && partial) return NULL;
+
+  dmux = (WebPDemuxer*)WebPSafeCalloc(1ULL, sizeof(*dmux));
+  if (dmux == NULL) return NULL;
+  InitDemux(dmux, &mem);
+
+  status = PARSE_ERROR;
+  for (parser = kMasterChunks; parser->parse != NULL; ++parser) {
+    if (!memcmp(parser->id, GetBuffer(&dmux->mem_), TAG_SIZE)) {
+      status = parser->parse(dmux);
+      if (status == PARSE_OK) dmux->state_ = WEBP_DEMUX_DONE;
+      if (status == PARSE_NEED_MORE_DATA && !partial) status = PARSE_ERROR;
+      if (status != PARSE_ERROR && !parser->valid(dmux)) status = PARSE_ERROR;
+      if (status == PARSE_ERROR) dmux->state_ = WEBP_DEMUX_PARSE_ERROR;
+      break;
+    }
+  }
+  if (state != NULL) *state = dmux->state_;
+
+  if (status == PARSE_ERROR) {
+    WebPDemuxDelete(dmux);
+    return NULL;
+  }
+  return dmux;
+}
+
+void WebPDemuxDelete(WebPDemuxer* dmux) {
+  Chunk* c;
+  Frame* f;
+  if (dmux == NULL) return;
+
+  for (f = dmux->frames_; f != NULL;) {
+    Frame* const cur_frame = f;
+    f = f->next_;
+    WebPSafeFree(cur_frame);
+  }
+  for (c = dmux->chunks_; c != NULL;) {
+    Chunk* const cur_chunk = c;
+    c = c->next_;
+    WebPSafeFree(cur_chunk);
+  }
+  WebPSafeFree(dmux);
+}
+
+// -----------------------------------------------------------------------------
+
+uint32_t WebPDemuxGetI(const WebPDemuxer* dmux, WebPFormatFeature feature) {
+  if (dmux == NULL) return 0;
+
+  switch (feature) {
+    case WEBP_FF_FORMAT_FLAGS:     return dmux->feature_flags_;
+    case WEBP_FF_CANVAS_WIDTH:     return (uint32_t)dmux->canvas_width_;
+    case WEBP_FF_CANVAS_HEIGHT:    return (uint32_t)dmux->canvas_height_;
+    case WEBP_FF_LOOP_COUNT:       return (uint32_t)dmux->loop_count_;
+    case WEBP_FF_BACKGROUND_COLOR: return dmux->bgcolor_;
+    case WEBP_FF_FRAME_COUNT:      return (uint32_t)dmux->num_frames_;
+  }
+  return 0;
+}
+
+// -----------------------------------------------------------------------------
+// Frame iteration
+
+// Find the first 'frame_num' frame. There may be multiple such frames in a
+// fragmented frame.
+static const Frame* GetFrame(const WebPDemuxer* const dmux, int frame_num) {
+  const Frame* f;
+  for (f = dmux->frames_; f != NULL; f = f->next_) {
+    if (frame_num == f->frame_num_) break;
+  }
+  return f;
+}
+
+// Returns fragment 'fragment_num' and the total count.
+static const Frame* GetFragment(
+    const Frame* const frame_set, int fragment_num, int* const count) {
+  const int this_frame = frame_set->frame_num_;
+  const Frame* f = frame_set;
+  const Frame* fragment = NULL;
+  int total;
+
+  for (total = 0; f != NULL && f->frame_num_ == this_frame; f = f->next_) {
+    if (++total == fragment_num) fragment = f;
+  }
+  *count = total;
+  return fragment;
+}
+
+static const uint8_t* GetFramePayload(const uint8_t* const mem_buf,
+                                      const Frame* const frame,
+                                      size_t* const data_size) {
+  *data_size = 0;
+  if (frame != NULL) {
+    const ChunkData* const image = frame->img_components_;
+    const ChunkData* const alpha = frame->img_components_ + 1;
+    size_t start_offset = image->offset_;
+    *data_size = image->size_;
+
+    // if alpha exists it precedes image, update the size allowing for
+    // intervening chunks.
+    if (alpha->size_ > 0) {
+      const size_t inter_size = (image->offset_ > 0)
+                              ? image->offset_ - (alpha->offset_ + alpha->size_)
+                              : 0;
+      start_offset = alpha->offset_;
+      *data_size  += alpha->size_ + inter_size;
+    }
+    return mem_buf + start_offset;
+  }
+  return NULL;
+}
+
+// Create a whole 'frame' from VP8 (+ alpha) or lossless.
+static int SynthesizeFrame(const WebPDemuxer* const dmux,
+                           const Frame* const first_frame,
+                           int fragment_num, WebPIterator* const iter) {
+  const uint8_t* const mem_buf = dmux->mem_.buf_;
+  int num_fragments;
+  size_t payload_size = 0;
+  const Frame* const fragment =
+      GetFragment(first_frame, fragment_num, &num_fragments);
+  const uint8_t* const payload =
+      GetFramePayload(mem_buf, fragment, &payload_size);
+  if (payload == NULL) return 0;
+  assert(first_frame != NULL);
+
+  iter->frame_num      = first_frame->frame_num_;
+  iter->num_frames     = dmux->num_frames_;
+  iter->fragment_num   = fragment_num;
+  iter->num_fragments  = num_fragments;
+  iter->x_offset       = fragment->x_offset_;
+  iter->y_offset       = fragment->y_offset_;
+  iter->width          = fragment->width_;
+  iter->height         = fragment->height_;
+  iter->has_alpha      = fragment->has_alpha_;
+  iter->duration       = fragment->duration_;
+  iter->dispose_method = fragment->dispose_method_;
+  iter->blend_method   = fragment->blend_method_;
+  iter->complete       = fragment->complete_;
+  iter->fragment.bytes = payload;
+  iter->fragment.size  = payload_size;
+  return 1;
+}
+
+static int SetFrame(int frame_num, WebPIterator* const iter) {
+  const Frame* frame;
+  const WebPDemuxer* const dmux = (WebPDemuxer*)iter->private_;
+  if (dmux == NULL || frame_num < 0) return 0;
+  if (frame_num > dmux->num_frames_) return 0;
+  if (frame_num == 0) frame_num = dmux->num_frames_;
+
+  frame = GetFrame(dmux, frame_num);
+  if (frame == NULL) return 0;
+
+  return SynthesizeFrame(dmux, frame, 1, iter);
+}
+
+int WebPDemuxGetFrame(const WebPDemuxer* dmux, int frame, WebPIterator* iter) {
+  if (iter == NULL) return 0;
+
+  memset(iter, 0, sizeof(*iter));
+  iter->private_ = (void*)dmux;
+  return SetFrame(frame, iter);
+}
+
+int WebPDemuxNextFrame(WebPIterator* iter) {
+  if (iter == NULL) return 0;
+  return SetFrame(iter->frame_num + 1, iter);
+}
+
+int WebPDemuxPrevFrame(WebPIterator* iter) {
+  if (iter == NULL) return 0;
+  if (iter->frame_num <= 1) return 0;
+  return SetFrame(iter->frame_num - 1, iter);
+}
+
+int WebPDemuxSelectFragment(WebPIterator* iter, int fragment_num) {
+  if (iter != NULL && iter->private_ != NULL && fragment_num > 0) {
+    const WebPDemuxer* const dmux = (WebPDemuxer*)iter->private_;
+    const Frame* const frame = GetFrame(dmux, iter->frame_num);
+    if (frame == NULL) return 0;
+
+    return SynthesizeFrame(dmux, frame, fragment_num, iter);
+  }
+  return 0;
+}
+
+void WebPDemuxReleaseIterator(WebPIterator* iter) {
+  (void)iter;
+}
+
+// -----------------------------------------------------------------------------
+// Chunk iteration
+
+static int ChunkCount(const WebPDemuxer* const dmux, const char fourcc[4]) {
+  const uint8_t* const mem_buf = dmux->mem_.buf_;
+  const Chunk* c;
+  int count = 0;
+  for (c = dmux->chunks_; c != NULL; c = c->next_) {
+    const uint8_t* const header = mem_buf + c->data_.offset_;
+    if (!memcmp(header, fourcc, TAG_SIZE)) ++count;
+  }
+  return count;
+}
+
+static const Chunk* GetChunk(const WebPDemuxer* const dmux,
+                             const char fourcc[4], int chunk_num) {
+  const uint8_t* const mem_buf = dmux->mem_.buf_;
+  const Chunk* c;
+  int count = 0;
+  for (c = dmux->chunks_; c != NULL; c = c->next_) {
+    const uint8_t* const header = mem_buf + c->data_.offset_;
+    if (!memcmp(header, fourcc, TAG_SIZE)) ++count;
+    if (count == chunk_num) break;
+  }
+  return c;
+}
+
+static int SetChunk(const char fourcc[4], int chunk_num,
+                    WebPChunkIterator* const iter) {
+  const WebPDemuxer* const dmux = (WebPDemuxer*)iter->private_;
+  int count;
+
+  if (dmux == NULL || fourcc == NULL || chunk_num < 0) return 0;
+  count = ChunkCount(dmux, fourcc);
+  if (count == 0) return 0;
+  if (chunk_num == 0) chunk_num = count;
+
+  if (chunk_num <= count) {
+    const uint8_t* const mem_buf = dmux->mem_.buf_;
+    const Chunk* const chunk = GetChunk(dmux, fourcc, chunk_num);
+    iter->chunk.bytes = mem_buf + chunk->data_.offset_ + CHUNK_HEADER_SIZE;
+    iter->chunk.size  = chunk->data_.size_ - CHUNK_HEADER_SIZE;
+    iter->num_chunks  = count;
+    iter->chunk_num   = chunk_num;
+    return 1;
+  }
+  return 0;
+}
+
+int WebPDemuxGetChunk(const WebPDemuxer* dmux,
+                      const char fourcc[4], int chunk_num,
+                      WebPChunkIterator* iter) {
+  if (iter == NULL) return 0;
+
+  memset(iter, 0, sizeof(*iter));
+  iter->private_ = (void*)dmux;
+  return SetChunk(fourcc, chunk_num, iter);
+}
+
+int WebPDemuxNextChunk(WebPChunkIterator* iter) {
+  if (iter != NULL) {
+    const char* const fourcc =
+        (const char*)iter->chunk.bytes - CHUNK_HEADER_SIZE;
+    return SetChunk(fourcc, iter->chunk_num + 1, iter);
+  }
+  return 0;
+}
+
+int WebPDemuxPrevChunk(WebPChunkIterator* iter) {
+  if (iter != NULL && iter->chunk_num > 1) {
+    const char* const fourcc =
+        (const char*)iter->chunk.bytes - CHUNK_HEADER_SIZE;
+    return SetChunk(fourcc, iter->chunk_num - 1, iter);
+  }
+  return 0;
+}
+
+void WebPDemuxReleaseChunkIterator(WebPChunkIterator* iter) {
+  (void)iter;
+}
+
diff --git a/drivers/webp/dsp/alpha_processing.c b/drivers/webp/dsp/alpha_processing.c
new file mode 100644
index 0000000000..1716cace8d
--- /dev/null
+++ b/drivers/webp/dsp/alpha_processing.c
@@ -0,0 +1,383 @@
+// Copyright 2013 Google Inc. All Rights Reserved.
+//
+// Use of this source code is governed by a BSD-style license
+// that can be found in the COPYING file in the root of the source
+// tree. An additional intellectual property rights grant can be found
+// in the file PATENTS. All contributing project authors may
+// be found in the AUTHORS file in the root of the source tree.
+// -----------------------------------------------------------------------------
+//
+// Utilities for processing transparent channel.
+//
+// Author: Skal (pascal.massimino@gmail.com)
+
+#include <assert.h>
+#include "./dsp.h"
+
+// Tables can be faster on some platform but incur some extra binary size (~2k).
+// #define USE_TABLES_FOR_ALPHA_MULT
+
+// -----------------------------------------------------------------------------
+
+#define MFIX 24    // 24bit fixed-point arithmetic
+#define HALF ((1u << MFIX) >> 1)
+#define KINV_255 ((1u << MFIX) / 255u)
+
+static uint32_t Mult(uint8_t x, uint32_t mult) {
+  const uint32_t v = (x * mult + HALF) >> MFIX;
+  assert(v <= 255);  // <- 24bit precision is enough to ensure that.
+  return v;
+}
+
+#ifdef USE_TABLES_FOR_ALPHA_MULT
+
+static const uint32_t kMultTables[2][256] = {
+  {    // (255u << MFIX) / alpha
+    0x00000000, 0xff000000, 0x7f800000, 0x55000000, 0x3fc00000, 0x33000000,
+    0x2a800000, 0x246db6db, 0x1fe00000, 0x1c555555, 0x19800000, 0x172e8ba2,
+    0x15400000, 0x139d89d8, 0x1236db6d, 0x11000000, 0x0ff00000, 0x0f000000,
+    0x0e2aaaaa, 0x0d6bca1a, 0x0cc00000, 0x0c249249, 0x0b9745d1, 0x0b1642c8,
+    0x0aa00000, 0x0a333333, 0x09cec4ec, 0x0971c71c, 0x091b6db6, 0x08cb08d3,
+    0x08800000, 0x0839ce73, 0x07f80000, 0x07ba2e8b, 0x07800000, 0x07492492,
+    0x07155555, 0x06e45306, 0x06b5e50d, 0x0689d89d, 0x06600000, 0x063831f3,
+    0x06124924, 0x05ee23b8, 0x05cba2e8, 0x05aaaaaa, 0x058b2164, 0x056cefa8,
+    0x05500000, 0x05343eb1, 0x05199999, 0x05000000, 0x04e76276, 0x04cfb2b7,
+    0x04b8e38e, 0x04a2e8ba, 0x048db6db, 0x0479435e, 0x04658469, 0x045270d0,
+    0x04400000, 0x042e29f7, 0x041ce739, 0x040c30c3, 0x03fc0000, 0x03ec4ec4,
+    0x03dd1745, 0x03ce540f, 0x03c00000, 0x03b21642, 0x03a49249, 0x03976fc6,
+    0x038aaaaa, 0x037e3f1f, 0x03722983, 0x03666666, 0x035af286, 0x034fcace,
+    0x0344ec4e, 0x033a5440, 0x03300000, 0x0325ed09, 0x031c18f9, 0x0312818a,
+    0x03092492, 0x03000000, 0x02f711dc, 0x02ee5846, 0x02e5d174, 0x02dd7baf,
+    0x02d55555, 0x02cd5cd5, 0x02c590b2, 0x02bdef7b, 0x02b677d4, 0x02af286b,
+    0x02a80000, 0x02a0fd5c, 0x029a1f58, 0x029364d9, 0x028ccccc, 0x0286562d,
+    0x02800000, 0x0279c952, 0x0273b13b, 0x026db6db, 0x0267d95b, 0x026217ec,
+    0x025c71c7, 0x0256e62a, 0x0251745d, 0x024c1bac, 0x0246db6d, 0x0241b2f9,
+    0x023ca1af, 0x0237a6f4, 0x0232c234, 0x022df2df, 0x02293868, 0x02249249,
+    0x02200000, 0x021b810e, 0x021714fb, 0x0212bb51, 0x020e739c, 0x020a3d70,
+    0x02061861, 0x02020408, 0x01fe0000, 0x01fa0be8, 0x01f62762, 0x01f25213,
+    0x01ee8ba2, 0x01ead3ba, 0x01e72a07, 0x01e38e38, 0x01e00000, 0x01dc7f10,
+    0x01d90b21, 0x01d5a3e9, 0x01d24924, 0x01cefa8d, 0x01cbb7e3, 0x01c880e5,
+    0x01c55555, 0x01c234f7, 0x01bf1f8f, 0x01bc14e5, 0x01b914c1, 0x01b61eed,
+    0x01b33333, 0x01b05160, 0x01ad7943, 0x01aaaaaa, 0x01a7e567, 0x01a5294a,
+    0x01a27627, 0x019fcbd2, 0x019d2a20, 0x019a90e7, 0x01980000, 0x01957741,
+    0x0192f684, 0x01907da4, 0x018e0c7c, 0x018ba2e8, 0x018940c5, 0x0186e5f0,
+    0x01849249, 0x018245ae, 0x01800000, 0x017dc11f, 0x017b88ee, 0x0179574e,
+    0x01772c23, 0x01750750, 0x0172e8ba, 0x0170d045, 0x016ebdd7, 0x016cb157,
+    0x016aaaaa, 0x0168a9b9, 0x0166ae6a, 0x0164b8a7, 0x0162c859, 0x0160dd67,
+    0x015ef7bd, 0x015d1745, 0x015b3bea, 0x01596596, 0x01579435, 0x0155c7b4,
+    0x01540000, 0x01523d03, 0x01507eae, 0x014ec4ec, 0x014d0fac, 0x014b5edc,
+    0x0149b26c, 0x01480a4a, 0x01466666, 0x0144c6af, 0x01432b16, 0x0141938b,
+    0x01400000, 0x013e7063, 0x013ce4a9, 0x013b5cc0, 0x0139d89d, 0x01385830,
+    0x0136db6d, 0x01356246, 0x0133ecad, 0x01327a97, 0x01310bf6, 0x012fa0be,
+    0x012e38e3, 0x012cd459, 0x012b7315, 0x012a150a, 0x0128ba2e, 0x01276276,
+    0x01260dd6, 0x0124bc44, 0x01236db6, 0x01222222, 0x0120d97c, 0x011f93bc,
+    0x011e50d7, 0x011d10c4, 0x011bd37a, 0x011a98ef, 0x0119611a, 0x01182bf2,
+    0x0116f96f, 0x0115c988, 0x01149c34, 0x0113716a, 0x01124924, 0x01112358,
+    0x01100000, 0x010edf12, 0x010dc087, 0x010ca458, 0x010b8a7d, 0x010a72f0,
+    0x01095da8, 0x01084a9f, 0x010739ce, 0x01062b2e, 0x01051eb8, 0x01041465,
+    0x01030c30, 0x01020612, 0x01010204, 0x01000000 },
+  {   // alpha * KINV_255
+    0x00000000, 0x00010101, 0x00020202, 0x00030303, 0x00040404, 0x00050505,
+    0x00060606, 0x00070707, 0x00080808, 0x00090909, 0x000a0a0a, 0x000b0b0b,
+    0x000c0c0c, 0x000d0d0d, 0x000e0e0e, 0x000f0f0f, 0x00101010, 0x00111111,
+    0x00121212, 0x00131313, 0x00141414, 0x00151515, 0x00161616, 0x00171717,
+    0x00181818, 0x00191919, 0x001a1a1a, 0x001b1b1b, 0x001c1c1c, 0x001d1d1d,
+    0x001e1e1e, 0x001f1f1f, 0x00202020, 0x00212121, 0x00222222, 0x00232323,
+    0x00242424, 0x00252525, 0x00262626, 0x00272727, 0x00282828, 0x00292929,
+    0x002a2a2a, 0x002b2b2b, 0x002c2c2c, 0x002d2d2d, 0x002e2e2e, 0x002f2f2f,
+    0x00303030, 0x00313131, 0x00323232, 0x00333333, 0x00343434, 0x00353535,
+    0x00363636, 0x00373737, 0x00383838, 0x00393939, 0x003a3a3a, 0x003b3b3b,
+    0x003c3c3c, 0x003d3d3d, 0x003e3e3e, 0x003f3f3f, 0x00404040, 0x00414141,
+    0x00424242, 0x00434343, 0x00444444, 0x00454545, 0x00464646, 0x00474747,
+    0x00484848, 0x00494949, 0x004a4a4a, 0x004b4b4b, 0x004c4c4c, 0x004d4d4d,
+    0x004e4e4e, 0x004f4f4f, 0x00505050, 0x00515151, 0x00525252, 0x00535353,
+    0x00545454, 0x00555555, 0x00565656, 0x00575757, 0x00585858, 0x00595959,
+    0x005a5a5a, 0x005b5b5b, 0x005c5c5c, 0x005d5d5d, 0x005e5e5e, 0x005f5f5f,
+    0x00606060, 0x00616161, 0x00626262, 0x00636363, 0x00646464, 0x00656565,
+    0x00666666, 0x00676767, 0x00686868, 0x00696969, 0x006a6a6a, 0x006b6b6b,
+    0x006c6c6c, 0x006d6d6d, 0x006e6e6e, 0x006f6f6f, 0x00707070, 0x00717171,
+    0x00727272, 0x00737373, 0x00747474, 0x00757575, 0x00767676, 0x00777777,
+    0x00787878, 0x00797979, 0x007a7a7a, 0x007b7b7b, 0x007c7c7c, 0x007d7d7d,
+    0x007e7e7e, 0x007f7f7f, 0x00808080, 0x00818181, 0x00828282, 0x00838383,
+    0x00848484, 0x00858585, 0x00868686, 0x00878787, 0x00888888, 0x00898989,
+    0x008a8a8a, 0x008b8b8b, 0x008c8c8c, 0x008d8d8d, 0x008e8e8e, 0x008f8f8f,
+    0x00909090, 0x00919191, 0x00929292, 0x00939393, 0x00949494, 0x00959595,
+    0x00969696, 0x00979797, 0x00989898, 0x00999999, 0x009a9a9a, 0x009b9b9b,
+    0x009c9c9c, 0x009d9d9d, 0x009e9e9e, 0x009f9f9f, 0x00a0a0a0, 0x00a1a1a1,
+    0x00a2a2a2, 0x00a3a3a3, 0x00a4a4a4, 0x00a5a5a5, 0x00a6a6a6, 0x00a7a7a7,
+    0x00a8a8a8, 0x00a9a9a9, 0x00aaaaaa, 0x00ababab, 0x00acacac, 0x00adadad,
+    0x00aeaeae, 0x00afafaf, 0x00b0b0b0, 0x00b1b1b1, 0x00b2b2b2, 0x00b3b3b3,
+    0x00b4b4b4, 0x00b5b5b5, 0x00b6b6b6, 0x00b7b7b7, 0x00b8b8b8, 0x00b9b9b9,
+    0x00bababa, 0x00bbbbbb, 0x00bcbcbc, 0x00bdbdbd, 0x00bebebe, 0x00bfbfbf,
+    0x00c0c0c0, 0x00c1c1c1, 0x00c2c2c2, 0x00c3c3c3, 0x00c4c4c4, 0x00c5c5c5,
+    0x00c6c6c6, 0x00c7c7c7, 0x00c8c8c8, 0x00c9c9c9, 0x00cacaca, 0x00cbcbcb,
+    0x00cccccc, 0x00cdcdcd, 0x00cecece, 0x00cfcfcf, 0x00d0d0d0, 0x00d1d1d1,
+    0x00d2d2d2, 0x00d3d3d3, 0x00d4d4d4, 0x00d5d5d5, 0x00d6d6d6, 0x00d7d7d7,
+    0x00d8d8d8, 0x00d9d9d9, 0x00dadada, 0x00dbdbdb, 0x00dcdcdc, 0x00dddddd,
+    0x00dedede, 0x00dfdfdf, 0x00e0e0e0, 0x00e1e1e1, 0x00e2e2e2, 0x00e3e3e3,
+    0x00e4e4e4, 0x00e5e5e5, 0x00e6e6e6, 0x00e7e7e7, 0x00e8e8e8, 0x00e9e9e9,
+    0x00eaeaea, 0x00ebebeb, 0x00ececec, 0x00ededed, 0x00eeeeee, 0x00efefef,
+    0x00f0f0f0, 0x00f1f1f1, 0x00f2f2f2, 0x00f3f3f3, 0x00f4f4f4, 0x00f5f5f5,
+    0x00f6f6f6, 0x00f7f7f7, 0x00f8f8f8, 0x00f9f9f9, 0x00fafafa, 0x00fbfbfb,
+    0x00fcfcfc, 0x00fdfdfd, 0x00fefefe, 0x00ffffff }
+};
+
+static WEBP_INLINE uint32_t GetScale(uint32_t a, int inverse) {
+  return kMultTables[!inverse][a];
+}
+
+#else
+
+static WEBP_INLINE uint32_t GetScale(uint32_t a, int inverse) {
+  return inverse ? (255u << MFIX) / a : a * KINV_255;
+}
+
+#endif    // USE_TABLES_FOR_ALPHA_MULT
+
+void WebPMultARGBRowC(uint32_t* const ptr, int width, int inverse) {
+  int x;
+  for (x = 0; x < width; ++x) {
+    const uint32_t argb = ptr[x];
+    if (argb < 0xff000000u) {      // alpha < 255
+      if (argb <= 0x00ffffffu) {   // alpha == 0
+        ptr[x] = 0;
+      } else {
+        const uint32_t alpha = (argb >> 24) & 0xff;
+        const uint32_t scale = GetScale(alpha, inverse);
+        uint32_t out = argb & 0xff000000u;
+        out |= Mult(argb >>  0, scale) <<  0;
+        out |= Mult(argb >>  8, scale) <<  8;
+        out |= Mult(argb >> 16, scale) << 16;
+        ptr[x] = out;
+      }
+    }
+  }
+}
+
+void WebPMultRowC(uint8_t* const ptr, const uint8_t* const alpha,
+                  int width, int inverse) {
+  int x;
+  for (x = 0; x < width; ++x) {
+    const uint32_t a = alpha[x];
+    if (a != 255) {
+      if (a == 0) {
+        ptr[x] = 0;
+      } else {
+        const uint32_t scale = GetScale(a, inverse);
+        ptr[x] = Mult(ptr[x], scale);
+      }
+    }
+  }
+}
+
+#undef KINV_255
+#undef HALF
+#undef MFIX
+
+void (*WebPMultARGBRow)(uint32_t* const ptr, int width, int inverse);
+void (*WebPMultRow)(uint8_t* const ptr, const uint8_t* const alpha,
+                    int width, int inverse);
+
+//------------------------------------------------------------------------------
+// Generic per-plane calls
+
+void WebPMultARGBRows(uint8_t* ptr, int stride, int width, int num_rows,
+                      int inverse) {
+  int n;
+  for (n = 0; n < num_rows; ++n) {
+    WebPMultARGBRow((uint32_t*)ptr, width, inverse);
+    ptr += stride;
+  }
+}
+
+void WebPMultRows(uint8_t* ptr, int stride,
+                  const uint8_t* alpha, int alpha_stride,
+                  int width, int num_rows, int inverse) {
+  int n;
+  for (n = 0; n < num_rows; ++n) {
+    WebPMultRow(ptr, alpha, width, inverse);
+    ptr += stride;
+    alpha += alpha_stride;
+  }
+}
+
+//------------------------------------------------------------------------------
+// Premultiplied modes
+
+// non dithered-modes
+
+// (x * a * 32897) >> 23 is bit-wise equivalent to (int)(x * a / 255.)
+// for all 8bit x or a. For bit-wise equivalence to (int)(x * a / 255. + .5),
+// one can use instead: (x * a * 65793 + (1 << 23)) >> 24
+#if 1     // (int)(x * a / 255.)
+#define MULTIPLIER(a)   ((a) * 32897U)
+#define PREMULTIPLY(x, m) (((x) * (m)) >> 23)
+#else     // (int)(x * a / 255. + .5)
+#define MULTIPLIER(a) ((a) * 65793U)
+#define PREMULTIPLY(x, m) (((x) * (m) + (1U << 23)) >> 24)
+#endif
+
+static void ApplyAlphaMultiply(uint8_t* rgba, int alpha_first,
+                               int w, int h, int stride) {
+  while (h-- > 0) {
+    uint8_t* const rgb = rgba + (alpha_first ? 1 : 0);
+    const uint8_t* const alpha = rgba + (alpha_first ? 0 : 3);
+    int i;
+    for (i = 0; i < w; ++i) {
+      const uint32_t a = alpha[4 * i];
+      if (a != 0xff) {
+        const uint32_t mult = MULTIPLIER(a);
+        rgb[4 * i + 0] = PREMULTIPLY(rgb[4 * i + 0], mult);
+        rgb[4 * i + 1] = PREMULTIPLY(rgb[4 * i + 1], mult);
+        rgb[4 * i + 2] = PREMULTIPLY(rgb[4 * i + 2], mult);
+      }
+    }
+    rgba += stride;
+  }
+}
+#undef MULTIPLIER
+#undef PREMULTIPLY
+
+// rgbA4444
+
+#define MULTIPLIER(a)  ((a) * 0x1111)    // 0x1111 ~= (1 << 16) / 15
+
+static WEBP_INLINE uint8_t dither_hi(uint8_t x) {
+  return (x & 0xf0) | (x >> 4);
+}
+
+static WEBP_INLINE uint8_t dither_lo(uint8_t x) {
+  return (x & 0x0f) | (x << 4);
+}
+
+static WEBP_INLINE uint8_t multiply(uint8_t x, uint32_t m) {
+  return (x * m) >> 16;
+}
+
+static WEBP_INLINE void ApplyAlphaMultiply4444(uint8_t* rgba4444,
+                                               int w, int h, int stride,
+                                               int rg_byte_pos /* 0 or 1 */) {
+  while (h-- > 0) {
+    int i;
+    for (i = 0; i < w; ++i) {
+      const uint32_t rg = rgba4444[2 * i + rg_byte_pos];
+      const uint32_t ba = rgba4444[2 * i + (rg_byte_pos ^ 1)];
+      const uint8_t a = ba & 0x0f;
+      const uint32_t mult = MULTIPLIER(a);
+      const uint8_t r = multiply(dither_hi(rg), mult);
+      const uint8_t g = multiply(dither_lo(rg), mult);
+      const uint8_t b = multiply(dither_hi(ba), mult);
+      rgba4444[2 * i + rg_byte_pos] = (r & 0xf0) | ((g >> 4) & 0x0f);
+      rgba4444[2 * i + (rg_byte_pos ^ 1)] = (b & 0xf0) | a;
+    }
+    rgba4444 += stride;
+  }
+}
+#undef MULTIPLIER
+
+static void ApplyAlphaMultiply_16b(uint8_t* rgba4444,
+                                   int w, int h, int stride) {
+#ifdef WEBP_SWAP_16BIT_CSP
+  ApplyAlphaMultiply4444(rgba4444, w, h, stride, 1);
+#else
+  ApplyAlphaMultiply4444(rgba4444, w, h, stride, 0);
+#endif
+}
+
+static int DispatchAlpha(const uint8_t* alpha, int alpha_stride,
+                         int width, int height,
+                         uint8_t* dst, int dst_stride) {
+  uint32_t alpha_mask = 0xff;
+  int i, j;
+
+  for (j = 0; j < height; ++j) {
+    for (i = 0; i < width; ++i) {
+      const uint32_t alpha_value = alpha[i];
+      dst[4 * i] = alpha_value;
+      alpha_mask &= alpha_value;
+    }
+    alpha += alpha_stride;
+    dst += dst_stride;
+  }
+
+  return (alpha_mask != 0xff);
+}
+
+static void DispatchAlphaToGreen(const uint8_t* alpha, int alpha_stride,
+                                 int width, int height,
+                                 uint32_t* dst, int dst_stride) {
+  int i, j;
+  for (j = 0; j < height; ++j) {
+    for (i = 0; i < width; ++i) {
+      dst[i] = alpha[i] << 8;  // leave A/R/B channels zero'd.
+    }
+    alpha += alpha_stride;
+    dst += dst_stride;
+  }
+}
+
+static int ExtractAlpha(const uint8_t* argb, int argb_stride,
+                        int width, int height,
+                        uint8_t* alpha, int alpha_stride) {
+  uint8_t alpha_mask = 0xff;
+  int i, j;
+
+  for (j = 0; j < height; ++j) {
+    for (i = 0; i < width; ++i) {
+      const uint8_t alpha_value = argb[4 * i];
+      alpha[i] = alpha_value;
+      alpha_mask &= alpha_value;
+    }
+    argb += argb_stride;
+    alpha += alpha_stride;
+  }
+  return (alpha_mask == 0xff);
+}
+
+void (*WebPApplyAlphaMultiply)(uint8_t*, int, int, int, int);
+void (*WebPApplyAlphaMultiply4444)(uint8_t*, int, int, int);
+int (*WebPDispatchAlpha)(const uint8_t*, int, int, int, uint8_t*, int);
+void (*WebPDispatchAlphaToGreen)(const uint8_t*, int, int, int, uint32_t*, int);
+int (*WebPExtractAlpha)(const uint8_t*, int, int, int, uint8_t*, int);
+
+//------------------------------------------------------------------------------
+// Init function
+
+extern void WebPInitAlphaProcessingMIPSdspR2(void);
+extern void WebPInitAlphaProcessingSSE2(void);
+extern void WebPInitAlphaProcessingSSE41(void);
+
+static volatile VP8CPUInfo alpha_processing_last_cpuinfo_used =
+    (VP8CPUInfo)&alpha_processing_last_cpuinfo_used;
+
+WEBP_TSAN_IGNORE_FUNCTION void WebPInitAlphaProcessing(void) {
+  if (alpha_processing_last_cpuinfo_used == VP8GetCPUInfo) return;
+
+  WebPMultARGBRow = WebPMultARGBRowC;
+  WebPMultRow = WebPMultRowC;
+  WebPApplyAlphaMultiply = ApplyAlphaMultiply;
+  WebPApplyAlphaMultiply4444 = ApplyAlphaMultiply_16b;
+  WebPDispatchAlpha = DispatchAlpha;
+  WebPDispatchAlphaToGreen = DispatchAlphaToGreen;
+  WebPExtractAlpha = ExtractAlpha;
+
+  // If defined, use CPUInfo() to overwrite some pointers with faster versions.
+  if (VP8GetCPUInfo != NULL) {
+#if defined(WEBP_USE_SSE2)
+    if (VP8GetCPUInfo(kSSE2)) {
+      WebPInitAlphaProcessingSSE2();
+#if defined(WEBP_USE_SSE41)
+      if (VP8GetCPUInfo(kSSE4_1)) {
+        WebPInitAlphaProcessingSSE41();
+      }
+#endif
+    }
+#endif
+#if defined(WEBP_USE_MIPS_DSP_R2)
+    if (VP8GetCPUInfo(kMIPSdspR2)) {
+      WebPInitAlphaProcessingMIPSdspR2();
+    }
+#endif
+  }
+  alpha_processing_last_cpuinfo_used = VP8GetCPUInfo;
+}
diff --git a/drivers/webp/dsp/alpha_processing_mips_dsp_r2.c b/drivers/webp/dsp/alpha_processing_mips_dsp_r2.c
new file mode 100644
index 0000000000..c631d78905
--- /dev/null
+++ b/drivers/webp/dsp/alpha_processing_mips_dsp_r2.c
@@ -0,0 +1,141 @@
+// Copyright 2014 Google Inc. All Rights Reserved.
+//
+// Use of this source code is governed by a BSD-style license
+// that can be found in the COPYING file in the root of the source
+// tree. An additional intellectual property rights grant can be found
+// in the file PATENTS. All contributing project authors may
+// be found in the AUTHORS file in the root of the source tree.
+// -----------------------------------------------------------------------------
+//
+// Utilities for processing transparent channel.
+//
+// Author(s): Branimir Vasic (branimir.vasic@imgtec.com)
+//            Djordje Pesut  (djordje.pesut@imgtec.com)
+
+#include "./dsp.h"
+
+#if defined(WEBP_USE_MIPS_DSP_R2)
+
+static int DispatchAlpha(const uint8_t* alpha, int alpha_stride,
+                         int width, int height,
+                         uint8_t* dst, int dst_stride) {
+  uint32_t alpha_mask = 0xffffffff;
+  int i, j, temp0;
+
+  for (j = 0; j < height; ++j) {
+    uint8_t* pdst = dst;
+    const uint8_t* palpha = alpha;
+    for (i = 0; i < (width >> 2); ++i) {
+      int temp1, temp2, temp3;
+
+      __asm__ volatile (
+        "ulw    %[temp0],      0(%[palpha])                \n\t"
+        "addiu  %[palpha],     %[palpha],     4            \n\t"
+        "addiu  %[pdst],       %[pdst],       16           \n\t"
+        "srl    %[temp1],      %[temp0],      8            \n\t"
+        "srl    %[temp2],      %[temp0],      16           \n\t"
+        "srl    %[temp3],      %[temp0],      24           \n\t"
+        "and    %[alpha_mask], %[alpha_mask], %[temp0]     \n\t"
+        "sb     %[temp0],      -16(%[pdst])                \n\t"
+        "sb     %[temp1],      -12(%[pdst])                \n\t"
+        "sb     %[temp2],      -8(%[pdst])                 \n\t"
+        "sb     %[temp3],      -4(%[pdst])                 \n\t"
+        : [temp0]"=&r"(temp0), [temp1]"=&r"(temp1), [temp2]"=&r"(temp2),
+          [temp3]"=&r"(temp3), [palpha]"+r"(palpha), [pdst]"+r"(pdst),
+          [alpha_mask]"+r"(alpha_mask)
+        :
+        : "memory"
+      );
+    }
+
+    for (i = 0; i < (width & 3); ++i) {
+      __asm__ volatile (
+        "lbu    %[temp0],      0(%[palpha])                \n\t"
+        "addiu  %[palpha],     %[palpha],     1            \n\t"
+        "sb     %[temp0],      0(%[pdst])                  \n\t"
+        "and    %[alpha_mask], %[alpha_mask], %[temp0]     \n\t"
+        "addiu  %[pdst],       %[pdst],       4            \n\t"
+        : [temp0]"=&r"(temp0), [palpha]"+r"(palpha), [pdst]"+r"(pdst),
+          [alpha_mask]"+r"(alpha_mask)
+        :
+        : "memory"
+      );
+    }
+    alpha += alpha_stride;
+    dst += dst_stride;
+  }
+
+  __asm__ volatile (
+    "ext    %[temp0],      %[alpha_mask], 0, 16            \n\t"
+    "srl    %[alpha_mask], %[alpha_mask], 16               \n\t"
+    "and    %[alpha_mask], %[alpha_mask], %[temp0]         \n\t"
+    "ext    %[temp0],      %[alpha_mask], 0, 8             \n\t"
+    "srl    %[alpha_mask], %[alpha_mask], 8                \n\t"
+    "and    %[alpha_mask], %[alpha_mask], %[temp0]         \n\t"
+    : [temp0]"=&r"(temp0), [alpha_mask]"+r"(alpha_mask)
+    :
+  );
+
+  return (alpha_mask != 0xff);
+}
+
+static void MultARGBRow(uint32_t* const ptr, int width, int inverse) {
+  int x;
+  const uint32_t c_00ffffff = 0x00ffffffu;
+  const uint32_t c_ff000000 = 0xff000000u;
+  const uint32_t c_8000000  = 0x00800000u;
+  const uint32_t c_8000080  = 0x00800080u;
+  for (x = 0; x < width; ++x) {
+    const uint32_t argb = ptr[x];
+    if (argb < 0xff000000u) {      // alpha < 255
+      if (argb <= 0x00ffffffu) {   // alpha == 0
+        ptr[x] = 0;
+      } else {
+        int temp0, temp1, temp2, temp3, alpha;
+        __asm__ volatile (
+          "srl          %[alpha],   %[argb],       24                \n\t"
+          "replv.qb     %[temp0],   %[alpha]                         \n\t"
+          "and          %[temp0],   %[temp0],      %[c_00ffffff]     \n\t"
+          "beqz         %[inverse], 0f                               \n\t"
+          "divu         $zero,      %[c_ff000000], %[alpha]          \n\t"
+          "mflo         %[temp0]                                     \n\t"
+        "0:                                                          \n\t"
+          "andi         %[temp1],   %[argb],       0xff              \n\t"
+          "ext          %[temp2],   %[argb],       8,             8  \n\t"
+          "ext          %[temp3],   %[argb],       16,            8  \n\t"
+          "mul          %[temp1],   %[temp1],      %[temp0]          \n\t"
+          "mul          %[temp2],   %[temp2],      %[temp0]          \n\t"
+          "mul          %[temp3],   %[temp3],      %[temp0]          \n\t"
+          "precrq.ph.w  %[temp1],   %[temp2],      %[temp1]          \n\t"
+          "addu         %[temp3],   %[temp3],      %[c_8000000]      \n\t"
+          "addu         %[temp1],   %[temp1],      %[c_8000080]      \n\t"
+          "precrq.ph.w  %[temp3],   %[argb],       %[temp3]          \n\t"
+          "precrq.qb.ph %[temp1],   %[temp3],      %[temp1]          \n\t"
+          : [temp0]"=&r"(temp0), [temp1]"=&r"(temp1), [temp2]"=&r"(temp2),
+            [temp3]"=&r"(temp3), [alpha]"=&r"(alpha)
+          : [inverse]"r"(inverse), [c_00ffffff]"r"(c_00ffffff),
+            [c_8000000]"r"(c_8000000), [c_8000080]"r"(c_8000080),
+            [c_ff000000]"r"(c_ff000000), [argb]"r"(argb)
+          : "memory", "hi", "lo"
+        );
+        ptr[x] = temp1;
+      }
+    }
+  }
+}
+
+//------------------------------------------------------------------------------
+// Entry point
+
+extern void WebPInitAlphaProcessingMIPSdspR2(void);
+
+WEBP_TSAN_IGNORE_FUNCTION void WebPInitAlphaProcessingMIPSdspR2(void) {
+  WebPDispatchAlpha = DispatchAlpha;
+  WebPMultARGBRow = MultARGBRow;
+}
+
+#else  // !WEBP_USE_MIPS_DSP_R2
+
+WEBP_DSP_INIT_STUB(WebPInitAlphaProcessingMIPSdspR2)
+
+#endif  // WEBP_USE_MIPS_DSP_R2
diff --git a/drivers/webp/dsp/alpha_processing_sse2.c b/drivers/webp/dsp/alpha_processing_sse2.c
new file mode 100644
index 0000000000..5acb481dcd
--- /dev/null
+++ b/drivers/webp/dsp/alpha_processing_sse2.c
@@ -0,0 +1,298 @@
+// Copyright 2014 Google Inc. All Rights Reserved.
+//
+// Use of this source code is governed by a BSD-style license
+// that can be found in the COPYING file in the root of the source
+// tree. An additional intellectual property rights grant can be found
+// in the file PATENTS. All contributing project authors may
+// be found in the AUTHORS file in the root of the source tree.
+// -----------------------------------------------------------------------------
+//
+// Utilities for processing transparent channel.
+//
+// Author: Skal (pascal.massimino@gmail.com)
+
+#include "./dsp.h"
+
+#if defined(WEBP_USE_SSE2)
+#include <emmintrin.h>
+
+//------------------------------------------------------------------------------
+
+static int DispatchAlpha(const uint8_t* alpha, int alpha_stride,
+                         int width, int height,
+                         uint8_t* dst, int dst_stride) {
+  // alpha_and stores an 'and' operation of all the alpha[] values. The final
+  // value is not 0xff if any of the alpha[] is not equal to 0xff.
+  uint32_t alpha_and = 0xff;
+  int i, j;
+  const __m128i zero = _mm_setzero_si128();
+  const __m128i rgb_mask = _mm_set1_epi32(0xffffff00u);  // to preserve RGB
+  const __m128i all_0xff = _mm_set_epi32(0, 0, ~0u, ~0u);
+  __m128i all_alphas = all_0xff;
+
+  // We must be able to access 3 extra bytes after the last written byte
+  // 'dst[4 * width - 4]', because we don't know if alpha is the first or the
+  // last byte of the quadruplet.
+  const int limit = (width - 1) & ~7;
+
+  for (j = 0; j < height; ++j) {
+    __m128i* out = (__m128i*)dst;
+    for (i = 0; i < limit; i += 8) {
+      // load 8 alpha bytes
+      const __m128i a0 = _mm_loadl_epi64((const __m128i*)&alpha[i]);
+      const __m128i a1 = _mm_unpacklo_epi8(a0, zero);
+      const __m128i a2_lo = _mm_unpacklo_epi16(a1, zero);
+      const __m128i a2_hi = _mm_unpackhi_epi16(a1, zero);
+      // load 8 dst pixels (32 bytes)
+      const __m128i b0_lo = _mm_loadu_si128(out + 0);
+      const __m128i b0_hi = _mm_loadu_si128(out + 1);
+      // mask dst alpha values
+      const __m128i b1_lo = _mm_and_si128(b0_lo, rgb_mask);
+      const __m128i b1_hi = _mm_and_si128(b0_hi, rgb_mask);
+      // combine
+      const __m128i b2_lo = _mm_or_si128(b1_lo, a2_lo);
+      const __m128i b2_hi = _mm_or_si128(b1_hi, a2_hi);
+      // store
+      _mm_storeu_si128(out + 0, b2_lo);
+      _mm_storeu_si128(out + 1, b2_hi);
+      // accumulate eight alpha 'and' in parallel
+      all_alphas = _mm_and_si128(all_alphas, a0);
+      out += 2;
+    }
+    for (; i < width; ++i) {
+      const uint32_t alpha_value = alpha[i];
+      dst[4 * i] = alpha_value;
+      alpha_and &= alpha_value;
+    }
+    alpha += alpha_stride;
+    dst += dst_stride;
+  }
+  // Combine the eight alpha 'and' into a 8-bit mask.
+  alpha_and &= _mm_movemask_epi8(_mm_cmpeq_epi8(all_alphas, all_0xff));
+  return (alpha_and != 0xff);
+}
+
+static void DispatchAlphaToGreen(const uint8_t* alpha, int alpha_stride,
+                                 int width, int height,
+                                 uint32_t* dst, int dst_stride) {
+  int i, j;
+  const __m128i zero = _mm_setzero_si128();
+  const int limit = width & ~15;
+  for (j = 0; j < height; ++j) {
+    for (i = 0; i < limit; i += 16) {   // process 16 alpha bytes
+      const __m128i a0 = _mm_loadu_si128((const __m128i*)&alpha[i]);
+      const __m128i a1 = _mm_unpacklo_epi8(zero, a0);  // note the 'zero' first!
+      const __m128i b1 = _mm_unpackhi_epi8(zero, a0);
+      const __m128i a2_lo = _mm_unpacklo_epi16(a1, zero);
+      const __m128i b2_lo = _mm_unpacklo_epi16(b1, zero);
+      const __m128i a2_hi = _mm_unpackhi_epi16(a1, zero);
+      const __m128i b2_hi = _mm_unpackhi_epi16(b1, zero);
+      _mm_storeu_si128((__m128i*)&dst[i +  0], a2_lo);
+      _mm_storeu_si128((__m128i*)&dst[i +  4], a2_hi);
+      _mm_storeu_si128((__m128i*)&dst[i +  8], b2_lo);
+      _mm_storeu_si128((__m128i*)&dst[i + 12], b2_hi);
+    }
+    for (; i < width; ++i) dst[i] = alpha[i] << 8;
+    alpha += alpha_stride;
+    dst += dst_stride;
+  }
+}
+
+static int ExtractAlpha(const uint8_t* argb, int argb_stride,
+                        int width, int height,
+                        uint8_t* alpha, int alpha_stride) {
+  // alpha_and stores an 'and' operation of all the alpha[] values. The final
+  // value is not 0xff if any of the alpha[] is not equal to 0xff.
+  uint32_t alpha_and = 0xff;
+  int i, j;
+  const __m128i a_mask = _mm_set1_epi32(0xffu);  // to preserve alpha
+  const __m128i all_0xff = _mm_set_epi32(0, 0, ~0u, ~0u);
+  __m128i all_alphas = all_0xff;
+
+  // We must be able to access 3 extra bytes after the last written byte
+  // 'src[4 * width - 4]', because we don't know if alpha is the first or the
+  // last byte of the quadruplet.
+  const int limit = (width - 1) & ~7;
+
+  for (j = 0; j < height; ++j) {
+    const __m128i* src = (const __m128i*)argb;
+    for (i = 0; i < limit; i += 8) {
+      // load 32 argb bytes
+      const __m128i a0 = _mm_loadu_si128(src + 0);
+      const __m128i a1 = _mm_loadu_si128(src + 1);
+      const __m128i b0 = _mm_and_si128(a0, a_mask);
+      const __m128i b1 = _mm_and_si128(a1, a_mask);
+      const __m128i c0 = _mm_packs_epi32(b0, b1);
+      const __m128i d0 = _mm_packus_epi16(c0, c0);
+      // store
+      _mm_storel_epi64((__m128i*)&alpha[i], d0);
+      // accumulate eight alpha 'and' in parallel
+      all_alphas = _mm_and_si128(all_alphas, d0);
+      src += 2;
+    }
+    for (; i < width; ++i) {
+      const uint32_t alpha_value = argb[4 * i];
+      alpha[i] = alpha_value;
+      alpha_and &= alpha_value;
+    }
+    argb += argb_stride;
+    alpha += alpha_stride;
+  }
+  // Combine the eight alpha 'and' into a 8-bit mask.
+  alpha_and &= _mm_movemask_epi8(_mm_cmpeq_epi8(all_alphas, all_0xff));
+  return (alpha_and == 0xff);
+}
+
+//------------------------------------------------------------------------------
+// Non-dither premultiplied modes
+
+#define MULTIPLIER(a)   ((a) * 0x8081)
+#define PREMULTIPLY(x, m) (((x) * (m)) >> 23)
+
+// We can't use a 'const int' for the SHUFFLE value, because it has to be an
+// immediate in the _mm_shufflexx_epi16() instruction. We really a macro here.
+#define APPLY_ALPHA(RGBX, SHUFFLE, MASK, MULT) do {             \
+  const __m128i argb0 = _mm_loadl_epi64((__m128i*)&(RGBX));     \
+  const __m128i argb1 = _mm_unpacklo_epi8(argb0, zero);         \
+  const __m128i alpha0 = _mm_and_si128(argb1, MASK);            \
+  const __m128i alpha1 = _mm_shufflelo_epi16(alpha0, SHUFFLE);  \
+  const __m128i alpha2 = _mm_shufflehi_epi16(alpha1, SHUFFLE);  \
+  /* alpha2 = [0 a0 a0 a0][0 a1 a1 a1] */                       \
+  const __m128i scale0 = _mm_mullo_epi16(alpha2, MULT);         \
+  const __m128i scale1 = _mm_mulhi_epu16(alpha2, MULT);         \
+  const __m128i argb2 = _mm_mulhi_epu16(argb1, scale0);         \
+  const __m128i argb3 = _mm_mullo_epi16(argb1, scale1);         \
+  const __m128i argb4 = _mm_adds_epu16(argb2, argb3);           \
+  const __m128i argb5 = _mm_srli_epi16(argb4, 7);               \
+  const __m128i argb6 = _mm_or_si128(argb5, alpha0);            \
+  const __m128i argb7 = _mm_packus_epi16(argb6, zero);          \
+  _mm_storel_epi64((__m128i*)&(RGBX), argb7);                   \
+} while (0)
+
+static void ApplyAlphaMultiply(uint8_t* rgba, int alpha_first,
+                               int w, int h, int stride) {
+  const __m128i zero = _mm_setzero_si128();
+  const int kSpan = 2;
+  const int w2 = w & ~(kSpan - 1);
+  while (h-- > 0) {
+    uint32_t* const rgbx = (uint32_t*)rgba;
+    int i;
+    if (!alpha_first) {
+      const __m128i kMask = _mm_set_epi16(0xff, 0, 0, 0, 0xff, 0, 0, 0);
+      const __m128i kMult =
+          _mm_set_epi16(0, 0x8081, 0x8081, 0x8081, 0, 0x8081, 0x8081, 0x8081);
+      for (i = 0; i < w2; i += kSpan) {
+        APPLY_ALPHA(rgbx[i], _MM_SHUFFLE(0, 3, 3, 3), kMask, kMult);
+      }
+    } else {
+      const __m128i kMask = _mm_set_epi16(0, 0, 0, 0xff, 0, 0, 0, 0xff);
+      const __m128i kMult =
+          _mm_set_epi16(0x8081, 0x8081, 0x8081, 0, 0x8081, 0x8081, 0x8081, 0);
+      for (i = 0; i < w2; i += kSpan) {
+        APPLY_ALPHA(rgbx[i], _MM_SHUFFLE(0, 0, 0, 3), kMask, kMult);
+      }
+    }
+    // Finish with left-overs.
+    for (; i < w; ++i) {
+      uint8_t* const rgb = rgba + (alpha_first ? 1 : 0);
+      const uint8_t* const alpha = rgba + (alpha_first ? 0 : 3);
+      const uint32_t a = alpha[4 * i];
+      if (a != 0xff) {
+        const uint32_t mult = MULTIPLIER(a);
+        rgb[4 * i + 0] = PREMULTIPLY(rgb[4 * i + 0], mult);
+        rgb[4 * i + 1] = PREMULTIPLY(rgb[4 * i + 1], mult);
+        rgb[4 * i + 2] = PREMULTIPLY(rgb[4 * i + 2], mult);
+      }
+    }
+    rgba += stride;
+  }
+}
+#undef MULTIPLIER
+#undef PREMULTIPLY
+
+// -----------------------------------------------------------------------------
+// Apply alpha value to rows
+
+// We use: kINV255 = (1 << 24) / 255 = 0x010101
+// So: a * kINV255 = (a << 16) | [(a << 8) | a]
+// -> _mm_mulhi_epu16() takes care of the (a<<16) part,
+// and _mm_mullo_epu16(a * 0x0101,...) takes care of the "(a << 8) | a" one.
+
+static void MultARGBRow(uint32_t* const ptr, int width, int inverse) {
+  int x = 0;
+  if (!inverse) {
+    const int kSpan = 2;
+    const __m128i zero = _mm_setzero_si128();
+    const __m128i kRound =
+        _mm_set_epi16(0, 1 << 7, 1 << 7, 1 << 7, 0, 1 << 7, 1 << 7, 1 << 7);
+    const __m128i kMult =
+        _mm_set_epi16(0, 0x0101, 0x0101, 0x0101, 0, 0x0101, 0x0101, 0x0101);
+    const __m128i kOne64 = _mm_set_epi16(1u << 8, 0, 0, 0, 1u << 8, 0, 0, 0);
+    const int w2 = width & ~(kSpan - 1);
+    for (x = 0; x < w2; x += kSpan) {
+      const __m128i argb0 = _mm_loadl_epi64((__m128i*)&ptr[x]);
+      const __m128i argb1 = _mm_unpacklo_epi8(argb0, zero);
+      const __m128i tmp0 = _mm_shufflelo_epi16(argb1, _MM_SHUFFLE(3, 3, 3, 3));
+      const __m128i tmp1 = _mm_shufflehi_epi16(tmp0, _MM_SHUFFLE(3, 3, 3, 3));
+      const __m128i tmp2 = _mm_srli_epi64(tmp1, 16);
+      const __m128i scale0 = _mm_mullo_epi16(tmp1, kMult);
+      const __m128i scale1 = _mm_or_si128(tmp2, kOne64);
+      const __m128i argb2 = _mm_mulhi_epu16(argb1, scale0);
+      const __m128i argb3 = _mm_mullo_epi16(argb1, scale1);
+      const __m128i argb4 = _mm_adds_epu16(argb2, argb3);
+      const __m128i argb5 = _mm_adds_epu16(argb4, kRound);
+      const __m128i argb6 = _mm_srli_epi16(argb5, 8);
+      const __m128i argb7 = _mm_packus_epi16(argb6, zero);
+      _mm_storel_epi64((__m128i*)&ptr[x], argb7);
+    }
+  }
+  width -= x;
+  if (width > 0) WebPMultARGBRowC(ptr + x, width, inverse);
+}
+
+static void MultRow(uint8_t* const ptr, const uint8_t* const alpha,
+                    int width, int inverse) {
+  int x = 0;
+  if (!inverse) {
+    const int kSpan = 8;
+    const __m128i zero = _mm_setzero_si128();
+    const __m128i kRound = _mm_set1_epi16(1 << 7);
+    const int w2 = width & ~(kSpan - 1);
+    for (x = 0; x < w2; x += kSpan) {
+      const __m128i v0 = _mm_loadl_epi64((__m128i*)&ptr[x]);
+      const __m128i v1 = _mm_unpacklo_epi8(v0, zero);
+      const __m128i alpha0 = _mm_loadl_epi64((const __m128i*)&alpha[x]);
+      const __m128i alpha1 = _mm_unpacklo_epi8(alpha0, zero);
+      const __m128i alpha2 = _mm_unpacklo_epi8(alpha0, alpha0);
+      const __m128i v2 = _mm_mulhi_epu16(v1, alpha2);
+      const __m128i v3 = _mm_mullo_epi16(v1, alpha1);
+      const __m128i v4 = _mm_adds_epu16(v2, v3);
+      const __m128i v5 = _mm_adds_epu16(v4, kRound);
+      const __m128i v6 = _mm_srli_epi16(v5, 8);
+      const __m128i v7 = _mm_packus_epi16(v6, zero);
+      _mm_storel_epi64((__m128i*)&ptr[x], v7);
+    }
+  }
+  width -= x;
+  if (width > 0) WebPMultRowC(ptr + x, alpha + x, width, inverse);
+}
+
+//------------------------------------------------------------------------------
+// Entry point
+
+extern void WebPInitAlphaProcessingSSE2(void);
+
+WEBP_TSAN_IGNORE_FUNCTION void WebPInitAlphaProcessingSSE2(void) {
+  WebPMultARGBRow = MultARGBRow;
+  WebPMultRow = MultRow;
+  WebPApplyAlphaMultiply = ApplyAlphaMultiply;
+  WebPDispatchAlpha = DispatchAlpha;
+  WebPDispatchAlphaToGreen = DispatchAlphaToGreen;
+  WebPExtractAlpha = ExtractAlpha;
+}
+
+#else  // !WEBP_USE_SSE2
+
+WEBP_DSP_INIT_STUB(WebPInitAlphaProcessingSSE2)
+
+#endif  // WEBP_USE_SSE2
diff --git a/drivers/webp/dsp/alpha_processing_sse41.c b/drivers/webp/dsp/alpha_processing_sse41.c
new file mode 100644
index 0000000000..986fde94ed
--- /dev/null
+++ b/drivers/webp/dsp/alpha_processing_sse41.c
@@ -0,0 +1,92 @@
+// Copyright 2015 Google Inc. All Rights Reserved.
+//
+// Use of this source code is governed by a BSD-style license
+// that can be found in the COPYING file in the root of the source
+// tree. An additional intellectual property rights grant can be found
+// in the file PATENTS. All contributing project authors may
+// be found in the AUTHORS file in the root of the source tree.
+// -----------------------------------------------------------------------------
+//
+// Utilities for processing transparent channel, SSE4.1 variant.
+//
+// Author: Skal (pascal.massimino@gmail.com)
+
+#include "./dsp.h"
+
+#if defined(WEBP_USE_SSE41)
+
+#include <smmintrin.h>
+
+//------------------------------------------------------------------------------
+
+static int ExtractAlpha(const uint8_t* argb, int argb_stride,
+                        int width, int height,
+                        uint8_t* alpha, int alpha_stride) {
+  // alpha_and stores an 'and' operation of all the alpha[] values. The final
+  // value is not 0xff if any of the alpha[] is not equal to 0xff.
+  uint32_t alpha_and = 0xff;
+  int i, j;
+  const __m128i all_0xff = _mm_set1_epi32(~0u);
+  __m128i all_alphas = all_0xff;
+
+  // We must be able to access 3 extra bytes after the last written byte
+  // 'src[4 * width - 4]', because we don't know if alpha is the first or the
+  // last byte of the quadruplet.
+  const int limit = (width - 1) & ~15;
+  const __m128i kCstAlpha0 = _mm_set_epi8(-1, -1, -1, -1, -1, -1, -1, -1,
+                                          -1, -1, -1, -1, 12, 8, 4, 0);
+  const __m128i kCstAlpha1 = _mm_set_epi8(-1, -1, -1, -1, -1, -1, -1, -1,
+                                          12, 8, 4, 0, -1, -1, -1, -1);
+  const __m128i kCstAlpha2 = _mm_set_epi8(-1, -1, -1, -1, 12, 8, 4, 0,
+                                          -1, -1, -1, -1, -1, -1, -1, -1);
+  const __m128i kCstAlpha3 = _mm_set_epi8(12, 8, 4, 0, -1, -1, -1, -1,
+                                          -1, -1, -1, -1, -1, -1, -1, -1);
+  for (j = 0; j < height; ++j) {
+    const __m128i* src = (const __m128i*)argb;
+    for (i = 0; i < limit; i += 16) {
+      // load 64 argb bytes
+      const __m128i a0 = _mm_loadu_si128(src + 0);
+      const __m128i a1 = _mm_loadu_si128(src + 1);
+      const __m128i a2 = _mm_loadu_si128(src + 2);
+      const __m128i a3 = _mm_loadu_si128(src + 3);
+      const __m128i b0 = _mm_shuffle_epi8(a0, kCstAlpha0);
+      const __m128i b1 = _mm_shuffle_epi8(a1, kCstAlpha1);
+      const __m128i b2 = _mm_shuffle_epi8(a2, kCstAlpha2);
+      const __m128i b3 = _mm_shuffle_epi8(a3, kCstAlpha3);
+      const __m128i c0 = _mm_or_si128(b0, b1);
+      const __m128i c1 = _mm_or_si128(b2, b3);
+      const __m128i d0 = _mm_or_si128(c0, c1);
+      // store
+      _mm_storeu_si128((__m128i*)&alpha[i], d0);
+      // accumulate sixteen alpha 'and' in parallel
+      all_alphas = _mm_and_si128(all_alphas, d0);
+      src += 4;
+    }
+    for (; i < width; ++i) {
+      const uint32_t alpha_value = argb[4 * i];
+      alpha[i] = alpha_value;
+      alpha_and &= alpha_value;
+    }
+    argb += argb_stride;
+    alpha += alpha_stride;
+  }
+  // Combine the sixteen alpha 'and' into an 8-bit mask.
+  alpha_and |= 0xff00u;  // pretend the upper bits [8..15] were tested ok.
+  alpha_and &= _mm_movemask_epi8(_mm_cmpeq_epi8(all_alphas, all_0xff));
+  return (alpha_and == 0xffffu);
+}
+
+//------------------------------------------------------------------------------
+// Entry point
+
+extern void WebPInitAlphaProcessingSSE41(void);
+
+WEBP_TSAN_IGNORE_FUNCTION void WebPInitAlphaProcessingSSE41(void) {
+  WebPExtractAlpha = ExtractAlpha;
+}
+
+#else  // !WEBP_USE_SSE41
+
+WEBP_DSP_INIT_STUB(WebPInitAlphaProcessingSSE41)
+
+#endif  // WEBP_USE_SSE41
diff --git a/drivers/webp/dsp/argb.c b/drivers/webp/dsp/argb.c
new file mode 100644
index 0000000000..cc1f9a96c3
--- /dev/null
+++ b/drivers/webp/dsp/argb.c
@@ -0,0 +1,68 @@
+// Copyright 2014 Google Inc. All Rights Reserved.
+//
+// Use of this source code is governed by a BSD-style license
+// that can be found in the COPYING file in the root of the source
+// tree. An additional intellectual property rights grant can be found
+// in the file PATENTS. All contributing project authors may
+// be found in the AUTHORS file in the root of the source tree.
+// -----------------------------------------------------------------------------
+//
+//   ARGB making functions.
+//
+// Author: Djordje Pesut (djordje.pesut@imgtec.com)
+
+#include "./dsp.h"
+
+static WEBP_INLINE uint32_t MakeARGB32(int a, int r, int g, int b) {
+  return (((uint32_t)a << 24) | (r << 16) | (g << 8) | b);
+}
+
+static void PackARGB(const uint8_t* a, const uint8_t* r, const uint8_t* g,
+                     const uint8_t* b, int len, uint32_t* out) {
+  int i;
+  for (i = 0; i < len; ++i) {
+    out[i] = MakeARGB32(a[4 * i], r[4 * i], g[4 * i], b[4 * i]);
+  }
+}
+
+static void PackRGB(const uint8_t* r, const uint8_t* g, const uint8_t* b,
+                    int len, int step, uint32_t* out) {
+  int i, offset = 0;
+  for (i = 0; i < len; ++i) {
+    out[i] = MakeARGB32(0xff, r[offset], g[offset], b[offset]);
+    offset += step;
+  }
+}
+
+void (*VP8PackARGB)(const uint8_t*, const uint8_t*, const uint8_t*,
+                    const uint8_t*, int, uint32_t*);
+void (*VP8PackRGB)(const uint8_t*, const uint8_t*, const uint8_t*,
+                   int, int, uint32_t*);
+
+extern void VP8EncDspARGBInitMIPSdspR2(void);
+extern void VP8EncDspARGBInitSSE2(void);
+
+static volatile VP8CPUInfo argb_last_cpuinfo_used =
+    (VP8CPUInfo)&argb_last_cpuinfo_used;
+
+WEBP_TSAN_IGNORE_FUNCTION void VP8EncDspARGBInit(void) {
+  if (argb_last_cpuinfo_used == VP8GetCPUInfo) return;
+
+  VP8PackARGB = PackARGB;
+  VP8PackRGB = PackRGB;
+
+  // If defined, use CPUInfo() to overwrite some pointers with faster versions.
+  if (VP8GetCPUInfo != NULL) {
+#if defined(WEBP_USE_SSE2)
+    if (VP8GetCPUInfo(kSSE2)) {
+      VP8EncDspARGBInitSSE2();
+    }
+#endif
+#if defined(WEBP_USE_MIPS_DSP_R2)
+    if (VP8GetCPUInfo(kMIPSdspR2)) {
+      VP8EncDspARGBInitMIPSdspR2();
+    }
+#endif
+  }
+  argb_last_cpuinfo_used = VP8GetCPUInfo;
+}
diff --git a/drivers/webp/dsp/argb_mips_dsp_r2.c b/drivers/webp/dsp/argb_mips_dsp_r2.c
new file mode 100644
index 0000000000..af65acb8ff
--- /dev/null
+++ b/drivers/webp/dsp/argb_mips_dsp_r2.c
@@ -0,0 +1,110 @@
+// Copyright 2014 Google Inc. All Rights Reserved.
+//
+// Use of this source code is governed by a BSD-style license
+// that can be found in the COPYING file in the root of the source
+// tree. An additional intellectual property rights grant can be found
+// in the file PATENTS. All contributing project authors may
+// be found in the AUTHORS file in the root of the source tree.
+// -----------------------------------------------------------------------------
+//
+//   ARGB making functions (mips version).
+//
+// Author: Djordje Pesut (djordje.pesut@imgtec.com)
+
+#include "./dsp.h"
+
+#if defined(WEBP_USE_MIPS_DSP_R2)
+
+static void PackARGB(const uint8_t* a, const uint8_t* r, const uint8_t* g,
+                     const uint8_t* b, int len, uint32_t* out) {
+  int temp0, temp1, temp2, temp3, offset;
+  const int rest = len & 1;
+  const uint32_t* const loop_end = out + len - rest;
+  const int step = 4;
+  __asm__ volatile (
+    "xor          %[offset],   %[offset], %[offset]    \n\t"
+    "beq          %[loop_end], %[out],    0f           \n\t"
+  "2:                                                  \n\t"
+    "lbux         %[temp0],    %[offset](%[a])         \n\t"
+    "lbux         %[temp1],    %[offset](%[r])         \n\t"
+    "lbux         %[temp2],    %[offset](%[g])         \n\t"
+    "lbux         %[temp3],    %[offset](%[b])         \n\t"
+    "ins          %[temp1],    %[temp0],  16,     16   \n\t"
+    "ins          %[temp3],    %[temp2],  16,     16   \n\t"
+    "addiu        %[out],      %[out],    4            \n\t"
+    "precr.qb.ph  %[temp0],    %[temp1],  %[temp3]     \n\t"
+    "sw           %[temp0],    -4(%[out])              \n\t"
+    "addu         %[offset],   %[offset], %[step]      \n\t"
+    "bne          %[loop_end], %[out],    2b           \n\t"
+  "0:                                                  \n\t"
+    "beq          %[rest],     $zero,     1f           \n\t"
+    "lbux         %[temp0],    %[offset](%[a])         \n\t"
+    "lbux         %[temp1],    %[offset](%[r])         \n\t"
+    "lbux         %[temp2],    %[offset](%[g])         \n\t"
+    "lbux         %[temp3],    %[offset](%[b])         \n\t"
+    "ins          %[temp1],    %[temp0],  16,     16   \n\t"
+    "ins          %[temp3],    %[temp2],  16,     16   \n\t"
+    "precr.qb.ph  %[temp0],    %[temp1],  %[temp3]     \n\t"
+    "sw           %[temp0],    0(%[out])               \n\t"
+  "1:                                                  \n\t"
+    : [temp0]"=&r"(temp0), [temp1]"=&r"(temp1), [temp2]"=&r"(temp2),
+      [temp3]"=&r"(temp3), [offset]"=&r"(offset), [out]"+&r"(out)
+    : [a]"r"(a), [r]"r"(r), [g]"r"(g), [b]"r"(b), [step]"r"(step),
+      [loop_end]"r"(loop_end), [rest]"r"(rest)
+    : "memory"
+  );
+}
+
+static void PackRGB(const uint8_t* r, const uint8_t* g, const uint8_t* b,
+                    int len, int step, uint32_t* out) {
+  int temp0, temp1, temp2, offset;
+  const int rest = len & 1;
+  const int a = 0xff;
+  const uint32_t* const loop_end = out + len - rest;
+  __asm__ volatile (
+    "xor          %[offset],   %[offset], %[offset]    \n\t"
+    "beq          %[loop_end], %[out],    0f           \n\t"
+  "2:                                                  \n\t"
+    "lbux         %[temp0],    %[offset](%[r])         \n\t"
+    "lbux         %[temp1],    %[offset](%[g])         \n\t"
+    "lbux         %[temp2],    %[offset](%[b])         \n\t"
+    "ins          %[temp0],    %[a],      16,     16   \n\t"
+    "ins          %[temp2],    %[temp1],  16,     16   \n\t"
+    "addiu        %[out],      %[out],    4            \n\t"
+    "precr.qb.ph  %[temp0],    %[temp0],  %[temp2]     \n\t"
+    "sw           %[temp0],    -4(%[out])              \n\t"
+    "addu         %[offset],   %[offset], %[step]      \n\t"
+    "bne          %[loop_end], %[out],    2b           \n\t"
+  "0:                                                  \n\t"
+    "beq          %[rest],     $zero,     1f           \n\t"
+    "lbux         %[temp0],    %[offset](%[r])         \n\t"
+    "lbux         %[temp1],    %[offset](%[g])         \n\t"
+    "lbux         %[temp2],    %[offset](%[b])         \n\t"
+    "ins          %[temp0],    %[a],      16,     16   \n\t"
+    "ins          %[temp2],    %[temp1],  16,     16   \n\t"
+    "precr.qb.ph  %[temp0],    %[temp0],  %[temp2]     \n\t"
+    "sw           %[temp0],    0(%[out])               \n\t"
+  "1:                                                  \n\t"
+    : [temp0]"=&r"(temp0), [temp1]"=&r"(temp1), [temp2]"=&r"(temp2),
+      [offset]"=&r"(offset), [out]"+&r"(out)
+    : [a]"r"(a), [r]"r"(r), [g]"r"(g), [b]"r"(b), [step]"r"(step),
+      [loop_end]"r"(loop_end), [rest]"r"(rest)
+    : "memory"
+  );
+}
+
+//------------------------------------------------------------------------------
+// Entry point
+
+extern void VP8EncDspARGBInitMIPSdspR2(void);
+
+WEBP_TSAN_IGNORE_FUNCTION void VP8EncDspARGBInitMIPSdspR2(void) {
+  VP8PackARGB = PackARGB;
+  VP8PackRGB = PackRGB;
+}
+
+#else  // !WEBP_USE_MIPS_DSP_R2
+
+WEBP_DSP_INIT_STUB(VP8EncDspARGBInitMIPSdspR2)
+
+#endif  // WEBP_USE_MIPS_DSP_R2
diff --git a/drivers/webp/dsp/argb_sse2.c b/drivers/webp/dsp/argb_sse2.c
new file mode 100644
index 0000000000..afcb1957e7
--- /dev/null
+++ b/drivers/webp/dsp/argb_sse2.c
@@ -0,0 +1,67 @@
+// Copyright 2014 Google Inc. All Rights Reserved.
+//
+// Use of this source code is governed by a BSD-style license
+// that can be found in the COPYING file in the root of the source
+// tree. An additional intellectual property rights grant can be found
+// in the file PATENTS. All contributing project authors may
+// be found in the AUTHORS file in the root of the source tree.
+// -----------------------------------------------------------------------------
+//
+//   ARGB making functions (SSE2 version).
+//
+// Author: Skal (pascal.massimino@gmail.com)
+
+#include "./dsp.h"
+
+#if defined(WEBP_USE_SSE2)
+
+#include <assert.h>
+#include <emmintrin.h>
+#include <string.h>
+
+static WEBP_INLINE uint32_t MakeARGB32(int a, int r, int g, int b) {
+  return (((uint32_t)a << 24) | (r << 16) | (g << 8) | b);
+}
+
+static void PackARGB(const uint8_t* a, const uint8_t* r, const uint8_t* g,
+                     const uint8_t* b, int len, uint32_t* out) {
+  if (g == r + 1) {  // RGBA input order. Need to swap R and B.
+    int i = 0;
+    const int len_max = len & ~3;  // max length processed in main loop
+    const __m128i red_blue_mask = _mm_set1_epi32(0x00ff00ffu);
+    assert(b == r + 2);
+    assert(a == r + 3);
+    for (; i < len_max; i += 4) {
+      const __m128i A = _mm_loadu_si128((const __m128i*)(r + 4 * i));
+      const __m128i B = _mm_and_si128(A, red_blue_mask);     // R 0 B 0
+      const __m128i C = _mm_andnot_si128(red_blue_mask, A);  // 0 G 0 A
+      const __m128i D = _mm_shufflelo_epi16(B, _MM_SHUFFLE(2, 3, 0, 1));
+      const __m128i E = _mm_shufflehi_epi16(D, _MM_SHUFFLE(2, 3, 0, 1));
+      const __m128i F = _mm_or_si128(E, C);
+      _mm_storeu_si128((__m128i*)(out + i), F);
+    }
+    for (; i < len; ++i) {
+      out[i] = MakeARGB32(a[4 * i], r[4 * i], g[4 * i], b[4 * i]);
+    }
+  } else {
+    assert(g == b + 1);
+    assert(r == b + 2);
+    assert(a == b + 3);
+    memcpy(out, b, len * 4);
+  }
+}
+
+//------------------------------------------------------------------------------
+// Entry point
+
+extern void VP8EncDspARGBInitSSE2(void);
+
+WEBP_TSAN_IGNORE_FUNCTION void VP8EncDspARGBInitSSE2(void) {
+  VP8PackARGB = PackARGB;
+}
+
+#else  // !WEBP_USE_SSE2
+
+WEBP_DSP_INIT_STUB(VP8EncDspARGBInitSSE2)
+
+#endif  // WEBP_USE_SSE2
diff --git a/drivers/webp/dsp/cost.c b/drivers/webp/dsp/cost.c
new file mode 100644
index 0000000000..fe72d26e79
--- /dev/null
+++ b/drivers/webp/dsp/cost.c
@@ -0,0 +1,412 @@
+// Copyright 2014 Google Inc. All Rights Reserved.
+//
+// Use of this source code is governed by a BSD-style license
+// that can be found in the COPYING file in the root of the source
+// tree. An additional intellectual property rights grant can be found
+// in the file PATENTS. All contributing project authors may
+// be found in the AUTHORS file in the root of the source tree.
+// -----------------------------------------------------------------------------
+//
+// Author: Skal (pascal.massimino@gmail.com)
+
+#include "./dsp.h"
+#include "../enc/cost.h"
+
+//------------------------------------------------------------------------------
+// Boolean-cost cost table
+
+const uint16_t VP8EntropyCost[256] = {
+  1792, 1792, 1792, 1536, 1536, 1408, 1366, 1280, 1280, 1216,
+  1178, 1152, 1110, 1076, 1061, 1024, 1024,  992,  968,  951,
+   939,  911,  896,  878,  871,  854,  838,  820,  811,  794,
+   786,  768,  768,  752,  740,  732,  720,  709,  704,  690,
+   683,  672,  666,  655,  647,  640,  631,  622,  615,  607,
+   598,  592,  586,  576,  572,  564,  559,  555,  547,  541,
+   534,  528,  522,  512,  512,  504,  500,  494,  488,  483,
+   477,  473,  467,  461,  458,  452,  448,  443,  438,  434,
+   427,  424,  419,  415,  410,  406,  403,  399,  394,  390,
+   384,  384,  377,  374,  370,  366,  362,  359,  355,  351,
+   347,  342,  342,  336,  333,  330,  326,  323,  320,  316,
+   312,  308,  305,  302,  299,  296,  293,  288,  287,  283,
+   280,  277,  274,  272,  268,  266,  262,  256,  256,  256,
+   251,  248,  245,  242,  240,  237,  234,  232,  228,  226,
+   223,  221,  218,  216,  214,  211,  208,  205,  203,  201,
+   198,  196,  192,  191,  188,  187,  183,  181,  179,  176,
+   175,  171,  171,  168,  165,  163,  160,  159,  156,  154,
+   152,  150,  148,  146,  144,  142,  139,  138,  135,  133,
+   131,  128,  128,  125,  123,  121,  119,  117,  115,  113,
+   111,  110,  107,  105,  103,  102,  100,   98,   96,   94,
+    92,   91,   89,   86,   86,   83,   82,   80,   77,   76,
+    74,   73,   71,   69,   67,   66,   64,   63,   61,   59,
+    57,   55,   54,   52,   51,   49,   47,   46,   44,   43,
+    41,   40,   38,   36,   35,   33,   32,   30,   29,   27,
+    25,   24,   22,   21,   19,   18,   16,   15,   13,   12,
+    10,    9,    7,    6,    4,    3
+};
+
+//------------------------------------------------------------------------------
+// Level cost tables
+
+// fixed costs for coding levels, deduce from the coding tree.
+// This is only the part that doesn't depend on the probability state.
+const uint16_t VP8LevelFixedCosts[MAX_LEVEL + 1] = {
+     0,  256,  256,  256,  256,  432,  618,  630,
+   731,  640,  640,  828,  901,  948, 1021, 1101,
+  1174, 1221, 1294, 1042, 1085, 1115, 1158, 1202,
+  1245, 1275, 1318, 1337, 1380, 1410, 1453, 1497,
+  1540, 1570, 1613, 1280, 1295, 1317, 1332, 1358,
+  1373, 1395, 1410, 1454, 1469, 1491, 1506, 1532,
+  1547, 1569, 1584, 1601, 1616, 1638, 1653, 1679,
+  1694, 1716, 1731, 1775, 1790, 1812, 1827, 1853,
+  1868, 1890, 1905, 1727, 1733, 1742, 1748, 1759,
+  1765, 1774, 1780, 1800, 1806, 1815, 1821, 1832,
+  1838, 1847, 1853, 1878, 1884, 1893, 1899, 1910,
+  1916, 1925, 1931, 1951, 1957, 1966, 1972, 1983,
+  1989, 1998, 2004, 2027, 2033, 2042, 2048, 2059,
+  2065, 2074, 2080, 2100, 2106, 2115, 2121, 2132,
+  2138, 2147, 2153, 2178, 2184, 2193, 2199, 2210,
+  2216, 2225, 2231, 2251, 2257, 2266, 2272, 2283,
+  2289, 2298, 2304, 2168, 2174, 2183, 2189, 2200,
+  2206, 2215, 2221, 2241, 2247, 2256, 2262, 2273,
+  2279, 2288, 2294, 2319, 2325, 2334, 2340, 2351,
+  2357, 2366, 2372, 2392, 2398, 2407, 2413, 2424,
+  2430, 2439, 2445, 2468, 2474, 2483, 2489, 2500,
+  2506, 2515, 2521, 2541, 2547, 2556, 2562, 2573,
+  2579, 2588, 2594, 2619, 2625, 2634, 2640, 2651,
+  2657, 2666, 2672, 2692, 2698, 2707, 2713, 2724,
+  2730, 2739, 2745, 2540, 2546, 2555, 2561, 2572,
+  2578, 2587, 2593, 2613, 2619, 2628, 2634, 2645,
+  2651, 2660, 2666, 2691, 2697, 2706, 2712, 2723,
+  2729, 2738, 2744, 2764, 2770, 2779, 2785, 2796,
+  2802, 2811, 2817, 2840, 2846, 2855, 2861, 2872,
+  2878, 2887, 2893, 2913, 2919, 2928, 2934, 2945,
+  2951, 2960, 2966, 2991, 2997, 3006, 3012, 3023,
+  3029, 3038, 3044, 3064, 3070, 3079, 3085, 3096,
+  3102, 3111, 3117, 2981, 2987, 2996, 3002, 3013,
+  3019, 3028, 3034, 3054, 3060, 3069, 3075, 3086,
+  3092, 3101, 3107, 3132, 3138, 3147, 3153, 3164,
+  3170, 3179, 3185, 3205, 3211, 3220, 3226, 3237,
+  3243, 3252, 3258, 3281, 3287, 3296, 3302, 3313,
+  3319, 3328, 3334, 3354, 3360, 3369, 3375, 3386,
+  3392, 3401, 3407, 3432, 3438, 3447, 3453, 3464,
+  3470, 3479, 3485, 3505, 3511, 3520, 3526, 3537,
+  3543, 3552, 3558, 2816, 2822, 2831, 2837, 2848,
+  2854, 2863, 2869, 2889, 2895, 2904, 2910, 2921,
+  2927, 2936, 2942, 2967, 2973, 2982, 2988, 2999,
+  3005, 3014, 3020, 3040, 3046, 3055, 3061, 3072,
+  3078, 3087, 3093, 3116, 3122, 3131, 3137, 3148,
+  3154, 3163, 3169, 3189, 3195, 3204, 3210, 3221,
+  3227, 3236, 3242, 3267, 3273, 3282, 3288, 3299,
+  3305, 3314, 3320, 3340, 3346, 3355, 3361, 3372,
+  3378, 3387, 3393, 3257, 3263, 3272, 3278, 3289,
+  3295, 3304, 3310, 3330, 3336, 3345, 3351, 3362,
+  3368, 3377, 3383, 3408, 3414, 3423, 3429, 3440,
+  3446, 3455, 3461, 3481, 3487, 3496, 3502, 3513,
+  3519, 3528, 3534, 3557, 3563, 3572, 3578, 3589,
+  3595, 3604, 3610, 3630, 3636, 3645, 3651, 3662,
+  3668, 3677, 3683, 3708, 3714, 3723, 3729, 3740,
+  3746, 3755, 3761, 3781, 3787, 3796, 3802, 3813,
+  3819, 3828, 3834, 3629, 3635, 3644, 3650, 3661,
+  3667, 3676, 3682, 3702, 3708, 3717, 3723, 3734,
+  3740, 3749, 3755, 3780, 3786, 3795, 3801, 3812,
+  3818, 3827, 3833, 3853, 3859, 3868, 3874, 3885,
+  3891, 3900, 3906, 3929, 3935, 3944, 3950, 3961,
+  3967, 3976, 3982, 4002, 4008, 4017, 4023, 4034,
+  4040, 4049, 4055, 4080, 4086, 4095, 4101, 4112,
+  4118, 4127, 4133, 4153, 4159, 4168, 4174, 4185,
+  4191, 4200, 4206, 4070, 4076, 4085, 4091, 4102,
+  4108, 4117, 4123, 4143, 4149, 4158, 4164, 4175,
+  4181, 4190, 4196, 4221, 4227, 4236, 4242, 4253,
+  4259, 4268, 4274, 4294, 4300, 4309, 4315, 4326,
+  4332, 4341, 4347, 4370, 4376, 4385, 4391, 4402,
+  4408, 4417, 4423, 4443, 4449, 4458, 4464, 4475,
+  4481, 4490, 4496, 4521, 4527, 4536, 4542, 4553,
+  4559, 4568, 4574, 4594, 4600, 4609, 4615, 4626,
+  4632, 4641, 4647, 3515, 3521, 3530, 3536, 3547,
+  3553, 3562, 3568, 3588, 3594, 3603, 3609, 3620,
+  3626, 3635, 3641, 3666, 3672, 3681, 3687, 3698,
+  3704, 3713, 3719, 3739, 3745, 3754, 3760, 3771,
+  3777, 3786, 3792, 3815, 3821, 3830, 3836, 3847,
+  3853, 3862, 3868, 3888, 3894, 3903, 3909, 3920,
+  3926, 3935, 3941, 3966, 3972, 3981, 3987, 3998,
+  4004, 4013, 4019, 4039, 4045, 4054, 4060, 4071,
+  4077, 4086, 4092, 3956, 3962, 3971, 3977, 3988,
+  3994, 4003, 4009, 4029, 4035, 4044, 4050, 4061,
+  4067, 4076, 4082, 4107, 4113, 4122, 4128, 4139,
+  4145, 4154, 4160, 4180, 4186, 4195, 4201, 4212,
+  4218, 4227, 4233, 4256, 4262, 4271, 4277, 4288,
+  4294, 4303, 4309, 4329, 4335, 4344, 4350, 4361,
+  4367, 4376, 4382, 4407, 4413, 4422, 4428, 4439,
+  4445, 4454, 4460, 4480, 4486, 4495, 4501, 4512,
+  4518, 4527, 4533, 4328, 4334, 4343, 4349, 4360,
+  4366, 4375, 4381, 4401, 4407, 4416, 4422, 4433,
+  4439, 4448, 4454, 4479, 4485, 4494, 4500, 4511,
+  4517, 4526, 4532, 4552, 4558, 4567, 4573, 4584,
+  4590, 4599, 4605, 4628, 4634, 4643, 4649, 4660,
+  4666, 4675, 4681, 4701, 4707, 4716, 4722, 4733,
+  4739, 4748, 4754, 4779, 4785, 4794, 4800, 4811,
+  4817, 4826, 4832, 4852, 4858, 4867, 4873, 4884,
+  4890, 4899, 4905, 4769, 4775, 4784, 4790, 4801,
+  4807, 4816, 4822, 4842, 4848, 4857, 4863, 4874,
+  4880, 4889, 4895, 4920, 4926, 4935, 4941, 4952,
+  4958, 4967, 4973, 4993, 4999, 5008, 5014, 5025,
+  5031, 5040, 5046, 5069, 5075, 5084, 5090, 5101,
+  5107, 5116, 5122, 5142, 5148, 5157, 5163, 5174,
+  5180, 5189, 5195, 5220, 5226, 5235, 5241, 5252,
+  5258, 5267, 5273, 5293, 5299, 5308, 5314, 5325,
+  5331, 5340, 5346, 4604, 4610, 4619, 4625, 4636,
+  4642, 4651, 4657, 4677, 4683, 4692, 4698, 4709,
+  4715, 4724, 4730, 4755, 4761, 4770, 4776, 4787,
+  4793, 4802, 4808, 4828, 4834, 4843, 4849, 4860,
+  4866, 4875, 4881, 4904, 4910, 4919, 4925, 4936,
+  4942, 4951, 4957, 4977, 4983, 4992, 4998, 5009,
+  5015, 5024, 5030, 5055, 5061, 5070, 5076, 5087,
+  5093, 5102, 5108, 5128, 5134, 5143, 5149, 5160,
+  5166, 5175, 5181, 5045, 5051, 5060, 5066, 5077,
+  5083, 5092, 5098, 5118, 5124, 5133, 5139, 5150,
+  5156, 5165, 5171, 5196, 5202, 5211, 5217, 5228,
+  5234, 5243, 5249, 5269, 5275, 5284, 5290, 5301,
+  5307, 5316, 5322, 5345, 5351, 5360, 5366, 5377,
+  5383, 5392, 5398, 5418, 5424, 5433, 5439, 5450,
+  5456, 5465, 5471, 5496, 5502, 5511, 5517, 5528,
+  5534, 5543, 5549, 5569, 5575, 5584, 5590, 5601,
+  5607, 5616, 5622, 5417, 5423, 5432, 5438, 5449,
+  5455, 5464, 5470, 5490, 5496, 5505, 5511, 5522,
+  5528, 5537, 5543, 5568, 5574, 5583, 5589, 5600,
+  5606, 5615, 5621, 5641, 5647, 5656, 5662, 5673,
+  5679, 5688, 5694, 5717, 5723, 5732, 5738, 5749,
+  5755, 5764, 5770, 5790, 5796, 5805, 5811, 5822,
+  5828, 5837, 5843, 5868, 5874, 5883, 5889, 5900,
+  5906, 5915, 5921, 5941, 5947, 5956, 5962, 5973,
+  5979, 5988, 5994, 5858, 5864, 5873, 5879, 5890,
+  5896, 5905, 5911, 5931, 5937, 5946, 5952, 5963,
+  5969, 5978, 5984, 6009, 6015, 6024, 6030, 6041,
+  6047, 6056, 6062, 6082, 6088, 6097, 6103, 6114,
+  6120, 6129, 6135, 6158, 6164, 6173, 6179, 6190,
+  6196, 6205, 6211, 6231, 6237, 6246, 6252, 6263,
+  6269, 6278, 6284, 6309, 6315, 6324, 6330, 6341,
+  6347, 6356, 6362, 6382, 6388, 6397, 6403, 6414,
+  6420, 6429, 6435, 3515, 3521, 3530, 3536, 3547,
+  3553, 3562, 3568, 3588, 3594, 3603, 3609, 3620,
+  3626, 3635, 3641, 3666, 3672, 3681, 3687, 3698,
+  3704, 3713, 3719, 3739, 3745, 3754, 3760, 3771,
+  3777, 3786, 3792, 3815, 3821, 3830, 3836, 3847,
+  3853, 3862, 3868, 3888, 3894, 3903, 3909, 3920,
+  3926, 3935, 3941, 3966, 3972, 3981, 3987, 3998,
+  4004, 4013, 4019, 4039, 4045, 4054, 4060, 4071,
+  4077, 4086, 4092, 3956, 3962, 3971, 3977, 3988,
+  3994, 4003, 4009, 4029, 4035, 4044, 4050, 4061,
+  4067, 4076, 4082, 4107, 4113, 4122, 4128, 4139,
+  4145, 4154, 4160, 4180, 4186, 4195, 4201, 4212,
+  4218, 4227, 4233, 4256, 4262, 4271, 4277, 4288,
+  4294, 4303, 4309, 4329, 4335, 4344, 4350, 4361,
+  4367, 4376, 4382, 4407, 4413, 4422, 4428, 4439,
+  4445, 4454, 4460, 4480, 4486, 4495, 4501, 4512,
+  4518, 4527, 4533, 4328, 4334, 4343, 4349, 4360,
+  4366, 4375, 4381, 4401, 4407, 4416, 4422, 4433,
+  4439, 4448, 4454, 4479, 4485, 4494, 4500, 4511,
+  4517, 4526, 4532, 4552, 4558, 4567, 4573, 4584,
+  4590, 4599, 4605, 4628, 4634, 4643, 4649, 4660,
+  4666, 4675, 4681, 4701, 4707, 4716, 4722, 4733,
+  4739, 4748, 4754, 4779, 4785, 4794, 4800, 4811,
+  4817, 4826, 4832, 4852, 4858, 4867, 4873, 4884,
+  4890, 4899, 4905, 4769, 4775, 4784, 4790, 4801,
+  4807, 4816, 4822, 4842, 4848, 4857, 4863, 4874,
+  4880, 4889, 4895, 4920, 4926, 4935, 4941, 4952,
+  4958, 4967, 4973, 4993, 4999, 5008, 5014, 5025,
+  5031, 5040, 5046, 5069, 5075, 5084, 5090, 5101,
+  5107, 5116, 5122, 5142, 5148, 5157, 5163, 5174,
+  5180, 5189, 5195, 5220, 5226, 5235, 5241, 5252,
+  5258, 5267, 5273, 5293, 5299, 5308, 5314, 5325,
+  5331, 5340, 5346, 4604, 4610, 4619, 4625, 4636,
+  4642, 4651, 4657, 4677, 4683, 4692, 4698, 4709,
+  4715, 4724, 4730, 4755, 4761, 4770, 4776, 4787,
+  4793, 4802, 4808, 4828, 4834, 4843, 4849, 4860,
+  4866, 4875, 4881, 4904, 4910, 4919, 4925, 4936,
+  4942, 4951, 4957, 4977, 4983, 4992, 4998, 5009,
+  5015, 5024, 5030, 5055, 5061, 5070, 5076, 5087,
+  5093, 5102, 5108, 5128, 5134, 5143, 5149, 5160,
+  5166, 5175, 5181, 5045, 5051, 5060, 5066, 5077,
+  5083, 5092, 5098, 5118, 5124, 5133, 5139, 5150,
+  5156, 5165, 5171, 5196, 5202, 5211, 5217, 5228,
+  5234, 5243, 5249, 5269, 5275, 5284, 5290, 5301,
+  5307, 5316, 5322, 5345, 5351, 5360, 5366, 5377,
+  5383, 5392, 5398, 5418, 5424, 5433, 5439, 5450,
+  5456, 5465, 5471, 5496, 5502, 5511, 5517, 5528,
+  5534, 5543, 5549, 5569, 5575, 5584, 5590, 5601,
+  5607, 5616, 5622, 5417, 5423, 5432, 5438, 5449,
+  5455, 5464, 5470, 5490, 5496, 5505, 5511, 5522,
+  5528, 5537, 5543, 5568, 5574, 5583, 5589, 5600,
+  5606, 5615, 5621, 5641, 5647, 5656, 5662, 5673,
+  5679, 5688, 5694, 5717, 5723, 5732, 5738, 5749,
+  5755, 5764, 5770, 5790, 5796, 5805, 5811, 5822,
+  5828, 5837, 5843, 5868, 5874, 5883, 5889, 5900,
+  5906, 5915, 5921, 5941, 5947, 5956, 5962, 5973,
+  5979, 5988, 5994, 5858, 5864, 5873, 5879, 5890,
+  5896, 5905, 5911, 5931, 5937, 5946, 5952, 5963,
+  5969, 5978, 5984, 6009, 6015, 6024, 6030, 6041,
+  6047, 6056, 6062, 6082, 6088, 6097, 6103, 6114,
+  6120, 6129, 6135, 6158, 6164, 6173, 6179, 6190,
+  6196, 6205, 6211, 6231, 6237, 6246, 6252, 6263,
+  6269, 6278, 6284, 6309, 6315, 6324, 6330, 6341,
+  6347, 6356, 6362, 6382, 6388, 6397, 6403, 6414,
+  6420, 6429, 6435, 5303, 5309, 5318, 5324, 5335,
+  5341, 5350, 5356, 5376, 5382, 5391, 5397, 5408,
+  5414, 5423, 5429, 5454, 5460, 5469, 5475, 5486,
+  5492, 5501, 5507, 5527, 5533, 5542, 5548, 5559,
+  5565, 5574, 5580, 5603, 5609, 5618, 5624, 5635,
+  5641, 5650, 5656, 5676, 5682, 5691, 5697, 5708,
+  5714, 5723, 5729, 5754, 5760, 5769, 5775, 5786,
+  5792, 5801, 5807, 5827, 5833, 5842, 5848, 5859,
+  5865, 5874, 5880, 5744, 5750, 5759, 5765, 5776,
+  5782, 5791, 5797, 5817, 5823, 5832, 5838, 5849,
+  5855, 5864, 5870, 5895, 5901, 5910, 5916, 5927,
+  5933, 5942, 5948, 5968, 5974, 5983, 5989, 6000,
+  6006, 6015, 6021, 6044, 6050, 6059, 6065, 6076,
+  6082, 6091, 6097, 6117, 6123, 6132, 6138, 6149,
+  6155, 6164, 6170, 6195, 6201, 6210, 6216, 6227,
+  6233, 6242, 6248, 6268, 6274, 6283, 6289, 6300,
+  6306, 6315, 6321, 6116, 6122, 6131, 6137, 6148,
+  6154, 6163, 6169, 6189, 6195, 6204, 6210, 6221,
+  6227, 6236, 6242, 6267, 6273, 6282, 6288, 6299,
+  6305, 6314, 6320, 6340, 6346, 6355, 6361, 6372,
+  6378, 6387, 6393, 6416, 6422, 6431, 6437, 6448,
+  6454, 6463, 6469, 6489, 6495, 6504, 6510, 6521,
+  6527, 6536, 6542, 6567, 6573, 6582, 6588, 6599,
+  6605, 6614, 6620, 6640, 6646, 6655, 6661, 6672,
+  6678, 6687, 6693, 6557, 6563, 6572, 6578, 6589,
+  6595, 6604, 6610, 6630, 6636, 6645, 6651, 6662,
+  6668, 6677, 6683, 6708, 6714, 6723, 6729, 6740,
+  6746, 6755, 6761, 6781, 6787, 6796, 6802, 6813,
+  6819, 6828, 6834, 6857, 6863, 6872, 6878, 6889,
+  6895, 6904, 6910, 6930, 6936, 6945, 6951, 6962,
+  6968, 6977, 6983, 7008, 7014, 7023, 7029, 7040,
+  7046, 7055, 7061, 7081, 7087, 7096, 7102, 7113,
+  7119, 7128, 7134, 6392, 6398, 6407, 6413, 6424,
+  6430, 6439, 6445, 6465, 6471, 6480, 6486, 6497,
+  6503, 6512, 6518, 6543, 6549, 6558, 6564, 6575,
+  6581, 6590, 6596, 6616, 6622, 6631, 6637, 6648,
+  6654, 6663, 6669, 6692, 6698, 6707, 6713, 6724,
+  6730, 6739, 6745, 6765, 6771, 6780, 6786, 6797,
+  6803, 6812, 6818, 6843, 6849, 6858, 6864, 6875,
+  6881, 6890, 6896, 6916, 6922, 6931, 6937, 6948,
+  6954, 6963, 6969, 6833, 6839, 6848, 6854, 6865,
+  6871, 6880, 6886, 6906, 6912, 6921, 6927, 6938,
+  6944, 6953, 6959, 6984, 6990, 6999, 7005, 7016,
+  7022, 7031, 7037, 7057, 7063, 7072, 7078, 7089,
+  7095, 7104, 7110, 7133, 7139, 7148, 7154, 7165,
+  7171, 7180, 7186, 7206, 7212, 7221, 7227, 7238,
+  7244, 7253, 7259, 7284, 7290, 7299, 7305, 7316,
+  7322, 7331, 7337, 7357, 7363, 7372, 7378, 7389,
+  7395, 7404, 7410, 7205, 7211, 7220, 7226, 7237,
+  7243, 7252, 7258, 7278, 7284, 7293, 7299, 7310,
+  7316, 7325, 7331, 7356, 7362, 7371, 7377, 7388,
+  7394, 7403, 7409, 7429, 7435, 7444, 7450, 7461,
+  7467, 7476, 7482, 7505, 7511, 7520, 7526, 7537,
+  7543, 7552, 7558, 7578, 7584, 7593, 7599, 7610,
+  7616, 7625, 7631, 7656, 7662, 7671, 7677, 7688,
+  7694, 7703, 7709, 7729, 7735, 7744, 7750, 7761
+};
+
+//------------------------------------------------------------------------------
+// Tables for level coding
+
+const uint8_t VP8EncBands[16 + 1] = {
+  0, 1, 2, 3, 6, 4, 5, 6, 6, 6, 6, 6, 6, 6, 6, 7,
+  0  // sentinel
+};
+
+//------------------------------------------------------------------------------
+// Mode costs
+
+static int GetResidualCost(int ctx0, const VP8Residual* const res) {
+  int n = res->first;
+  // should be prob[VP8EncBands[n]], but it's equivalent for n=0 or 1
+  const int p0 = res->prob[n][ctx0][0];
+  CostArrayPtr const costs = res->costs;
+  const uint16_t* t = costs[n][ctx0];
+  // bit_cost(1, p0) is already incorporated in t[] tables, but only if ctx != 0
+  // (as required by the syntax). For ctx0 == 0, we need to add it here or it'll
+  // be missing during the loop.
+  int cost = (ctx0 == 0) ? VP8BitCost(1, p0) : 0;
+
+  if (res->last < 0) {
+    return VP8BitCost(0, p0);
+  }
+  for (; n < res->last; ++n) {
+    const int v = abs(res->coeffs[n]);
+    const int ctx = (v >= 2) ? 2 : v;
+    cost += VP8LevelCost(t, v);
+    t = costs[n + 1][ctx];
+  }
+  // Last coefficient is always non-zero
+  {
+    const int v = abs(res->coeffs[n]);
+    assert(v != 0);
+    cost += VP8LevelCost(t, v);
+    if (n < 15) {
+      const int b = VP8EncBands[n + 1];
+      const int ctx = (v == 1) ? 1 : 2;
+      const int last_p0 = res->prob[b][ctx][0];
+      cost += VP8BitCost(0, last_p0);
+    }
+  }
+  return cost;
+}
+
+static void SetResidualCoeffs(const int16_t* const coeffs,
+                              VP8Residual* const res) {
+  int n;
+  res->last = -1;
+  assert(res->first == 0 || coeffs[0] == 0);
+  for (n = 15; n >= 0; --n) {
+    if (coeffs[n]) {
+      res->last = n;
+      break;
+    }
+  }
+  res->coeffs = coeffs;
+}
+
+//------------------------------------------------------------------------------
+// init function
+
+VP8GetResidualCostFunc VP8GetResidualCost;
+VP8SetResidualCoeffsFunc VP8SetResidualCoeffs;
+
+extern void VP8EncDspCostInitMIPS32(void);
+extern void VP8EncDspCostInitMIPSdspR2(void);
+extern void VP8EncDspCostInitSSE2(void);
+
+static volatile VP8CPUInfo cost_last_cpuinfo_used =
+    (VP8CPUInfo)&cost_last_cpuinfo_used;
+
+WEBP_TSAN_IGNORE_FUNCTION void VP8EncDspCostInit(void) {
+  if (cost_last_cpuinfo_used == VP8GetCPUInfo) return;
+
+  VP8GetResidualCost = GetResidualCost;
+  VP8SetResidualCoeffs = SetResidualCoeffs;
+
+  // If defined, use CPUInfo() to overwrite some pointers with faster versions.
+  if (VP8GetCPUInfo != NULL) {
+#if defined(WEBP_USE_MIPS32)
+    if (VP8GetCPUInfo(kMIPS32)) {
+      VP8EncDspCostInitMIPS32();
+    }
+#endif
+#if defined(WEBP_USE_MIPS_DSP_R2)
+    if (VP8GetCPUInfo(kMIPSdspR2)) {
+      VP8EncDspCostInitMIPSdspR2();
+    }
+#endif
+#if defined(WEBP_USE_SSE2)
+    if (VP8GetCPUInfo(kSSE2)) {
+      VP8EncDspCostInitSSE2();
+    }
+#endif
+  }
+
+  cost_last_cpuinfo_used = VP8GetCPUInfo;
+}
+
+//------------------------------------------------------------------------------
diff --git a/drivers/webp/dsp/cost_mips32.c b/drivers/webp/dsp/cost_mips32.c
new file mode 100644
index 0000000000..d1e240e191
--- /dev/null
+++ b/drivers/webp/dsp/cost_mips32.c
@@ -0,0 +1,154 @@
+// Copyright 2014 Google Inc. All Rights Reserved.
+//
+// Use of this source code is governed by a BSD-style license
+// that can be found in the COPYING file in the root of the source
+// tree. An additional intellectual property rights grant can be found
+// in the file PATENTS. All contributing project authors may
+// be found in the AUTHORS file in the root of the source tree.
+// -----------------------------------------------------------------------------
+//
+// Author: Djordje Pesut (djordje.pesut@imgtec.com)
+
+#include "./dsp.h"
+
+#if defined(WEBP_USE_MIPS32)
+
+#include "../enc/cost.h"
+
+static int GetResidualCost(int ctx0, const VP8Residual* const res) {
+  int temp0, temp1;
+  int v_reg, ctx_reg;
+  int n = res->first;
+  // should be prob[VP8EncBands[n]], but it's equivalent for n=0 or 1
+  int p0 = res->prob[n][ctx0][0];
+  CostArrayPtr const costs = res->costs;
+  const uint16_t* t = costs[n][ctx0];
+  // bit_cost(1, p0) is already incorporated in t[] tables, but only if ctx != 0
+  // (as required by the syntax). For ctx0 == 0, we need to add it here or it'll
+  // be missing during the loop.
+  int cost = (ctx0 == 0) ? VP8BitCost(1, p0) : 0;
+  const int16_t* res_coeffs = res->coeffs;
+  const int res_last = res->last;
+  const int const_max_level = MAX_VARIABLE_LEVEL;
+  const int const_2 = 2;
+  const uint16_t** p_costs = &costs[n][0];
+  const size_t inc_p_costs = NUM_CTX * sizeof(*p_costs);
+
+  if (res->last < 0) {
+    return VP8BitCost(0, p0);
+  }
+
+  __asm__ volatile (
+    ".set      push                                                        \n\t"
+    ".set      noreorder                                                   \n\t"
+    "subu      %[temp1],        %[res_last],        %[n]                   \n\t"
+    "sll       %[temp0],        %[n],               1                      \n\t"
+    "blez      %[temp1],        2f                                         \n\t"
+    " addu     %[res_coeffs],   %[res_coeffs],      %[temp0]               \n\t"
+  "1:                                                                      \n\t"
+    "lh        %[v_reg],        0(%[res_coeffs])                           \n\t"
+    "addiu     %[n],            %[n],               1                      \n\t"
+    "negu      %[temp0],        %[v_reg]                                   \n\t"
+    "slti      %[temp1],        %[v_reg],           0                      \n\t"
+    "movn      %[v_reg],        %[temp0],           %[temp1]               \n\t"
+    "sltiu     %[temp0],        %[v_reg],           2                      \n\t"
+    "move      %[ctx_reg],      %[v_reg]                                   \n\t"
+    "movz      %[ctx_reg],      %[const_2],         %[temp0]               \n\t"
+    "sll       %[temp1],        %[v_reg],           1                      \n\t"
+    "addu      %[temp1],        %[temp1],           %[VP8LevelFixedCosts]  \n\t"
+    "lhu       %[temp1],        0(%[temp1])                                \n\t"
+    "slt       %[temp0],        %[v_reg],           %[const_max_level]     \n\t"
+    "movz      %[v_reg],        %[const_max_level], %[temp0]               \n\t"
+    "addu      %[cost],         %[cost],            %[temp1]               \n\t"
+    "sll       %[v_reg],        %[v_reg],           1                      \n\t"
+    "sll       %[ctx_reg],      %[ctx_reg],         2                      \n\t"
+    "addu      %[v_reg],        %[v_reg],           %[t]                   \n\t"
+    "lhu       %[temp0],        0(%[v_reg])                                \n\t"
+    "addu      %[p_costs],      %[p_costs],         %[inc_p_costs]         \n\t"
+    "addu      %[t],            %[p_costs],         %[ctx_reg]             \n\t"
+    "addu      %[cost],         %[cost],            %[temp0]               \n\t"
+    "addiu     %[res_coeffs],   %[res_coeffs],      2                      \n\t"
+    "bne       %[n],            %[res_last],        1b                     \n\t"
+    " lw       %[t],            0(%[t])                                    \n\t"
+  "2:                                                                      \n\t"
+    ".set      pop                                                         \n\t"
+    : [cost]"+&r"(cost), [t]"+&r"(t), [n]"+&r"(n), [v_reg]"=&r"(v_reg),
+      [ctx_reg]"=&r"(ctx_reg), [p_costs]"+&r"(p_costs), [temp0]"=&r"(temp0),
+      [temp1]"=&r"(temp1), [res_coeffs]"+&r"(res_coeffs)
+    : [const_2]"r"(const_2), [const_max_level]"r"(const_max_level),
+      [VP8LevelFixedCosts]"r"(VP8LevelFixedCosts), [res_last]"r"(res_last),
+      [inc_p_costs]"r"(inc_p_costs)
+    : "memory"
+  );
+
+  // Last coefficient is always non-zero
+  {
+    const int v = abs(res->coeffs[n]);
+    assert(v != 0);
+    cost += VP8LevelCost(t, v);
+    if (n < 15) {
+      const int b = VP8EncBands[n + 1];
+      const int ctx = (v == 1) ? 1 : 2;
+      const int last_p0 = res->prob[b][ctx][0];
+      cost += VP8BitCost(0, last_p0);
+    }
+  }
+  return cost;
+}
+
+static void SetResidualCoeffs(const int16_t* const coeffs,
+                              VP8Residual* const res) {
+  const int16_t* p_coeffs = (int16_t*)coeffs;
+  int temp0, temp1, temp2, n, n1;
+  assert(res->first == 0 || coeffs[0] == 0);
+
+  __asm__ volatile (
+    ".set     push                                      \n\t"
+    ".set     noreorder                                 \n\t"
+    "addiu    %[p_coeffs],   %[p_coeffs],    28         \n\t"
+    "li       %[n],          15                         \n\t"
+    "li       %[temp2],      -1                         \n\t"
+  "0:                                                   \n\t"
+    "ulw      %[temp0],      0(%[p_coeffs])             \n\t"
+    "beqz     %[temp0],      1f                         \n\t"
+#if defined(WORDS_BIGENDIAN)
+    " sll     %[temp1],      %[temp0],       16         \n\t"
+#else
+    " srl     %[temp1],      %[temp0],       16         \n\t"
+#endif
+    "addiu    %[n1],         %[n],           -1         \n\t"
+    "movz     %[temp0],      %[n1],          %[temp1]   \n\t"
+    "movn     %[temp0],      %[n],           %[temp1]   \n\t"
+    "j        2f                                        \n\t"
+    " addiu   %[temp2],      %[temp0],       0          \n\t"
+  "1:                                                   \n\t"
+    "addiu    %[n],          %[n],           -2         \n\t"
+    "bgtz     %[n],          0b                         \n\t"
+    " addiu   %[p_coeffs],   %[p_coeffs],    -4         \n\t"
+  "2:                                                   \n\t"
+    ".set     pop                                       \n\t"
+    : [p_coeffs]"+&r"(p_coeffs), [temp0]"=&r"(temp0),
+      [temp1]"=&r"(temp1), [temp2]"=&r"(temp2),
+      [n]"=&r"(n), [n1]"=&r"(n1)
+    :
+    : "memory"
+  );
+  res->last = temp2;
+  res->coeffs = coeffs;
+}
+
+//------------------------------------------------------------------------------
+// Entry point
+
+extern void VP8EncDspCostInitMIPS32(void);
+
+WEBP_TSAN_IGNORE_FUNCTION void VP8EncDspCostInitMIPS32(void) {
+  VP8GetResidualCost = GetResidualCost;
+  VP8SetResidualCoeffs = SetResidualCoeffs;
+}
+
+#else  // !WEBP_USE_MIPS32
+
+WEBP_DSP_INIT_STUB(VP8EncDspCostInitMIPS32)
+
+#endif  // WEBP_USE_MIPS32
diff --git a/drivers/webp/dsp/cost_mips_dsp_r2.c b/drivers/webp/dsp/cost_mips_dsp_r2.c
new file mode 100644
index 0000000000..ce64067756
--- /dev/null
+++ b/drivers/webp/dsp/cost_mips_dsp_r2.c
@@ -0,0 +1,107 @@
+// Copyright 2014 Google Inc. All Rights Reserved.
+//
+// Use of this source code is governed by a BSD-style license
+// that can be found in the COPYING file in the root of the source
+// tree. An additional intellectual property rights grant can be found
+// in the file PATENTS. All contributing project authors may
+// be found in the AUTHORS file in the root of the source tree.
+// -----------------------------------------------------------------------------
+//
+// Author: Djordje Pesut (djordje.pesut@imgtec.com)
+
+#include "./dsp.h"
+
+#if defined(WEBP_USE_MIPS_DSP_R2)
+
+#include "../enc/cost.h"
+
+static int GetResidualCost(int ctx0, const VP8Residual* const res) {
+  int temp0, temp1;
+  int v_reg, ctx_reg;
+  int n = res->first;
+  // should be prob[VP8EncBands[n]], but it's equivalent for n=0 or 1
+  int p0 = res->prob[n][ctx0][0];
+  CostArrayPtr const costs = res->costs;
+  const uint16_t* t = costs[n][ctx0];
+  // bit_cost(1, p0) is already incorporated in t[] tables, but only if ctx != 0
+  // (as required by the syntax). For ctx0 == 0, we need to add it here or it'll
+  // be missing during the loop.
+  int cost = (ctx0 == 0) ? VP8BitCost(1, p0) : 0;
+  const int16_t* res_coeffs = res->coeffs;
+  const int res_last = res->last;
+  const int const_max_level = MAX_VARIABLE_LEVEL;
+  const int const_2 = 2;
+  const uint16_t** p_costs = &costs[n][0];
+  const size_t inc_p_costs = NUM_CTX * sizeof(*p_costs);
+
+  if (res->last < 0) {
+    return VP8BitCost(0, p0);
+  }
+
+  __asm__ volatile (
+    ".set      push                                                     \n\t"
+    ".set      noreorder                                                \n\t"
+    "subu      %[temp1],        %[res_last],        %[n]                \n\t"
+    "blez      %[temp1],        2f                                      \n\t"
+    " nop                                                               \n\t"
+  "1:                                                                   \n\t"
+    "sll       %[temp0],        %[n],               1                   \n\t"
+    "lhx       %[v_reg],        %[temp0](%[res_coeffs])                 \n\t"
+    "addiu     %[n],            %[n],               1                   \n\t"
+    "absq_s.w  %[v_reg],        %[v_reg]                                \n\t"
+    "sltiu     %[temp0],        %[v_reg],           2                   \n\t"
+    "move      %[ctx_reg],      %[v_reg]                                \n\t"
+    "movz      %[ctx_reg],      %[const_2],         %[temp0]            \n\t"
+    "sll       %[temp1],        %[v_reg],           1                   \n\t"
+    "lhx       %[temp1],        %[temp1](%[VP8LevelFixedCosts])         \n\t"
+    "slt       %[temp0],        %[v_reg],           %[const_max_level]  \n\t"
+    "movz      %[v_reg],        %[const_max_level], %[temp0]            \n\t"
+    "addu      %[cost],         %[cost],            %[temp1]            \n\t"
+    "sll       %[v_reg],        %[v_reg],           1                   \n\t"
+    "sll       %[ctx_reg],      %[ctx_reg],         2                   \n\t"
+    "lhx       %[temp0],        %[v_reg](%[t])                          \n\t"
+    "addu      %[p_costs],      %[p_costs],         %[inc_p_costs]      \n\t"
+    "addu      %[t],            %[p_costs],         %[ctx_reg]          \n\t"
+    "addu      %[cost],         %[cost],            %[temp0]            \n\t"
+    "bne       %[n],            %[res_last],        1b                  \n\t"
+    " lw       %[t],            0(%[t])                                 \n\t"
+  "2:                                                                   \n\t"
+    ".set      pop                                                      \n\t"
+    : [cost]"+&r"(cost), [t]"+&r"(t), [n]"+&r"(n), [v_reg]"=&r"(v_reg),
+      [ctx_reg]"=&r"(ctx_reg), [p_costs]"+&r"(p_costs), [temp0]"=&r"(temp0),
+      [temp1]"=&r"(temp1)
+    : [const_2]"r"(const_2), [const_max_level]"r"(const_max_level),
+      [VP8LevelFixedCosts]"r"(VP8LevelFixedCosts), [res_last]"r"(res_last),
+      [res_coeffs]"r"(res_coeffs), [inc_p_costs]"r"(inc_p_costs)
+    : "memory"
+  );
+
+  // Last coefficient is always non-zero
+  {
+    const int v = abs(res->coeffs[n]);
+    assert(v != 0);
+    cost += VP8LevelCost(t, v);
+    if (n < 15) {
+      const int b = VP8EncBands[n + 1];
+      const int ctx = (v == 1) ? 1 : 2;
+      const int last_p0 = res->prob[b][ctx][0];
+      cost += VP8BitCost(0, last_p0);
+    }
+  }
+  return cost;
+}
+
+//------------------------------------------------------------------------------
+// Entry point
+
+extern void VP8EncDspCostInitMIPSdspR2(void);
+
+WEBP_TSAN_IGNORE_FUNCTION void VP8EncDspCostInitMIPSdspR2(void) {
+  VP8GetResidualCost = GetResidualCost;
+}
+
+#else  // !WEBP_USE_MIPS_DSP_R2
+
+WEBP_DSP_INIT_STUB(VP8EncDspCostInitMIPSdspR2)
+
+#endif  // WEBP_USE_MIPS_DSP_R2
diff --git a/drivers/webp/dsp/cost_sse2.c b/drivers/webp/dsp/cost_sse2.c
new file mode 100644
index 0000000000..0cb1c1fa04
--- /dev/null
+++ b/drivers/webp/dsp/cost_sse2.c
@@ -0,0 +1,119 @@
+// Copyright 2015 Google Inc. All Rights Reserved.
+//
+// Use of this source code is governed by a BSD-style license
+// that can be found in the COPYING file in the root of the source
+// tree. An additional intellectual property rights grant can be found
+// in the file PATENTS. All contributing project authors may
+// be found in the AUTHORS file in the root of the source tree.
+// -----------------------------------------------------------------------------
+//
+// SSE2 version of cost functions
+//
+// Author: Skal (pascal.massimino@gmail.com)
+
+#include "./dsp.h"
+
+#if defined(WEBP_USE_SSE2)
+#include <emmintrin.h>
+
+#include "../enc/cost.h"
+#include "../enc/vp8enci.h"
+#include "../utils/utils.h"
+
+//------------------------------------------------------------------------------
+
+static void SetResidualCoeffsSSE2(const int16_t* const coeffs,
+                                  VP8Residual* const res) {
+  const __m128i c0 = _mm_loadu_si128((const __m128i*)(coeffs + 0));
+  const __m128i c1 = _mm_loadu_si128((const __m128i*)(coeffs + 8));
+  // Use SSE2 to compare 16 values with a single instruction.
+  const __m128i zero = _mm_setzero_si128();
+  const __m128i m0 = _mm_packs_epi16(c0, c1);
+  const __m128i m1 = _mm_cmpeq_epi8(m0, zero);
+  // Get the comparison results as a bitmask into 16bits. Negate the mask to get
+  // the position of entries that are not equal to zero. We don't need to mask
+  // out least significant bits according to res->first, since coeffs[0] is 0
+  // if res->first > 0.
+  const uint32_t mask = 0x0000ffffu ^ (uint32_t)_mm_movemask_epi8(m1);
+  // The position of the most significant non-zero bit indicates the position of
+  // the last non-zero value.
+  assert(res->first == 0 || coeffs[0] == 0);
+  res->last = mask ? BitsLog2Floor(mask) : -1;
+  res->coeffs = coeffs;
+}
+
+static int GetResidualCostSSE2(int ctx0, const VP8Residual* const res) {
+  uint8_t levels[16], ctxs[16];
+  uint16_t abs_levels[16];
+  int n = res->first;
+  // should be prob[VP8EncBands[n]], but it's equivalent for n=0 or 1
+  const int p0 = res->prob[n][ctx0][0];
+  CostArrayPtr const costs = res->costs;
+  const uint16_t* t = costs[n][ctx0];
+  // bit_cost(1, p0) is already incorporated in t[] tables, but only if ctx != 0
+  // (as required by the syntax). For ctx0 == 0, we need to add it here or it'll
+  // be missing during the loop.
+  int cost = (ctx0 == 0) ? VP8BitCost(1, p0) : 0;
+
+  if (res->last < 0) {
+    return VP8BitCost(0, p0);
+  }
+
+  {   // precompute clamped levels and contexts, packed to 8b.
+    const __m128i zero = _mm_setzero_si128();
+    const __m128i kCst2 = _mm_set1_epi8(2);
+    const __m128i kCst67 = _mm_set1_epi8(MAX_VARIABLE_LEVEL);
+    const __m128i c0 = _mm_loadu_si128((const __m128i*)&res->coeffs[0]);
+    const __m128i c1 = _mm_loadu_si128((const __m128i*)&res->coeffs[8]);
+    const __m128i D0 = _mm_sub_epi16(zero, c0);
+    const __m128i D1 = _mm_sub_epi16(zero, c1);
+    const __m128i E0 = _mm_max_epi16(c0, D0);   // abs(v), 16b
+    const __m128i E1 = _mm_max_epi16(c1, D1);
+    const __m128i F = _mm_packs_epi16(E0, E1);
+    const __m128i G = _mm_min_epu8(F, kCst2);    // context = 0,1,2
+    const __m128i H = _mm_min_epu8(F, kCst67);   // clamp_level in [0..67]
+
+    _mm_storeu_si128((__m128i*)&ctxs[0], G);
+    _mm_storeu_si128((__m128i*)&levels[0], H);
+
+    _mm_storeu_si128((__m128i*)&abs_levels[0], E0);
+    _mm_storeu_si128((__m128i*)&abs_levels[8], E1);
+  }
+  for (; n < res->last; ++n) {
+    const int ctx = ctxs[n];
+    const int level = levels[n];
+    const int flevel = abs_levels[n];   // full level
+    cost += VP8LevelFixedCosts[flevel] + t[level];  // simplified VP8LevelCost()
+    t = costs[n + 1][ctx];
+  }
+  // Last coefficient is always non-zero
+  {
+    const int level = levels[n];
+    const int flevel = abs_levels[n];
+    assert(flevel != 0);
+    cost += VP8LevelFixedCosts[flevel] + t[level];
+    if (n < 15) {
+      const int b = VP8EncBands[n + 1];
+      const int ctx = ctxs[n];
+      const int last_p0 = res->prob[b][ctx][0];
+      cost += VP8BitCost(0, last_p0);
+    }
+  }
+  return cost;
+}
+
+//------------------------------------------------------------------------------
+// Entry point
+
+extern void VP8EncDspCostInitSSE2(void);
+
+WEBP_TSAN_IGNORE_FUNCTION void VP8EncDspCostInitSSE2(void) {
+  VP8SetResidualCoeffs = SetResidualCoeffsSSE2;
+  VP8GetResidualCost = GetResidualCostSSE2;
+}
+
+#else  // !WEBP_USE_SSE2
+
+WEBP_DSP_INIT_STUB(VP8EncDspCostInitSSE2)
+
+#endif  // WEBP_USE_SSE2
diff --git a/drivers/webp/dsp/cpu.c b/drivers/webp/dsp/cpu.c
index 0228734457..35c2af7f58 100644
--- a/drivers/webp/dsp/cpu.c
+++ b/drivers/webp/dsp/cpu.c
@@ -1,8 +1,10 @@
 // Copyright 2011 Google Inc. All Rights Reserved.
 //
-// This code is licensed under the same terms as WebM:
-//  Software License Agreement:  http://www.webmproject.org/license/software/
-//  Additional IP Rights Grant:  http://www.webmproject.org/license/additional/
+// Use of this source code is governed by a BSD-style license
+// that can be found in the COPYING file in the root of the source
+// tree. An additional intellectual property rights grant can be found
+// in the file PATENTS. All contributing project authors may
+// be found in the AUTHORS file in the root of the source tree.
 // -----------------------------------------------------------------------------
 //
 // CPU detection
@@ -11,14 +13,10 @@
 
 #include "./dsp.h"
 
-#if defined(__ANDROID__)
+#if defined(WEBP_ANDROID_NEON)
 #include <cpu-features.h>
 #endif
 
-#if defined(__cplusplus) || defined(c_plusplus)
-extern "C" {
-#endif
-
 //------------------------------------------------------------------------------
 // SSE2 detection.
 //
@@ -31,22 +29,66 @@ static WEBP_INLINE void GetCPUInfo(int cpu_info[4], int info_type) {
     "cpuid\n"
     "xchg %%edi, %%ebx\n"
     : "=a"(cpu_info[0]), "=D"(cpu_info[1]), "=c"(cpu_info[2]), "=d"(cpu_info[3])
-    : "a"(info_type));
+    : "a"(info_type), "c"(0));
 }
 #elif defined(__i386__) || defined(__x86_64__)
 static WEBP_INLINE void GetCPUInfo(int cpu_info[4], int info_type) {
   __asm__ volatile (
     "cpuid\n"
     : "=a"(cpu_info[0]), "=b"(cpu_info[1]), "=c"(cpu_info[2]), "=d"(cpu_info[3])
-    : "a"(info_type));
+    : "a"(info_type), "c"(0));
 }
+#elif (defined(_M_X64) || defined(_M_IX86)) && \
+      defined(_MSC_FULL_VER) && _MSC_FULL_VER >= 150030729  // >= VS2008 SP1
+#include <intrin.h>
+#define GetCPUInfo(info, type) __cpuidex(info, type, 0)  // set ecx=0
 #elif defined(WEBP_MSC_SSE2)
 #define GetCPUInfo __cpuid
 #endif
 
+// NaCl has no support for xgetbv or the raw opcode.
+#if !defined(__native_client__) && (defined(__i386__) || defined(__x86_64__))
+static WEBP_INLINE uint64_t xgetbv(void) {
+  const uint32_t ecx = 0;
+  uint32_t eax, edx;
+  // Use the raw opcode for xgetbv for compatibility with older toolchains.
+  __asm__ volatile (
+    ".byte 0x0f, 0x01, 0xd0\n"
+    : "=a"(eax), "=d"(edx) : "c" (ecx));
+  return ((uint64_t)edx << 32) | eax;
+}
+#elif (defined(_M_X64) || defined(_M_IX86)) && \
+      defined(_MSC_FULL_VER) && _MSC_FULL_VER >= 160040219  // >= VS2010 SP1
+#include <immintrin.h>
+#define xgetbv() _xgetbv(0)
+#elif defined(_MSC_VER) && defined(_M_IX86)
+static WEBP_INLINE uint64_t xgetbv(void) {
+  uint32_t eax_, edx_;
+  __asm {
+    xor ecx, ecx  // ecx = 0
+    // Use the raw opcode for xgetbv for compatibility with older toolchains.
+    __asm _emit 0x0f __asm _emit 0x01 __asm _emit 0xd0
+    mov eax_, eax
+    mov edx_, edx
+  }
+  return ((uint64_t)edx_ << 32) | eax_;
+}
+#else
+#define xgetbv() 0U  // no AVX for older x64 or unrecognized toolchains.
+#endif
+
 #if defined(__i386__) || defined(__x86_64__) || defined(WEBP_MSC_SSE2)
 static int x86CPUInfo(CPUFeature feature) {
+  int max_cpuid_value;
   int cpu_info[4];
+
+  // get the highest feature value cpuid supports
+  GetCPUInfo(cpu_info, 0);
+  max_cpuid_value = cpu_info[0];
+  if (max_cpuid_value < 1) {
+    return 0;
+  }
+
   GetCPUInfo(cpu_info, 1);
   if (feature == kSSE2) {
     return 0 != (cpu_info[3] & 0x04000000);
@@ -54,10 +96,26 @@ static int x86CPUInfo(CPUFeature feature) {
   if (feature == kSSE3) {
     return 0 != (cpu_info[2] & 0x00000001);
   }
+  if (feature == kSSE4_1) {
+    return 0 != (cpu_info[2] & 0x00080000);
+  }
+  if (feature == kAVX) {
+    // bits 27 (OSXSAVE) & 28 (256-bit AVX)
+    if ((cpu_info[2] & 0x18000000) == 0x18000000) {
+      // XMM state and YMM state enabled by the OS.
+      return (xgetbv() & 0x6) == 0x6;
+    }
+  }
+  if (feature == kAVX2) {
+    if (x86CPUInfo(kAVX) && max_cpuid_value >= 7) {
+      GetCPUInfo(cpu_info, 7);
+      return ((cpu_info[1] & 0x00000020) == 0x00000020);
+    }
+  }
   return 0;
 }
 VP8CPUInfo VP8GetCPUInfo = x86CPUInfo;
-#elif defined(WEBP_ANDROID_NEON)
+#elif defined(WEBP_ANDROID_NEON)  // NB: needs to be before generic NEON test.
 static int AndroidCPUInfo(CPUFeature feature) {
   const AndroidCpuFamily cpu_family = android_getCpuFamily();
   const uint64_t cpu_features = android_getCpuFeatures();
@@ -68,7 +126,7 @@ static int AndroidCPUInfo(CPUFeature feature) {
   return 0;
 }
 VP8CPUInfo VP8GetCPUInfo = AndroidCPUInfo;
-#elif defined(__ARM_NEON__)
+#elif defined(WEBP_USE_NEON)
 // define a dummy function to enable turning off NEON at runtime by setting
 // VP8DecGetCPUInfo = NULL
 static int armCPUInfo(CPUFeature feature) {
@@ -76,10 +134,17 @@ static int armCPUInfo(CPUFeature feature) {
   return 1;
 }
 VP8CPUInfo VP8GetCPUInfo = armCPUInfo;
+#elif defined(WEBP_USE_MIPS32) || defined(WEBP_USE_MIPS_DSP_R2)
+static int mipsCPUInfo(CPUFeature feature) {
+  if ((feature == kMIPS32) || (feature == kMIPSdspR2)) {
+    return 1;
+  } else {
+    return 0;
+  }
+
+}
+VP8CPUInfo VP8GetCPUInfo = mipsCPUInfo;
 #else
 VP8CPUInfo VP8GetCPUInfo = NULL;
 #endif
 
-#if defined(__cplusplus) || defined(c_plusplus)
-}    // extern "C"
-#endif
diff --git a/drivers/webp/dsp/dec.c b/drivers/webp/dsp/dec.c
index 9ae7b6fa76..77a00381c5 100644
--- a/drivers/webp/dsp/dec.c
+++ b/drivers/webp/dsp/dec.c
@@ -1,53 +1,20 @@
 // Copyright 2010 Google Inc. All Rights Reserved.
 //
-// This code is licensed under the same terms as WebM:
-//  Software License Agreement:  http://www.webmproject.org/license/software/
-//  Additional IP Rights Grant:  http://www.webmproject.org/license/additional/
+// Use of this source code is governed by a BSD-style license
+// that can be found in the COPYING file in the root of the source
+// tree. An additional intellectual property rights grant can be found
+// in the file PATENTS. All contributing project authors may
+// be found in the AUTHORS file in the root of the source tree.
 // -----------------------------------------------------------------------------
 //
-// Speed-critical decoding functions.
+// Speed-critical decoding functions, default plain-C implementations.
 //
 // Author: Skal (pascal.massimino@gmail.com)
 
 #include "./dsp.h"
 #include "../dec/vp8i.h"
 
-#if defined(__cplusplus) || defined(c_plusplus)
-extern "C" {
-#endif
-
 //------------------------------------------------------------------------------
-// run-time tables (~4k)
-
-static uint8_t abs0[255 + 255 + 1];     // abs(i)
-static uint8_t abs1[255 + 255 + 1];     // abs(i)>>1
-static int8_t sclip1[1020 + 1020 + 1];  // clips [-1020, 1020] to [-128, 127]
-static int8_t sclip2[112 + 112 + 1];    // clips [-112, 112] to [-16, 15]
-static uint8_t clip1[255 + 510 + 1];    // clips [-255,510] to [0,255]
-
-// We declare this variable 'volatile' to prevent instruction reordering
-// and make sure it's set to true _last_ (so as to be thread-safe)
-static volatile int tables_ok = 0;
-
-static void DspInitTables(void) {
-  if (!tables_ok) {
-    int i;
-    for (i = -255; i <= 255; ++i) {
-      abs0[255 + i] = (i < 0) ? -i : i;
-      abs1[255 + i] = abs0[255 + i] >> 1;
-    }
-    for (i = -1020; i <= 1020; ++i) {
-      sclip1[1020 + i] = (i < -128) ? -128 : (i > 127) ? 127 : i;
-    }
-    for (i = -112; i <= 112; ++i) {
-      sclip2[112 + i] = (i < -16) ? -16 : (i > 15) ? 15 : i;
-    }
-    for (i = -255; i <= 255 + 255; ++i) {
-      clip1[255 + i] = (i < 0) ? 0 : (i > 255) ? 255 : i;
-    }
-    tables_ok = 1;
-  }
-}
 
 static WEBP_INLINE uint8_t clip_8b(int v) {
   return (!(v & ~0xff)) ? v : (v < 0) ? 0 : 255;
@@ -59,9 +26,16 @@ static WEBP_INLINE uint8_t clip_8b(int v) {
 #define STORE(x, y, v) \
   dst[x + y * BPS] = clip_8b(dst[x + y * BPS] + ((v) >> 3))
 
-static const int kC1 = 20091 + (1 << 16);
-static const int kC2 = 35468;
-#define MUL(a, b) (((a) * (b)) >> 16)
+#define STORE2(y, dc, d, c) do {    \
+  const int DC = (dc);              \
+  STORE(0, y, DC + (d));            \
+  STORE(1, y, DC + (c));            \
+  STORE(2, y, DC - (c));            \
+  STORE(3, y, DC - (d));            \
+} while (0)
+
+#define MUL1(a) ((((a) * 20091) >> 16) + (a))
+#define MUL2(a) (((a) * 35468) >> 16)
 
 static void TransformOne(const int16_t* in, uint8_t* dst) {
   int C[4 * 4], *tmp;
@@ -70,8 +44,8 @@ static void TransformOne(const int16_t* in, uint8_t* dst) {
   for (i = 0; i < 4; ++i) {    // vertical pass
     const int a = in[0] + in[8];    // [-4096, 4094]
     const int b = in[0] - in[8];    // [-4095, 4095]
-    const int c = MUL(in[4], kC2) - MUL(in[12], kC1);   // [-3783, 3783]
-    const int d = MUL(in[4], kC1) + MUL(in[12], kC2);   // [-3785, 3781]
+    const int c = MUL2(in[4]) - MUL1(in[12]);   // [-3783, 3783]
+    const int d = MUL1(in[4]) + MUL2(in[12]);   // [-3785, 3781]
     tmp[0] = a + d;   // [-7881, 7875]
     tmp[1] = b + c;   // [-7878, 7878]
     tmp[2] = b - c;   // [-7878, 7878]
@@ -80,7 +54,7 @@ static void TransformOne(const int16_t* in, uint8_t* dst) {
     in++;
   }
   // Each pass is expanding the dynamic range by ~3.85 (upper bound).
-  // The exact value is (2. + (kC1 + kC2) / 65536).
+  // The exact value is (2. + (20091 + 35468) / 65536).
   // After the second pass, maximum interval is [-3794, 3794], assuming
   // an input in [-2048, 2047] interval. We then need to add a dst value
   // in the [0, 255] range.
@@ -91,8 +65,8 @@ static void TransformOne(const int16_t* in, uint8_t* dst) {
     const int dc = tmp[0] + 4;
     const int a =  dc +  tmp[8];
     const int b =  dc -  tmp[8];
-    const int c = MUL(tmp[4], kC2) - MUL(tmp[12], kC1);
-    const int d = MUL(tmp[4], kC1) + MUL(tmp[12], kC2);
+    const int c = MUL2(tmp[4]) - MUL1(tmp[12]);
+    const int d = MUL1(tmp[4]) + MUL2(tmp[12]);
     STORE(0, 0, a + d);
     STORE(1, 0, b + c);
     STORE(2, 0, b - c);
@@ -101,7 +75,22 @@ static void TransformOne(const int16_t* in, uint8_t* dst) {
     dst += BPS;
   }
 }
-#undef MUL
+
+// Simplified transform when only in[0], in[1] and in[4] are non-zero
+static void TransformAC3(const int16_t* in, uint8_t* dst) {
+  const int a = in[0] + 4;
+  const int c4 = MUL2(in[4]);
+  const int d4 = MUL1(in[4]);
+  const int c1 = MUL2(in[1]);
+  const int d1 = MUL1(in[1]);
+  STORE2(0, a + d4, d1, c1);
+  STORE2(1, a + c4, d1, c1);
+  STORE2(2, a - c4, d1, c1);
+  STORE2(3, a - d4, d1, c1);
+}
+#undef MUL1
+#undef MUL2
+#undef STORE2
 
 static void TransformTwo(const int16_t* in, uint8_t* dst, int do_two) {
   TransformOne(in, dst);
@@ -115,7 +104,7 @@ static void TransformUV(const int16_t* in, uint8_t* dst) {
   VP8Transform(in + 2 * 16, dst + 4 * BPS, 1);
 }
 
-static void TransformDC(const int16_t *in, uint8_t* dst) {
+static void TransformDC(const int16_t* in, uint8_t* dst) {
   const int DC = in[0] + 4;
   int i, j;
   for (j = 0; j < 4; ++j) {
@@ -126,10 +115,10 @@ static void TransformDC(const int16_t *in, uint8_t* dst) {
 }
 
 static void TransformDCUV(const int16_t* in, uint8_t* dst) {
-  if (in[0 * 16]) TransformDC(in + 0 * 16, dst);
-  if (in[1 * 16]) TransformDC(in + 1 * 16, dst + 4);
-  if (in[2 * 16]) TransformDC(in + 2 * 16, dst + 4 * BPS);
-  if (in[3 * 16]) TransformDC(in + 3 * 16, dst + 4 * BPS + 4);
+  if (in[0 * 16]) VP8TransformDC(in + 0 * 16, dst);
+  if (in[1 * 16]) VP8TransformDC(in + 1 * 16, dst + 4);
+  if (in[2 * 16]) VP8TransformDC(in + 2 * 16, dst + 4 * BPS);
+  if (in[3 * 16]) VP8TransformDC(in + 3 * 16, dst + 4 * BPS + 4);
 }
 
 #undef STORE
@@ -164,16 +153,16 @@ static void TransformWHT(const int16_t* in, int16_t* out) {
   }
 }
 
-void (*VP8TransformWHT)(const int16_t* in, int16_t* out) = TransformWHT;
+void (*VP8TransformWHT)(const int16_t* in, int16_t* out);
 
 //------------------------------------------------------------------------------
 // Intra predictions
 
 #define DST(x, y) dst[(x) + (y) * BPS]
 
-static WEBP_INLINE void TrueMotion(uint8_t *dst, int size) {
+static WEBP_INLINE void TrueMotion(uint8_t* dst, int size) {
   const uint8_t* top = dst - BPS;
-  const uint8_t* const clip0 = clip1 + 255 - top[-1];
+  const uint8_t* const clip0 = VP8kclip1 - top[-1];
   int y;
   for (y = 0; y < size; ++y) {
     const uint8_t* const clip = clip0 + dst[-1];
@@ -184,21 +173,21 @@ static WEBP_INLINE void TrueMotion(uint8_t *dst, int size) {
     dst += BPS;
   }
 }
-static void TM4(uint8_t *dst)   { TrueMotion(dst, 4); }
-static void TM8uv(uint8_t *dst) { TrueMotion(dst, 8); }
-static void TM16(uint8_t *dst)  { TrueMotion(dst, 16); }
+static void TM4(uint8_t* dst)   { TrueMotion(dst, 4); }
+static void TM8uv(uint8_t* dst) { TrueMotion(dst, 8); }
+static void TM16(uint8_t* dst)  { TrueMotion(dst, 16); }
 
 //------------------------------------------------------------------------------
 // 16x16
 
-static void VE16(uint8_t *dst) {     // vertical
+static void VE16(uint8_t* dst) {     // vertical
   int j;
   for (j = 0; j < 16; ++j) {
     memcpy(dst + j * BPS, dst - BPS, 16);
   }
 }
 
-static void HE16(uint8_t *dst) {     // horizontal
+static void HE16(uint8_t* dst) {     // horizontal
   int j;
   for (j = 16; j > 0; --j) {
     memset(dst, dst[-1], 16);
@@ -213,7 +202,7 @@ static WEBP_INLINE void Put16(int v, uint8_t* dst) {
   }
 }
 
-static void DC16(uint8_t *dst) {    // DC
+static void DC16(uint8_t* dst) {    // DC
   int DC = 16;
   int j;
   for (j = 0; j < 16; ++j) {
@@ -222,7 +211,7 @@ static void DC16(uint8_t *dst) {    // DC
   Put16(DC >> 5, dst);
 }
 
-static void DC16NoTop(uint8_t *dst) {   // DC with top samples not available
+static void DC16NoTop(uint8_t* dst) {   // DC with top samples not available
   int DC = 8;
   int j;
   for (j = 0; j < 16; ++j) {
@@ -231,7 +220,7 @@ static void DC16NoTop(uint8_t *dst) {   // DC with top samples not available
   Put16(DC >> 4, dst);
 }
 
-static void DC16NoLeft(uint8_t *dst) {  // DC with left samples not available
+static void DC16NoLeft(uint8_t* dst) {  // DC with left samples not available
   int DC = 8;
   int i;
   for (i = 0; i < 16; ++i) {
@@ -240,17 +229,19 @@ static void DC16NoLeft(uint8_t *dst) {  // DC with left samples not available
   Put16(DC >> 4, dst);
 }
 
-static void DC16NoTopLeft(uint8_t *dst) {  // DC with no top and left samples
+static void DC16NoTopLeft(uint8_t* dst) {  // DC with no top and left samples
   Put16(0x80, dst);
 }
 
+VP8PredFunc VP8PredLuma16[NUM_B_DC_MODES];
+
 //------------------------------------------------------------------------------
 // 4x4
 
 #define AVG3(a, b, c) (((a) + 2 * (b) + (c) + 2) >> 2)
 #define AVG2(a, b) (((a) + (b) + 1) >> 1)
 
-static void VE4(uint8_t *dst) {    // vertical
+static void VE4(uint8_t* dst) {    // vertical
   const uint8_t* top = dst - BPS;
   const uint8_t vals[4] = {
     AVG3(top[-1], top[0], top[1]),
@@ -264,7 +255,7 @@ static void VE4(uint8_t *dst) {    // vertical
   }
 }
 
-static void HE4(uint8_t *dst) {    // horizontal
+static void HE4(uint8_t* dst) {    // horizontal
   const int A = dst[-1 - BPS];
   const int B = dst[-1];
   const int C = dst[-1 + BPS];
@@ -276,7 +267,7 @@ static void HE4(uint8_t *dst) {    // horizontal
   *(uint32_t*)(dst + 3 * BPS) = 0x01010101U * AVG3(D, E, E);
 }
 
-static void DC4(uint8_t *dst) {   // DC
+static void DC4(uint8_t* dst) {   // DC
   uint32_t dc = 4;
   int i;
   for (i = 0; i < 4; ++i) dc += dst[i - BPS] + dst[-1 + i * BPS];
@@ -284,7 +275,7 @@ static void DC4(uint8_t *dst) {   // DC
   for (i = 0; i < 4; ++i) memset(dst + i * BPS, dc, 4);
 }
 
-static void RD4(uint8_t *dst) {   // Down-right
+static void RD4(uint8_t* dst) {   // Down-right
   const int I = dst[-1 + 0 * BPS];
   const int J = dst[-1 + 1 * BPS];
   const int K = dst[-1 + 2 * BPS];
@@ -295,15 +286,15 @@ static void RD4(uint8_t *dst) {   // Down-right
   const int C = dst[2 - BPS];
   const int D = dst[3 - BPS];
   DST(0, 3)                                     = AVG3(J, K, L);
-  DST(0, 2) = DST(1, 3)                         = AVG3(I, J, K);
-  DST(0, 1) = DST(1, 2) = DST(2, 3)             = AVG3(X, I, J);
-  DST(0, 0) = DST(1, 1) = DST(2, 2) = DST(3, 3) = AVG3(A, X, I);
-  DST(1, 0) = DST(2, 1) = DST(3, 2)             = AVG3(B, A, X);
-  DST(2, 0) = DST(3, 1)                         = AVG3(C, B, A);
-  DST(3, 0)                                     = AVG3(D, C, B);
+  DST(1, 3) = DST(0, 2)                         = AVG3(I, J, K);
+  DST(2, 3) = DST(1, 2) = DST(0, 1)             = AVG3(X, I, J);
+  DST(3, 3) = DST(2, 2) = DST(1, 1) = DST(0, 0) = AVG3(A, X, I);
+              DST(3, 2) = DST(2, 1) = DST(1, 0) = AVG3(B, A, X);
+                          DST(3, 1) = DST(2, 0) = AVG3(C, B, A);
+                                      DST(3, 0) = AVG3(D, C, B);
 }
 
-static void LD4(uint8_t *dst) {   // Down-Left
+static void LD4(uint8_t* dst) {   // Down-Left
   const int A = dst[0 - BPS];
   const int B = dst[1 - BPS];
   const int C = dst[2 - BPS];
@@ -316,12 +307,12 @@ static void LD4(uint8_t *dst) {   // Down-Left
   DST(1, 0) = DST(0, 1)                         = AVG3(B, C, D);
   DST(2, 0) = DST(1, 1) = DST(0, 2)             = AVG3(C, D, E);
   DST(3, 0) = DST(2, 1) = DST(1, 2) = DST(0, 3) = AVG3(D, E, F);
-  DST(3, 1) = DST(2, 2) = DST(1, 3)             = AVG3(E, F, G);
-  DST(3, 2) = DST(2, 3)                         = AVG3(F, G, H);
-  DST(3, 3)                                     = AVG3(G, H, H);
+              DST(3, 1) = DST(2, 2) = DST(1, 3) = AVG3(E, F, G);
+                          DST(3, 2) = DST(2, 3) = AVG3(F, G, H);
+                                      DST(3, 3) = AVG3(G, H, H);
 }
 
-static void VR4(uint8_t *dst) {   // Vertical-Right
+static void VR4(uint8_t* dst) {   // Vertical-Right
   const int I = dst[-1 + 0 * BPS];
   const int J = dst[-1 + 1 * BPS];
   const int K = dst[-1 + 2 * BPS];
@@ -343,7 +334,7 @@ static void VR4(uint8_t *dst) {   // Vertical-Right
   DST(3, 1) =             AVG3(B, C, D);
 }
 
-static void VL4(uint8_t *dst) {   // Vertical-Left
+static void VL4(uint8_t* dst) {   // Vertical-Left
   const int A = dst[0 - BPS];
   const int B = dst[1 - BPS];
   const int C = dst[2 - BPS];
@@ -365,7 +356,7 @@ static void VL4(uint8_t *dst) {   // Vertical-Left
               DST(3, 3) = AVG3(F, G, H);
 }
 
-static void HU4(uint8_t *dst) {   // Horizontal-Up
+static void HU4(uint8_t* dst) {   // Horizontal-Up
   const int I = dst[-1 + 0 * BPS];
   const int J = dst[-1 + 1 * BPS];
   const int K = dst[-1 + 2 * BPS];
@@ -380,7 +371,7 @@ static void HU4(uint8_t *dst) {   // Horizontal-Up
     DST(0, 3) = DST(1, 3) = DST(2, 3) = DST(3, 3) = L;
 }
 
-static void HD4(uint8_t *dst) {  // Horizontal-Down
+static void HD4(uint8_t* dst) {  // Horizontal-Down
   const int I = dst[-1 + 0 * BPS];
   const int J = dst[-1 + 1 * BPS];
   const int K = dst[-1 + 2 * BPS];
@@ -407,17 +398,19 @@ static void HD4(uint8_t *dst) {  // Horizontal-Down
 #undef AVG3
 #undef AVG2
 
+VP8PredFunc VP8PredLuma4[NUM_BMODES];
+
 //------------------------------------------------------------------------------
 // Chroma
 
-static void VE8uv(uint8_t *dst) {    // vertical
+static void VE8uv(uint8_t* dst) {    // vertical
   int j;
   for (j = 0; j < 8; ++j) {
     memcpy(dst + j * BPS, dst - BPS, 8);
   }
 }
 
-static void HE8uv(uint8_t *dst) {    // horizontal
+static void HE8uv(uint8_t* dst) {    // horizontal
   int j;
   for (j = 0; j < 8; ++j) {
     memset(dst, dst[-1], 8);
@@ -426,60 +419,45 @@ static void HE8uv(uint8_t *dst) {    // horizontal
 }
 
 // helper for chroma-DC predictions
-static WEBP_INLINE void Put8x8uv(uint64_t v, uint8_t* dst) {
+static WEBP_INLINE void Put8x8uv(uint8_t value, uint8_t* dst) {
   int j;
   for (j = 0; j < 8; ++j) {
-    *(uint64_t*)(dst + j * BPS) = v;
+    memset(dst + j * BPS, value, 8);
   }
 }
 
-static void DC8uv(uint8_t *dst) {     // DC
+static void DC8uv(uint8_t* dst) {     // DC
   int dc0 = 8;
   int i;
   for (i = 0; i < 8; ++i) {
     dc0 += dst[i - BPS] + dst[-1 + i * BPS];
   }
-  Put8x8uv((uint64_t)((dc0 >> 4) * 0x0101010101010101ULL), dst);
+  Put8x8uv(dc0 >> 4, dst);
 }
 
-static void DC8uvNoLeft(uint8_t *dst) {   // DC with no left samples
+static void DC8uvNoLeft(uint8_t* dst) {   // DC with no left samples
   int dc0 = 4;
   int i;
   for (i = 0; i < 8; ++i) {
     dc0 += dst[i - BPS];
   }
-  Put8x8uv((uint64_t)((dc0 >> 3) * 0x0101010101010101ULL), dst);
+  Put8x8uv(dc0 >> 3, dst);
 }
 
-static void DC8uvNoTop(uint8_t *dst) {  // DC with no top samples
+static void DC8uvNoTop(uint8_t* dst) {  // DC with no top samples
   int dc0 = 4;
   int i;
   for (i = 0; i < 8; ++i) {
     dc0 += dst[-1 + i * BPS];
   }
-  Put8x8uv((uint64_t)((dc0 >> 3) * 0x0101010101010101ULL), dst);
+  Put8x8uv(dc0 >> 3, dst);
 }
 
-static void DC8uvNoTopLeft(uint8_t *dst) {    // DC with nothing
-  Put8x8uv(0x8080808080808080ULL, dst);
+static void DC8uvNoTopLeft(uint8_t* dst) {    // DC with nothing
+  Put8x8uv(0x80, dst);
 }
 
-//------------------------------------------------------------------------------
-// default C implementations
-
-const VP8PredFunc VP8PredLuma4[NUM_BMODES] = {
-  DC4, TM4, VE4, HE4, RD4, VR4, LD4, VL4, HD4, HU4
-};
-
-const VP8PredFunc VP8PredLuma16[NUM_B_DC_MODES] = {
-  DC16, TM16, VE16, HE16,
-  DC16NoTop, DC16NoLeft, DC16NoTopLeft
-};
-
-const VP8PredFunc VP8PredChroma8[NUM_B_DC_MODES] = {
-  DC8uv, TM8uv, VE8uv, HE8uv,
-  DC8uvNoTop, DC8uvNoLeft, DC8uvNoTopLeft
-};
+VP8PredFunc VP8PredChroma8[NUM_B_DC_MODES];
 
 //------------------------------------------------------------------------------
 // Edge filtering functions
@@ -487,61 +465,62 @@ const VP8PredFunc VP8PredChroma8[NUM_B_DC_MODES] = {
 // 4 pixels in, 2 pixels out
 static WEBP_INLINE void do_filter2(uint8_t* p, int step) {
   const int p1 = p[-2*step], p0 = p[-step], q0 = p[0], q1 = p[step];
-  const int a = 3 * (q0 - p0) + sclip1[1020 + p1 - q1];
-  const int a1 = sclip2[112 + ((a + 4) >> 3)];
-  const int a2 = sclip2[112 + ((a + 3) >> 3)];
-  p[-step] = clip1[255 + p0 + a2];
-  p[    0] = clip1[255 + q0 - a1];
+  const int a = 3 * (q0 - p0) + VP8ksclip1[p1 - q1];  // in [-893,892]
+  const int a1 = VP8ksclip2[(a + 4) >> 3];            // in [-16,15]
+  const int a2 = VP8ksclip2[(a + 3) >> 3];
+  p[-step] = VP8kclip1[p0 + a2];
+  p[    0] = VP8kclip1[q0 - a1];
 }
 
 // 4 pixels in, 4 pixels out
 static WEBP_INLINE void do_filter4(uint8_t* p, int step) {
   const int p1 = p[-2*step], p0 = p[-step], q0 = p[0], q1 = p[step];
   const int a = 3 * (q0 - p0);
-  const int a1 = sclip2[112 + ((a + 4) >> 3)];
-  const int a2 = sclip2[112 + ((a + 3) >> 3)];
+  const int a1 = VP8ksclip2[(a + 4) >> 3];
+  const int a2 = VP8ksclip2[(a + 3) >> 3];
   const int a3 = (a1 + 1) >> 1;
-  p[-2*step] = clip1[255 + p1 + a3];
-  p[-  step] = clip1[255 + p0 + a2];
-  p[      0] = clip1[255 + q0 - a1];
-  p[   step] = clip1[255 + q1 - a3];
+  p[-2*step] = VP8kclip1[p1 + a3];
+  p[-  step] = VP8kclip1[p0 + a2];
+  p[      0] = VP8kclip1[q0 - a1];
+  p[   step] = VP8kclip1[q1 - a3];
 }
 
 // 6 pixels in, 6 pixels out
 static WEBP_INLINE void do_filter6(uint8_t* p, int step) {
   const int p2 = p[-3*step], p1 = p[-2*step], p0 = p[-step];
   const int q0 = p[0], q1 = p[step], q2 = p[2*step];
-  const int a = sclip1[1020 + 3 * (q0 - p0) + sclip1[1020 + p1 - q1]];
+  const int a = VP8ksclip1[3 * (q0 - p0) + VP8ksclip1[p1 - q1]];
+  // a is in [-128,127], a1 in [-27,27], a2 in [-18,18] and a3 in [-9,9]
   const int a1 = (27 * a + 63) >> 7;  // eq. to ((3 * a + 7) * 9) >> 7
   const int a2 = (18 * a + 63) >> 7;  // eq. to ((2 * a + 7) * 9) >> 7
   const int a3 = (9  * a + 63) >> 7;  // eq. to ((1 * a + 7) * 9) >> 7
-  p[-3*step] = clip1[255 + p2 + a3];
-  p[-2*step] = clip1[255 + p1 + a2];
-  p[-  step] = clip1[255 + p0 + a1];
-  p[      0] = clip1[255 + q0 - a1];
-  p[   step] = clip1[255 + q1 - a2];
-  p[ 2*step] = clip1[255 + q2 - a3];
+  p[-3*step] = VP8kclip1[p2 + a3];
+  p[-2*step] = VP8kclip1[p1 + a2];
+  p[-  step] = VP8kclip1[p0 + a1];
+  p[      0] = VP8kclip1[q0 - a1];
+  p[   step] = VP8kclip1[q1 - a2];
+  p[ 2*step] = VP8kclip1[q2 - a3];
 }
 
 static WEBP_INLINE int hev(const uint8_t* p, int step, int thresh) {
   const int p1 = p[-2*step], p0 = p[-step], q0 = p[0], q1 = p[step];
-  return (abs0[255 + p1 - p0] > thresh) || (abs0[255 + q1 - q0] > thresh);
+  return (VP8kabs0[p1 - p0] > thresh) || (VP8kabs0[q1 - q0] > thresh);
 }
 
-static WEBP_INLINE int needs_filter(const uint8_t* p, int step, int thresh) {
-  const int p1 = p[-2*step], p0 = p[-step], q0 = p[0], q1 = p[step];
-  return (2 * abs0[255 + p0 - q0] + abs1[255 + p1 - q1]) <= thresh;
+static WEBP_INLINE int needs_filter(const uint8_t* p, int step, int t) {
+  const int p1 = p[-2 * step], p0 = p[-step], q0 = p[0], q1 = p[step];
+  return ((4 * VP8kabs0[p0 - q0] + VP8kabs0[p1 - q1]) <= t);
 }
 
 static WEBP_INLINE int needs_filter2(const uint8_t* p,
                                      int step, int t, int it) {
-  const int p3 = p[-4*step], p2 = p[-3*step], p1 = p[-2*step], p0 = p[-step];
-  const int q0 = p[0], q1 = p[step], q2 = p[2*step], q3 = p[3*step];
-  if ((2 * abs0[255 + p0 - q0] + abs1[255 + p1 - q1]) > t)
-    return 0;
-  return abs0[255 + p3 - p2] <= it && abs0[255 + p2 - p1] <= it &&
-         abs0[255 + p1 - p0] <= it && abs0[255 + q3 - q2] <= it &&
-         abs0[255 + q2 - q1] <= it && abs0[255 + q1 - q0] <= it;
+  const int p3 = p[-4 * step], p2 = p[-3 * step], p1 = p[-2 * step];
+  const int p0 = p[-step], q0 = p[0];
+  const int q1 = p[step], q2 = p[2 * step], q3 = p[3 * step];
+  if ((4 * VP8kabs0[p0 - q0] + VP8kabs0[p1 - q1]) > t) return 0;
+  return VP8kabs0[p3 - p2] <= it && VP8kabs0[p2 - p1] <= it &&
+         VP8kabs0[p1 - p0] <= it && VP8kabs0[q3 - q2] <= it &&
+         VP8kabs0[q2 - q1] <= it && VP8kabs0[q1 - q0] <= it;
 }
 
 //------------------------------------------------------------------------------
@@ -549,8 +528,9 @@ static WEBP_INLINE int needs_filter2(const uint8_t* p,
 
 static void SimpleVFilter16(uint8_t* p, int stride, int thresh) {
   int i;
+  const int thresh2 = 2 * thresh + 1;
   for (i = 0; i < 16; ++i) {
-    if (needs_filter(p + i, stride, thresh)) {
+    if (needs_filter(p + i, stride, thresh2)) {
       do_filter2(p + i, stride);
     }
   }
@@ -558,8 +538,9 @@ static void SimpleVFilter16(uint8_t* p, int stride, int thresh) {
 
 static void SimpleHFilter16(uint8_t* p, int stride, int thresh) {
   int i;
+  const int thresh2 = 2 * thresh + 1;
   for (i = 0; i < 16; ++i) {
-    if (needs_filter(p + i * stride, 1, thresh)) {
+    if (needs_filter(p + i * stride, 1, thresh2)) {
       do_filter2(p + i * stride, 1);
     }
   }
@@ -587,8 +568,9 @@ static void SimpleHFilter16i(uint8_t* p, int stride, int thresh) {
 static WEBP_INLINE void FilterLoop26(uint8_t* p,
                                      int hstride, int vstride, int size,
                                      int thresh, int ithresh, int hev_thresh) {
+  const int thresh2 = 2 * thresh + 1;
   while (size-- > 0) {
-    if (needs_filter2(p, hstride, thresh, ithresh)) {
+    if (needs_filter2(p, hstride, thresh2, ithresh)) {
       if (hev(p, hstride, hev_thresh)) {
         do_filter2(p, hstride);
       } else {
@@ -602,8 +584,9 @@ static WEBP_INLINE void FilterLoop26(uint8_t* p,
 static WEBP_INLINE void FilterLoop24(uint8_t* p,
                                      int hstride, int vstride, int size,
                                      int thresh, int ithresh, int hev_thresh) {
+  const int thresh2 = 2 * thresh + 1;
   while (size-- > 0) {
-    if (needs_filter2(p, hstride, thresh, ithresh)) {
+    if (needs_filter2(p, hstride, thresh2, ithresh)) {
       if (hev(p, hstride, hev_thresh)) {
         do_filter2(p, hstride);
       } else {
@@ -672,6 +655,7 @@ static void HFilter8i(uint8_t* u, uint8_t* v, int stride,
 //------------------------------------------------------------------------------
 
 VP8DecIdct2 VP8Transform;
+VP8DecIdct VP8TransformAC3;
 VP8DecIdct VP8TransformUV;
 VP8DecIdct VP8TransformDC;
 VP8DecIdct VP8TransformDCUV;
@@ -690,15 +674,25 @@ VP8SimpleFilterFunc VP8SimpleVFilter16i;
 VP8SimpleFilterFunc VP8SimpleHFilter16i;
 
 extern void VP8DspInitSSE2(void);
+extern void VP8DspInitSSE41(void);
 extern void VP8DspInitNEON(void);
+extern void VP8DspInitMIPS32(void);
+extern void VP8DspInitMIPSdspR2(void);
+
+static volatile VP8CPUInfo dec_last_cpuinfo_used =
+    (VP8CPUInfo)&dec_last_cpuinfo_used;
 
-void VP8DspInit(void) {
-  DspInitTables();
+WEBP_TSAN_IGNORE_FUNCTION void VP8DspInit(void) {
+  if (dec_last_cpuinfo_used == VP8GetCPUInfo) return;
 
+  VP8InitClipTables();
+
+  VP8TransformWHT = TransformWHT;
   VP8Transform = TransformTwo;
   VP8TransformUV = TransformUV;
   VP8TransformDC = TransformDC;
   VP8TransformDCUV = TransformDCUV;
+  VP8TransformAC3 = TransformAC3;
 
   VP8VFilter16 = VFilter16;
   VP8HFilter16 = HFilter16;
@@ -713,20 +707,60 @@ void VP8DspInit(void) {
   VP8SimpleVFilter16i = SimpleVFilter16i;
   VP8SimpleHFilter16i = SimpleHFilter16i;
 
+  VP8PredLuma4[0] = DC4;
+  VP8PredLuma4[1] = TM4;
+  VP8PredLuma4[2] = VE4;
+  VP8PredLuma4[3] = HE4;
+  VP8PredLuma4[4] = RD4;
+  VP8PredLuma4[5] = VR4;
+  VP8PredLuma4[6] = LD4;
+  VP8PredLuma4[7] = VL4;
+  VP8PredLuma4[8] = HD4;
+  VP8PredLuma4[9] = HU4;
+
+  VP8PredLuma16[0] = DC16;
+  VP8PredLuma16[1] = TM16;
+  VP8PredLuma16[2] = VE16;
+  VP8PredLuma16[3] = HE16;
+  VP8PredLuma16[4] = DC16NoTop;
+  VP8PredLuma16[5] = DC16NoLeft;
+  VP8PredLuma16[6] = DC16NoTopLeft;
+
+  VP8PredChroma8[0] = DC8uv;
+  VP8PredChroma8[1] = TM8uv;
+  VP8PredChroma8[2] = VE8uv;
+  VP8PredChroma8[3] = HE8uv;
+  VP8PredChroma8[4] = DC8uvNoTop;
+  VP8PredChroma8[5] = DC8uvNoLeft;
+  VP8PredChroma8[6] = DC8uvNoTopLeft;
+
   // If defined, use CPUInfo() to overwrite some pointers with faster versions.
-  if (VP8GetCPUInfo) {
+  if (VP8GetCPUInfo != NULL) {
 #if defined(WEBP_USE_SSE2)
     if (VP8GetCPUInfo(kSSE2)) {
       VP8DspInitSSE2();
+#if defined(WEBP_USE_SSE41)
+      if (VP8GetCPUInfo(kSSE4_1)) {
+        VP8DspInitSSE41();
+      }
+#endif
     }
-#elif defined(WEBP_USE_NEON)
+#endif
+#if defined(WEBP_USE_NEON)
     if (VP8GetCPUInfo(kNEON)) {
       VP8DspInitNEON();
     }
 #endif
+#if defined(WEBP_USE_MIPS32)
+    if (VP8GetCPUInfo(kMIPS32)) {
+      VP8DspInitMIPS32();
+    }
+#endif
+#if defined(WEBP_USE_MIPS_DSP_R2)
+    if (VP8GetCPUInfo(kMIPSdspR2)) {
+      VP8DspInitMIPSdspR2();
+    }
+#endif
   }
+  dec_last_cpuinfo_used = VP8GetCPUInfo;
 }
-
-#if defined(__cplusplus) || defined(c_plusplus)
-}    // extern "C"
-#endif
diff --git a/drivers/webp/dsp/dec_clip_tables.c b/drivers/webp/dsp/dec_clip_tables.c
new file mode 100644
index 0000000000..3b6dde86ba
--- /dev/null
+++ b/drivers/webp/dsp/dec_clip_tables.c
@@ -0,0 +1,366 @@
+// Copyright 2014 Google Inc. All Rights Reserved.
+//
+// Use of this source code is governed by a BSD-style license
+// that can be found in the COPYING file in the root of the source
+// tree. An additional intellectual property rights grant can be found
+// in the file PATENTS. All contributing project authors may
+// be found in the AUTHORS file in the root of the source tree.
+// -----------------------------------------------------------------------------
+//
+// Clipping tables for filtering
+//
+// Author: Skal (pascal.massimino@gmail.com)
+
+#include "./dsp.h"
+
+#define USE_STATIC_TABLES     // undefine to have run-time table initialization
+
+#ifdef USE_STATIC_TABLES
+
+static const uint8_t abs0[255 + 255 + 1] = {
+  0xff, 0xfe, 0xfd, 0xfc, 0xfb, 0xfa, 0xf9, 0xf8, 0xf7, 0xf6, 0xf5, 0xf4,
+  0xf3, 0xf2, 0xf1, 0xf0, 0xef, 0xee, 0xed, 0xec, 0xeb, 0xea, 0xe9, 0xe8,
+  0xe7, 0xe6, 0xe5, 0xe4, 0xe3, 0xe2, 0xe1, 0xe0, 0xdf, 0xde, 0xdd, 0xdc,
+  0xdb, 0xda, 0xd9, 0xd8, 0xd7, 0xd6, 0xd5, 0xd4, 0xd3, 0xd2, 0xd1, 0xd0,
+  0xcf, 0xce, 0xcd, 0xcc, 0xcb, 0xca, 0xc9, 0xc8, 0xc7, 0xc6, 0xc5, 0xc4,
+  0xc3, 0xc2, 0xc1, 0xc0, 0xbf, 0xbe, 0xbd, 0xbc, 0xbb, 0xba, 0xb9, 0xb8,
+  0xb7, 0xb6, 0xb5, 0xb4, 0xb3, 0xb2, 0xb1, 0xb0, 0xaf, 0xae, 0xad, 0xac,
+  0xab, 0xaa, 0xa9, 0xa8, 0xa7, 0xa6, 0xa5, 0xa4, 0xa3, 0xa2, 0xa1, 0xa0,
+  0x9f, 0x9e, 0x9d, 0x9c, 0x9b, 0x9a, 0x99, 0x98, 0x97, 0x96, 0x95, 0x94,
+  0x93, 0x92, 0x91, 0x90, 0x8f, 0x8e, 0x8d, 0x8c, 0x8b, 0x8a, 0x89, 0x88,
+  0x87, 0x86, 0x85, 0x84, 0x83, 0x82, 0x81, 0x80, 0x7f, 0x7e, 0x7d, 0x7c,
+  0x7b, 0x7a, 0x79, 0x78, 0x77, 0x76, 0x75, 0x74, 0x73, 0x72, 0x71, 0x70,
+  0x6f, 0x6e, 0x6d, 0x6c, 0x6b, 0x6a, 0x69, 0x68, 0x67, 0x66, 0x65, 0x64,
+  0x63, 0x62, 0x61, 0x60, 0x5f, 0x5e, 0x5d, 0x5c, 0x5b, 0x5a, 0x59, 0x58,
+  0x57, 0x56, 0x55, 0x54, 0x53, 0x52, 0x51, 0x50, 0x4f, 0x4e, 0x4d, 0x4c,
+  0x4b, 0x4a, 0x49, 0x48, 0x47, 0x46, 0x45, 0x44, 0x43, 0x42, 0x41, 0x40,
+  0x3f, 0x3e, 0x3d, 0x3c, 0x3b, 0x3a, 0x39, 0x38, 0x37, 0x36, 0x35, 0x34,
+  0x33, 0x32, 0x31, 0x30, 0x2f, 0x2e, 0x2d, 0x2c, 0x2b, 0x2a, 0x29, 0x28,
+  0x27, 0x26, 0x25, 0x24, 0x23, 0x22, 0x21, 0x20, 0x1f, 0x1e, 0x1d, 0x1c,
+  0x1b, 0x1a, 0x19, 0x18, 0x17, 0x16, 0x15, 0x14, 0x13, 0x12, 0x11, 0x10,
+  0x0f, 0x0e, 0x0d, 0x0c, 0x0b, 0x0a, 0x09, 0x08, 0x07, 0x06, 0x05, 0x04,
+  0x03, 0x02, 0x01, 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08,
+  0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f, 0x10, 0x11, 0x12, 0x13, 0x14,
+  0x15, 0x16, 0x17, 0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f, 0x20,
+  0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27, 0x28, 0x29, 0x2a, 0x2b, 0x2c,
+  0x2d, 0x2e, 0x2f, 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37, 0x38,
+  0x39, 0x3a, 0x3b, 0x3c, 0x3d, 0x3e, 0x3f, 0x40, 0x41, 0x42, 0x43, 0x44,
+  0x45, 0x46, 0x47, 0x48, 0x49, 0x4a, 0x4b, 0x4c, 0x4d, 0x4e, 0x4f, 0x50,
+  0x51, 0x52, 0x53, 0x54, 0x55, 0x56, 0x57, 0x58, 0x59, 0x5a, 0x5b, 0x5c,
+  0x5d, 0x5e, 0x5f, 0x60, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68,
+  0x69, 0x6a, 0x6b, 0x6c, 0x6d, 0x6e, 0x6f, 0x70, 0x71, 0x72, 0x73, 0x74,
+  0x75, 0x76, 0x77, 0x78, 0x79, 0x7a, 0x7b, 0x7c, 0x7d, 0x7e, 0x7f, 0x80,
+  0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87, 0x88, 0x89, 0x8a, 0x8b, 0x8c,
+  0x8d, 0x8e, 0x8f, 0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, 0x98,
+  0x99, 0x9a, 0x9b, 0x9c, 0x9d, 0x9e, 0x9f, 0xa0, 0xa1, 0xa2, 0xa3, 0xa4,
+  0xa5, 0xa6, 0xa7, 0xa8, 0xa9, 0xaa, 0xab, 0xac, 0xad, 0xae, 0xaf, 0xb0,
+  0xb1, 0xb2, 0xb3, 0xb4, 0xb5, 0xb6, 0xb7, 0xb8, 0xb9, 0xba, 0xbb, 0xbc,
+  0xbd, 0xbe, 0xbf, 0xc0, 0xc1, 0xc2, 0xc3, 0xc4, 0xc5, 0xc6, 0xc7, 0xc8,
+  0xc9, 0xca, 0xcb, 0xcc, 0xcd, 0xce, 0xcf, 0xd0, 0xd1, 0xd2, 0xd3, 0xd4,
+  0xd5, 0xd6, 0xd7, 0xd8, 0xd9, 0xda, 0xdb, 0xdc, 0xdd, 0xde, 0xdf, 0xe0,
+  0xe1, 0xe2, 0xe3, 0xe4, 0xe5, 0xe6, 0xe7, 0xe8, 0xe9, 0xea, 0xeb, 0xec,
+  0xed, 0xee, 0xef, 0xf0, 0xf1, 0xf2, 0xf3, 0xf4, 0xf5, 0xf6, 0xf7, 0xf8,
+  0xf9, 0xfa, 0xfb, 0xfc, 0xfd, 0xfe, 0xff
+};
+
+static const int8_t sclip1[1020 + 1020 + 1] = {
+  0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
+  0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
+  0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
+  0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
+  0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
+  0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
+  0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
+  0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
+  0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
+  0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
+  0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
+  0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
+  0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
+  0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
+  0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
+  0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
+  0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
+  0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
+  0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
+  0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
+  0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
+  0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
+  0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
+  0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
+  0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
+  0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
+  0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
+  0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
+  0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
+  0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
+  0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
+  0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
+  0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
+  0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
+  0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
+  0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
+  0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
+  0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
+  0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
+  0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
+  0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
+  0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
+  0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
+  0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
+  0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
+  0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
+  0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
+  0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
+  0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
+  0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
+  0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
+  0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
+  0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
+  0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
+  0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
+  0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
+  0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
+  0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
+  0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
+  0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
+  0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
+  0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
+  0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
+  0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
+  0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
+  0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
+  0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
+  0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
+  0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
+  0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
+  0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
+  0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
+  0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
+  0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
+  0x80, 0x80, 0x80, 0x80, 0x80, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87,
+  0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e, 0x8f, 0x90, 0x91, 0x92, 0x93,
+  0x94, 0x95, 0x96, 0x97, 0x98, 0x99, 0x9a, 0x9b, 0x9c, 0x9d, 0x9e, 0x9f,
+  0xa0, 0xa1, 0xa2, 0xa3, 0xa4, 0xa5, 0xa6, 0xa7, 0xa8, 0xa9, 0xaa, 0xab,
+  0xac, 0xad, 0xae, 0xaf, 0xb0, 0xb1, 0xb2, 0xb3, 0xb4, 0xb5, 0xb6, 0xb7,
+  0xb8, 0xb9, 0xba, 0xbb, 0xbc, 0xbd, 0xbe, 0xbf, 0xc0, 0xc1, 0xc2, 0xc3,
+  0xc4, 0xc5, 0xc6, 0xc7, 0xc8, 0xc9, 0xca, 0xcb, 0xcc, 0xcd, 0xce, 0xcf,
+  0xd0, 0xd1, 0xd2, 0xd3, 0xd4, 0xd5, 0xd6, 0xd7, 0xd8, 0xd9, 0xda, 0xdb,
+  0xdc, 0xdd, 0xde, 0xdf, 0xe0, 0xe1, 0xe2, 0xe3, 0xe4, 0xe5, 0xe6, 0xe7,
+  0xe8, 0xe9, 0xea, 0xeb, 0xec, 0xed, 0xee, 0xef, 0xf0, 0xf1, 0xf2, 0xf3,
+  0xf4, 0xf5, 0xf6, 0xf7, 0xf8, 0xf9, 0xfa, 0xfb, 0xfc, 0xfd, 0xfe, 0xff,
+  0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0a, 0x0b,
+  0x0c, 0x0d, 0x0e, 0x0f, 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17,
+  0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f, 0x20, 0x21, 0x22, 0x23,
+  0x24, 0x25, 0x26, 0x27, 0x28, 0x29, 0x2a, 0x2b, 0x2c, 0x2d, 0x2e, 0x2f,
+  0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37, 0x38, 0x39, 0x3a, 0x3b,
+  0x3c, 0x3d, 0x3e, 0x3f, 0x40, 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47,
+  0x48, 0x49, 0x4a, 0x4b, 0x4c, 0x4d, 0x4e, 0x4f, 0x50, 0x51, 0x52, 0x53,
+  0x54, 0x55, 0x56, 0x57, 0x58, 0x59, 0x5a, 0x5b, 0x5c, 0x5d, 0x5e, 0x5f,
+  0x60, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x6a, 0x6b,
+  0x6c, 0x6d, 0x6e, 0x6f, 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77,
+  0x78, 0x79, 0x7a, 0x7b, 0x7c, 0x7d, 0x7e, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f,
+  0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f,
+  0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f,
+  0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f,
+  0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f,
+  0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f,
+  0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f,
+  0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f,
+  0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f,
+  0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f,
+  0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f,
+  0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f,
+  0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f,
+  0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f,
+  0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f,
+  0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f,
+  0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f,
+  0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f,
+  0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f,
+  0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f,
+  0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f,
+  0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f,
+  0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f,
+  0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f,
+  0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f,
+  0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f,
+  0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f,
+  0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f,
+  0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f,
+  0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f,
+  0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f,
+  0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f,
+  0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f,
+  0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f,
+  0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f,
+  0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f,
+  0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f,
+  0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f,
+  0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f,
+  0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f,
+  0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f,
+  0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f,
+  0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f,
+  0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f,
+  0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f,
+  0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f,
+  0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f,
+  0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f,
+  0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f,
+  0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f,
+  0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f,
+  0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f,
+  0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f,
+  0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f,
+  0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f,
+  0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f,
+  0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f,
+  0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f,
+  0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f,
+  0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f,
+  0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f,
+  0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f,
+  0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f,
+  0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f,
+  0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f,
+  0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f,
+  0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f,
+  0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f,
+  0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f,
+  0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f,
+  0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f,
+  0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f,
+  0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f,
+  0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f,
+  0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f
+};
+
+static const int8_t sclip2[112 + 112 + 1] = {
+  0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0,
+  0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0,
+  0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0,
+  0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0,
+  0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0,
+  0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0,
+  0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0,
+  0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0,
+  0xf0, 0xf1, 0xf2, 0xf3, 0xf4, 0xf5, 0xf6, 0xf7, 0xf8, 0xf9, 0xfa, 0xfb,
+  0xfc, 0xfd, 0xfe, 0xff, 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07,
+  0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f,
+  0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f,
+  0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f,
+  0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f,
+  0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f,
+  0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f,
+  0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f,
+  0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f,
+  0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f
+};
+
+static const uint8_t clip1[255 + 511 + 1] = {
+  0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+  0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+  0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+  0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+  0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+  0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+  0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+  0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+  0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+  0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+  0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+  0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+  0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+  0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+  0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+  0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+  0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+  0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+  0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+  0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+  0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+  0x00, 0x00, 0x00, 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08,
+  0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f, 0x10, 0x11, 0x12, 0x13, 0x14,
+  0x15, 0x16, 0x17, 0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f, 0x20,
+  0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27, 0x28, 0x29, 0x2a, 0x2b, 0x2c,
+  0x2d, 0x2e, 0x2f, 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37, 0x38,
+  0x39, 0x3a, 0x3b, 0x3c, 0x3d, 0x3e, 0x3f, 0x40, 0x41, 0x42, 0x43, 0x44,
+  0x45, 0x46, 0x47, 0x48, 0x49, 0x4a, 0x4b, 0x4c, 0x4d, 0x4e, 0x4f, 0x50,
+  0x51, 0x52, 0x53, 0x54, 0x55, 0x56, 0x57, 0x58, 0x59, 0x5a, 0x5b, 0x5c,
+  0x5d, 0x5e, 0x5f, 0x60, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68,
+  0x69, 0x6a, 0x6b, 0x6c, 0x6d, 0x6e, 0x6f, 0x70, 0x71, 0x72, 0x73, 0x74,
+  0x75, 0x76, 0x77, 0x78, 0x79, 0x7a, 0x7b, 0x7c, 0x7d, 0x7e, 0x7f, 0x80,
+  0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87, 0x88, 0x89, 0x8a, 0x8b, 0x8c,
+  0x8d, 0x8e, 0x8f, 0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, 0x98,
+  0x99, 0x9a, 0x9b, 0x9c, 0x9d, 0x9e, 0x9f, 0xa0, 0xa1, 0xa2, 0xa3, 0xa4,
+  0xa5, 0xa6, 0xa7, 0xa8, 0xa9, 0xaa, 0xab, 0xac, 0xad, 0xae, 0xaf, 0xb0,
+  0xb1, 0xb2, 0xb3, 0xb4, 0xb5, 0xb6, 0xb7, 0xb8, 0xb9, 0xba, 0xbb, 0xbc,
+  0xbd, 0xbe, 0xbf, 0xc0, 0xc1, 0xc2, 0xc3, 0xc4, 0xc5, 0xc6, 0xc7, 0xc8,
+  0xc9, 0xca, 0xcb, 0xcc, 0xcd, 0xce, 0xcf, 0xd0, 0xd1, 0xd2, 0xd3, 0xd4,
+  0xd5, 0xd6, 0xd7, 0xd8, 0xd9, 0xda, 0xdb, 0xdc, 0xdd, 0xde, 0xdf, 0xe0,
+  0xe1, 0xe2, 0xe3, 0xe4, 0xe5, 0xe6, 0xe7, 0xe8, 0xe9, 0xea, 0xeb, 0xec,
+  0xed, 0xee, 0xef, 0xf0, 0xf1, 0xf2, 0xf3, 0xf4, 0xf5, 0xf6, 0xf7, 0xf8,
+  0xf9, 0xfa, 0xfb, 0xfc, 0xfd, 0xfe, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+  0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+  0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+  0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+  0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+  0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+  0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+  0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+  0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+  0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+  0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+  0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+  0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+  0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+  0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+  0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+  0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+  0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+  0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+  0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+  0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+  0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff
+};
+
+#else
+
+// uninitialized tables
+static uint8_t abs0[255 + 255 + 1];
+static int8_t sclip1[1020 + 1020 + 1];
+static int8_t sclip2[112 + 112 + 1];
+static uint8_t clip1[255 + 511 + 1];
+
+// We declare this variable 'volatile' to prevent instruction reordering
+// and make sure it's set to true _last_ (so as to be thread-safe)
+static volatile int tables_ok = 0;
+
+#endif
+
+const int8_t* const VP8ksclip1 = &sclip1[1020];
+const int8_t* const VP8ksclip2 = &sclip2[112];
+const uint8_t* const VP8kclip1 = &clip1[255];
+const uint8_t* const VP8kabs0 = &abs0[255];
+
+WEBP_TSAN_IGNORE_FUNCTION void VP8InitClipTables(void) {
+#if !defined(USE_STATIC_TABLES)
+  int i;
+  if (!tables_ok) {
+    for (i = -255; i <= 255; ++i) {
+      abs0[255 + i] = (i < 0) ? -i : i;
+    }
+    for (i = -1020; i <= 1020; ++i) {
+      sclip1[1020 + i] = (i < -128) ? -128 : (i > 127) ? 127 : i;
+    }
+    for (i = -112; i <= 112; ++i) {
+      sclip2[112 + i] = (i < -16) ? -16 : (i > 15) ? 15 : i;
+    }
+    for (i = -255; i <= 255 + 255; ++i) {
+      clip1[255 + i] = (i < 0) ? 0 : (i > 255) ? 255 : i;
+    }
+    tables_ok = 1;
+  }
+#endif    // USE_STATIC_TABLES
+}
diff --git a/drivers/webp/dsp/dec_mips32.c b/drivers/webp/dsp/dec_mips32.c
new file mode 100644
index 0000000000..4e9ef42605
--- /dev/null
+++ b/drivers/webp/dsp/dec_mips32.c
@@ -0,0 +1,587 @@
+// Copyright 2014 Google Inc. All Rights Reserved.
+//
+// Use of this source code is governed by a BSD-style license
+// that can be found in the COPYING file in the root of the source
+// tree. An additional intellectual property rights grant can be found
+// in the file PATENTS. All contributing project authors may
+// be found in the AUTHORS file in the root of the source tree.
+// -----------------------------------------------------------------------------
+//
+// MIPS version of dsp functions
+//
+// Author(s):  Djordje Pesut    (djordje.pesut@imgtec.com)
+//             Jovan Zelincevic (jovan.zelincevic@imgtec.com)
+
+#include "./dsp.h"
+
+#if defined(WEBP_USE_MIPS32)
+
+#include "./mips_macro.h"
+
+static const int kC1 = 20091 + (1 << 16);
+static const int kC2 = 35468;
+
+static WEBP_INLINE int abs_mips32(int x) {
+  const int sign = x >> 31;
+  return (x ^ sign) - sign;
+}
+
+// 4 pixels in, 2 pixels out
+static WEBP_INLINE void do_filter2(uint8_t* p, int step) {
+  const int p1 = p[-2 * step], p0 = p[-step], q0 = p[0], q1 = p[step];
+  const int a = 3 * (q0 - p0) + VP8ksclip1[p1 - q1];
+  const int a1 = VP8ksclip2[(a + 4) >> 3];
+  const int a2 = VP8ksclip2[(a + 3) >> 3];
+  p[-step] = VP8kclip1[p0 + a2];
+  p[    0] = VP8kclip1[q0 - a1];
+}
+
+// 4 pixels in, 4 pixels out
+static WEBP_INLINE void do_filter4(uint8_t* p, int step) {
+  const int p1 = p[-2 * step], p0 = p[-step], q0 = p[0], q1 = p[step];
+  const int a = 3 * (q0 - p0);
+  const int a1 = VP8ksclip2[(a + 4) >> 3];
+  const int a2 = VP8ksclip2[(a + 3) >> 3];
+  const int a3 = (a1 + 1) >> 1;
+  p[-2 * step] = VP8kclip1[p1 + a3];
+  p[-    step] = VP8kclip1[p0 + a2];
+  p[        0] = VP8kclip1[q0 - a1];
+  p[     step] = VP8kclip1[q1 - a3];
+}
+
+// 6 pixels in, 6 pixels out
+static WEBP_INLINE void do_filter6(uint8_t* p, int step) {
+  const int p2 = p[-3 * step], p1 = p[-2 * step], p0 = p[-step];
+  const int q0 = p[0], q1 = p[step], q2 = p[2 * step];
+  const int a = VP8ksclip1[3 * (q0 - p0) + VP8ksclip1[p1 - q1]];
+  // a is in [-128,127], a1 in [-27,27], a2 in [-18,18] and a3 in [-9,9]
+  const int a1 = (27 * a + 63) >> 7;  // eq. to ((3 * a + 7) * 9) >> 7
+  const int a2 = (18 * a + 63) >> 7;  // eq. to ((2 * a + 7) * 9) >> 7
+  const int a3 = (9  * a + 63) >> 7;  // eq. to ((1 * a + 7) * 9) >> 7
+  p[-3 * step] = VP8kclip1[p2 + a3];
+  p[-2 * step] = VP8kclip1[p1 + a2];
+  p[-    step] = VP8kclip1[p0 + a1];
+  p[        0] = VP8kclip1[q0 - a1];
+  p[     step] = VP8kclip1[q1 - a2];
+  p[ 2 * step] = VP8kclip1[q2 - a3];
+}
+
+static WEBP_INLINE int hev(const uint8_t* p, int step, int thresh) {
+  const int p1 = p[-2 * step], p0 = p[-step], q0 = p[0], q1 = p[step];
+  return (abs_mips32(p1 - p0) > thresh) || (abs_mips32(q1 - q0) > thresh);
+}
+
+static WEBP_INLINE int needs_filter(const uint8_t* p, int step, int t) {
+  const int p1 = p[-2 * step], p0 = p[-step], q0 = p[0], q1 = p[step];
+  return ((4 * abs_mips32(p0 - q0) + abs_mips32(p1 - q1)) <= t);
+}
+
+static WEBP_INLINE int needs_filter2(const uint8_t* p,
+                                     int step, int t, int it) {
+  const int p3 = p[-4 * step], p2 = p[-3 * step];
+  const int p1 = p[-2 * step], p0 = p[-step];
+  const int q0 = p[0], q1 = p[step], q2 = p[2 * step], q3 = p[3 * step];
+  if ((4 * abs_mips32(p0 - q0) + abs_mips32(p1 - q1)) > t) {
+    return 0;
+  }
+  return abs_mips32(p3 - p2) <= it && abs_mips32(p2 - p1) <= it &&
+         abs_mips32(p1 - p0) <= it && abs_mips32(q3 - q2) <= it &&
+         abs_mips32(q2 - q1) <= it && abs_mips32(q1 - q0) <= it;
+}
+
+static WEBP_INLINE void FilterLoop26(uint8_t* p,
+                                     int hstride, int vstride, int size,
+                                     int thresh, int ithresh, int hev_thresh) {
+  const int thresh2 = 2 * thresh + 1;
+  while (size-- > 0) {
+    if (needs_filter2(p, hstride, thresh2, ithresh)) {
+      if (hev(p, hstride, hev_thresh)) {
+        do_filter2(p, hstride);
+      } else {
+        do_filter6(p, hstride);
+      }
+    }
+    p += vstride;
+  }
+}
+
+static WEBP_INLINE void FilterLoop24(uint8_t* p,
+                                     int hstride, int vstride, int size,
+                                     int thresh, int ithresh, int hev_thresh) {
+  const int thresh2 = 2 * thresh + 1;
+  while (size-- > 0) {
+    if (needs_filter2(p, hstride, thresh2, ithresh)) {
+      if (hev(p, hstride, hev_thresh)) {
+        do_filter2(p, hstride);
+      } else {
+        do_filter4(p, hstride);
+      }
+    }
+    p += vstride;
+  }
+}
+
+// on macroblock edges
+static void VFilter16(uint8_t* p, int stride,
+                      int thresh, int ithresh, int hev_thresh) {
+  FilterLoop26(p, stride, 1, 16, thresh, ithresh, hev_thresh);
+}
+
+static void HFilter16(uint8_t* p, int stride,
+                      int thresh, int ithresh, int hev_thresh) {
+  FilterLoop26(p, 1, stride, 16, thresh, ithresh, hev_thresh);
+}
+
+// 8-pixels wide variant, for chroma filtering
+static void VFilter8(uint8_t* u, uint8_t* v, int stride,
+                     int thresh, int ithresh, int hev_thresh) {
+  FilterLoop26(u, stride, 1, 8, thresh, ithresh, hev_thresh);
+  FilterLoop26(v, stride, 1, 8, thresh, ithresh, hev_thresh);
+}
+
+static void HFilter8(uint8_t* u, uint8_t* v, int stride,
+                     int thresh, int ithresh, int hev_thresh) {
+  FilterLoop26(u, 1, stride, 8, thresh, ithresh, hev_thresh);
+  FilterLoop26(v, 1, stride, 8, thresh, ithresh, hev_thresh);
+}
+
+static void VFilter8i(uint8_t* u, uint8_t* v, int stride,
+                      int thresh, int ithresh, int hev_thresh) {
+  FilterLoop24(u + 4 * stride, stride, 1, 8, thresh, ithresh, hev_thresh);
+  FilterLoop24(v + 4 * stride, stride, 1, 8, thresh, ithresh, hev_thresh);
+}
+
+static void HFilter8i(uint8_t* u, uint8_t* v, int stride,
+                      int thresh, int ithresh, int hev_thresh) {
+  FilterLoop24(u + 4, 1, stride, 8, thresh, ithresh, hev_thresh);
+  FilterLoop24(v + 4, 1, stride, 8, thresh, ithresh, hev_thresh);
+}
+
+// on three inner edges
+static void VFilter16i(uint8_t* p, int stride,
+                       int thresh, int ithresh, int hev_thresh) {
+  int k;
+  for (k = 3; k > 0; --k) {
+    p += 4 * stride;
+    FilterLoop24(p, stride, 1, 16, thresh, ithresh, hev_thresh);
+  }
+}
+
+static void HFilter16i(uint8_t* p, int stride,
+                       int thresh, int ithresh, int hev_thresh) {
+  int k;
+  for (k = 3; k > 0; --k) {
+    p += 4;
+    FilterLoop24(p, 1, stride, 16, thresh, ithresh, hev_thresh);
+  }
+}
+
+//------------------------------------------------------------------------------
+// Simple In-loop filtering (Paragraph 15.2)
+
+static void SimpleVFilter16(uint8_t* p, int stride, int thresh) {
+  int i;
+  const int thresh2 = 2 * thresh + 1;
+  for (i = 0; i < 16; ++i) {
+    if (needs_filter(p + i, stride, thresh2)) {
+      do_filter2(p + i, stride);
+    }
+  }
+}
+
+static void SimpleHFilter16(uint8_t* p, int stride, int thresh) {
+  int i;
+  const int thresh2 = 2 * thresh + 1;
+  for (i = 0; i < 16; ++i) {
+    if (needs_filter(p + i * stride, 1, thresh2)) {
+      do_filter2(p + i * stride, 1);
+    }
+  }
+}
+
+static void SimpleVFilter16i(uint8_t* p, int stride, int thresh) {
+  int k;
+  for (k = 3; k > 0; --k) {
+    p += 4 * stride;
+    SimpleVFilter16(p, stride, thresh);
+  }
+}
+
+static void SimpleHFilter16i(uint8_t* p, int stride, int thresh) {
+  int k;
+  for (k = 3; k > 0; --k) {
+    p += 4;
+    SimpleHFilter16(p, stride, thresh);
+  }
+}
+
+static void TransformOne(const int16_t* in, uint8_t* dst) {
+  int temp0, temp1, temp2, temp3, temp4;
+  int temp5, temp6, temp7, temp8, temp9;
+  int temp10, temp11, temp12, temp13, temp14;
+  int temp15, temp16, temp17, temp18;
+  int16_t* p_in = (int16_t*)in;
+
+  // loops unrolled and merged to avoid usage of tmp buffer
+  // and to reduce number of stalls. MUL macro is written
+  // in assembler and inlined
+  __asm__ volatile(
+    "lh       %[temp0],  0(%[in])                      \n\t"
+    "lh       %[temp8],  16(%[in])                     \n\t"
+    "lh       %[temp4],  8(%[in])                      \n\t"
+    "lh       %[temp12], 24(%[in])                     \n\t"
+    "addu     %[temp16], %[temp0],  %[temp8]           \n\t"
+    "subu     %[temp0],  %[temp0],  %[temp8]           \n\t"
+    "mul      %[temp8],  %[temp4],  %[kC2]             \n\t"
+    "mul      %[temp17], %[temp12], %[kC1]             \n\t"
+    "mul      %[temp4],  %[temp4],  %[kC1]             \n\t"
+    "mul      %[temp12], %[temp12], %[kC2]             \n\t"
+    "lh       %[temp1],  2(%[in])                      \n\t"
+    "lh       %[temp5],  10(%[in])                     \n\t"
+    "lh       %[temp9],  18(%[in])                     \n\t"
+    "lh       %[temp13], 26(%[in])                     \n\t"
+    "sra      %[temp8],  %[temp8],  16                 \n\t"
+    "sra      %[temp17], %[temp17], 16                 \n\t"
+    "sra      %[temp4],  %[temp4],  16                 \n\t"
+    "sra      %[temp12], %[temp12], 16                 \n\t"
+    "lh       %[temp2],  4(%[in])                      \n\t"
+    "lh       %[temp6],  12(%[in])                     \n\t"
+    "lh       %[temp10], 20(%[in])                     \n\t"
+    "lh       %[temp14], 28(%[in])                     \n\t"
+    "subu     %[temp17], %[temp8],  %[temp17]          \n\t"
+    "addu     %[temp4],  %[temp4],  %[temp12]          \n\t"
+    "addu     %[temp8],  %[temp16], %[temp4]           \n\t"
+    "subu     %[temp4],  %[temp16], %[temp4]           \n\t"
+    "addu     %[temp16], %[temp1],  %[temp9]           \n\t"
+    "subu     %[temp1],  %[temp1],  %[temp9]           \n\t"
+    "lh       %[temp3],  6(%[in])                      \n\t"
+    "lh       %[temp7],  14(%[in])                     \n\t"
+    "lh       %[temp11], 22(%[in])                     \n\t"
+    "lh       %[temp15], 30(%[in])                     \n\t"
+    "addu     %[temp12], %[temp0],  %[temp17]          \n\t"
+    "subu     %[temp0],  %[temp0],  %[temp17]          \n\t"
+    "mul      %[temp9],  %[temp5],  %[kC2]             \n\t"
+    "mul      %[temp17], %[temp13], %[kC1]             \n\t"
+    "mul      %[temp5],  %[temp5],  %[kC1]             \n\t"
+    "mul      %[temp13], %[temp13], %[kC2]             \n\t"
+    "sra      %[temp9],  %[temp9],  16                 \n\t"
+    "sra      %[temp17], %[temp17], 16                 \n\t"
+    "subu     %[temp17], %[temp9],  %[temp17]          \n\t"
+    "sra      %[temp5],  %[temp5],  16                 \n\t"
+    "sra      %[temp13], %[temp13], 16                 \n\t"
+    "addu     %[temp5],  %[temp5],  %[temp13]          \n\t"
+    "addu     %[temp13], %[temp1],  %[temp17]          \n\t"
+    "subu     %[temp1],  %[temp1],  %[temp17]          \n\t"
+    "mul      %[temp17], %[temp14], %[kC1]             \n\t"
+    "mul      %[temp14], %[temp14], %[kC2]             \n\t"
+    "addu     %[temp9],  %[temp16], %[temp5]           \n\t"
+    "subu     %[temp5],  %[temp16], %[temp5]           \n\t"
+    "addu     %[temp16], %[temp2],  %[temp10]          \n\t"
+    "subu     %[temp2],  %[temp2],  %[temp10]          \n\t"
+    "mul      %[temp10], %[temp6],  %[kC2]             \n\t"
+    "mul      %[temp6],  %[temp6],  %[kC1]             \n\t"
+    "sra      %[temp17], %[temp17], 16                 \n\t"
+    "sra      %[temp14], %[temp14], 16                 \n\t"
+    "sra      %[temp10], %[temp10], 16                 \n\t"
+    "sra      %[temp6],  %[temp6],  16                 \n\t"
+    "subu     %[temp17], %[temp10], %[temp17]          \n\t"
+    "addu     %[temp6],  %[temp6],  %[temp14]          \n\t"
+    "addu     %[temp10], %[temp16], %[temp6]           \n\t"
+    "subu     %[temp6],  %[temp16], %[temp6]           \n\t"
+    "addu     %[temp14], %[temp2],  %[temp17]          \n\t"
+    "subu     %[temp2],  %[temp2],  %[temp17]          \n\t"
+    "mul      %[temp17], %[temp15], %[kC1]             \n\t"
+    "mul      %[temp15], %[temp15], %[kC2]             \n\t"
+    "addu     %[temp16], %[temp3],  %[temp11]          \n\t"
+    "subu     %[temp3],  %[temp3],  %[temp11]          \n\t"
+    "mul      %[temp11], %[temp7],  %[kC2]             \n\t"
+    "mul      %[temp7],  %[temp7],  %[kC1]             \n\t"
+    "addiu    %[temp8],  %[temp8],  4                  \n\t"
+    "addiu    %[temp12], %[temp12], 4                  \n\t"
+    "addiu    %[temp0],  %[temp0],  4                  \n\t"
+    "addiu    %[temp4],  %[temp4],  4                  \n\t"
+    "sra      %[temp17], %[temp17], 16                 \n\t"
+    "sra      %[temp15], %[temp15], 16                 \n\t"
+    "sra      %[temp11], %[temp11], 16                 \n\t"
+    "sra      %[temp7],  %[temp7],  16                 \n\t"
+    "subu     %[temp17], %[temp11], %[temp17]          \n\t"
+    "addu     %[temp7],  %[temp7],  %[temp15]          \n\t"
+    "addu     %[temp15], %[temp3],  %[temp17]          \n\t"
+    "subu     %[temp3],  %[temp3],  %[temp17]          \n\t"
+    "addu     %[temp11], %[temp16], %[temp7]           \n\t"
+    "subu     %[temp7],  %[temp16], %[temp7]           \n\t"
+    "addu     %[temp16], %[temp8],  %[temp10]          \n\t"
+    "subu     %[temp8],  %[temp8],  %[temp10]          \n\t"
+    "mul      %[temp10], %[temp9],  %[kC2]             \n\t"
+    "mul      %[temp17], %[temp11], %[kC1]             \n\t"
+    "mul      %[temp9],  %[temp9],  %[kC1]             \n\t"
+    "mul      %[temp11], %[temp11], %[kC2]             \n\t"
+    "sra      %[temp10], %[temp10], 16                 \n\t"
+    "sra      %[temp17], %[temp17], 16                 \n\t"
+    "sra      %[temp9],  %[temp9],  16                 \n\t"
+    "sra      %[temp11], %[temp11], 16                 \n\t"
+    "subu     %[temp17], %[temp10], %[temp17]          \n\t"
+    "addu     %[temp11], %[temp9],  %[temp11]          \n\t"
+    "addu     %[temp10], %[temp12], %[temp14]          \n\t"
+    "subu     %[temp12], %[temp12], %[temp14]          \n\t"
+    "mul      %[temp14], %[temp13], %[kC2]             \n\t"
+    "mul      %[temp9],  %[temp15], %[kC1]             \n\t"
+    "mul      %[temp13], %[temp13], %[kC1]             \n\t"
+    "mul      %[temp15], %[temp15], %[kC2]             \n\t"
+    "sra      %[temp14], %[temp14], 16                 \n\t"
+    "sra      %[temp9],  %[temp9],  16                 \n\t"
+    "sra      %[temp13], %[temp13], 16                 \n\t"
+    "sra      %[temp15], %[temp15], 16                 \n\t"
+    "subu     %[temp9],  %[temp14], %[temp9]           \n\t"
+    "addu     %[temp15], %[temp13], %[temp15]          \n\t"
+    "addu     %[temp14], %[temp0],  %[temp2]           \n\t"
+    "subu     %[temp0],  %[temp0],  %[temp2]           \n\t"
+    "mul      %[temp2],  %[temp1],  %[kC2]             \n\t"
+    "mul      %[temp13], %[temp3],  %[kC1]             \n\t"
+    "mul      %[temp1],  %[temp1],  %[kC1]             \n\t"
+    "mul      %[temp3],  %[temp3],  %[kC2]             \n\t"
+    "sra      %[temp2],  %[temp2],  16                 \n\t"
+    "sra      %[temp13], %[temp13], 16                 \n\t"
+    "sra      %[temp1],  %[temp1],  16                 \n\t"
+    "sra      %[temp3],  %[temp3],  16                 \n\t"
+    "subu     %[temp13], %[temp2],  %[temp13]          \n\t"
+    "addu     %[temp3],  %[temp1],  %[temp3]           \n\t"
+    "addu     %[temp2],  %[temp4],  %[temp6]           \n\t"
+    "subu     %[temp4],  %[temp4],  %[temp6]           \n\t"
+    "mul      %[temp6],  %[temp5],  %[kC2]             \n\t"
+    "mul      %[temp1],  %[temp7],  %[kC1]             \n\t"
+    "mul      %[temp5],  %[temp5],  %[kC1]             \n\t"
+    "mul      %[temp7],  %[temp7],  %[kC2]             \n\t"
+    "sra      %[temp6],  %[temp6],  16                 \n\t"
+    "sra      %[temp1],  %[temp1],  16                 \n\t"
+    "sra      %[temp5],  %[temp5],  16                 \n\t"
+    "sra      %[temp7],  %[temp7],  16                 \n\t"
+    "subu     %[temp1],  %[temp6],  %[temp1]           \n\t"
+    "addu     %[temp7],  %[temp5],  %[temp7]           \n\t"
+    "addu     %[temp5],  %[temp16], %[temp11]          \n\t"
+    "subu     %[temp16], %[temp16], %[temp11]          \n\t"
+    "addu     %[temp11], %[temp8],  %[temp17]          \n\t"
+    "subu     %[temp8],  %[temp8],  %[temp17]          \n\t"
+    "sra      %[temp5],  %[temp5],  3                  \n\t"
+    "sra      %[temp16], %[temp16], 3                  \n\t"
+    "sra      %[temp11], %[temp11], 3                  \n\t"
+    "sra      %[temp8],  %[temp8],  3                  \n\t"
+    "addu     %[temp17], %[temp10], %[temp15]          \n\t"
+    "subu     %[temp10], %[temp10], %[temp15]          \n\t"
+    "addu     %[temp15], %[temp12], %[temp9]           \n\t"
+    "subu     %[temp12], %[temp12], %[temp9]           \n\t"
+    "sra      %[temp17], %[temp17], 3                  \n\t"
+    "sra      %[temp10], %[temp10], 3                  \n\t"
+    "sra      %[temp15], %[temp15], 3                  \n\t"
+    "sra      %[temp12], %[temp12], 3                  \n\t"
+    "addu     %[temp9],  %[temp14], %[temp3]           \n\t"
+    "subu     %[temp14], %[temp14], %[temp3]           \n\t"
+    "addu     %[temp3],  %[temp0],  %[temp13]          \n\t"
+    "subu     %[temp0],  %[temp0],  %[temp13]          \n\t"
+    "sra      %[temp9],  %[temp9],  3                  \n\t"
+    "sra      %[temp14], %[temp14], 3                  \n\t"
+    "sra      %[temp3],  %[temp3],  3                  \n\t"
+    "sra      %[temp0],  %[temp0],  3                  \n\t"
+    "addu     %[temp13], %[temp2],  %[temp7]           \n\t"
+    "subu     %[temp2],  %[temp2],  %[temp7]           \n\t"
+    "addu     %[temp7],  %[temp4],  %[temp1]           \n\t"
+    "subu     %[temp4],  %[temp4],  %[temp1]           \n\t"
+    "sra      %[temp13], %[temp13], 3                  \n\t"
+    "sra      %[temp2],  %[temp2],  3                  \n\t"
+    "sra      %[temp7],  %[temp7],  3                  \n\t"
+    "sra      %[temp4],  %[temp4],  3                  \n\t"
+    "addiu    %[temp6],  $zero,     255                \n\t"
+    "lbu      %[temp1],  0+0*" XSTR(BPS) "(%[dst])     \n\t"
+    "addu     %[temp1],  %[temp1],  %[temp5]           \n\t"
+    "sra      %[temp5],  %[temp1],  8                  \n\t"
+    "sra      %[temp18], %[temp1],  31                 \n\t"
+    "beqz     %[temp5],  1f                            \n\t"
+    "xor      %[temp1],  %[temp1],  %[temp1]           \n\t"
+    "movz     %[temp1],  %[temp6],  %[temp18]          \n\t"
+  "1:                                                  \n\t"
+    "lbu      %[temp18], 1+0*" XSTR(BPS) "(%[dst])     \n\t"
+    "sb       %[temp1],  0+0*" XSTR(BPS) "(%[dst])     \n\t"
+    "addu     %[temp18], %[temp18], %[temp11]          \n\t"
+    "sra      %[temp11], %[temp18], 8                  \n\t"
+    "sra      %[temp1],  %[temp18], 31                 \n\t"
+    "beqz     %[temp11], 2f                            \n\t"
+    "xor      %[temp18], %[temp18], %[temp18]          \n\t"
+    "movz     %[temp18], %[temp6],  %[temp1]           \n\t"
+  "2:                                                  \n\t"
+    "lbu      %[temp1],  2+0*" XSTR(BPS) "(%[dst])     \n\t"
+    "sb       %[temp18], 1+0*" XSTR(BPS) "(%[dst])     \n\t"
+    "addu     %[temp1],  %[temp1],  %[temp8]           \n\t"
+    "sra      %[temp8],  %[temp1],  8                  \n\t"
+    "sra      %[temp18], %[temp1],  31                 \n\t"
+    "beqz     %[temp8],  3f                            \n\t"
+    "xor      %[temp1],  %[temp1],  %[temp1]           \n\t"
+    "movz     %[temp1],  %[temp6],  %[temp18]          \n\t"
+  "3:                                                  \n\t"
+    "lbu      %[temp18], 3+0*" XSTR(BPS) "(%[dst])     \n\t"
+    "sb       %[temp1],  2+0*" XSTR(BPS) "(%[dst])     \n\t"
+    "addu     %[temp18], %[temp18], %[temp16]          \n\t"
+    "sra      %[temp16], %[temp18], 8                  \n\t"
+    "sra      %[temp1],  %[temp18], 31                 \n\t"
+    "beqz     %[temp16], 4f                            \n\t"
+    "xor      %[temp18], %[temp18], %[temp18]          \n\t"
+    "movz     %[temp18], %[temp6],  %[temp1]           \n\t"
+  "4:                                                  \n\t"
+    "sb       %[temp18], 3+0*" XSTR(BPS) "(%[dst])     \n\t"
+    "lbu      %[temp5],  0+1*" XSTR(BPS) "(%[dst])     \n\t"
+    "lbu      %[temp8],  1+1*" XSTR(BPS) "(%[dst])     \n\t"
+    "lbu      %[temp11], 2+1*" XSTR(BPS) "(%[dst])     \n\t"
+    "lbu      %[temp16], 3+1*" XSTR(BPS) "(%[dst])     \n\t"
+    "addu     %[temp5],  %[temp5],  %[temp17]          \n\t"
+    "addu     %[temp8],  %[temp8],  %[temp15]          \n\t"
+    "addu     %[temp11], %[temp11], %[temp12]          \n\t"
+    "addu     %[temp16], %[temp16], %[temp10]          \n\t"
+    "sra      %[temp18], %[temp5],  8                  \n\t"
+    "sra      %[temp1],  %[temp5],  31                 \n\t"
+    "beqz     %[temp18], 5f                            \n\t"
+    "xor      %[temp5],  %[temp5],  %[temp5]           \n\t"
+    "movz     %[temp5],  %[temp6],  %[temp1]           \n\t"
+  "5:                                                  \n\t"
+    "sra      %[temp18], %[temp8],  8                  \n\t"
+    "sra      %[temp1],  %[temp8],  31                 \n\t"
+    "beqz     %[temp18], 6f                            \n\t"
+    "xor      %[temp8],  %[temp8],  %[temp8]           \n\t"
+    "movz     %[temp8],  %[temp6],  %[temp1]           \n\t"
+  "6:                                                  \n\t"
+    "sra      %[temp18], %[temp11], 8                  \n\t"
+    "sra      %[temp1],  %[temp11], 31                 \n\t"
+    "sra      %[temp17], %[temp16], 8                  \n\t"
+    "sra      %[temp15], %[temp16], 31                 \n\t"
+    "beqz     %[temp18], 7f                            \n\t"
+    "xor      %[temp11], %[temp11], %[temp11]          \n\t"
+    "movz     %[temp11], %[temp6],  %[temp1]           \n\t"
+  "7:                                                  \n\t"
+    "beqz     %[temp17], 8f                            \n\t"
+    "xor      %[temp16], %[temp16], %[temp16]          \n\t"
+    "movz     %[temp16], %[temp6],  %[temp15]          \n\t"
+  "8:                                                  \n\t"
+    "sb       %[temp5],  0+1*" XSTR(BPS) "(%[dst])     \n\t"
+    "sb       %[temp8],  1+1*" XSTR(BPS) "(%[dst])     \n\t"
+    "sb       %[temp11], 2+1*" XSTR(BPS) "(%[dst])     \n\t"
+    "sb       %[temp16], 3+1*" XSTR(BPS) "(%[dst])     \n\t"
+    "lbu      %[temp5],  0+2*" XSTR(BPS) "(%[dst])     \n\t"
+    "lbu      %[temp8],  1+2*" XSTR(BPS) "(%[dst])     \n\t"
+    "lbu      %[temp11], 2+2*" XSTR(BPS) "(%[dst])     \n\t"
+    "lbu      %[temp16], 3+2*" XSTR(BPS) "(%[dst])     \n\t"
+    "addu     %[temp5],  %[temp5],  %[temp9]           \n\t"
+    "addu     %[temp8],  %[temp8],  %[temp3]           \n\t"
+    "addu     %[temp11], %[temp11], %[temp0]           \n\t"
+    "addu     %[temp16], %[temp16], %[temp14]          \n\t"
+    "sra      %[temp18], %[temp5],  8                  \n\t"
+    "sra      %[temp1],  %[temp5],  31                 \n\t"
+    "sra      %[temp17], %[temp8],  8                  \n\t"
+    "sra      %[temp15], %[temp8],  31                 \n\t"
+    "sra      %[temp12], %[temp11], 8                  \n\t"
+    "sra      %[temp10], %[temp11], 31                 \n\t"
+    "sra      %[temp9],  %[temp16], 8                  \n\t"
+    "sra      %[temp3],  %[temp16], 31                 \n\t"
+    "beqz     %[temp18], 9f                            \n\t"
+    "xor      %[temp5],  %[temp5],  %[temp5]           \n\t"
+    "movz     %[temp5],  %[temp6],  %[temp1]           \n\t"
+  "9:                                                  \n\t"
+    "beqz     %[temp17], 10f                           \n\t"
+    "xor      %[temp8],  %[temp8],  %[temp8]           \n\t"
+    "movz     %[temp8],  %[temp6],  %[temp15]          \n\t"
+  "10:                                                 \n\t"
+    "beqz     %[temp12], 11f                           \n\t"
+    "xor      %[temp11], %[temp11], %[temp11]          \n\t"
+    "movz     %[temp11], %[temp6],  %[temp10]          \n\t"
+  "11:                                                 \n\t"
+    "beqz     %[temp9],  12f                           \n\t"
+    "xor      %[temp16], %[temp16], %[temp16]          \n\t"
+    "movz     %[temp16], %[temp6],  %[temp3]           \n\t"
+  "12:                                                 \n\t"
+    "sb       %[temp5],  0+2*" XSTR(BPS) "(%[dst])     \n\t"
+    "sb       %[temp8],  1+2*" XSTR(BPS) "(%[dst])     \n\t"
+    "sb       %[temp11], 2+2*" XSTR(BPS) "(%[dst])     \n\t"
+    "sb       %[temp16], 3+2*" XSTR(BPS) "(%[dst])     \n\t"
+    "lbu      %[temp5],  0+3*" XSTR(BPS) "(%[dst])     \n\t"
+    "lbu      %[temp8],  1+3*" XSTR(BPS) "(%[dst])     \n\t"
+    "lbu      %[temp11], 2+3*" XSTR(BPS) "(%[dst])     \n\t"
+    "lbu      %[temp16], 3+3*" XSTR(BPS) "(%[dst])     \n\t"
+    "addu     %[temp5],  %[temp5],  %[temp13]          \n\t"
+    "addu     %[temp8],  %[temp8],  %[temp7]           \n\t"
+    "addu     %[temp11], %[temp11], %[temp4]           \n\t"
+    "addu     %[temp16], %[temp16], %[temp2]           \n\t"
+    "sra      %[temp18], %[temp5],  8                  \n\t"
+    "sra      %[temp1],  %[temp5],  31                 \n\t"
+    "sra      %[temp17], %[temp8],  8                  \n\t"
+    "sra      %[temp15], %[temp8],  31                 \n\t"
+    "sra      %[temp12], %[temp11], 8                  \n\t"
+    "sra      %[temp10], %[temp11], 31                 \n\t"
+    "sra      %[temp9],  %[temp16], 8                  \n\t"
+    "sra      %[temp3],  %[temp16], 31                 \n\t"
+    "beqz     %[temp18], 13f                           \n\t"
+    "xor      %[temp5],  %[temp5],  %[temp5]           \n\t"
+    "movz     %[temp5],  %[temp6],  %[temp1]           \n\t"
+  "13:                                                 \n\t"
+    "beqz     %[temp17], 14f                           \n\t"
+    "xor      %[temp8],  %[temp8],  %[temp8]           \n\t"
+    "movz     %[temp8],  %[temp6],  %[temp15]          \n\t"
+  "14:                                                 \n\t"
+    "beqz     %[temp12], 15f                           \n\t"
+    "xor      %[temp11], %[temp11], %[temp11]          \n\t"
+    "movz     %[temp11], %[temp6],  %[temp10]          \n\t"
+  "15:                                                 \n\t"
+    "beqz     %[temp9],  16f                           \n\t"
+    "xor      %[temp16], %[temp16], %[temp16]          \n\t"
+    "movz     %[temp16], %[temp6],  %[temp3]           \n\t"
+  "16:                                                 \n\t"
+    "sb       %[temp5],  0+3*" XSTR(BPS) "(%[dst])     \n\t"
+    "sb       %[temp8],  1+3*" XSTR(BPS) "(%[dst])     \n\t"
+    "sb       %[temp11], 2+3*" XSTR(BPS) "(%[dst])     \n\t"
+    "sb       %[temp16], 3+3*" XSTR(BPS) "(%[dst])     \n\t"
+
+    : [temp0]"=&r"(temp0), [temp1]"=&r"(temp1), [temp2]"=&r"(temp2),
+      [temp3]"=&r"(temp3), [temp4]"=&r"(temp4), [temp5]"=&r"(temp5),
+      [temp6]"=&r"(temp6), [temp7]"=&r"(temp7), [temp8]"=&r"(temp8),
+      [temp9]"=&r"(temp9), [temp10]"=&r"(temp10), [temp11]"=&r"(temp11),
+      [temp12]"=&r"(temp12), [temp13]"=&r"(temp13), [temp14]"=&r"(temp14),
+      [temp15]"=&r"(temp15), [temp16]"=&r"(temp16), [temp17]"=&r"(temp17),
+      [temp18]"=&r"(temp18)
+    : [in]"r"(p_in), [kC1]"r"(kC1), [kC2]"r"(kC2), [dst]"r"(dst)
+    : "memory", "hi", "lo"
+  );
+}
+
+static void TransformTwo(const int16_t* in, uint8_t* dst, int do_two) {
+  TransformOne(in, dst);
+  if (do_two) {
+    TransformOne(in + 16, dst + 4);
+  }
+}
+
+//------------------------------------------------------------------------------
+// Entry point
+
+extern void VP8DspInitMIPS32(void);
+
+WEBP_TSAN_IGNORE_FUNCTION void VP8DspInitMIPS32(void) {
+  VP8InitClipTables();
+
+  VP8Transform = TransformTwo;
+
+  VP8VFilter16 = VFilter16;
+  VP8HFilter16 = HFilter16;
+  VP8VFilter8 = VFilter8;
+  VP8HFilter8 = HFilter8;
+  VP8VFilter16i = VFilter16i;
+  VP8HFilter16i = HFilter16i;
+  VP8VFilter8i = VFilter8i;
+  VP8HFilter8i = HFilter8i;
+
+  VP8SimpleVFilter16 = SimpleVFilter16;
+  VP8SimpleHFilter16 = SimpleHFilter16;
+  VP8SimpleVFilter16i = SimpleVFilter16i;
+  VP8SimpleHFilter16i = SimpleHFilter16i;
+}
+
+#else  // !WEBP_USE_MIPS32
+
+WEBP_DSP_INIT_STUB(VP8DspInitMIPS32)
+
+#endif  // WEBP_USE_MIPS32
diff --git a/drivers/webp/dsp/dec_mips_dsp_r2.c b/drivers/webp/dsp/dec_mips_dsp_r2.c
new file mode 100644
index 0000000000..db5c657228
--- /dev/null
+++ b/drivers/webp/dsp/dec_mips_dsp_r2.c
@@ -0,0 +1,994 @@
+// Copyright 2014 Google Inc. All Rights Reserved.
+//
+// Use of this source code is governed by a BSD-style license
+// that can be found in the COPYING file in the root of the source
+// tree. An additional intellectual property rights grant can be found
+// in the file PATENTS. All contributing project authors may
+// be found in the AUTHORS file in the root of the source tree.
+// -----------------------------------------------------------------------------
+//
+// MIPS version of dsp functions
+//
+// Author(s):  Djordje Pesut    (djordje.pesut@imgtec.com)
+//             Jovan Zelincevic (jovan.zelincevic@imgtec.com)
+
+#include "./dsp.h"
+
+#if defined(WEBP_USE_MIPS_DSP_R2)
+
+#include "./mips_macro.h"
+
+static const int kC1 = 20091 + (1 << 16);
+static const int kC2 = 35468;
+
+#define MUL(a, b) (((a) * (b)) >> 16)
+
+static void TransformDC(const int16_t* in, uint8_t* dst) {
+  int temp1, temp2, temp3, temp4, temp5, temp6, temp7, temp8, temp9, temp10;
+
+  __asm__ volatile (
+    LOAD_WITH_OFFSET_X4(temp1, temp2, temp3, temp4, dst,
+                        0, 0, 0, 0,
+                        0, 1, 2, 3,
+                        BPS)
+    "lh               %[temp5],  0(%[in])               \n\t"
+    "addiu            %[temp5],  %[temp5],  4           \n\t"
+    "ins              %[temp5],  %[temp5],  16, 16      \n\t"
+    "shra.ph          %[temp5],  %[temp5],  3           \n\t"
+    CONVERT_2_BYTES_TO_HALF(temp6, temp7, temp8, temp9, temp10, temp1, temp2,
+                            temp3, temp1, temp2, temp3, temp4)
+    STORE_SAT_SUM_X2(temp6, temp7, temp8, temp9, temp10, temp1, temp2, temp3,
+                     temp5, temp5, temp5, temp5, temp5, temp5, temp5, temp5,
+                     dst, 0, 1, 2, 3, BPS)
+
+    OUTPUT_EARLY_CLOBBER_REGS_10()
+    : [in]"r"(in), [dst]"r"(dst)
+    : "memory"
+  );
+}
+
+static void TransformAC3(const int16_t* in, uint8_t* dst) {
+  const int a = in[0] + 4;
+  int c4 = MUL(in[4], kC2);
+  const int d4 = MUL(in[4], kC1);
+  const int c1 = MUL(in[1], kC2);
+  const int d1 = MUL(in[1], kC1);
+  int temp1, temp2, temp3, temp4, temp5, temp6, temp7, temp8, temp9;
+  int temp10, temp11, temp12, temp13, temp14, temp15, temp16, temp17, temp18;
+
+  __asm__ volatile (
+    "ins              %[c4],      %[d4],     16,       16    \n\t"
+    "replv.ph         %[temp1],   %[a]                       \n\t"
+    "replv.ph         %[temp4],   %[d1]                      \n\t"
+    ADD_SUB_HALVES(temp2, temp3, temp1, c4)
+    "replv.ph         %[temp5],   %[c1]                      \n\t"
+    SHIFT_R_SUM_X2(temp1, temp6, temp7, temp8, temp2, temp9, temp10, temp4,
+                   temp2, temp2, temp3, temp3, temp4, temp5, temp4, temp5)
+    LOAD_WITH_OFFSET_X4(temp3, temp5, temp11, temp12, dst,
+                        0, 0, 0, 0,
+                        0, 1, 2, 3,
+                        BPS)
+    CONVERT_2_BYTES_TO_HALF(temp13, temp14, temp3, temp15, temp5, temp16,
+                            temp11, temp17, temp3, temp5, temp11, temp12)
+    PACK_2_HALVES_TO_WORD(temp12, temp18, temp7, temp6, temp1, temp8, temp2,
+                          temp4, temp7, temp6, temp10, temp9)
+    STORE_SAT_SUM_X2(temp13, temp14, temp3, temp15, temp5, temp16, temp11,
+                     temp17, temp12, temp18, temp1, temp8, temp2, temp4,
+                     temp7, temp6, dst, 0, 1, 2, 3, BPS)
+
+    OUTPUT_EARLY_CLOBBER_REGS_18(),
+      [c4]"+&r"(c4)
+    : [dst]"r"(dst), [a]"r"(a), [d1]"r"(d1), [d4]"r"(d4), [c1]"r"(c1)
+    : "memory"
+  );
+}
+
+static void TransformOne(const int16_t* in, uint8_t* dst) {
+  int temp1, temp2, temp3, temp4, temp5, temp6, temp7, temp8, temp9;
+  int temp10, temp11, temp12, temp13, temp14, temp15, temp16, temp17, temp18;
+
+  __asm__ volatile (
+    "ulw              %[temp1],   0(%[in])                 \n\t"
+    "ulw              %[temp2],   16(%[in])                \n\t"
+    LOAD_IN_X2(temp5, temp6, 24, 26)
+    ADD_SUB_HALVES(temp3, temp4, temp1, temp2)
+    LOAD_IN_X2(temp1, temp2, 8, 10)
+    MUL_SHIFT_SUM(temp7, temp8, temp9, temp10, temp11, temp12, temp13, temp14,
+                  temp10, temp8, temp9, temp7, temp1, temp2, temp5, temp6,
+                  temp13, temp11, temp14, temp12)
+    INSERT_HALF_X2(temp8, temp7, temp10, temp9)
+    "ulw              %[temp17],  4(%[in])                 \n\t"
+    "ulw              %[temp18],  20(%[in])                \n\t"
+    ADD_SUB_HALVES(temp1, temp2, temp3, temp8)
+    ADD_SUB_HALVES(temp5, temp6, temp4, temp7)
+    ADD_SUB_HALVES(temp7, temp8, temp17, temp18)
+    LOAD_IN_X2(temp17, temp18, 12, 14)
+    LOAD_IN_X2(temp9, temp10, 28, 30)
+    MUL_SHIFT_SUM(temp11, temp12, temp13, temp14, temp15, temp16, temp4, temp17,
+                  temp12, temp14, temp11, temp13, temp17, temp18, temp9, temp10,
+                  temp15, temp4, temp16, temp17)
+    INSERT_HALF_X2(temp11, temp12, temp13, temp14)
+    ADD_SUB_HALVES(temp17, temp8, temp8, temp11)
+    ADD_SUB_HALVES(temp3, temp4, temp7, temp12)
+
+    // horizontal
+    SRA_16(temp9, temp10, temp11, temp12, temp1, temp2, temp5, temp6)
+    INSERT_HALF_X2(temp1, temp6, temp5, temp2)
+    SRA_16(temp13, temp14, temp15, temp16, temp3, temp4, temp17, temp8)
+    "repl.ph          %[temp2],   0x4                      \n\t"
+    INSERT_HALF_X2(temp3, temp8, temp17, temp4)
+    "addq.ph          %[temp1],   %[temp1],  %[temp2]      \n\t"
+    "addq.ph          %[temp6],   %[temp6],  %[temp2]      \n\t"
+    ADD_SUB_HALVES(temp2, temp4, temp1, temp3)
+    ADD_SUB_HALVES(temp5, temp7, temp6, temp8)
+    MUL_SHIFT_SUM(temp1, temp3, temp6, temp8, temp9, temp13, temp17, temp18,
+                  temp3, temp13, temp1, temp9, temp9, temp13, temp11, temp15,
+                  temp6, temp17, temp8, temp18)
+    MUL_SHIFT_SUM(temp6, temp8, temp18, temp17, temp11, temp15, temp12, temp16,
+                  temp8, temp15, temp6, temp11, temp12, temp16, temp10, temp14,
+                  temp18, temp12, temp17, temp16)
+    INSERT_HALF_X2(temp1, temp3, temp9, temp13)
+    INSERT_HALF_X2(temp6, temp8, temp11, temp15)
+    SHIFT_R_SUM_X2(temp9, temp10, temp11, temp12, temp13, temp14, temp15,
+                   temp16, temp2, temp4, temp5, temp7, temp3, temp1, temp8,
+                   temp6)
+    PACK_2_HALVES_TO_WORD(temp1, temp2, temp3, temp4, temp9, temp12, temp13,
+                          temp16, temp11, temp10, temp15, temp14)
+    LOAD_WITH_OFFSET_X4(temp10, temp11, temp14, temp15, dst,
+                        0, 0, 0, 0,
+                        0, 1, 2, 3,
+                        BPS)
+    CONVERT_2_BYTES_TO_HALF(temp5, temp6, temp7, temp8, temp17, temp18, temp10,
+                            temp11, temp10, temp11, temp14, temp15)
+    STORE_SAT_SUM_X2(temp5, temp6, temp7, temp8, temp17, temp18, temp10, temp11,
+                     temp9, temp12, temp1, temp2, temp13, temp16, temp3, temp4,
+                     dst, 0, 1, 2, 3, BPS)
+
+    OUTPUT_EARLY_CLOBBER_REGS_18()
+    : [dst]"r"(dst), [in]"r"(in), [kC1]"r"(kC1), [kC2]"r"(kC2)
+    : "memory", "hi", "lo"
+  );
+}
+
+static void TransformTwo(const int16_t* in, uint8_t* dst, int do_two) {
+  TransformOne(in, dst);
+  if (do_two) {
+    TransformOne(in + 16, dst + 4);
+  }
+}
+
+static WEBP_INLINE void FilterLoop26(uint8_t* p,
+                                     int hstride, int vstride, int size,
+                                     int thresh, int ithresh, int hev_thresh) {
+  const int thresh2 = 2 * thresh + 1;
+  int temp1, temp2, temp3, temp4, temp5, temp6, temp7, temp8, temp9;
+  int temp10, temp11, temp12, temp13, temp14, temp15;
+
+  __asm__ volatile (
+    ".set      push                                      \n\t"
+    ".set      noreorder                                 \n\t"
+  "1:                                                    \n\t"
+    "negu      %[temp1],  %[hstride]                     \n\t"
+    "addiu     %[size],   %[size],        -1             \n\t"
+    "sll       %[temp2],  %[hstride],     1              \n\t"
+    "sll       %[temp3],  %[temp1],       1              \n\t"
+    "addu      %[temp4],  %[temp2],       %[hstride]     \n\t"
+    "addu      %[temp5],  %[temp3],       %[temp1]       \n\t"
+    "lbu       %[temp7],  0(%[p])                        \n\t"
+    "sll       %[temp6],  %[temp3],       1              \n\t"
+    "lbux      %[temp8],  %[temp5](%[p])                 \n\t"
+    "lbux      %[temp9],  %[temp3](%[p])                 \n\t"
+    "lbux      %[temp10], %[temp1](%[p])                 \n\t"
+    "lbux      %[temp11], %[temp6](%[p])                 \n\t"
+    "lbux      %[temp12], %[hstride](%[p])               \n\t"
+    "lbux      %[temp13], %[temp2](%[p])                 \n\t"
+    "lbux      %[temp14], %[temp4](%[p])                 \n\t"
+    "subu      %[temp1],  %[temp10],      %[temp7]       \n\t"
+    "subu      %[temp2],  %[temp9],       %[temp12]      \n\t"
+    "absq_s.w  %[temp3],  %[temp1]                       \n\t"
+    "absq_s.w  %[temp4],  %[temp2]                       \n\t"
+    "negu      %[temp1],  %[temp1]                       \n\t"
+    "sll       %[temp3],  %[temp3],       2              \n\t"
+    "addu      %[temp15], %[temp3],       %[temp4]       \n\t"
+    "subu      %[temp3],  %[temp15],      %[thresh2]     \n\t"
+    "sll       %[temp6],  %[temp1],       1              \n\t"
+    "bgtz      %[temp3],  3f                             \n\t"
+    " subu     %[temp4],  %[temp11],      %[temp8]       \n\t"
+    "absq_s.w  %[temp4],  %[temp4]                       \n\t"
+    "shll_s.w  %[temp2],  %[temp2],       24             \n\t"
+    "subu      %[temp4],  %[temp4],       %[ithresh]     \n\t"
+    "bgtz      %[temp4],  3f                             \n\t"
+    " subu     %[temp3],  %[temp8],       %[temp9]       \n\t"
+    "absq_s.w  %[temp3],  %[temp3]                       \n\t"
+    "subu      %[temp3],  %[temp3],       %[ithresh]     \n\t"
+    "bgtz      %[temp3],  3f                             \n\t"
+    " subu     %[temp5],  %[temp9],       %[temp10]      \n\t"
+    "absq_s.w  %[temp3],  %[temp5]                       \n\t"
+    "absq_s.w  %[temp5],  %[temp5]                       \n\t"
+    "subu      %[temp3],  %[temp3],       %[ithresh]     \n\t"
+    "bgtz      %[temp3],  3f                             \n\t"
+    " subu     %[temp3],  %[temp14],      %[temp13]      \n\t"
+    "absq_s.w  %[temp3],  %[temp3]                       \n\t"
+    "slt       %[temp5],  %[hev_thresh],  %[temp5]       \n\t"
+    "subu      %[temp3],  %[temp3],       %[ithresh]     \n\t"
+    "bgtz      %[temp3],  3f                             \n\t"
+    " subu     %[temp3],  %[temp13],      %[temp12]      \n\t"
+    "absq_s.w  %[temp3],  %[temp3]                       \n\t"
+    "sra       %[temp4],  %[temp2],       24             \n\t"
+    "subu      %[temp3],  %[temp3],       %[ithresh]     \n\t"
+    "bgtz      %[temp3],  3f                             \n\t"
+    " subu     %[temp15], %[temp12],      %[temp7]       \n\t"
+    "absq_s.w  %[temp3],  %[temp15]                      \n\t"
+    "absq_s.w  %[temp15], %[temp15]                      \n\t"
+    "subu      %[temp3],  %[temp3],       %[ithresh]     \n\t"
+    "bgtz      %[temp3],  3f                             \n\t"
+    " slt      %[temp15], %[hev_thresh],  %[temp15]      \n\t"
+    "addu      %[temp3],  %[temp6],       %[temp1]       \n\t"
+    "or        %[temp2],  %[temp5],       %[temp15]      \n\t"
+    "addu      %[temp5],  %[temp4],       %[temp3]       \n\t"
+    "beqz      %[temp2],  4f                             \n\t"
+    " shra_r.w %[temp1],  %[temp5],       3              \n\t"
+    "addiu     %[temp2],  %[temp5],       3              \n\t"
+    "sra       %[temp2],  %[temp2],       3              \n\t"
+    "shll_s.w  %[temp1],  %[temp1],       27             \n\t"
+    "shll_s.w  %[temp2],  %[temp2],       27             \n\t"
+    "subu      %[temp3],  %[p],           %[hstride]     \n\t"
+    "sra       %[temp1],  %[temp1],       27             \n\t"
+    "sra       %[temp2],  %[temp2],       27             \n\t"
+    "subu      %[temp1],  %[temp7],       %[temp1]       \n\t"
+    "addu      %[temp2],  %[temp10],      %[temp2]       \n\t"
+    "lbux      %[temp2],  %[temp2](%[VP8kclip1])         \n\t"
+    "lbux      %[temp1],  %[temp1](%[VP8kclip1])         \n\t"
+    "sb        %[temp2],  0(%[temp3])                    \n\t"
+    "j         3f                                        \n\t"
+    " sb       %[temp1],  0(%[p])                        \n\t"
+  "4:                                                    \n\t"
+    "shll_s.w  %[temp5],  %[temp5],       24             \n\t"
+    "subu      %[temp14], %[p],           %[hstride]     \n\t"
+    "subu      %[temp11], %[temp14],      %[hstride]     \n\t"
+    "sra       %[temp6],  %[temp5],       24             \n\t"
+    "sll       %[temp1],  %[temp6],       3              \n\t"
+    "subu      %[temp15], %[temp11],      %[hstride]     \n\t"
+    "addu      %[temp2],  %[temp6],       %[temp1]       \n\t"
+    "sll       %[temp3],  %[temp2],       1              \n\t"
+    "addu      %[temp4],  %[temp3],       %[temp2]       \n\t"
+    "addiu     %[temp2],  %[temp2],       63             \n\t"
+    "addiu     %[temp3],  %[temp3],       63             \n\t"
+    "addiu     %[temp4],  %[temp4],       63             \n\t"
+    "sra       %[temp2],  %[temp2],       7              \n\t"
+    "sra       %[temp3],  %[temp3],       7              \n\t"
+    "sra       %[temp4],  %[temp4],       7              \n\t"
+    "addu      %[temp1],  %[temp8],       %[temp2]       \n\t"
+    "addu      %[temp5],  %[temp9],       %[temp3]       \n\t"
+    "addu      %[temp6],  %[temp10],      %[temp4]       \n\t"
+    "subu      %[temp8],  %[temp7],       %[temp4]       \n\t"
+    "subu      %[temp7],  %[temp12],      %[temp3]       \n\t"
+    "addu      %[temp10], %[p],           %[hstride]     \n\t"
+    "subu      %[temp9],  %[temp13],      %[temp2]       \n\t"
+    "addu      %[temp12], %[temp10],      %[hstride]     \n\t"
+    "lbux      %[temp2],  %[temp1](%[VP8kclip1])         \n\t"
+    "lbux      %[temp3],  %[temp5](%[VP8kclip1])         \n\t"
+    "lbux      %[temp4],  %[temp6](%[VP8kclip1])         \n\t"
+    "lbux      %[temp5],  %[temp8](%[VP8kclip1])         \n\t"
+    "lbux      %[temp6],  %[temp7](%[VP8kclip1])         \n\t"
+    "lbux      %[temp8],  %[temp9](%[VP8kclip1])         \n\t"
+    "sb        %[temp2],  0(%[temp15])                   \n\t"
+    "sb        %[temp3],  0(%[temp11])                   \n\t"
+    "sb        %[temp4],  0(%[temp14])                   \n\t"
+    "sb        %[temp5],  0(%[p])                        \n\t"
+    "sb        %[temp6],  0(%[temp10])                   \n\t"
+    "sb        %[temp8],  0(%[temp12])                   \n\t"
+  "3:                                                    \n\t"
+    "bgtz      %[size],   1b                             \n\t"
+    " addu     %[p],      %[p],           %[vstride]     \n\t"
+    ".set      pop                                       \n\t"
+    : [temp1]"=&r"(temp1), [temp2]"=&r"(temp2),[temp3]"=&r"(temp3),
+      [temp4]"=&r"(temp4), [temp5]"=&r"(temp5), [temp6]"=&r"(temp6),
+      [temp7]"=&r"(temp7),[temp8]"=&r"(temp8),[temp9]"=&r"(temp9),
+      [temp10]"=&r"(temp10),[temp11]"=&r"(temp11),[temp12]"=&r"(temp12),
+      [temp13]"=&r"(temp13),[temp14]"=&r"(temp14),[temp15]"=&r"(temp15),
+      [size]"+&r"(size), [p]"+&r"(p)
+    : [hstride]"r"(hstride), [thresh2]"r"(thresh2),
+      [ithresh]"r"(ithresh),[vstride]"r"(vstride), [hev_thresh]"r"(hev_thresh),
+      [VP8kclip1]"r"(VP8kclip1)
+    : "memory"
+  );
+}
+
+static WEBP_INLINE void FilterLoop24(uint8_t* p,
+                                     int hstride, int vstride, int size,
+                                     int thresh, int ithresh, int hev_thresh) {
+  int p0, q0, p1, q1, p2, q2, p3, q3;
+  int step1, step2, temp1, temp2, temp3, temp4;
+  uint8_t* pTemp0;
+  uint8_t* pTemp1;
+  const int thresh2 = 2 * thresh + 1;
+
+  __asm__ volatile (
+    ".set      push                                   \n\t"
+    ".set      noreorder                              \n\t"
+    "bltz      %[size],    3f                         \n\t"
+    " nop                                             \n\t"
+  "2:                                                 \n\t"
+    "negu      %[step1],   %[hstride]                 \n\t"
+    "lbu       %[q0],      0(%[p])                    \n\t"
+    "lbux      %[p0],      %[step1](%[p])             \n\t"
+    "subu      %[step1],   %[step1],      %[hstride]  \n\t"
+    "lbux      %[q1],      %[hstride](%[p])           \n\t"
+    "subu      %[temp1],   %[p0],         %[q0]       \n\t"
+    "lbux      %[p1],      %[step1](%[p])             \n\t"
+    "addu      %[step2],   %[hstride],    %[hstride]  \n\t"
+    "absq_s.w  %[temp2],   %[temp1]                   \n\t"
+    "subu      %[temp3],   %[p1],         %[q1]       \n\t"
+    "absq_s.w  %[temp4],   %[temp3]                   \n\t"
+    "sll       %[temp2],   %[temp2],      2           \n\t"
+    "addu      %[temp2],   %[temp2],      %[temp4]    \n\t"
+    "subu      %[temp4],   %[temp2],      %[thresh2]  \n\t"
+    "subu      %[step1],   %[step1],      %[hstride]  \n\t"
+    "bgtz      %[temp4],   0f                         \n\t"
+    " lbux     %[p2],      %[step1](%[p])             \n\t"
+    "subu      %[step1],   %[step1],      %[hstride]  \n\t"
+    "lbux      %[q2],      %[step2](%[p])             \n\t"
+    "lbux      %[p3],      %[step1](%[p])             \n\t"
+    "subu      %[temp4],   %[p2],         %[p1]       \n\t"
+    "addu      %[step2],   %[step2],      %[hstride]  \n\t"
+    "subu      %[temp2],   %[p3],         %[p2]       \n\t"
+    "absq_s.w  %[temp4],   %[temp4]                   \n\t"
+    "absq_s.w  %[temp2],   %[temp2]                   \n\t"
+    "lbux      %[q3],      %[step2](%[p])             \n\t"
+    "subu      %[temp4],   %[temp4],      %[ithresh]  \n\t"
+    "negu      %[temp1],   %[temp1]                   \n\t"
+    "bgtz      %[temp4],   0f                         \n\t"
+    " subu     %[temp2],   %[temp2],      %[ithresh]  \n\t"
+    "subu      %[p3],      %[p1],         %[p0]       \n\t"
+    "bgtz      %[temp2],   0f                         \n\t"
+    " absq_s.w %[p3],      %[p3]                      \n\t"
+    "subu      %[temp4],   %[q3],         %[q2]       \n\t"
+    "subu      %[pTemp0],  %[p],          %[hstride]  \n\t"
+    "absq_s.w  %[temp4],   %[temp4]                   \n\t"
+    "subu      %[temp2],   %[p3],         %[ithresh]  \n\t"
+    "sll       %[step1],   %[temp1],      1           \n\t"
+    "bgtz      %[temp2],   0f                         \n\t"
+    " subu     %[temp4],   %[temp4],      %[ithresh]  \n\t"
+    "subu      %[temp2],   %[q2],         %[q1]       \n\t"
+    "bgtz      %[temp4],   0f                         \n\t"
+    " absq_s.w %[temp2],   %[temp2]                   \n\t"
+    "subu      %[q3],      %[q1],         %[q0]       \n\t"
+    "absq_s.w  %[q3],      %[q3]                      \n\t"
+    "subu      %[temp2],   %[temp2],      %[ithresh]  \n\t"
+    "addu      %[temp1],   %[temp1],      %[step1]    \n\t"
+    "bgtz      %[temp2],   0f                         \n\t"
+    " subu     %[temp4],   %[q3],         %[ithresh]  \n\t"
+    "slt       %[p3],      %[hev_thresh], %[p3]       \n\t"
+    "bgtz      %[temp4],   0f                         \n\t"
+    " slt      %[q3],      %[hev_thresh], %[q3]       \n\t"
+    "or        %[q3],      %[q3],         %[p3]       \n\t"
+    "bgtz      %[q3],      1f                         \n\t"
+    " shra_r.w %[temp2],   %[temp1],      3           \n\t"
+    "addiu     %[temp1],   %[temp1],      3           \n\t"
+    "sra       %[temp1],   %[temp1],      3           \n\t"
+    "shll_s.w  %[temp2],   %[temp2],      27          \n\t"
+    "shll_s.w  %[temp1],   %[temp1],      27          \n\t"
+    "addu      %[pTemp1],  %[p],          %[hstride]  \n\t"
+    "sra       %[temp2],   %[temp2],      27          \n\t"
+    "sra       %[temp1],   %[temp1],      27          \n\t"
+    "addiu     %[step1],   %[temp2],      1           \n\t"
+    "sra       %[step1],   %[step1],      1           \n\t"
+    "addu      %[p0],      %[p0],         %[temp1]    \n\t"
+    "addu      %[p1],      %[p1],         %[step1]    \n\t"
+    "subu      %[q0],      %[q0],         %[temp2]    \n\t"
+    "subu      %[q1],      %[q1],         %[step1]    \n\t"
+    "lbux      %[temp2],   %[p0](%[VP8kclip1])        \n\t"
+    "lbux      %[temp3],   %[q0](%[VP8kclip1])        \n\t"
+    "lbux      %[temp4],   %[q1](%[VP8kclip1])        \n\t"
+    "sb        %[temp2],   0(%[pTemp0])               \n\t"
+    "lbux      %[temp1],   %[p1](%[VP8kclip1])        \n\t"
+    "subu      %[pTemp0],  %[pTemp0],    %[hstride]   \n\t"
+    "sb        %[temp3],   0(%[p])                    \n\t"
+    "sb        %[temp4],   0(%[pTemp1])               \n\t"
+    "j         0f                                     \n\t"
+    " sb       %[temp1],   0(%[pTemp0])               \n\t"
+  "1:                                                 \n\t"
+    "shll_s.w  %[temp3],   %[temp3],      24          \n\t"
+    "sra       %[temp3],   %[temp3],      24          \n\t"
+    "addu      %[temp1],   %[temp1],      %[temp3]    \n\t"
+    "shra_r.w  %[temp2],   %[temp1],      3           \n\t"
+    "addiu     %[temp1],   %[temp1],      3           \n\t"
+    "shll_s.w  %[temp2],   %[temp2],      27          \n\t"
+    "sra       %[temp1],   %[temp1],      3           \n\t"
+    "shll_s.w  %[temp1],   %[temp1],      27          \n\t"
+    "sra       %[temp2],   %[temp2],      27          \n\t"
+    "sra       %[temp1],   %[temp1],      27          \n\t"
+    "addu      %[p0],      %[p0],         %[temp1]    \n\t"
+    "subu      %[q0],      %[q0],         %[temp2]    \n\t"
+    "lbux      %[temp1],   %[p0](%[VP8kclip1])        \n\t"
+    "lbux      %[temp2],   %[q0](%[VP8kclip1])        \n\t"
+    "sb        %[temp2],   0(%[p])                    \n\t"
+    "sb        %[temp1],   0(%[pTemp0])               \n\t"
+  "0:                                                 \n\t"
+    "subu      %[size],    %[size],       1           \n\t"
+    "bgtz      %[size],    2b                         \n\t"
+    " addu     %[p],       %[p],          %[vstride]  \n\t"
+  "3:                                                 \n\t"
+    ".set      pop                                    \n\t"
+    : [p0]"=&r"(p0), [q0]"=&r"(q0), [p1]"=&r"(p1), [q1]"=&r"(q1),
+      [p2]"=&r"(p2), [q2]"=&r"(q2), [p3]"=&r"(p3), [q3]"=&r"(q3),
+      [step2]"=&r"(step2), [step1]"=&r"(step1), [temp1]"=&r"(temp1),
+      [temp2]"=&r"(temp2), [temp3]"=&r"(temp3), [temp4]"=&r"(temp4),
+      [pTemp0]"=&r"(pTemp0), [pTemp1]"=&r"(pTemp1), [p]"+&r"(p),
+      [size]"+&r"(size)
+    : [vstride]"r"(vstride), [ithresh]"r"(ithresh),
+      [hev_thresh]"r"(hev_thresh), [hstride]"r"(hstride),
+      [VP8kclip1]"r"(VP8kclip1), [thresh2]"r"(thresh2)
+    : "memory"
+  );
+}
+
+// on macroblock edges
+static void VFilter16(uint8_t* p, int stride,
+                      int thresh, int ithresh, int hev_thresh) {
+  FilterLoop26(p, stride, 1, 16, thresh, ithresh, hev_thresh);
+}
+
+static void HFilter16(uint8_t* p, int stride,
+                      int thresh, int ithresh, int hev_thresh) {
+  FilterLoop26(p, 1, stride, 16, thresh, ithresh, hev_thresh);
+}
+
+// 8-pixels wide variant, for chroma filtering
+static void VFilter8(uint8_t* u, uint8_t* v, int stride,
+                     int thresh, int ithresh, int hev_thresh) {
+  FilterLoop26(u, stride, 1, 8, thresh, ithresh, hev_thresh);
+  FilterLoop26(v, stride, 1, 8, thresh, ithresh, hev_thresh);
+}
+
+static void HFilter8(uint8_t* u, uint8_t* v, int stride,
+                     int thresh, int ithresh, int hev_thresh) {
+  FilterLoop26(u, 1, stride, 8, thresh, ithresh, hev_thresh);
+  FilterLoop26(v, 1, stride, 8, thresh, ithresh, hev_thresh);
+}
+
+// on three inner edges
+static void VFilter16i(uint8_t* p, int stride,
+                       int thresh, int ithresh, int hev_thresh) {
+  int k;
+  for (k = 3; k > 0; --k) {
+    p += 4 * stride;
+    FilterLoop24(p, stride, 1, 16, thresh, ithresh, hev_thresh);
+  }
+}
+
+static void HFilter16i(uint8_t* p, int stride,
+                       int thresh, int ithresh, int hev_thresh) {
+  int k;
+  for (k = 3; k > 0; --k) {
+    p += 4;
+    FilterLoop24(p, 1, stride, 16, thresh, ithresh, hev_thresh);
+  }
+}
+
+static void VFilter8i(uint8_t* u, uint8_t* v, int stride,
+                      int thresh, int ithresh, int hev_thresh) {
+  FilterLoop24(u + 4 * stride, stride, 1, 8, thresh, ithresh, hev_thresh);
+  FilterLoop24(v + 4 * stride, stride, 1, 8, thresh, ithresh, hev_thresh);
+}
+
+static void HFilter8i(uint8_t* u, uint8_t* v, int stride,
+                      int thresh, int ithresh, int hev_thresh) {
+  FilterLoop24(u + 4, 1, stride, 8, thresh, ithresh, hev_thresh);
+  FilterLoop24(v + 4, 1, stride, 8, thresh, ithresh, hev_thresh);
+}
+
+#undef MUL
+
+//------------------------------------------------------------------------------
+// Simple In-loop filtering (Paragraph 15.2)
+
+static void SimpleVFilter16(uint8_t* p, int stride, int thresh) {
+  int i;
+  const int thresh2 = 2 * thresh + 1;
+  int temp0, temp1, temp2, temp3, temp4, temp5, temp6, temp7, temp8;
+  uint8_t* p1 = p - stride;
+  __asm__ volatile (
+    ".set      push                                      \n\t"
+    ".set      noreorder                                 \n\t"
+    "li        %[i],        16                           \n\t"
+  "0:                                                    \n\t"
+    "negu      %[temp4],    %[stride]                    \n\t"
+    "sll       %[temp5],    %[temp4],       1            \n\t"
+    "lbu       %[temp2],    0(%[p])                      \n\t"
+    "lbux      %[temp3],    %[stride](%[p])              \n\t"
+    "lbux      %[temp1],    %[temp4](%[p])               \n\t"
+    "lbux      %[temp0],    %[temp5](%[p])               \n\t"
+    "subu      %[temp7],    %[temp1],       %[temp2]     \n\t"
+    "subu      %[temp6],    %[temp0],       %[temp3]     \n\t"
+    "absq_s.w  %[temp4],    %[temp7]                     \n\t"
+    "absq_s.w  %[temp5],    %[temp6]                     \n\t"
+    "sll       %[temp4],    %[temp4],       2            \n\t"
+    "subu      %[temp5],    %[temp5],       %[thresh2]   \n\t"
+    "addu      %[temp5],    %[temp4],       %[temp5]     \n\t"
+    "negu      %[temp8],    %[temp7]                     \n\t"
+    "bgtz      %[temp5],    1f                           \n\t"
+    " addiu    %[i],        %[i],           -1           \n\t"
+    "sll       %[temp4],    %[temp8],       1            \n\t"
+    "shll_s.w  %[temp5],    %[temp6],       24           \n\t"
+    "addu      %[temp3],    %[temp4],       %[temp8]     \n\t"
+    "sra       %[temp5],    %[temp5],       24           \n\t"
+    "addu      %[temp3],    %[temp3],       %[temp5]     \n\t"
+    "addiu     %[temp7],    %[temp3],       3            \n\t"
+    "sra       %[temp7],    %[temp7],       3            \n\t"
+    "shra_r.w  %[temp8],    %[temp3],       3            \n\t"
+    "shll_s.w  %[temp0],    %[temp7],       27           \n\t"
+    "shll_s.w  %[temp4],    %[temp8],       27           \n\t"
+    "sra       %[temp0],    %[temp0],       27           \n\t"
+    "sra       %[temp4],    %[temp4],       27           \n\t"
+    "addu      %[temp7],    %[temp1],       %[temp0]     \n\t"
+    "subu      %[temp2],    %[temp2],       %[temp4]     \n\t"
+    "lbux      %[temp3],    %[temp7](%[VP8kclip1])       \n\t"
+    "lbux      %[temp4],    %[temp2](%[VP8kclip1])       \n\t"
+    "sb        %[temp3],    0(%[p1])                     \n\t"
+    "sb        %[temp4],    0(%[p])                      \n\t"
+  "1:                                                    \n\t"
+    "addiu     %[p1],       %[p1],          1            \n\t"
+    "bgtz      %[i],        0b                           \n\t"
+    " addiu    %[p],        %[p],           1            \n\t"
+    " .set     pop                                       \n\t"
+    : [temp0]"=&r"(temp0), [temp1]"=&r"(temp1), [temp2]"=&r"(temp2),
+      [temp3]"=&r"(temp3), [temp4]"=&r"(temp4), [temp5]"=&r"(temp5),
+      [temp6]"=&r"(temp6), [temp7]"=&r"(temp7), [temp8]"=&r"(temp8),
+      [p]"+&r"(p), [i]"=&r"(i), [p1]"+&r"(p1)
+    : [stride]"r"(stride), [VP8kclip1]"r"(VP8kclip1), [thresh2]"r"(thresh2)
+    : "memory"
+  );
+}
+
+// TEMP0 = SRC[A + A1 * BPS]
+// TEMP1 = SRC[B + B1 * BPS]
+// TEMP2 = SRC[C + C1 * BPS]
+// TEMP3 = SRC[D + D1 * BPS]
+#define LOAD_4_BYTES(TEMP0, TEMP1, TEMP2, TEMP3,                               \
+                     A, A1, B, B1, C, C1, D, D1, SRC)                          \
+  "lbu      %[" #TEMP0 "],   " #A "+" #A1 "*" XSTR(BPS) "(%[" #SRC "]) \n\t"   \
+  "lbu      %[" #TEMP1 "],   " #B "+" #B1 "*" XSTR(BPS) "(%[" #SRC "]) \n\t"   \
+  "lbu      %[" #TEMP2 "],   " #C "+" #C1 "*" XSTR(BPS) "(%[" #SRC "]) \n\t"   \
+  "lbu      %[" #TEMP3 "],   " #D "+" #D1 "*" XSTR(BPS) "(%[" #SRC "]) \n\t"   \
+
+static void SimpleHFilter16(uint8_t* p, int stride, int thresh) {
+  int i;
+  const int thresh2 = 2 * thresh + 1;
+  int temp0, temp1, temp2, temp3, temp4, temp5, temp6, temp7, temp8;
+  __asm__ volatile (
+    ".set      push                                     \n\t"
+    ".set      noreorder                                \n\t"
+    "li        %[i],       16                           \n\t"
+  "0:                                                   \n\t"
+    LOAD_4_BYTES(temp0, temp1, temp2, temp3, -2, 0, -1, 0, 0, 0, 1, 0, p)
+    "subu      %[temp7],    %[temp1],       %[temp2]    \n\t"
+    "subu      %[temp6],    %[temp0],       %[temp3]    \n\t"
+    "absq_s.w  %[temp4],    %[temp7]                    \n\t"
+    "absq_s.w  %[temp5],    %[temp6]                    \n\t"
+    "sll       %[temp4],    %[temp4],       2           \n\t"
+    "addu      %[temp5],    %[temp4],       %[temp5]    \n\t"
+    "subu      %[temp5],    %[temp5],       %[thresh2]  \n\t"
+    "negu      %[temp8],    %[temp7]                    \n\t"
+    "bgtz      %[temp5],    1f                          \n\t"
+    " addiu    %[i],        %[i],           -1          \n\t"
+    "sll       %[temp4],    %[temp8],       1           \n\t"
+    "shll_s.w  %[temp5],    %[temp6],       24          \n\t"
+    "addu      %[temp3],    %[temp4],       %[temp8]    \n\t"
+    "sra       %[temp5],    %[temp5],       24          \n\t"
+    "addu      %[temp3],    %[temp3],       %[temp5]    \n\t"
+    "addiu     %[temp7],    %[temp3],       3           \n\t"
+    "sra       %[temp7],    %[temp7],       3           \n\t"
+    "shra_r.w  %[temp8],    %[temp3],       3           \n\t"
+    "shll_s.w  %[temp0],    %[temp7],       27          \n\t"
+    "shll_s.w  %[temp4],    %[temp8],       27          \n\t"
+    "sra       %[temp0],    %[temp0],       27          \n\t"
+    "sra       %[temp4],    %[temp4],       27          \n\t"
+    "addu      %[temp7],    %[temp1],       %[temp0]    \n\t"
+    "subu      %[temp2],    %[temp2],       %[temp4]    \n\t"
+    "lbux      %[temp3],    %[temp7](%[VP8kclip1])      \n\t"
+    "lbux      %[temp4],    %[temp2](%[VP8kclip1])      \n\t"
+    "sb        %[temp3],    -1(%[p])                    \n\t"
+    "sb        %[temp4],    0(%[p])                     \n\t"
+  "1:                                                   \n\t"
+    "bgtz      %[i],        0b                          \n\t"
+    " addu     %[p],        %[p],           %[stride]   \n\t"
+    ".set      pop                                      \n\t"
+    : [temp0]"=&r"(temp0), [temp1]"=&r"(temp1), [temp2]"=&r"(temp2),
+      [temp3]"=&r"(temp3), [temp4]"=&r"(temp4), [temp5]"=&r"(temp5),
+      [temp6]"=&r"(temp6), [temp7]"=&r"(temp7), [temp8]"=&r"(temp8),
+      [p]"+&r"(p), [i]"=&r"(i)
+    : [stride]"r"(stride), [VP8kclip1]"r"(VP8kclip1), [thresh2]"r"(thresh2)
+    : "memory"
+  );
+}
+
+static void SimpleVFilter16i(uint8_t* p, int stride, int thresh) {
+  int k;
+  for (k = 3; k > 0; --k) {
+    p += 4 * stride;
+    SimpleVFilter16(p, stride, thresh);
+  }
+}
+
+static void SimpleHFilter16i(uint8_t* p, int stride, int thresh) {
+  int k;
+  for (k = 3; k > 0; --k) {
+    p += 4;
+    SimpleHFilter16(p, stride, thresh);
+  }
+}
+
+// DST[A * BPS]     = TEMP0
+// DST[B + C * BPS] = TEMP1
+#define STORE_8_BYTES(TEMP0, TEMP1, A, B, C, DST)                              \
+  "usw    %[" #TEMP0 "],   " #A "*" XSTR(BPS) "(%[" #DST "])         \n\t"     \
+  "usw    %[" #TEMP1 "],   " #B "+" #C "*" XSTR(BPS) "(%[" #DST "])  \n\t"
+
+static void VE4(uint8_t* dst) {    // vertical
+  const uint8_t* top = dst - BPS;
+  int temp0, temp1, temp2, temp3, temp4, temp5, temp6;
+  __asm__ volatile (
+    "ulw             %[temp0],   -1(%[top])              \n\t"
+    "ulh             %[temp1],   3(%[top])               \n\t"
+    "preceu.ph.qbr   %[temp2],   %[temp0]                \n\t"
+    "preceu.ph.qbl   %[temp3],   %[temp0]                \n\t"
+    "preceu.ph.qbr   %[temp4],   %[temp1]                \n\t"
+    "packrl.ph       %[temp5],   %[temp3],    %[temp2]   \n\t"
+    "packrl.ph       %[temp6],   %[temp4],    %[temp3]   \n\t"
+    "shll.ph         %[temp5],   %[temp5],    1          \n\t"
+    "shll.ph         %[temp6],   %[temp6],    1          \n\t"
+    "addq.ph         %[temp2],   %[temp5],    %[temp2]   \n\t"
+    "addq.ph         %[temp6],   %[temp6],    %[temp4]   \n\t"
+    "addq.ph         %[temp2],   %[temp2],    %[temp3]   \n\t"
+    "addq.ph         %[temp6],   %[temp6],    %[temp3]   \n\t"
+    "shra_r.ph       %[temp2],   %[temp2],    2          \n\t"
+    "shra_r.ph       %[temp6],   %[temp6],    2          \n\t"
+    "precr.qb.ph     %[temp4],   %[temp6],    %[temp2]   \n\t"
+    STORE_8_BYTES(temp4, temp4, 0, 0, 1, dst)
+    STORE_8_BYTES(temp4, temp4, 2, 0, 3, dst)
+    : [temp0]"=&r"(temp0), [temp1]"=&r"(temp1), [temp2]"=&r"(temp2),
+      [temp3]"=&r"(temp3), [temp4]"=&r"(temp4), [temp5]"=&r"(temp5),
+      [temp6]"=&r"(temp6)
+    : [top]"r"(top), [dst]"r"(dst)
+    : "memory"
+  );
+}
+
+static void DC4(uint8_t* dst) {   // DC
+  int temp0, temp1, temp2, temp3, temp4;
+  __asm__ volatile (
+    "ulw          %[temp0],   -1*" XSTR(BPS) "(%[dst]) \n\t"
+    LOAD_4_BYTES(temp1, temp2, temp3, temp4, -1, 0, -1, 1, -1, 2, -1, 3, dst)
+    "ins          %[temp1],   %[temp2],    8,     8    \n\t"
+    "ins          %[temp1],   %[temp3],    16,    8    \n\t"
+    "ins          %[temp1],   %[temp4],    24,    8    \n\t"
+    "raddu.w.qb   %[temp0],   %[temp0]                 \n\t"
+    "raddu.w.qb   %[temp1],   %[temp1]                 \n\t"
+    "addu         %[temp0],   %[temp0],    %[temp1]    \n\t"
+    "shra_r.w     %[temp0],   %[temp0],    3           \n\t"
+    "replv.qb     %[temp0],   %[temp0]                 \n\t"
+    STORE_8_BYTES(temp0, temp0, 0, 0, 1, dst)
+    STORE_8_BYTES(temp0, temp0, 2, 0, 3, dst)
+    : [temp0]"=&r"(temp0), [temp1]"=&r"(temp1), [temp2]"=&r"(temp2),
+      [temp3]"=&r"(temp3), [temp4]"=&r"(temp4)
+    : [dst]"r"(dst)
+    : "memory"
+  );
+}
+
+static void RD4(uint8_t* dst) {   // Down-right
+  int temp0, temp1, temp2, temp3, temp4;
+  int temp5, temp6, temp7, temp8;
+  __asm__ volatile (
+    LOAD_4_BYTES(temp0, temp1, temp2, temp3, -1, 0, -1, 1, -1, 2, -1, 3, dst)
+    "ulw            %[temp7],   -1-" XSTR(BPS) "(%[dst])       \n\t"
+    "ins            %[temp1],   %[temp0], 16, 16               \n\t"
+    "preceu.ph.qbr  %[temp5],   %[temp7]                       \n\t"
+    "ins            %[temp2],   %[temp1], 16, 16               \n\t"
+    "preceu.ph.qbl  %[temp4],   %[temp7]                       \n\t"
+    "ins            %[temp3],   %[temp2], 16, 16               \n\t"
+    "shll.ph        %[temp2],   %[temp2], 1                    \n\t"
+    "addq.ph        %[temp3],   %[temp3], %[temp1]             \n\t"
+    "packrl.ph      %[temp6],   %[temp5], %[temp1]             \n\t"
+    "addq.ph        %[temp3],   %[temp3], %[temp2]             \n\t"
+    "addq.ph        %[temp1],   %[temp1], %[temp5]             \n\t"
+    "shll.ph        %[temp6],   %[temp6], 1                    \n\t"
+    "addq.ph        %[temp1],   %[temp1], %[temp6]             \n\t"
+    "packrl.ph      %[temp0],   %[temp4], %[temp5]             \n\t"
+    "addq.ph        %[temp8],   %[temp5], %[temp4]             \n\t"
+    "shra_r.ph      %[temp3],   %[temp3], 2                    \n\t"
+    "shll.ph        %[temp0],   %[temp0], 1                    \n\t"
+    "shra_r.ph      %[temp1],   %[temp1], 2                    \n\t"
+    "addq.ph        %[temp8],   %[temp0], %[temp8]             \n\t"
+    "lbu            %[temp5],   3-" XSTR(BPS) "(%[dst])        \n\t"
+    "precrq.ph.w    %[temp7],   %[temp7], %[temp7]             \n\t"
+    "shra_r.ph      %[temp8],   %[temp8], 2                    \n\t"
+    "ins            %[temp7],   %[temp5], 0,  8                \n\t"
+    "precr.qb.ph    %[temp2],   %[temp1], %[temp3]             \n\t"
+    "raddu.w.qb     %[temp4],   %[temp7]                       \n\t"
+    "precr.qb.ph    %[temp6],   %[temp8], %[temp1]             \n\t"
+    "shra_r.w       %[temp4],   %[temp4], 2                    \n\t"
+    STORE_8_BYTES(temp2, temp6, 3, 0, 1, dst)
+    "prepend        %[temp2],   %[temp8], 8                    \n\t"
+    "prepend        %[temp6],   %[temp4], 8                    \n\t"
+    STORE_8_BYTES(temp2, temp6, 2, 0, 0, dst)
+    : [temp0]"=&r"(temp0), [temp1]"=&r"(temp1), [temp2]"=&r"(temp2),
+      [temp3]"=&r"(temp3), [temp4]"=&r"(temp4), [temp5]"=&r"(temp5),
+      [temp6]"=&r"(temp6), [temp7]"=&r"(temp7), [temp8]"=&r"(temp8)
+    : [dst]"r"(dst)
+    : "memory"
+  );
+}
+
+// TEMP0 = SRC[A * BPS]
+// TEMP1 = SRC[B + C * BPS]
+#define LOAD_8_BYTES(TEMP0, TEMP1, A, B, C, SRC)                               \
+  "ulw    %[" #TEMP0 "],   " #A "*" XSTR(BPS) "(%[" #SRC "])         \n\t"     \
+  "ulw    %[" #TEMP1 "],   " #B "+" #C "*" XSTR(BPS) "(%[" #SRC "])  \n\t"
+
+static void LD4(uint8_t* dst) {   // Down-Left
+  int temp0, temp1, temp2, temp3, temp4;
+  int temp5, temp6, temp7, temp8, temp9;
+  __asm__ volatile (
+    LOAD_8_BYTES(temp0, temp1, -1, 4, -1, dst)
+    "preceu.ph.qbl   %[temp2],    %[temp0]                     \n\t"
+    "preceu.ph.qbr   %[temp3],    %[temp0]                     \n\t"
+    "preceu.ph.qbr   %[temp4],    %[temp1]                     \n\t"
+    "preceu.ph.qbl   %[temp5],    %[temp1]                     \n\t"
+    "packrl.ph       %[temp6],    %[temp2],    %[temp3]        \n\t"
+    "packrl.ph       %[temp7],    %[temp4],    %[temp2]        \n\t"
+    "packrl.ph       %[temp8],    %[temp5],    %[temp4]        \n\t"
+    "shll.ph         %[temp6],    %[temp6],    1               \n\t"
+    "addq.ph         %[temp9],    %[temp2],    %[temp6]        \n\t"
+    "shll.ph         %[temp7],    %[temp7],    1               \n\t"
+    "addq.ph         %[temp9],    %[temp9],    %[temp3]        \n\t"
+    "shll.ph         %[temp8],    %[temp8],    1               \n\t"
+    "shra_r.ph       %[temp9],    %[temp9],    2               \n\t"
+    "addq.ph         %[temp3],    %[temp4],    %[temp7]        \n\t"
+    "addq.ph         %[temp0],    %[temp5],    %[temp8]        \n\t"
+    "addq.ph         %[temp3],    %[temp3],    %[temp2]        \n\t"
+    "addq.ph         %[temp0],    %[temp0],    %[temp4]        \n\t"
+    "shra_r.ph       %[temp3],    %[temp3],    2               \n\t"
+    "shra_r.ph       %[temp0],    %[temp0],    2               \n\t"
+    "srl             %[temp1],    %[temp1],    24              \n\t"
+    "sll             %[temp1],    %[temp1],    1               \n\t"
+    "raddu.w.qb      %[temp5],    %[temp5]                     \n\t"
+    "precr.qb.ph     %[temp9],    %[temp3],    %[temp9]        \n\t"
+    "precr.qb.ph     %[temp3],    %[temp0],    %[temp3]        \n\t"
+    "addu            %[temp1],    %[temp1],    %[temp5]        \n\t"
+    "shra_r.w        %[temp1],    %[temp1],    2               \n\t"
+    STORE_8_BYTES(temp9, temp3, 0, 0, 2, dst)
+    "prepend         %[temp9],    %[temp0],    8               \n\t"
+    "prepend         %[temp3],    %[temp1],    8               \n\t"
+    STORE_8_BYTES(temp9, temp3, 1, 0, 3, dst)
+    : [temp0]"=&r"(temp0), [temp1]"=&r"(temp1), [temp2]"=&r"(temp2),
+      [temp3]"=&r"(temp3), [temp4]"=&r"(temp4), [temp5]"=&r"(temp5),
+      [temp6]"=&r"(temp6), [temp7]"=&r"(temp7), [temp8]"=&r"(temp8),
+      [temp9]"=&r"(temp9)
+    : [dst]"r"(dst)
+    : "memory"
+  );
+}
+
+//------------------------------------------------------------------------------
+// Chroma
+
+static void DC8uv(uint8_t* dst) {     // DC
+  int temp0, temp1, temp2, temp3, temp4;
+  int temp5, temp6, temp7, temp8, temp9;
+  __asm__ volatile (
+    LOAD_8_BYTES(temp0, temp1, -1, 4, -1, dst)
+    LOAD_4_BYTES(temp2, temp3, temp4, temp5, -1, 0, -1, 1, -1, 2, -1, 3, dst)
+    LOAD_4_BYTES(temp6, temp7, temp8, temp9, -1, 4, -1, 5, -1, 6, -1, 7, dst)
+    "raddu.w.qb   %[temp0],   %[temp0]                   \n\t"
+    "raddu.w.qb   %[temp1],   %[temp1]                   \n\t"
+    "addu         %[temp2],   %[temp2],    %[temp3]      \n\t"
+    "addu         %[temp4],   %[temp4],    %[temp5]      \n\t"
+    "addu         %[temp6],   %[temp6],    %[temp7]      \n\t"
+    "addu         %[temp8],   %[temp8],    %[temp9]      \n\t"
+    "addu         %[temp0],   %[temp0],    %[temp1]      \n\t"
+    "addu         %[temp2],   %[temp2],    %[temp4]      \n\t"
+    "addu         %[temp6],   %[temp6],    %[temp8]      \n\t"
+    "addu         %[temp0],   %[temp0],    %[temp2]      \n\t"
+    "addu         %[temp0],   %[temp0],    %[temp6]      \n\t"
+    "shra_r.w     %[temp0],   %[temp0],    4             \n\t"
+    "replv.qb     %[temp0],   %[temp0]                   \n\t"
+    STORE_8_BYTES(temp0, temp0, 0, 4, 0, dst)
+    STORE_8_BYTES(temp0, temp0, 1, 4, 1, dst)
+    STORE_8_BYTES(temp0, temp0, 2, 4, 2, dst)
+    STORE_8_BYTES(temp0, temp0, 3, 4, 3, dst)
+    STORE_8_BYTES(temp0, temp0, 4, 4, 4, dst)
+    STORE_8_BYTES(temp0, temp0, 5, 4, 5, dst)
+    STORE_8_BYTES(temp0, temp0, 6, 4, 6, dst)
+    STORE_8_BYTES(temp0, temp0, 7, 4, 7, dst)
+    : [temp0]"=&r"(temp0), [temp1]"=&r"(temp1), [temp2]"=&r"(temp2),
+      [temp3]"=&r"(temp3), [temp4]"=&r"(temp4), [temp5]"=&r"(temp5),
+      [temp6]"=&r"(temp6), [temp7]"=&r"(temp7), [temp8]"=&r"(temp8),
+      [temp9]"=&r"(temp9)
+    : [dst]"r"(dst)
+    : "memory"
+  );
+}
+
+static void DC8uvNoLeft(uint8_t* dst) {   // DC with no left samples
+  int temp0, temp1;
+  __asm__ volatile (
+    LOAD_8_BYTES(temp0, temp1, -1, 4, -1, dst)
+    "raddu.w.qb   %[temp0],   %[temp0]                   \n\t"
+    "raddu.w.qb   %[temp1],   %[temp1]                   \n\t"
+    "addu         %[temp0],   %[temp0],    %[temp1]      \n\t"
+    "shra_r.w     %[temp0],   %[temp0],    3             \n\t"
+    "replv.qb     %[temp0],   %[temp0]                   \n\t"
+    STORE_8_BYTES(temp0, temp0, 0, 4, 0, dst)
+    STORE_8_BYTES(temp0, temp0, 1, 4, 1, dst)
+    STORE_8_BYTES(temp0, temp0, 2, 4, 2, dst)
+    STORE_8_BYTES(temp0, temp0, 3, 4, 3, dst)
+    STORE_8_BYTES(temp0, temp0, 4, 4, 4, dst)
+    STORE_8_BYTES(temp0, temp0, 5, 4, 5, dst)
+    STORE_8_BYTES(temp0, temp0, 6, 4, 6, dst)
+    STORE_8_BYTES(temp0, temp0, 7, 4, 7, dst)
+    : [temp0]"=&r"(temp0), [temp1]"=&r"(temp1)
+    : [dst]"r"(dst)
+    : "memory"
+  );
+}
+
+static void DC8uvNoTop(uint8_t* dst) {  // DC with no top samples
+  int temp0, temp1, temp2, temp3, temp4;
+  int temp5, temp6, temp7, temp8;
+  __asm__ volatile (
+    LOAD_4_BYTES(temp2, temp3, temp4, temp5, -1, 0, -1, 1, -1, 2, -1, 3, dst)
+    LOAD_4_BYTES(temp6, temp7, temp8, temp1, -1, 4, -1, 5, -1, 6, -1, 7, dst)
+    "addu         %[temp2],   %[temp2],    %[temp3]      \n\t"
+    "addu         %[temp4],   %[temp4],    %[temp5]      \n\t"
+    "addu         %[temp6],   %[temp6],    %[temp7]      \n\t"
+    "addu         %[temp8],   %[temp8],    %[temp1]      \n\t"
+    "addu         %[temp2],   %[temp2],    %[temp4]      \n\t"
+    "addu         %[temp6],   %[temp6],    %[temp8]      \n\t"
+    "addu         %[temp0],   %[temp6],    %[temp2]      \n\t"
+    "shra_r.w     %[temp0],   %[temp0],    3             \n\t"
+    "replv.qb     %[temp0],   %[temp0]                   \n\t"
+    STORE_8_BYTES(temp0, temp0, 0, 4, 0, dst)
+    STORE_8_BYTES(temp0, temp0, 1, 4, 1, dst)
+    STORE_8_BYTES(temp0, temp0, 2, 4, 2, dst)
+    STORE_8_BYTES(temp0, temp0, 3, 4, 3, dst)
+    STORE_8_BYTES(temp0, temp0, 4, 4, 4, dst)
+    STORE_8_BYTES(temp0, temp0, 5, 4, 5, dst)
+    STORE_8_BYTES(temp0, temp0, 6, 4, 6, dst)
+    STORE_8_BYTES(temp0, temp0, 7, 4, 7, dst)
+    : [temp0]"=&r"(temp0), [temp1]"=&r"(temp1), [temp2]"=&r"(temp2),
+      [temp3]"=&r"(temp3), [temp4]"=&r"(temp4), [temp5]"=&r"(temp5),
+      [temp6]"=&r"(temp6), [temp7]"=&r"(temp7), [temp8]"=&r"(temp8)
+    : [dst]"r"(dst)
+    : "memory"
+  );
+}
+
+#undef LOAD_8_BYTES
+#undef STORE_8_BYTES
+#undef LOAD_4_BYTES
+
+#define CLIPPING(SIZE)                                                         \
+  "preceu.ph.qbl   %[temp2],   %[temp0]                  \n\t"                 \
+  "preceu.ph.qbr   %[temp0],   %[temp0]                  \n\t"                 \
+".if " #SIZE " == 8                                      \n\t"                 \
+  "preceu.ph.qbl   %[temp3],   %[temp1]                  \n\t"                 \
+  "preceu.ph.qbr   %[temp1],   %[temp1]                  \n\t"                 \
+".endif                                                  \n\t"                 \
+  "addu.ph         %[temp2],   %[temp2],   %[dst_1]      \n\t"                 \
+  "addu.ph         %[temp0],   %[temp0],   %[dst_1]      \n\t"                 \
+".if " #SIZE " == 8                                      \n\t"                 \
+  "addu.ph         %[temp3],   %[temp3],   %[dst_1]      \n\t"                 \
+  "addu.ph         %[temp1],   %[temp1],   %[dst_1]      \n\t"                 \
+".endif                                                  \n\t"                 \
+  "shll_s.ph       %[temp2],   %[temp2],   7             \n\t"                 \
+  "shll_s.ph       %[temp0],   %[temp0],   7             \n\t"                 \
+".if " #SIZE " == 8                                      \n\t"                 \
+  "shll_s.ph       %[temp3],   %[temp3],   7             \n\t"                 \
+  "shll_s.ph       %[temp1],   %[temp1],   7             \n\t"                 \
+".endif                                                  \n\t"                 \
+  "precrqu_s.qb.ph %[temp0],   %[temp2],   %[temp0]      \n\t"                 \
+".if " #SIZE " == 8                                      \n\t"                 \
+  "precrqu_s.qb.ph %[temp1],   %[temp3],   %[temp1]      \n\t"                 \
+".endif                                                  \n\t"
+
+
+#define CLIP_8B_TO_DST(DST, TOP, SIZE) do {                                    \
+  int dst_1 = ((int)(DST)[-1] << 16) + (DST)[-1];                              \
+  int temp0, temp1, temp2, temp3;                                              \
+  __asm__ volatile (                                                           \
+  ".if " #SIZE " < 8                                     \n\t"                 \
+    "ulw             %[temp0],   0(%[top])               \n\t"                 \
+    "subu.ph         %[dst_1],   %[dst_1],    %[top_1]   \n\t"                 \
+    CLIPPING(4)                                                                \
+    "usw             %[temp0],   0(%[dst])               \n\t"                 \
+  ".else                                                 \n\t"                 \
+    "ulw             %[temp0],   0(%[top])               \n\t"                 \
+    "ulw             %[temp1],   4(%[top])               \n\t"                 \
+    "subu.ph         %[dst_1],   %[dst_1],    %[top_1]   \n\t"                 \
+    CLIPPING(8)                                                                \
+    "usw             %[temp0],   0(%[dst])               \n\t"                 \
+    "usw             %[temp1],   4(%[dst])               \n\t"                 \
+  ".if " #SIZE " == 16                                   \n\t"                 \
+    "ulw             %[temp0],   8(%[top])               \n\t"                 \
+    "ulw             %[temp1],   12(%[top])              \n\t"                 \
+    CLIPPING(8)                                                                \
+    "usw             %[temp0],   8(%[dst])               \n\t"                 \
+    "usw             %[temp1],   12(%[dst])              \n\t"                 \
+  ".endif                                                \n\t"                 \
+  ".endif                                                \n\t"                 \
+    : [dst_1]"+&r"(dst_1), [temp0]"=&r"(temp0), [temp1]"=&r"(temp1),           \
+      [temp2]"=&r"(temp2), [temp3]"=&r"(temp3)                                 \
+    : [top_1]"r"(top_1), [top]"r"((TOP)), [dst]"r"((DST))                      \
+    : "memory"                                                                 \
+  );                                                                           \
+} while (0)
+
+#define CLIP_TO_DST(DST, SIZE) do {                                            \
+  int y;                                                                       \
+  const uint8_t* top = (DST) - BPS;                                            \
+  const int top_1 = ((int)top[-1] << 16) + top[-1];                            \
+  for (y = 0; y < (SIZE); ++y) {                                               \
+    CLIP_8B_TO_DST((DST), top, (SIZE));                                        \
+    (DST) += BPS;                                                              \
+  }                                                                            \
+} while (0)
+
+#define TRUE_MOTION(DST, SIZE)                                                 \
+static void TrueMotion##SIZE(uint8_t* (DST)) {                                 \
+  CLIP_TO_DST((DST), (SIZE));                                                  \
+}
+
+TRUE_MOTION(dst, 4)
+TRUE_MOTION(dst, 8)
+TRUE_MOTION(dst, 16)
+
+#undef TRUE_MOTION
+#undef CLIP_TO_DST
+#undef CLIP_8B_TO_DST
+#undef CLIPPING
+
+//------------------------------------------------------------------------------
+// Entry point
+
+extern void VP8DspInitMIPSdspR2(void);
+
+WEBP_TSAN_IGNORE_FUNCTION void VP8DspInitMIPSdspR2(void) {
+  VP8TransformDC = TransformDC;
+  VP8TransformAC3 = TransformAC3;
+  VP8Transform = TransformTwo;
+
+  VP8VFilter16 = VFilter16;
+  VP8HFilter16 = HFilter16;
+  VP8VFilter8 = VFilter8;
+  VP8HFilter8 = HFilter8;
+  VP8VFilter16i = VFilter16i;
+  VP8HFilter16i = HFilter16i;
+  VP8VFilter8i = VFilter8i;
+  VP8HFilter8i = HFilter8i;
+  VP8SimpleVFilter16 = SimpleVFilter16;
+  VP8SimpleHFilter16 = SimpleHFilter16;
+  VP8SimpleVFilter16i = SimpleVFilter16i;
+  VP8SimpleHFilter16i = SimpleHFilter16i;
+
+  VP8PredLuma4[0] = DC4;
+  VP8PredLuma4[1] = TrueMotion4;
+  VP8PredLuma4[2] = VE4;
+  VP8PredLuma4[4] = RD4;
+  VP8PredLuma4[6] = LD4;
+
+  VP8PredChroma8[0] = DC8uv;
+  VP8PredChroma8[1] = TrueMotion8;
+  VP8PredChroma8[4] = DC8uvNoTop;
+  VP8PredChroma8[5] = DC8uvNoLeft;
+
+  VP8PredLuma16[1] = TrueMotion16;
+}
+
+#else  // !WEBP_USE_MIPS_DSP_R2
+
+WEBP_DSP_INIT_STUB(VP8DspInitMIPSdspR2)
+
+#endif  // WEBP_USE_MIPS_DSP_R2
diff --git a/drivers/webp/dsp/dec_neon.c b/drivers/webp/dsp/dec_neon.c
index ec824b790b..a63f43fe17 100644
--- a/drivers/webp/dsp/dec_neon.c
+++ b/drivers/webp/dsp/dec_neon.c
@@ -1,8 +1,10 @@
 // Copyright 2012 Google Inc. All Rights Reserved.
 //
-// This code is licensed under the same terms as WebM:
-//  Software License Agreement:  http://www.webmproject.org/license/software/
-//  Additional IP Rights Grant:  http://www.webmproject.org/license/additional/
+// Use of this source code is governed by a BSD-style license
+// that can be found in the COPYING file in the root of the source
+// tree. An additional intellectual property rights grant can be found
+// in the file PATENTS. All contributing project authors may
+// be found in the AUTHORS file in the root of the source tree.
 // -----------------------------------------------------------------------------
 //
 // ARM NEON version of dsp functions and loop filtering.
@@ -14,13 +16,535 @@
 
 #if defined(WEBP_USE_NEON)
 
+#include "./neon.h"
 #include "../dec/vp8i.h"
 
-#if defined(__cplusplus) || defined(c_plusplus)
-extern "C" {
-#endif
+//------------------------------------------------------------------------------
+// NxM Loading functions
+
+// Load/Store vertical edge
+#define LOAD8x4(c1, c2, c3, c4, b1, b2, stride)                                \
+  "vld4.8 {" #c1 "[0]," #c2 "[0]," #c3 "[0]," #c4 "[0]}," #b1 "," #stride "\n" \
+  "vld4.8 {" #c1 "[1]," #c2 "[1]," #c3 "[1]," #c4 "[1]}," #b2 "," #stride "\n" \
+  "vld4.8 {" #c1 "[2]," #c2 "[2]," #c3 "[2]," #c4 "[2]}," #b1 "," #stride "\n" \
+  "vld4.8 {" #c1 "[3]," #c2 "[3]," #c3 "[3]," #c4 "[3]}," #b2 "," #stride "\n" \
+  "vld4.8 {" #c1 "[4]," #c2 "[4]," #c3 "[4]," #c4 "[4]}," #b1 "," #stride "\n" \
+  "vld4.8 {" #c1 "[5]," #c2 "[5]," #c3 "[5]," #c4 "[5]}," #b2 "," #stride "\n" \
+  "vld4.8 {" #c1 "[6]," #c2 "[6]," #c3 "[6]," #c4 "[6]}," #b1 "," #stride "\n" \
+  "vld4.8 {" #c1 "[7]," #c2 "[7]," #c3 "[7]," #c4 "[7]}," #b2 "," #stride "\n"
+
+#define STORE8x2(c1, c2, p, stride)                                            \
+  "vst2.8   {" #c1 "[0], " #c2 "[0]}," #p "," #stride " \n"                    \
+  "vst2.8   {" #c1 "[1], " #c2 "[1]}," #p "," #stride " \n"                    \
+  "vst2.8   {" #c1 "[2], " #c2 "[2]}," #p "," #stride " \n"                    \
+  "vst2.8   {" #c1 "[3], " #c2 "[3]}," #p "," #stride " \n"                    \
+  "vst2.8   {" #c1 "[4], " #c2 "[4]}," #p "," #stride " \n"                    \
+  "vst2.8   {" #c1 "[5], " #c2 "[5]}," #p "," #stride " \n"                    \
+  "vst2.8   {" #c1 "[6], " #c2 "[6]}," #p "," #stride " \n"                    \
+  "vst2.8   {" #c1 "[7], " #c2 "[7]}," #p "," #stride " \n"
+
+#if !defined(WORK_AROUND_GCC)
+
+// This intrinsics version makes gcc-4.6.3 crash during Load4x??() compilation
+// (register alloc, probably). The variants somewhat mitigate the problem, but
+// not quite. HFilter16i() remains problematic.
+static WEBP_INLINE uint8x8x4_t Load4x8(const uint8_t* const src, int stride) {
+  const uint8x8_t zero = vdup_n_u8(0);
+  uint8x8x4_t out;
+  INIT_VECTOR4(out, zero, zero, zero, zero);
+  out = vld4_lane_u8(src + 0 * stride, out, 0);
+  out = vld4_lane_u8(src + 1 * stride, out, 1);
+  out = vld4_lane_u8(src + 2 * stride, out, 2);
+  out = vld4_lane_u8(src + 3 * stride, out, 3);
+  out = vld4_lane_u8(src + 4 * stride, out, 4);
+  out = vld4_lane_u8(src + 5 * stride, out, 5);
+  out = vld4_lane_u8(src + 6 * stride, out, 6);
+  out = vld4_lane_u8(src + 7 * stride, out, 7);
+  return out;
+}
+
+static WEBP_INLINE void Load4x16(const uint8_t* const src, int stride,
+                                 uint8x16_t* const p1, uint8x16_t* const p0,
+                                 uint8x16_t* const q0, uint8x16_t* const q1) {
+  // row0 = p1[0..7]|p0[0..7]|q0[0..7]|q1[0..7]
+  // row8 = p1[8..15]|p0[8..15]|q0[8..15]|q1[8..15]
+  const uint8x8x4_t row0 = Load4x8(src - 2 + 0 * stride, stride);
+  const uint8x8x4_t row8 = Load4x8(src - 2 + 8 * stride, stride);
+  *p1 = vcombine_u8(row0.val[0], row8.val[0]);
+  *p0 = vcombine_u8(row0.val[1], row8.val[1]);
+  *q0 = vcombine_u8(row0.val[2], row8.val[2]);
+  *q1 = vcombine_u8(row0.val[3], row8.val[3]);
+}
+
+#else  // WORK_AROUND_GCC
+
+#define LOADQ_LANE_32b(VALUE, LANE) do {                             \
+  (VALUE) = vld1q_lane_u32((const uint32_t*)src, (VALUE), (LANE));   \
+  src += stride;                                                     \
+} while (0)
+
+static WEBP_INLINE void Load4x16(const uint8_t* src, int stride,
+                                 uint8x16_t* const p1, uint8x16_t* const p0,
+                                 uint8x16_t* const q0, uint8x16_t* const q1) {
+  const uint32x4_t zero = vdupq_n_u32(0);
+  uint32x4x4_t in;
+  INIT_VECTOR4(in, zero, zero, zero, zero);
+  src -= 2;
+  LOADQ_LANE_32b(in.val[0], 0);
+  LOADQ_LANE_32b(in.val[1], 0);
+  LOADQ_LANE_32b(in.val[2], 0);
+  LOADQ_LANE_32b(in.val[3], 0);
+  LOADQ_LANE_32b(in.val[0], 1);
+  LOADQ_LANE_32b(in.val[1], 1);
+  LOADQ_LANE_32b(in.val[2], 1);
+  LOADQ_LANE_32b(in.val[3], 1);
+  LOADQ_LANE_32b(in.val[0], 2);
+  LOADQ_LANE_32b(in.val[1], 2);
+  LOADQ_LANE_32b(in.val[2], 2);
+  LOADQ_LANE_32b(in.val[3], 2);
+  LOADQ_LANE_32b(in.val[0], 3);
+  LOADQ_LANE_32b(in.val[1], 3);
+  LOADQ_LANE_32b(in.val[2], 3);
+  LOADQ_LANE_32b(in.val[3], 3);
+  // Transpose four 4x4 parts:
+  {
+    const uint8x16x2_t row01 = vtrnq_u8(vreinterpretq_u8_u32(in.val[0]),
+                                        vreinterpretq_u8_u32(in.val[1]));
+    const uint8x16x2_t row23 = vtrnq_u8(vreinterpretq_u8_u32(in.val[2]),
+                                        vreinterpretq_u8_u32(in.val[3]));
+    const uint16x8x2_t row02 = vtrnq_u16(vreinterpretq_u16_u8(row01.val[0]),
+                                         vreinterpretq_u16_u8(row23.val[0]));
+    const uint16x8x2_t row13 = vtrnq_u16(vreinterpretq_u16_u8(row01.val[1]),
+                                         vreinterpretq_u16_u8(row23.val[1]));
+    *p1 = vreinterpretq_u8_u16(row02.val[0]);
+    *p0 = vreinterpretq_u8_u16(row13.val[0]);
+    *q0 = vreinterpretq_u8_u16(row02.val[1]);
+    *q1 = vreinterpretq_u8_u16(row13.val[1]);
+  }
+}
+#undef LOADQ_LANE_32b
+
+#endif  // !WORK_AROUND_GCC
+
+static WEBP_INLINE void Load8x16(const uint8_t* const src, int stride,
+                                 uint8x16_t* const p3, uint8x16_t* const p2,
+                                 uint8x16_t* const p1, uint8x16_t* const p0,
+                                 uint8x16_t* const q0, uint8x16_t* const q1,
+                                 uint8x16_t* const q2, uint8x16_t* const q3) {
+  Load4x16(src - 2, stride, p3, p2, p1, p0);
+  Load4x16(src + 2, stride, q0, q1, q2, q3);
+}
+
+static WEBP_INLINE void Load16x4(const uint8_t* const src, int stride,
+                                 uint8x16_t* const p1, uint8x16_t* const p0,
+                                 uint8x16_t* const q0, uint8x16_t* const q1) {
+  *p1 = vld1q_u8(src - 2 * stride);
+  *p0 = vld1q_u8(src - 1 * stride);
+  *q0 = vld1q_u8(src + 0 * stride);
+  *q1 = vld1q_u8(src + 1 * stride);
+}
+
+static WEBP_INLINE void Load16x8(const uint8_t* const src, int stride,
+                                 uint8x16_t* const p3, uint8x16_t* const p2,
+                                 uint8x16_t* const p1, uint8x16_t* const p0,
+                                 uint8x16_t* const q0, uint8x16_t* const q1,
+                                 uint8x16_t* const q2, uint8x16_t* const q3) {
+  Load16x4(src - 2  * stride, stride, p3, p2, p1, p0);
+  Load16x4(src + 2  * stride, stride, q0, q1, q2, q3);
+}
+
+static WEBP_INLINE void Load8x8x2(const uint8_t* const u,
+                                  const uint8_t* const v,
+                                  int stride,
+                                  uint8x16_t* const p3, uint8x16_t* const p2,
+                                  uint8x16_t* const p1, uint8x16_t* const p0,
+                                  uint8x16_t* const q0, uint8x16_t* const q1,
+                                  uint8x16_t* const q2, uint8x16_t* const q3) {
+  // We pack the 8x8 u-samples in the lower half of the uint8x16_t destination
+  // and the v-samples on the higher half.
+  *p3 = vcombine_u8(vld1_u8(u - 4 * stride), vld1_u8(v - 4 * stride));
+  *p2 = vcombine_u8(vld1_u8(u - 3 * stride), vld1_u8(v - 3 * stride));
+  *p1 = vcombine_u8(vld1_u8(u - 2 * stride), vld1_u8(v - 2 * stride));
+  *p0 = vcombine_u8(vld1_u8(u - 1 * stride), vld1_u8(v - 1 * stride));
+  *q0 = vcombine_u8(vld1_u8(u + 0 * stride), vld1_u8(v + 0 * stride));
+  *q1 = vcombine_u8(vld1_u8(u + 1 * stride), vld1_u8(v + 1 * stride));
+  *q2 = vcombine_u8(vld1_u8(u + 2 * stride), vld1_u8(v + 2 * stride));
+  *q3 = vcombine_u8(vld1_u8(u + 3 * stride), vld1_u8(v + 3 * stride));
+}
+
+#if !defined(WORK_AROUND_GCC)
+
+#define LOAD_UV_8(ROW) \
+  vcombine_u8(vld1_u8(u - 4 + (ROW) * stride), vld1_u8(v - 4 + (ROW) * stride))
+
+static WEBP_INLINE void Load8x8x2T(const uint8_t* const u,
+                                   const uint8_t* const v,
+                                   int stride,
+                                   uint8x16_t* const p3, uint8x16_t* const p2,
+                                   uint8x16_t* const p1, uint8x16_t* const p0,
+                                   uint8x16_t* const q0, uint8x16_t* const q1,
+                                   uint8x16_t* const q2, uint8x16_t* const q3) {
+  // We pack the 8x8 u-samples in the lower half of the uint8x16_t destination
+  // and the v-samples on the higher half.
+  const uint8x16_t row0 = LOAD_UV_8(0);
+  const uint8x16_t row1 = LOAD_UV_8(1);
+  const uint8x16_t row2 = LOAD_UV_8(2);
+  const uint8x16_t row3 = LOAD_UV_8(3);
+  const uint8x16_t row4 = LOAD_UV_8(4);
+  const uint8x16_t row5 = LOAD_UV_8(5);
+  const uint8x16_t row6 = LOAD_UV_8(6);
+  const uint8x16_t row7 = LOAD_UV_8(7);
+  // Perform two side-by-side 8x8 transposes
+  // u00 u01 u02 u03 u04 u05 u06 u07 | v00 v01 v02 v03 v04 v05 v06 v07
+  // u10 u11 u12 u13 u14 u15 u16 u17 | v10 v11 v12 ...
+  // u20 u21 u22 u23 u24 u25 u26 u27 | v20 v21 ...
+  // u30 u31 u32 u33 u34 u35 u36 u37 | ...
+  // u40 u41 u42 u43 u44 u45 u46 u47 | ...
+  // u50 u51 u52 u53 u54 u55 u56 u57 | ...
+  // u60 u61 u62 u63 u64 u65 u66 u67 | v60 ...
+  // u70 u71 u72 u73 u74 u75 u76 u77 | v70 v71 v72 ...
+  const uint8x16x2_t row01 = vtrnq_u8(row0, row1);  // u00 u10 u02 u12 ...
+                                                    // u01 u11 u03 u13 ...
+  const uint8x16x2_t row23 = vtrnq_u8(row2, row3);  // u20 u30 u22 u32 ...
+                                                    // u21 u31 u23 u33 ...
+  const uint8x16x2_t row45 = vtrnq_u8(row4, row5);  // ...
+  const uint8x16x2_t row67 = vtrnq_u8(row6, row7);  // ...
+  const uint16x8x2_t row02 = vtrnq_u16(vreinterpretq_u16_u8(row01.val[0]),
+                                       vreinterpretq_u16_u8(row23.val[0]));
+  const uint16x8x2_t row13 = vtrnq_u16(vreinterpretq_u16_u8(row01.val[1]),
+                                       vreinterpretq_u16_u8(row23.val[1]));
+  const uint16x8x2_t row46 = vtrnq_u16(vreinterpretq_u16_u8(row45.val[0]),
+                                       vreinterpretq_u16_u8(row67.val[0]));
+  const uint16x8x2_t row57 = vtrnq_u16(vreinterpretq_u16_u8(row45.val[1]),
+                                       vreinterpretq_u16_u8(row67.val[1]));
+  const uint32x4x2_t row04 = vtrnq_u32(vreinterpretq_u32_u16(row02.val[0]),
+                                       vreinterpretq_u32_u16(row46.val[0]));
+  const uint32x4x2_t row26 = vtrnq_u32(vreinterpretq_u32_u16(row02.val[1]),
+                                       vreinterpretq_u32_u16(row46.val[1]));
+  const uint32x4x2_t row15 = vtrnq_u32(vreinterpretq_u32_u16(row13.val[0]),
+                                       vreinterpretq_u32_u16(row57.val[0]));
+  const uint32x4x2_t row37 = vtrnq_u32(vreinterpretq_u32_u16(row13.val[1]),
+                                       vreinterpretq_u32_u16(row57.val[1]));
+  *p3 = vreinterpretq_u8_u32(row04.val[0]);
+  *p2 = vreinterpretq_u8_u32(row15.val[0]);
+  *p1 = vreinterpretq_u8_u32(row26.val[0]);
+  *p0 = vreinterpretq_u8_u32(row37.val[0]);
+  *q0 = vreinterpretq_u8_u32(row04.val[1]);
+  *q1 = vreinterpretq_u8_u32(row15.val[1]);
+  *q2 = vreinterpretq_u8_u32(row26.val[1]);
+  *q3 = vreinterpretq_u8_u32(row37.val[1]);
+}
+#undef LOAD_UV_8
+
+#endif  // !WORK_AROUND_GCC
+
+static WEBP_INLINE void Store2x8(const uint8x8x2_t v,
+                                 uint8_t* const dst, int stride) {
+  vst2_lane_u8(dst + 0 * stride, v, 0);
+  vst2_lane_u8(dst + 1 * stride, v, 1);
+  vst2_lane_u8(dst + 2 * stride, v, 2);
+  vst2_lane_u8(dst + 3 * stride, v, 3);
+  vst2_lane_u8(dst + 4 * stride, v, 4);
+  vst2_lane_u8(dst + 5 * stride, v, 5);
+  vst2_lane_u8(dst + 6 * stride, v, 6);
+  vst2_lane_u8(dst + 7 * stride, v, 7);
+}
 
-#define QRegs "q0", "q1", "q2", "q3", "q4", "q5", "q6", "q7",                  \
+static WEBP_INLINE void Store2x16(const uint8x16_t p0, const uint8x16_t q0,
+                                  uint8_t* const dst, int stride) {
+  uint8x8x2_t lo, hi;
+  lo.val[0] = vget_low_u8(p0);
+  lo.val[1] = vget_low_u8(q0);
+  hi.val[0] = vget_high_u8(p0);
+  hi.val[1] = vget_high_u8(q0);
+  Store2x8(lo, dst - 1 + 0 * stride, stride);
+  Store2x8(hi, dst - 1 + 8 * stride, stride);
+}
+
+#if !defined(WORK_AROUND_GCC)
+static WEBP_INLINE void Store4x8(const uint8x8x4_t v,
+                                 uint8_t* const dst, int stride) {
+  vst4_lane_u8(dst + 0 * stride, v, 0);
+  vst4_lane_u8(dst + 1 * stride, v, 1);
+  vst4_lane_u8(dst + 2 * stride, v, 2);
+  vst4_lane_u8(dst + 3 * stride, v, 3);
+  vst4_lane_u8(dst + 4 * stride, v, 4);
+  vst4_lane_u8(dst + 5 * stride, v, 5);
+  vst4_lane_u8(dst + 6 * stride, v, 6);
+  vst4_lane_u8(dst + 7 * stride, v, 7);
+}
+
+static WEBP_INLINE void Store4x16(const uint8x16_t p1, const uint8x16_t p0,
+                                  const uint8x16_t q0, const uint8x16_t q1,
+                                  uint8_t* const dst, int stride) {
+  uint8x8x4_t lo, hi;
+  INIT_VECTOR4(lo,
+               vget_low_u8(p1), vget_low_u8(p0),
+               vget_low_u8(q0), vget_low_u8(q1));
+  INIT_VECTOR4(hi,
+               vget_high_u8(p1), vget_high_u8(p0),
+               vget_high_u8(q0), vget_high_u8(q1));
+  Store4x8(lo, dst - 2 + 0 * stride, stride);
+  Store4x8(hi, dst - 2 + 8 * stride, stride);
+}
+#endif  // !WORK_AROUND_GCC
+
+static WEBP_INLINE void Store16x2(const uint8x16_t p0, const uint8x16_t q0,
+                                  uint8_t* const dst, int stride) {
+  vst1q_u8(dst - stride, p0);
+  vst1q_u8(dst, q0);
+}
+
+static WEBP_INLINE void Store16x4(const uint8x16_t p1, const uint8x16_t p0,
+                                  const uint8x16_t q0, const uint8x16_t q1,
+                                  uint8_t* const dst, int stride) {
+  Store16x2(p1, p0, dst - stride, stride);
+  Store16x2(q0, q1, dst + stride, stride);
+}
+
+static WEBP_INLINE void Store8x2x2(const uint8x16_t p0, const uint8x16_t q0,
+                                   uint8_t* const u, uint8_t* const v,
+                                   int stride) {
+  // p0 and q0 contain the u+v samples packed in low/high halves.
+  vst1_u8(u - stride, vget_low_u8(p0));
+  vst1_u8(u,          vget_low_u8(q0));
+  vst1_u8(v - stride, vget_high_u8(p0));
+  vst1_u8(v,          vget_high_u8(q0));
+}
+
+static WEBP_INLINE void Store8x4x2(const uint8x16_t p1, const uint8x16_t p0,
+                                   const uint8x16_t q0, const uint8x16_t q1,
+                                   uint8_t* const u, uint8_t* const v,
+                                   int stride) {
+  // The p1...q1 registers contain the u+v samples packed in low/high halves.
+  Store8x2x2(p1, p0, u - stride, v - stride, stride);
+  Store8x2x2(q0, q1, u + stride, v + stride, stride);
+}
+
+#if !defined(WORK_AROUND_GCC)
+
+#define STORE6_LANE(DST, VAL0, VAL1, LANE) do {   \
+  vst3_lane_u8((DST) - 3, (VAL0), (LANE));        \
+  vst3_lane_u8((DST) + 0, (VAL1), (LANE));        \
+  (DST) += stride;                                \
+} while (0)
+
+static WEBP_INLINE void Store6x8x2(const uint8x16_t p2, const uint8x16_t p1,
+                                   const uint8x16_t p0, const uint8x16_t q0,
+                                   const uint8x16_t q1, const uint8x16_t q2,
+                                   uint8_t* u, uint8_t* v,
+                                   int stride) {
+  uint8x8x3_t u0, u1, v0, v1;
+  INIT_VECTOR3(u0, vget_low_u8(p2), vget_low_u8(p1), vget_low_u8(p0));
+  INIT_VECTOR3(u1, vget_low_u8(q0), vget_low_u8(q1), vget_low_u8(q2));
+  INIT_VECTOR3(v0, vget_high_u8(p2), vget_high_u8(p1), vget_high_u8(p0));
+  INIT_VECTOR3(v1, vget_high_u8(q0), vget_high_u8(q1), vget_high_u8(q2));
+  STORE6_LANE(u, u0, u1, 0);
+  STORE6_LANE(u, u0, u1, 1);
+  STORE6_LANE(u, u0, u1, 2);
+  STORE6_LANE(u, u0, u1, 3);
+  STORE6_LANE(u, u0, u1, 4);
+  STORE6_LANE(u, u0, u1, 5);
+  STORE6_LANE(u, u0, u1, 6);
+  STORE6_LANE(u, u0, u1, 7);
+  STORE6_LANE(v, v0, v1, 0);
+  STORE6_LANE(v, v0, v1, 1);
+  STORE6_LANE(v, v0, v1, 2);
+  STORE6_LANE(v, v0, v1, 3);
+  STORE6_LANE(v, v0, v1, 4);
+  STORE6_LANE(v, v0, v1, 5);
+  STORE6_LANE(v, v0, v1, 6);
+  STORE6_LANE(v, v0, v1, 7);
+}
+#undef STORE6_LANE
+
+static WEBP_INLINE void Store4x8x2(const uint8x16_t p1, const uint8x16_t p0,
+                                   const uint8x16_t q0, const uint8x16_t q1,
+                                   uint8_t* const u, uint8_t* const v,
+                                   int stride) {
+  uint8x8x4_t u0, v0;
+  INIT_VECTOR4(u0,
+               vget_low_u8(p1), vget_low_u8(p0),
+               vget_low_u8(q0), vget_low_u8(q1));
+  INIT_VECTOR4(v0,
+               vget_high_u8(p1), vget_high_u8(p0),
+               vget_high_u8(q0), vget_high_u8(q1));
+  vst4_lane_u8(u - 2 + 0 * stride, u0, 0);
+  vst4_lane_u8(u - 2 + 1 * stride, u0, 1);
+  vst4_lane_u8(u - 2 + 2 * stride, u0, 2);
+  vst4_lane_u8(u - 2 + 3 * stride, u0, 3);
+  vst4_lane_u8(u - 2 + 4 * stride, u0, 4);
+  vst4_lane_u8(u - 2 + 5 * stride, u0, 5);
+  vst4_lane_u8(u - 2 + 6 * stride, u0, 6);
+  vst4_lane_u8(u - 2 + 7 * stride, u0, 7);
+  vst4_lane_u8(v - 2 + 0 * stride, v0, 0);
+  vst4_lane_u8(v - 2 + 1 * stride, v0, 1);
+  vst4_lane_u8(v - 2 + 2 * stride, v0, 2);
+  vst4_lane_u8(v - 2 + 3 * stride, v0, 3);
+  vst4_lane_u8(v - 2 + 4 * stride, v0, 4);
+  vst4_lane_u8(v - 2 + 5 * stride, v0, 5);
+  vst4_lane_u8(v - 2 + 6 * stride, v0, 6);
+  vst4_lane_u8(v - 2 + 7 * stride, v0, 7);
+}
+
+#endif  // !WORK_AROUND_GCC
+
+// Zero extend 'v' to an int16x8_t.
+static WEBP_INLINE int16x8_t ConvertU8ToS16(uint8x8_t v) {
+  return vreinterpretq_s16_u16(vmovl_u8(v));
+}
+
+// Performs unsigned 8b saturation on 'dst01' and 'dst23' storing the result
+// to the corresponding rows of 'dst'.
+static WEBP_INLINE void SaturateAndStore4x4(uint8_t* const dst,
+                                            const int16x8_t dst01,
+                                            const int16x8_t dst23) {
+  // Unsigned saturate to 8b.
+  const uint8x8_t dst01_u8 = vqmovun_s16(dst01);
+  const uint8x8_t dst23_u8 = vqmovun_s16(dst23);
+
+  // Store the results.
+  vst1_lane_u32((uint32_t*)(dst + 0 * BPS), vreinterpret_u32_u8(dst01_u8), 0);
+  vst1_lane_u32((uint32_t*)(dst + 1 * BPS), vreinterpret_u32_u8(dst01_u8), 1);
+  vst1_lane_u32((uint32_t*)(dst + 2 * BPS), vreinterpret_u32_u8(dst23_u8), 0);
+  vst1_lane_u32((uint32_t*)(dst + 3 * BPS), vreinterpret_u32_u8(dst23_u8), 1);
+}
+
+static WEBP_INLINE void Add4x4(const int16x8_t row01, const int16x8_t row23,
+                               uint8_t* const dst) {
+  uint32x2_t dst01 = vdup_n_u32(0);
+  uint32x2_t dst23 = vdup_n_u32(0);
+
+  // Load the source pixels.
+  dst01 = vld1_lane_u32((uint32_t*)(dst + 0 * BPS), dst01, 0);
+  dst23 = vld1_lane_u32((uint32_t*)(dst + 2 * BPS), dst23, 0);
+  dst01 = vld1_lane_u32((uint32_t*)(dst + 1 * BPS), dst01, 1);
+  dst23 = vld1_lane_u32((uint32_t*)(dst + 3 * BPS), dst23, 1);
+
+  {
+    // Convert to 16b.
+    const int16x8_t dst01_s16 = ConvertU8ToS16(vreinterpret_u8_u32(dst01));
+    const int16x8_t dst23_s16 = ConvertU8ToS16(vreinterpret_u8_u32(dst23));
+
+    // Descale with rounding.
+    const int16x8_t out01 = vrsraq_n_s16(dst01_s16, row01, 3);
+    const int16x8_t out23 = vrsraq_n_s16(dst23_s16, row23, 3);
+    // Add the inverse transform.
+    SaturateAndStore4x4(dst, out01, out23);
+  }
+}
+
+//-----------------------------------------------------------------------------
+// Simple In-loop filtering (Paragraph 15.2)
+
+static uint8x16_t NeedsFilter(const uint8x16_t p1, const uint8x16_t p0,
+                              const uint8x16_t q0, const uint8x16_t q1,
+                              int thresh) {
+  const uint8x16_t thresh_v = vdupq_n_u8((uint8_t)thresh);
+  const uint8x16_t a_p0_q0 = vabdq_u8(p0, q0);               // abs(p0-q0)
+  const uint8x16_t a_p1_q1 = vabdq_u8(p1, q1);               // abs(p1-q1)
+  const uint8x16_t a_p0_q0_2 = vqaddq_u8(a_p0_q0, a_p0_q0);  // 2 * abs(p0-q0)
+  const uint8x16_t a_p1_q1_2 = vshrq_n_u8(a_p1_q1, 1);       // abs(p1-q1) / 2
+  const uint8x16_t sum = vqaddq_u8(a_p0_q0_2, a_p1_q1_2);
+  const uint8x16_t mask = vcgeq_u8(thresh_v, sum);
+  return mask;
+}
+
+static int8x16_t FlipSign(const uint8x16_t v) {
+  const uint8x16_t sign_bit = vdupq_n_u8(0x80);
+  return vreinterpretq_s8_u8(veorq_u8(v, sign_bit));
+}
+
+static uint8x16_t FlipSignBack(const int8x16_t v) {
+  const int8x16_t sign_bit = vdupq_n_s8(0x80);
+  return vreinterpretq_u8_s8(veorq_s8(v, sign_bit));
+}
+
+static int8x16_t GetBaseDelta(const int8x16_t p1, const int8x16_t p0,
+                              const int8x16_t q0, const int8x16_t q1) {
+  const int8x16_t q0_p0 = vqsubq_s8(q0, p0);      // (q0-p0)
+  const int8x16_t p1_q1 = vqsubq_s8(p1, q1);      // (p1-q1)
+  const int8x16_t s1 = vqaddq_s8(p1_q1, q0_p0);   // (p1-q1) + 1 * (q0 - p0)
+  const int8x16_t s2 = vqaddq_s8(q0_p0, s1);      // (p1-q1) + 2 * (q0 - p0)
+  const int8x16_t s3 = vqaddq_s8(q0_p0, s2);      // (p1-q1) + 3 * (q0 - p0)
+  return s3;
+}
+
+static int8x16_t GetBaseDelta0(const int8x16_t p0, const int8x16_t q0) {
+  const int8x16_t q0_p0 = vqsubq_s8(q0, p0);      // (q0-p0)
+  const int8x16_t s1 = vqaddq_s8(q0_p0, q0_p0);   // 2 * (q0 - p0)
+  const int8x16_t s2 = vqaddq_s8(q0_p0, s1);      // 3 * (q0 - p0)
+  return s2;
+}
+
+//------------------------------------------------------------------------------
+
+static void ApplyFilter2NoFlip(const int8x16_t p0s, const int8x16_t q0s,
+                               const int8x16_t delta,
+                               int8x16_t* const op0, int8x16_t* const oq0) {
+  const int8x16_t kCst3 = vdupq_n_s8(0x03);
+  const int8x16_t kCst4 = vdupq_n_s8(0x04);
+  const int8x16_t delta_p3 = vqaddq_s8(delta, kCst3);
+  const int8x16_t delta_p4 = vqaddq_s8(delta, kCst4);
+  const int8x16_t delta3 = vshrq_n_s8(delta_p3, 3);
+  const int8x16_t delta4 = vshrq_n_s8(delta_p4, 3);
+  *op0 = vqaddq_s8(p0s, delta3);
+  *oq0 = vqsubq_s8(q0s, delta4);
+}
+
+#if defined(WEBP_USE_INTRINSICS)
+
+static void ApplyFilter2(const int8x16_t p0s, const int8x16_t q0s,
+                         const int8x16_t delta,
+                         uint8x16_t* const op0, uint8x16_t* const oq0) {
+  const int8x16_t kCst3 = vdupq_n_s8(0x03);
+  const int8x16_t kCst4 = vdupq_n_s8(0x04);
+  const int8x16_t delta_p3 = vqaddq_s8(delta, kCst3);
+  const int8x16_t delta_p4 = vqaddq_s8(delta, kCst4);
+  const int8x16_t delta3 = vshrq_n_s8(delta_p3, 3);
+  const int8x16_t delta4 = vshrq_n_s8(delta_p4, 3);
+  const int8x16_t sp0 = vqaddq_s8(p0s, delta3);
+  const int8x16_t sq0 = vqsubq_s8(q0s, delta4);
+  *op0 = FlipSignBack(sp0);
+  *oq0 = FlipSignBack(sq0);
+}
+
+static void DoFilter2(const uint8x16_t p1, const uint8x16_t p0,
+                      const uint8x16_t q0, const uint8x16_t q1,
+                      const uint8x16_t mask,
+                      uint8x16_t* const op0, uint8x16_t* const oq0) {
+  const int8x16_t p1s = FlipSign(p1);
+  const int8x16_t p0s = FlipSign(p0);
+  const int8x16_t q0s = FlipSign(q0);
+  const int8x16_t q1s = FlipSign(q1);
+  const int8x16_t delta0 = GetBaseDelta(p1s, p0s, q0s, q1s);
+  const int8x16_t delta1 = vandq_s8(delta0, vreinterpretq_s8_u8(mask));
+  ApplyFilter2(p0s, q0s, delta1, op0, oq0);
+}
+
+static void SimpleVFilter16(uint8_t* p, int stride, int thresh) {
+  uint8x16_t p1, p0, q0, q1, op0, oq0;
+  Load16x4(p, stride, &p1, &p0, &q0, &q1);
+  {
+    const uint8x16_t mask = NeedsFilter(p1, p0, q0, q1, thresh);
+    DoFilter2(p1, p0, q0, q1, mask, &op0, &oq0);
+  }
+  Store16x2(op0, oq0, p, stride);
+}
+
+static void SimpleHFilter16(uint8_t* p, int stride, int thresh) {
+  uint8x16_t p1, p0, q0, q1, oq0, op0;
+  Load4x16(p, stride, &p1, &p0, &q0, &q1);
+  {
+    const uint8x16_t mask = NeedsFilter(p1, p0, q0, q1, thresh);
+    DoFilter2(p1, p0, q0, q1, mask, &op0, &oq0);
+  }
+  Store2x16(op0, oq0, p, stride);
+}
+
+#else
+
+#define QRegs "q0", "q1", "q2", "q3",                                          \
               "q8", "q9", "q10", "q11", "q12", "q13", "q14", "q15"
 
 #define FLIP_SIGN_BIT2(a, b, s)                                                \
@@ -68,40 +592,16 @@ extern "C" {
   DO_SIMPLE_FILTER(p0, q0, q9)                 /* apply filter */              \
   FLIP_SIGN_BIT2(p0, q0, q10)
 
-// Load/Store vertical edge
-#define LOAD8x4(c1, c2, c3, c4, b1, b2, stride)                                \
-  "vld4.8   {" #c1"[0], " #c2"[0], " #c3"[0], " #c4"[0]}," #b1 "," #stride"\n" \
-  "vld4.8   {" #c1"[1], " #c2"[1], " #c3"[1], " #c4"[1]}," #b2 "," #stride"\n" \
-  "vld4.8   {" #c1"[2], " #c2"[2], " #c3"[2], " #c4"[2]}," #b1 "," #stride"\n" \
-  "vld4.8   {" #c1"[3], " #c2"[3], " #c3"[3], " #c4"[3]}," #b2 "," #stride"\n" \
-  "vld4.8   {" #c1"[4], " #c2"[4], " #c3"[4], " #c4"[4]}," #b1 "," #stride"\n" \
-  "vld4.8   {" #c1"[5], " #c2"[5], " #c3"[5], " #c4"[5]}," #b2 "," #stride"\n" \
-  "vld4.8   {" #c1"[6], " #c2"[6], " #c3"[6], " #c4"[6]}," #b1 "," #stride"\n" \
-  "vld4.8   {" #c1"[7], " #c2"[7], " #c3"[7], " #c4"[7]}," #b2 "," #stride"\n"
-
-#define STORE8x2(c1, c2, p,stride)                                             \
-  "vst2.8   {" #c1"[0], " #c2"[0]}," #p "," #stride " \n"                      \
-  "vst2.8   {" #c1"[1], " #c2"[1]}," #p "," #stride " \n"                      \
-  "vst2.8   {" #c1"[2], " #c2"[2]}," #p "," #stride " \n"                      \
-  "vst2.8   {" #c1"[3], " #c2"[3]}," #p "," #stride " \n"                      \
-  "vst2.8   {" #c1"[4], " #c2"[4]}," #p "," #stride " \n"                      \
-  "vst2.8   {" #c1"[5], " #c2"[5]}," #p "," #stride " \n"                      \
-  "vst2.8   {" #c1"[6], " #c2"[6]}," #p "," #stride " \n"                      \
-  "vst2.8   {" #c1"[7], " #c2"[7]}," #p "," #stride " \n"
-
-//-----------------------------------------------------------------------------
-// Simple In-loop filtering (Paragraph 15.2)
-
-static void SimpleVFilter16NEON(uint8_t* p, int stride, int thresh) {
+static void SimpleVFilter16(uint8_t* p, int stride, int thresh) {
   __asm__ volatile (
     "sub        %[p], %[p], %[stride], lsl #1  \n"  // p -= 2 * stride
 
     "vld1.u8    {q1}, [%[p]], %[stride]        \n"  // p1
     "vld1.u8    {q2}, [%[p]], %[stride]        \n"  // p0
     "vld1.u8    {q3}, [%[p]], %[stride]        \n"  // q0
-    "vld1.u8    {q4}, [%[p]]                   \n"  // q1
+    "vld1.u8    {q12}, [%[p]]                  \n"  // q1
 
-    DO_FILTER2(q1, q2, q3, q4, %[thresh])
+    DO_FILTER2(q1, q2, q3, q12, %[thresh])
 
     "sub        %[p], %[p], %[stride], lsl #1  \n"  // p -= 2 * stride
 
@@ -113,25 +613,25 @@ static void SimpleVFilter16NEON(uint8_t* p, int stride, int thresh) {
   );
 }
 
-static void SimpleHFilter16NEON(uint8_t* p, int stride, int thresh) {
+static void SimpleHFilter16(uint8_t* p, int stride, int thresh) {
   __asm__ volatile (
     "sub        r4, %[p], #2                   \n"  // base1 = p - 2
     "lsl        r6, %[stride], #1              \n"  // r6 = 2 * stride
     "add        r5, r4, %[stride]              \n"  // base2 = base1 + stride
 
     LOAD8x4(d2, d3, d4, d5, [r4], [r5], r6)
-    LOAD8x4(d6, d7, d8, d9, [r4], [r5], r6)
-    "vswp       d3, d6                         \n"  // p1:q1 p0:q3
-    "vswp       d5, d8                         \n"  // q0:q2 q1:q4
-    "vswp       q2, q3                         \n"  // p1:q1 p0:q2 q0:q3 q1:q4
+    LOAD8x4(d24, d25, d26, d27, [r4], [r5], r6)
+    "vswp       d3, d24                        \n"  // p1:q1 p0:q3
+    "vswp       d5, d26                        \n"  // q0:q2 q1:q4
+    "vswp       q2, q12                        \n"  // p1:q1 p0:q2 q0:q3 q1:q4
 
-    DO_FILTER2(q1, q2, q3, q4, %[thresh])
+    DO_FILTER2(q1, q2, q12, q13, %[thresh])
 
     "sub        %[p], %[p], #1                 \n"  // p - 1
 
-    "vswp        d5, d6                        \n"
+    "vswp        d5, d24                       \n"
     STORE8x2(d4, d5, [%[p]], %[stride])
-    STORE8x2(d6, d7, [%[p]], %[stride])
+    STORE8x2(d24, d25, [%[p]], %[stride])
 
     : [p] "+r"(p)
     : [stride] "r"(stride), [thresh] "r"(thresh)
@@ -139,44 +639,408 @@ static void SimpleHFilter16NEON(uint8_t* p, int stride, int thresh) {
   );
 }
 
-static void SimpleVFilter16iNEON(uint8_t* p, int stride, int thresh) {
-  int k;
-  for (k = 3; k > 0; --k) {
+#endif    // WEBP_USE_INTRINSICS
+
+static void SimpleVFilter16i(uint8_t* p, int stride, int thresh) {
+  uint32_t k;
+  for (k = 3; k != 0; --k) {
     p += 4 * stride;
-    SimpleVFilter16NEON(p, stride, thresh);
+    SimpleVFilter16(p, stride, thresh);
   }
 }
 
-static void SimpleHFilter16iNEON(uint8_t* p, int stride, int thresh) {
-  int k;
-  for (k = 3; k > 0; --k) {
+static void SimpleHFilter16i(uint8_t* p, int stride, int thresh) {
+  uint32_t k;
+  for (k = 3; k != 0; --k) {
     p += 4;
-    SimpleHFilter16NEON(p, stride, thresh);
+    SimpleHFilter16(p, stride, thresh);
   }
 }
 
-static void TransformOneNEON(const int16_t *in, uint8_t *dst) {
-  const int kBPS = BPS;
-  const int16_t constants[] = {20091, 17734, 0, 0};
-  /* kC1, kC2. Padded because vld1.16 loads 8 bytes
-   * Technically these are unsigned but vqdmulh is only available in signed.
-   * vqdmulh returns high half (effectively >> 16) but also doubles the value,
-   * changing the >> 16 to >> 15 and requiring an additional >> 1.
-   * We use this to our advantage with kC2. The canonical value is 35468.
-   * However, the high bit is set so treating it as signed will give incorrect
-   * results. We avoid this by down shifting by 1 here to clear the highest bit.
-   * Combined with the doubling effect of vqdmulh we get >> 16.
-   * This can not be applied to kC1 because the lowest bit is set. Down shifting
-   * the constant would reduce precision.
-   */
-
-  /* libwebp uses a trick to avoid some extra addition that libvpx does.
-   * Instead of:
-   * temp2 = ip[12] + ((ip[12] * cospi8sqrt2minus1) >> 16);
-   * libwebp adds 1 << 16 to cospi8sqrt2minus1 (kC1). However, this causes the
-   * same issue with kC1 and vqdmulh that we work around by down shifting kC2
-   */
+//------------------------------------------------------------------------------
+// Complex In-loop filtering (Paragraph 15.3)
+
+static uint8x16_t NeedsHev(const uint8x16_t p1, const uint8x16_t p0,
+                           const uint8x16_t q0, const uint8x16_t q1,
+                           int hev_thresh) {
+  const uint8x16_t hev_thresh_v = vdupq_n_u8((uint8_t)hev_thresh);
+  const uint8x16_t a_p1_p0 = vabdq_u8(p1, p0);  // abs(p1 - p0)
+  const uint8x16_t a_q1_q0 = vabdq_u8(q1, q0);  // abs(q1 - q0)
+  const uint8x16_t mask1 = vcgtq_u8(a_p1_p0, hev_thresh_v);
+  const uint8x16_t mask2 = vcgtq_u8(a_q1_q0, hev_thresh_v);
+  const uint8x16_t mask = vorrq_u8(mask1, mask2);
+  return mask;
+}
+
+static uint8x16_t NeedsFilter2(const uint8x16_t p3, const uint8x16_t p2,
+                               const uint8x16_t p1, const uint8x16_t p0,
+                               const uint8x16_t q0, const uint8x16_t q1,
+                               const uint8x16_t q2, const uint8x16_t q3,
+                               int ithresh, int thresh) {
+  const uint8x16_t ithresh_v = vdupq_n_u8((uint8_t)ithresh);
+  const uint8x16_t a_p3_p2 = vabdq_u8(p3, p2);  // abs(p3 - p2)
+  const uint8x16_t a_p2_p1 = vabdq_u8(p2, p1);  // abs(p2 - p1)
+  const uint8x16_t a_p1_p0 = vabdq_u8(p1, p0);  // abs(p1 - p0)
+  const uint8x16_t a_q3_q2 = vabdq_u8(q3, q2);  // abs(q3 - q2)
+  const uint8x16_t a_q2_q1 = vabdq_u8(q2, q1);  // abs(q2 - q1)
+  const uint8x16_t a_q1_q0 = vabdq_u8(q1, q0);  // abs(q1 - q0)
+  const uint8x16_t max1 = vmaxq_u8(a_p3_p2, a_p2_p1);
+  const uint8x16_t max2 = vmaxq_u8(a_p1_p0, a_q3_q2);
+  const uint8x16_t max3 = vmaxq_u8(a_q2_q1, a_q1_q0);
+  const uint8x16_t max12 = vmaxq_u8(max1, max2);
+  const uint8x16_t max123 = vmaxq_u8(max12, max3);
+  const uint8x16_t mask2 = vcgeq_u8(ithresh_v, max123);
+  const uint8x16_t mask1 = NeedsFilter(p1, p0, q0, q1, thresh);
+  const uint8x16_t mask = vandq_u8(mask1, mask2);
+  return mask;
+}
+
+//  4-points filter
+
+static void ApplyFilter4(
+    const int8x16_t p1, const int8x16_t p0,
+    const int8x16_t q0, const int8x16_t q1,
+    const int8x16_t delta0,
+    uint8x16_t* const op1, uint8x16_t* const op0,
+    uint8x16_t* const oq0, uint8x16_t* const oq1) {
+  const int8x16_t kCst3 = vdupq_n_s8(0x03);
+  const int8x16_t kCst4 = vdupq_n_s8(0x04);
+  const int8x16_t delta1 = vqaddq_s8(delta0, kCst4);
+  const int8x16_t delta2 = vqaddq_s8(delta0, kCst3);
+  const int8x16_t a1 = vshrq_n_s8(delta1, 3);
+  const int8x16_t a2 = vshrq_n_s8(delta2, 3);
+  const int8x16_t a3 = vrshrq_n_s8(a1, 1);   // a3 = (a1 + 1) >> 1
+  *op0 = FlipSignBack(vqaddq_s8(p0, a2));  // clip(p0 + a2)
+  *oq0 = FlipSignBack(vqsubq_s8(q0, a1));  // clip(q0 - a1)
+  *op1 = FlipSignBack(vqaddq_s8(p1, a3));  // clip(p1 + a3)
+  *oq1 = FlipSignBack(vqsubq_s8(q1, a3));  // clip(q1 - a3)
+}
+
+static void DoFilter4(
+    const uint8x16_t p1, const uint8x16_t p0,
+    const uint8x16_t q0, const uint8x16_t q1,
+    const uint8x16_t mask, const uint8x16_t hev_mask,
+    uint8x16_t* const op1, uint8x16_t* const op0,
+    uint8x16_t* const oq0, uint8x16_t* const oq1) {
+  // This is a fused version of DoFilter2() calling ApplyFilter2 directly
+  const int8x16_t p1s = FlipSign(p1);
+  int8x16_t p0s = FlipSign(p0);
+  int8x16_t q0s = FlipSign(q0);
+  const int8x16_t q1s = FlipSign(q1);
+  const uint8x16_t simple_lf_mask = vandq_u8(mask, hev_mask);
+
+  // do_filter2 part (simple loopfilter on pixels with hev)
+  {
+    const int8x16_t delta = GetBaseDelta(p1s, p0s, q0s, q1s);
+    const int8x16_t simple_lf_delta =
+        vandq_s8(delta, vreinterpretq_s8_u8(simple_lf_mask));
+    ApplyFilter2NoFlip(p0s, q0s, simple_lf_delta, &p0s, &q0s);
+  }
+
+  // do_filter4 part (complex loopfilter on pixels without hev)
+  {
+    const int8x16_t delta0 = GetBaseDelta0(p0s, q0s);
+    // we use: (mask & hev_mask) ^ mask = mask & !hev_mask
+    const uint8x16_t complex_lf_mask = veorq_u8(simple_lf_mask, mask);
+    const int8x16_t complex_lf_delta =
+        vandq_s8(delta0, vreinterpretq_s8_u8(complex_lf_mask));
+    ApplyFilter4(p1s, p0s, q0s, q1s, complex_lf_delta, op1, op0, oq0, oq1);
+  }
+}
+
+//  6-points filter
+
+static void ApplyFilter6(
+    const int8x16_t p2, const int8x16_t p1, const int8x16_t p0,
+    const int8x16_t q0, const int8x16_t q1, const int8x16_t q2,
+    const int8x16_t delta,
+    uint8x16_t* const op2, uint8x16_t* const op1, uint8x16_t* const op0,
+    uint8x16_t* const oq0, uint8x16_t* const oq1, uint8x16_t* const oq2) {
+  const int16x8_t kCst63 = vdupq_n_s16(63);
+  const int8x8_t kCst27 = vdup_n_s8(27);
+  const int8x8_t kCst18 = vdup_n_s8(18);
+  const int8x8_t kCst9 = vdup_n_s8(9);
+  const int8x8_t delta_lo = vget_low_s8(delta);
+  const int8x8_t delta_hi = vget_high_s8(delta);
+  const int16x8_t s1_lo = vmlal_s8(kCst63, kCst27, delta_lo);  // 63 + 27 * a
+  const int16x8_t s1_hi = vmlal_s8(kCst63, kCst27, delta_hi);  // 63 + 27 * a
+  const int16x8_t s2_lo = vmlal_s8(kCst63, kCst18, delta_lo);  // 63 + 18 * a
+  const int16x8_t s2_hi = vmlal_s8(kCst63, kCst18, delta_hi);  // 63 + 18 * a
+  const int16x8_t s3_lo = vmlal_s8(kCst63, kCst9, delta_lo);   // 63 + 9 * a
+  const int16x8_t s3_hi = vmlal_s8(kCst63, kCst9, delta_hi);   // 63 + 9 * a
+  const int8x8_t a1_lo = vqshrn_n_s16(s1_lo, 7);
+  const int8x8_t a1_hi = vqshrn_n_s16(s1_hi, 7);
+  const int8x8_t a2_lo = vqshrn_n_s16(s2_lo, 7);
+  const int8x8_t a2_hi = vqshrn_n_s16(s2_hi, 7);
+  const int8x8_t a3_lo = vqshrn_n_s16(s3_lo, 7);
+  const int8x8_t a3_hi = vqshrn_n_s16(s3_hi, 7);
+  const int8x16_t a1 = vcombine_s8(a1_lo, a1_hi);
+  const int8x16_t a2 = vcombine_s8(a2_lo, a2_hi);
+  const int8x16_t a3 = vcombine_s8(a3_lo, a3_hi);
+
+  *op0 = FlipSignBack(vqaddq_s8(p0, a1));  // clip(p0 + a1)
+  *oq0 = FlipSignBack(vqsubq_s8(q0, a1));  // clip(q0 - q1)
+  *oq1 = FlipSignBack(vqsubq_s8(q1, a2));  // clip(q1 - a2)
+  *op1 = FlipSignBack(vqaddq_s8(p1, a2));  // clip(p1 + a2)
+  *oq2 = FlipSignBack(vqsubq_s8(q2, a3));  // clip(q2 - a3)
+  *op2 = FlipSignBack(vqaddq_s8(p2, a3));  // clip(p2 + a3)
+}
+
+static void DoFilter6(
+    const uint8x16_t p2, const uint8x16_t p1, const uint8x16_t p0,
+    const uint8x16_t q0, const uint8x16_t q1, const uint8x16_t q2,
+    const uint8x16_t mask, const uint8x16_t hev_mask,
+    uint8x16_t* const op2, uint8x16_t* const op1, uint8x16_t* const op0,
+    uint8x16_t* const oq0, uint8x16_t* const oq1, uint8x16_t* const oq2) {
+  // This is a fused version of DoFilter2() calling ApplyFilter2 directly
+  const int8x16_t p2s = FlipSign(p2);
+  const int8x16_t p1s = FlipSign(p1);
+  int8x16_t p0s = FlipSign(p0);
+  int8x16_t q0s = FlipSign(q0);
+  const int8x16_t q1s = FlipSign(q1);
+  const int8x16_t q2s = FlipSign(q2);
+  const uint8x16_t simple_lf_mask = vandq_u8(mask, hev_mask);
+  const int8x16_t delta0 = GetBaseDelta(p1s, p0s, q0s, q1s);
+
+  // do_filter2 part (simple loopfilter on pixels with hev)
+  {
+    const int8x16_t simple_lf_delta =
+        vandq_s8(delta0, vreinterpretq_s8_u8(simple_lf_mask));
+    ApplyFilter2NoFlip(p0s, q0s, simple_lf_delta, &p0s, &q0s);
+  }
+
+  // do_filter6 part (complex loopfilter on pixels without hev)
+  {
+    // we use: (mask & hev_mask) ^ mask = mask & !hev_mask
+    const uint8x16_t complex_lf_mask = veorq_u8(simple_lf_mask, mask);
+    const int8x16_t complex_lf_delta =
+        vandq_s8(delta0, vreinterpretq_s8_u8(complex_lf_mask));
+    ApplyFilter6(p2s, p1s, p0s, q0s, q1s, q2s, complex_lf_delta,
+                 op2, op1, op0, oq0, oq1, oq2);
+  }
+}
+
+// on macroblock edges
+
+static void VFilter16(uint8_t* p, int stride,
+                      int thresh, int ithresh, int hev_thresh) {
+  uint8x16_t p3, p2, p1, p0, q0, q1, q2, q3;
+  Load16x8(p, stride, &p3, &p2, &p1, &p0, &q0, &q1, &q2, &q3);
+  {
+    const uint8x16_t mask = NeedsFilter2(p3, p2, p1, p0, q0, q1, q2, q3,
+                                         ithresh, thresh);
+    const uint8x16_t hev_mask = NeedsHev(p1, p0, q0, q1, hev_thresh);
+    uint8x16_t op2, op1, op0, oq0, oq1, oq2;
+    DoFilter6(p2, p1, p0, q0, q1, q2, mask, hev_mask,
+              &op2, &op1, &op0, &oq0, &oq1, &oq2);
+    Store16x2(op2, op1, p - 2 * stride, stride);
+    Store16x2(op0, oq0, p + 0 * stride, stride);
+    Store16x2(oq1, oq2, p + 2 * stride, stride);
+  }
+}
+
+static void HFilter16(uint8_t* p, int stride,
+                      int thresh, int ithresh, int hev_thresh) {
+  uint8x16_t p3, p2, p1, p0, q0, q1, q2, q3;
+  Load8x16(p, stride, &p3, &p2, &p1, &p0, &q0, &q1, &q2, &q3);
+  {
+    const uint8x16_t mask = NeedsFilter2(p3, p2, p1, p0, q0, q1, q2, q3,
+                                         ithresh, thresh);
+    const uint8x16_t hev_mask = NeedsHev(p1, p0, q0, q1, hev_thresh);
+    uint8x16_t op2, op1, op0, oq0, oq1, oq2;
+    DoFilter6(p2, p1, p0, q0, q1, q2, mask, hev_mask,
+              &op2, &op1, &op0, &oq0, &oq1, &oq2);
+    Store2x16(op2, op1, p - 2, stride);
+    Store2x16(op0, oq0, p + 0, stride);
+    Store2x16(oq1, oq2, p + 2, stride);
+  }
+}
+
+// on three inner edges
+static void VFilter16i(uint8_t* p, int stride,
+                       int thresh, int ithresh, int hev_thresh) {
+  uint32_t k;
+  uint8x16_t p3, p2, p1, p0;
+  Load16x4(p + 2  * stride, stride, &p3, &p2, &p1, &p0);
+  for (k = 3; k != 0; --k) {
+    uint8x16_t q0, q1, q2, q3;
+    p += 4 * stride;
+    Load16x4(p + 2  * stride, stride, &q0, &q1, &q2, &q3);
+    {
+      const uint8x16_t mask =
+          NeedsFilter2(p3, p2, p1, p0, q0, q1, q2, q3, ithresh, thresh);
+      const uint8x16_t hev_mask = NeedsHev(p1, p0, q0, q1, hev_thresh);
+      // p3 and p2 are not just temporary variables here: they will be
+      // re-used for next span. And q2/q3 will become p1/p0 accordingly.
+      DoFilter4(p1, p0, q0, q1, mask, hev_mask, &p1, &p0, &p3, &p2);
+      Store16x4(p1, p0, p3, p2, p, stride);
+      p1 = q2;
+      p0 = q3;
+    }
+  }
+}
+
+#if !defined(WORK_AROUND_GCC)
+static void HFilter16i(uint8_t* p, int stride,
+                       int thresh, int ithresh, int hev_thresh) {
+  uint32_t k;
+  uint8x16_t p3, p2, p1, p0;
+  Load4x16(p + 2, stride, &p3, &p2, &p1, &p0);
+  for (k = 3; k != 0; --k) {
+    uint8x16_t q0, q1, q2, q3;
+    p += 4;
+    Load4x16(p + 2, stride, &q0, &q1, &q2, &q3);
+    {
+      const uint8x16_t mask =
+          NeedsFilter2(p3, p2, p1, p0, q0, q1, q2, q3, ithresh, thresh);
+      const uint8x16_t hev_mask = NeedsHev(p1, p0, q0, q1, hev_thresh);
+      DoFilter4(p1, p0, q0, q1, mask, hev_mask, &p1, &p0, &p3, &p2);
+      Store4x16(p1, p0, p3, p2, p, stride);
+      p1 = q2;
+      p0 = q3;
+    }
+  }
+}
+#endif  // !WORK_AROUND_GCC
+
+// 8-pixels wide variant, for chroma filtering
+static void VFilter8(uint8_t* u, uint8_t* v, int stride,
+                     int thresh, int ithresh, int hev_thresh) {
+  uint8x16_t p3, p2, p1, p0, q0, q1, q2, q3;
+  Load8x8x2(u, v, stride, &p3, &p2, &p1, &p0, &q0, &q1, &q2, &q3);
+  {
+    const uint8x16_t mask = NeedsFilter2(p3, p2, p1, p0, q0, q1, q2, q3,
+                                         ithresh, thresh);
+    const uint8x16_t hev_mask = NeedsHev(p1, p0, q0, q1, hev_thresh);
+    uint8x16_t op2, op1, op0, oq0, oq1, oq2;
+    DoFilter6(p2, p1, p0, q0, q1, q2, mask, hev_mask,
+              &op2, &op1, &op0, &oq0, &oq1, &oq2);
+    Store8x2x2(op2, op1, u - 2 * stride, v - 2 * stride, stride);
+    Store8x2x2(op0, oq0, u + 0 * stride, v + 0 * stride, stride);
+    Store8x2x2(oq1, oq2, u + 2 * stride, v + 2 * stride, stride);
+  }
+}
+static void VFilter8i(uint8_t* u, uint8_t* v, int stride,
+                      int thresh, int ithresh, int hev_thresh) {
+  uint8x16_t p3, p2, p1, p0, q0, q1, q2, q3;
+  u += 4 * stride;
+  v += 4 * stride;
+  Load8x8x2(u, v, stride, &p3, &p2, &p1, &p0, &q0, &q1, &q2, &q3);
+  {
+    const uint8x16_t mask = NeedsFilter2(p3, p2, p1, p0, q0, q1, q2, q3,
+                                         ithresh, thresh);
+    const uint8x16_t hev_mask = NeedsHev(p1, p0, q0, q1, hev_thresh);
+    uint8x16_t op1, op0, oq0, oq1;
+    DoFilter4(p1, p0, q0, q1, mask, hev_mask, &op1, &op0, &oq0, &oq1);
+    Store8x4x2(op1, op0, oq0, oq1, u, v, stride);
+  }
+}
+
+#if !defined(WORK_AROUND_GCC)
+static void HFilter8(uint8_t* u, uint8_t* v, int stride,
+                     int thresh, int ithresh, int hev_thresh) {
+  uint8x16_t p3, p2, p1, p0, q0, q1, q2, q3;
+  Load8x8x2T(u, v, stride, &p3, &p2, &p1, &p0, &q0, &q1, &q2, &q3);
+  {
+    const uint8x16_t mask = NeedsFilter2(p3, p2, p1, p0, q0, q1, q2, q3,
+                                         ithresh, thresh);
+    const uint8x16_t hev_mask = NeedsHev(p1, p0, q0, q1, hev_thresh);
+    uint8x16_t op2, op1, op0, oq0, oq1, oq2;
+    DoFilter6(p2, p1, p0, q0, q1, q2, mask, hev_mask,
+              &op2, &op1, &op0, &oq0, &oq1, &oq2);
+    Store6x8x2(op2, op1, op0, oq0, oq1, oq2, u, v, stride);
+  }
+}
+
+static void HFilter8i(uint8_t* u, uint8_t* v, int stride,
+                      int thresh, int ithresh, int hev_thresh) {
+  uint8x16_t p3, p2, p1, p0, q0, q1, q2, q3;
+  u += 4;
+  v += 4;
+  Load8x8x2T(u, v, stride, &p3, &p2, &p1, &p0, &q0, &q1, &q2, &q3);
+  {
+    const uint8x16_t mask = NeedsFilter2(p3, p2, p1, p0, q0, q1, q2, q3,
+                                         ithresh, thresh);
+    const uint8x16_t hev_mask = NeedsHev(p1, p0, q0, q1, hev_thresh);
+    uint8x16_t op1, op0, oq0, oq1;
+    DoFilter4(p1, p0, q0, q1, mask, hev_mask, &op1, &op0, &oq0, &oq1);
+    Store4x8x2(op1, op0, oq0, oq1, u, v, stride);
+  }
+}
+#endif  // !WORK_AROUND_GCC
+
+//-----------------------------------------------------------------------------
+// Inverse transforms (Paragraph 14.4)
+
+// Technically these are unsigned but vqdmulh is only available in signed.
+// vqdmulh returns high half (effectively >> 16) but also doubles the value,
+// changing the >> 16 to >> 15 and requiring an additional >> 1.
+// We use this to our advantage with kC2. The canonical value is 35468.
+// However, the high bit is set so treating it as signed will give incorrect
+// results. We avoid this by down shifting by 1 here to clear the highest bit.
+// Combined with the doubling effect of vqdmulh we get >> 16.
+// This can not be applied to kC1 because the lowest bit is set. Down shifting
+// the constant would reduce precision.
+
+// libwebp uses a trick to avoid some extra addition that libvpx does.
+// Instead of:
+// temp2 = ip[12] + ((ip[12] * cospi8sqrt2minus1) >> 16);
+// libwebp adds 1 << 16 to cospi8sqrt2minus1 (kC1). However, this causes the
+// same issue with kC1 and vqdmulh that we work around by down shifting kC2
 
+static const int16_t kC1 = 20091;
+static const int16_t kC2 = 17734;  // half of kC2, actually. See comment above.
+
+#if defined(WEBP_USE_INTRINSICS)
+static WEBP_INLINE void Transpose8x2(const int16x8_t in0, const int16x8_t in1,
+                                     int16x8x2_t* const out) {
+  // a0 a1 a2 a3 | b0 b1 b2 b3   => a0 b0 c0 d0 | a1 b1 c1 d1
+  // c0 c1 c2 c3 | d0 d1 d2 d3      a2 b2 c2 d2 | a3 b3 c3 d3
+  const int16x8x2_t tmp0 = vzipq_s16(in0, in1);   // a0 c0 a1 c1 a2 c2 ...
+                                                  // b0 d0 b1 d1 b2 d2 ...
+  *out = vzipq_s16(tmp0.val[0], tmp0.val[1]);
+}
+
+static WEBP_INLINE void TransformPass(int16x8x2_t* const rows) {
+  // {rows} = in0 | in4
+  //          in8 | in12
+  // B1 = in4 | in12
+  const int16x8_t B1 =
+      vcombine_s16(vget_high_s16(rows->val[0]), vget_high_s16(rows->val[1]));
+  // C0 = kC1 * in4 | kC1 * in12
+  // C1 = kC2 * in4 | kC2 * in12
+  const int16x8_t C0 = vsraq_n_s16(B1, vqdmulhq_n_s16(B1, kC1), 1);
+  const int16x8_t C1 = vqdmulhq_n_s16(B1, kC2);
+  const int16x4_t a = vqadd_s16(vget_low_s16(rows->val[0]),
+                                vget_low_s16(rows->val[1]));   // in0 + in8
+  const int16x4_t b = vqsub_s16(vget_low_s16(rows->val[0]),
+                                vget_low_s16(rows->val[1]));   // in0 - in8
+  // c = kC2 * in4 - kC1 * in12
+  // d = kC1 * in4 + kC2 * in12
+  const int16x4_t c = vqsub_s16(vget_low_s16(C1), vget_high_s16(C0));
+  const int16x4_t d = vqadd_s16(vget_low_s16(C0), vget_high_s16(C1));
+  const int16x8_t D0 = vcombine_s16(a, b);      // D0 = a | b
+  const int16x8_t D1 = vcombine_s16(d, c);      // D1 = d | c
+  const int16x8_t E0 = vqaddq_s16(D0, D1);      // a+d | b+c
+  const int16x8_t E_tmp = vqsubq_s16(D0, D1);   // a-d | b-c
+  const int16x8_t E1 = vcombine_s16(vget_high_s16(E_tmp), vget_low_s16(E_tmp));
+  Transpose8x2(E0, E1, rows);
+}
+
+static void TransformOne(const int16_t* in, uint8_t* dst) {
+  int16x8x2_t rows;
+  INIT_VECTOR2(rows, vld1q_s16(in + 0), vld1q_s16(in + 8));
+  TransformPass(&rows);
+  TransformPass(&rows);
+  Add4x4(rows.val[0], rows.val[1], dst);
+}
+
+#else
+
+static void TransformOne(const int16_t* in, uint8_t* dst) {
+  const int kBPS = BPS;
+  // kC1, kC2. Padded because vld1.16 loads 8 bytes
+  const int16_t constants[4] = { kC1, kC2, 0, 0 };
   /* Adapted from libvpx: vp8/common/arm/neon/shortidct4x4llm_neon.asm */
   __asm__ volatile (
     "vld1.16         {q1, q2}, [%[in]]           \n"
@@ -304,26 +1168,472 @@ static void TransformOneNEON(const int16_t *in, uint8_t *dst) {
   );
 }
 
-static void TransformTwoNEON(const int16_t* in, uint8_t* dst, int do_two) {
-  TransformOneNEON(in, dst);
+#endif    // WEBP_USE_INTRINSICS
+
+static void TransformTwo(const int16_t* in, uint8_t* dst, int do_two) {
+  TransformOne(in, dst);
   if (do_two) {
-    TransformOneNEON(in + 16, dst + 4);
+    TransformOne(in + 16, dst + 4);
   }
 }
 
-extern void VP8DspInitNEON(void);
+static void TransformDC(const int16_t* in, uint8_t* dst) {
+  const int16x8_t DC = vdupq_n_s16(in[0]);
+  Add4x4(DC, DC, dst);
+}
+
+//------------------------------------------------------------------------------
+
+#define STORE_WHT(dst, col, rows) do {                  \
+  *dst = vgetq_lane_s32(rows.val[0], col); (dst) += 16; \
+  *dst = vgetq_lane_s32(rows.val[1], col); (dst) += 16; \
+  *dst = vgetq_lane_s32(rows.val[2], col); (dst) += 16; \
+  *dst = vgetq_lane_s32(rows.val[3], col); (dst) += 16; \
+} while (0)
+
+static void TransformWHT(const int16_t* in, int16_t* out) {
+  int32x4x4_t tmp;
 
-void VP8DspInitNEON(void) {
-  VP8Transform = TransformTwoNEON;
+  {
+    // Load the source.
+    const int16x4_t in00_03 = vld1_s16(in + 0);
+    const int16x4_t in04_07 = vld1_s16(in + 4);
+    const int16x4_t in08_11 = vld1_s16(in + 8);
+    const int16x4_t in12_15 = vld1_s16(in + 12);
+    const int32x4_t a0 = vaddl_s16(in00_03, in12_15);  // in[0..3] + in[12..15]
+    const int32x4_t a1 = vaddl_s16(in04_07, in08_11);  // in[4..7] + in[8..11]
+    const int32x4_t a2 = vsubl_s16(in04_07, in08_11);  // in[4..7] - in[8..11]
+    const int32x4_t a3 = vsubl_s16(in00_03, in12_15);  // in[0..3] - in[12..15]
+    tmp.val[0] = vaddq_s32(a0, a1);
+    tmp.val[1] = vaddq_s32(a3, a2);
+    tmp.val[2] = vsubq_s32(a0, a1);
+    tmp.val[3] = vsubq_s32(a3, a2);
+    // Arrange the temporary results column-wise.
+    tmp = Transpose4x4(tmp);
+  }
+
+  {
+    const int32x4_t kCst3 = vdupq_n_s32(3);
+    const int32x4_t dc = vaddq_s32(tmp.val[0], kCst3);  // add rounder
+    const int32x4_t a0 = vaddq_s32(dc, tmp.val[3]);
+    const int32x4_t a1 = vaddq_s32(tmp.val[1], tmp.val[2]);
+    const int32x4_t a2 = vsubq_s32(tmp.val[1], tmp.val[2]);
+    const int32x4_t a3 = vsubq_s32(dc, tmp.val[3]);
+
+    tmp.val[0] = vaddq_s32(a0, a1);
+    tmp.val[1] = vaddq_s32(a3, a2);
+    tmp.val[2] = vsubq_s32(a0, a1);
+    tmp.val[3] = vsubq_s32(a3, a2);
+
+    // right shift the results by 3.
+    tmp.val[0] = vshrq_n_s32(tmp.val[0], 3);
+    tmp.val[1] = vshrq_n_s32(tmp.val[1], 3);
+    tmp.val[2] = vshrq_n_s32(tmp.val[2], 3);
+    tmp.val[3] = vshrq_n_s32(tmp.val[3], 3);
 
-  VP8SimpleVFilter16 = SimpleVFilter16NEON;
-  VP8SimpleHFilter16 = SimpleHFilter16NEON;
-  VP8SimpleVFilter16i = SimpleVFilter16iNEON;
-  VP8SimpleHFilter16i = SimpleHFilter16iNEON;
+    STORE_WHT(out, 0, tmp);
+    STORE_WHT(out, 1, tmp);
+    STORE_WHT(out, 2, tmp);
+    STORE_WHT(out, 3, tmp);
+  }
 }
 
-#if defined(__cplusplus) || defined(c_plusplus)
-}    // extern "C"
+#undef STORE_WHT
+
+//------------------------------------------------------------------------------
+
+#define MUL(a, b) (((a) * (b)) >> 16)
+static void TransformAC3(const int16_t* in, uint8_t* dst) {
+  static const int kC1_full = 20091 + (1 << 16);
+  static const int kC2_full = 35468;
+  const int16x4_t A = vld1_dup_s16(in);
+  const int16x4_t c4 = vdup_n_s16(MUL(in[4], kC2_full));
+  const int16x4_t d4 = vdup_n_s16(MUL(in[4], kC1_full));
+  const int c1 = MUL(in[1], kC2_full);
+  const int d1 = MUL(in[1], kC1_full);
+  const uint64_t cd = (uint64_t)( d1 & 0xffff) <<  0 |
+                      (uint64_t)( c1 & 0xffff) << 16 |
+                      (uint64_t)(-c1 & 0xffff) << 32 |
+                      (uint64_t)(-d1 & 0xffff) << 48;
+  const int16x4_t CD = vcreate_s16(cd);
+  const int16x4_t B = vqadd_s16(A, CD);
+  const int16x8_t m0_m1 = vcombine_s16(vqadd_s16(B, d4), vqadd_s16(B, c4));
+  const int16x8_t m2_m3 = vcombine_s16(vqsub_s16(B, c4), vqsub_s16(B, d4));
+  Add4x4(m0_m1, m2_m3, dst);
+}
+#undef MUL
+
+//------------------------------------------------------------------------------
+// 4x4
+
+static void DC4(uint8_t* dst) {    // DC
+  const uint8x8_t A = vld1_u8(dst - BPS);  // top row
+  const uint16x4_t p0 = vpaddl_u8(A);  // cascading summation of the top
+  const uint16x4_t p1 = vpadd_u16(p0, p0);
+  const uint16x8_t L0 = vmovl_u8(vld1_u8(dst + 0 * BPS - 1));
+  const uint16x8_t L1 = vmovl_u8(vld1_u8(dst + 1 * BPS - 1));
+  const uint16x8_t L2 = vmovl_u8(vld1_u8(dst + 2 * BPS - 1));
+  const uint16x8_t L3 = vmovl_u8(vld1_u8(dst + 3 * BPS - 1));
+  const uint16x8_t s0 = vaddq_u16(L0, L1);
+  const uint16x8_t s1 = vaddq_u16(L2, L3);
+  const uint16x8_t s01 = vaddq_u16(s0, s1);
+  const uint16x8_t sum = vaddq_u16(s01, vcombine_u16(p1, p1));
+  const uint8x8_t dc0 = vrshrn_n_u16(sum, 3);  // (sum + 4) >> 3
+  const uint8x8_t dc = vdup_lane_u8(dc0, 0);
+  int i;
+  for (i = 0; i < 4; ++i) {
+    vst1_lane_u32((uint32_t*)(dst + i * BPS), vreinterpret_u32_u8(dc), 0);
+  }
+}
+
+// TrueMotion (4x4 + 8x8)
+static WEBP_INLINE void TrueMotion(uint8_t* dst, int size) {
+  const uint8x8_t TL = vld1_dup_u8(dst - BPS - 1);  // top-left pixel 'A[-1]'
+  const uint8x8_t T = vld1_u8(dst - BPS);  // top row 'A[0..3]'
+  const int16x8_t d = vreinterpretq_s16_u16(vsubl_u8(T, TL));  // A[c] - A[-1]
+  int y;
+  for (y = 0; y < size; y += 4) {
+    // left edge
+    const int16x8_t L0 = ConvertU8ToS16(vld1_dup_u8(dst + 0 * BPS - 1));
+    const int16x8_t L1 = ConvertU8ToS16(vld1_dup_u8(dst + 1 * BPS - 1));
+    const int16x8_t L2 = ConvertU8ToS16(vld1_dup_u8(dst + 2 * BPS - 1));
+    const int16x8_t L3 = ConvertU8ToS16(vld1_dup_u8(dst + 3 * BPS - 1));
+    const int16x8_t r0 = vaddq_s16(L0, d);  // L[r] + A[c] - A[-1]
+    const int16x8_t r1 = vaddq_s16(L1, d);
+    const int16x8_t r2 = vaddq_s16(L2, d);
+    const int16x8_t r3 = vaddq_s16(L3, d);
+    // Saturate and store the result.
+    const uint32x2_t r0_u32 = vreinterpret_u32_u8(vqmovun_s16(r0));
+    const uint32x2_t r1_u32 = vreinterpret_u32_u8(vqmovun_s16(r1));
+    const uint32x2_t r2_u32 = vreinterpret_u32_u8(vqmovun_s16(r2));
+    const uint32x2_t r3_u32 = vreinterpret_u32_u8(vqmovun_s16(r3));
+    if (size == 4) {
+      vst1_lane_u32((uint32_t*)(dst + 0 * BPS), r0_u32, 0);
+      vst1_lane_u32((uint32_t*)(dst + 1 * BPS), r1_u32, 0);
+      vst1_lane_u32((uint32_t*)(dst + 2 * BPS), r2_u32, 0);
+      vst1_lane_u32((uint32_t*)(dst + 3 * BPS), r3_u32, 0);
+    } else {
+      vst1_u32((uint32_t*)(dst + 0 * BPS), r0_u32);
+      vst1_u32((uint32_t*)(dst + 1 * BPS), r1_u32);
+      vst1_u32((uint32_t*)(dst + 2 * BPS), r2_u32);
+      vst1_u32((uint32_t*)(dst + 3 * BPS), r3_u32);
+    }
+    dst += 4 * BPS;
+  }
+}
+
+static void TM4(uint8_t* dst) { TrueMotion(dst, 4); }
+
+static void VE4(uint8_t* dst) {    // vertical
+  // NB: avoid vld1_u64 here as an alignment hint may be added -> SIGBUS.
+  const uint64x1_t A0 = vreinterpret_u64_u8(vld1_u8(dst - BPS - 1));  // top row
+  const uint64x1_t A1 = vshr_n_u64(A0, 8);
+  const uint64x1_t A2 = vshr_n_u64(A0, 16);
+  const uint8x8_t ABCDEFGH = vreinterpret_u8_u64(A0);
+  const uint8x8_t BCDEFGH0 = vreinterpret_u8_u64(A1);
+  const uint8x8_t CDEFGH00 = vreinterpret_u8_u64(A2);
+  const uint8x8_t b = vhadd_u8(ABCDEFGH, CDEFGH00);
+  const uint8x8_t avg = vrhadd_u8(b, BCDEFGH0);
+  int i;
+  for (i = 0; i < 4; ++i) {
+    vst1_lane_u32((uint32_t*)(dst + i * BPS), vreinterpret_u32_u8(avg), 0);
+  }
+}
+
+static void RD4(uint8_t* dst) {   // Down-right
+  const uint8x8_t XABCD_u8 = vld1_u8(dst - BPS - 1);
+  const uint64x1_t XABCD = vreinterpret_u64_u8(XABCD_u8);
+  const uint64x1_t ____XABC = vshl_n_u64(XABCD, 32);
+  const uint32_t I = dst[-1 + 0 * BPS];
+  const uint32_t J = dst[-1 + 1 * BPS];
+  const uint32_t K = dst[-1 + 2 * BPS];
+  const uint32_t L = dst[-1 + 3 * BPS];
+  const uint64x1_t LKJI____ = vcreate_u64(L | (K << 8) | (J << 16) | (I << 24));
+  const uint64x1_t LKJIXABC = vorr_u64(LKJI____, ____XABC);
+  const uint8x8_t KJIXABC_ = vreinterpret_u8_u64(vshr_n_u64(LKJIXABC, 8));
+  const uint8x8_t JIXABC__ = vreinterpret_u8_u64(vshr_n_u64(LKJIXABC, 16));
+  const uint8_t D = vget_lane_u8(XABCD_u8, 4);
+  const uint8x8_t JIXABCD_ = vset_lane_u8(D, JIXABC__, 6);
+  const uint8x8_t LKJIXABC_u8 = vreinterpret_u8_u64(LKJIXABC);
+  const uint8x8_t avg1 = vhadd_u8(JIXABCD_, LKJIXABC_u8);
+  const uint8x8_t avg2 = vrhadd_u8(avg1, KJIXABC_);
+  const uint64x1_t avg2_u64 = vreinterpret_u64_u8(avg2);
+  const uint32x2_t r3 = vreinterpret_u32_u8(avg2);
+  const uint32x2_t r2 = vreinterpret_u32_u64(vshr_n_u64(avg2_u64, 8));
+  const uint32x2_t r1 = vreinterpret_u32_u64(vshr_n_u64(avg2_u64, 16));
+  const uint32x2_t r0 = vreinterpret_u32_u64(vshr_n_u64(avg2_u64, 24));
+  vst1_lane_u32((uint32_t*)(dst + 0 * BPS), r0, 0);
+  vst1_lane_u32((uint32_t*)(dst + 1 * BPS), r1, 0);
+  vst1_lane_u32((uint32_t*)(dst + 2 * BPS), r2, 0);
+  vst1_lane_u32((uint32_t*)(dst + 3 * BPS), r3, 0);
+}
+
+static void LD4(uint8_t* dst) {    // Down-left
+  // Note using the same shift trick as VE4() is slower here.
+  const uint8x8_t ABCDEFGH = vld1_u8(dst - BPS + 0);
+  const uint8x8_t BCDEFGH0 = vld1_u8(dst - BPS + 1);
+  const uint8x8_t CDEFGH00 = vld1_u8(dst - BPS + 2);
+  const uint8x8_t CDEFGHH0 = vset_lane_u8(dst[-BPS + 7], CDEFGH00, 6);
+  const uint8x8_t avg1 = vhadd_u8(ABCDEFGH, CDEFGHH0);
+  const uint8x8_t avg2 = vrhadd_u8(avg1, BCDEFGH0);
+  const uint64x1_t avg2_u64 = vreinterpret_u64_u8(avg2);
+  const uint32x2_t r0 = vreinterpret_u32_u8(avg2);
+  const uint32x2_t r1 = vreinterpret_u32_u64(vshr_n_u64(avg2_u64, 8));
+  const uint32x2_t r2 = vreinterpret_u32_u64(vshr_n_u64(avg2_u64, 16));
+  const uint32x2_t r3 = vreinterpret_u32_u64(vshr_n_u64(avg2_u64, 24));
+  vst1_lane_u32((uint32_t*)(dst + 0 * BPS), r0, 0);
+  vst1_lane_u32((uint32_t*)(dst + 1 * BPS), r1, 0);
+  vst1_lane_u32((uint32_t*)(dst + 2 * BPS), r2, 0);
+  vst1_lane_u32((uint32_t*)(dst + 3 * BPS), r3, 0);
+}
+
+//------------------------------------------------------------------------------
+// Chroma
+
+static void VE8uv(uint8_t* dst) {    // vertical
+  const uint8x8_t top = vld1_u8(dst - BPS);
+  int j;
+  for (j = 0; j < 8; ++j) {
+    vst1_u8(dst + j * BPS, top);
+  }
+}
+
+static void HE8uv(uint8_t* dst) {    // horizontal
+  int j;
+  for (j = 0; j < 8; ++j) {
+    const uint8x8_t left = vld1_dup_u8(dst - 1);
+    vst1_u8(dst, left);
+    dst += BPS;
+  }
+}
+
+static WEBP_INLINE void DC8(uint8_t* dst, int do_top, int do_left) {
+  uint16x8_t sum_top;
+  uint16x8_t sum_left;
+  uint8x8_t dc0;
+
+  if (do_top) {
+    const uint8x8_t A = vld1_u8(dst - BPS);  // top row
+    const uint16x4_t p0 = vpaddl_u8(A);  // cascading summation of the top
+    const uint16x4_t p1 = vpadd_u16(p0, p0);
+    const uint16x4_t p2 = vpadd_u16(p1, p1);
+    sum_top = vcombine_u16(p2, p2);
+  }
+
+  if (do_left) {
+    const uint16x8_t L0 = vmovl_u8(vld1_u8(dst + 0 * BPS - 1));
+    const uint16x8_t L1 = vmovl_u8(vld1_u8(dst + 1 * BPS - 1));
+    const uint16x8_t L2 = vmovl_u8(vld1_u8(dst + 2 * BPS - 1));
+    const uint16x8_t L3 = vmovl_u8(vld1_u8(dst + 3 * BPS - 1));
+    const uint16x8_t L4 = vmovl_u8(vld1_u8(dst + 4 * BPS - 1));
+    const uint16x8_t L5 = vmovl_u8(vld1_u8(dst + 5 * BPS - 1));
+    const uint16x8_t L6 = vmovl_u8(vld1_u8(dst + 6 * BPS - 1));
+    const uint16x8_t L7 = vmovl_u8(vld1_u8(dst + 7 * BPS - 1));
+    const uint16x8_t s0 = vaddq_u16(L0, L1);
+    const uint16x8_t s1 = vaddq_u16(L2, L3);
+    const uint16x8_t s2 = vaddq_u16(L4, L5);
+    const uint16x8_t s3 = vaddq_u16(L6, L7);
+    const uint16x8_t s01 = vaddq_u16(s0, s1);
+    const uint16x8_t s23 = vaddq_u16(s2, s3);
+    sum_left = vaddq_u16(s01, s23);
+  }
+
+  if (do_top && do_left) {
+    const uint16x8_t sum = vaddq_u16(sum_left, sum_top);
+    dc0 = vrshrn_n_u16(sum, 4);
+  } else if (do_top) {
+    dc0 = vrshrn_n_u16(sum_top, 3);
+  } else if (do_left) {
+    dc0 = vrshrn_n_u16(sum_left, 3);
+  } else {
+    dc0 = vdup_n_u8(0x80);
+  }
+
+  {
+    const uint8x8_t dc = vdup_lane_u8(dc0, 0);
+    int i;
+    for (i = 0; i < 8; ++i) {
+      vst1_u32((uint32_t*)(dst + i * BPS), vreinterpret_u32_u8(dc));
+    }
+  }
+}
+
+static void DC8uv(uint8_t* dst) { DC8(dst, 1, 1); }
+static void DC8uvNoTop(uint8_t* dst) { DC8(dst, 0, 1); }
+static void DC8uvNoLeft(uint8_t* dst) { DC8(dst, 1, 0); }
+static void DC8uvNoTopLeft(uint8_t* dst) { DC8(dst, 0, 0); }
+
+static void TM8uv(uint8_t* dst) { TrueMotion(dst, 8); }
+
+//------------------------------------------------------------------------------
+// 16x16
+
+static void VE16(uint8_t* dst) {     // vertical
+  const uint8x16_t top = vld1q_u8(dst - BPS);
+  int j;
+  for (j = 0; j < 16; ++j) {
+    vst1q_u8(dst + j * BPS, top);
+  }
+}
+
+static void HE16(uint8_t* dst) {     // horizontal
+  int j;
+  for (j = 0; j < 16; ++j) {
+    const uint8x16_t left = vld1q_dup_u8(dst - 1);
+    vst1q_u8(dst, left);
+    dst += BPS;
+  }
+}
+
+static WEBP_INLINE void DC16(uint8_t* dst, int do_top, int do_left) {
+  uint16x8_t sum_top;
+  uint16x8_t sum_left;
+  uint8x8_t dc0;
+
+  if (do_top) {
+    const uint8x16_t A = vld1q_u8(dst - BPS);  // top row
+    const uint16x8_t p0 = vpaddlq_u8(A);  // cascading summation of the top
+    const uint16x4_t p1 = vadd_u16(vget_low_u16(p0), vget_high_u16(p0));
+    const uint16x4_t p2 = vpadd_u16(p1, p1);
+    const uint16x4_t p3 = vpadd_u16(p2, p2);
+    sum_top = vcombine_u16(p3, p3);
+  }
+
+  if (do_left) {
+    int i;
+    sum_left = vdupq_n_u16(0);
+    for (i = 0; i < 16; i += 8) {
+      const uint16x8_t L0 = vmovl_u8(vld1_u8(dst + (i + 0) * BPS - 1));
+      const uint16x8_t L1 = vmovl_u8(vld1_u8(dst + (i + 1) * BPS - 1));
+      const uint16x8_t L2 = vmovl_u8(vld1_u8(dst + (i + 2) * BPS - 1));
+      const uint16x8_t L3 = vmovl_u8(vld1_u8(dst + (i + 3) * BPS - 1));
+      const uint16x8_t L4 = vmovl_u8(vld1_u8(dst + (i + 4) * BPS - 1));
+      const uint16x8_t L5 = vmovl_u8(vld1_u8(dst + (i + 5) * BPS - 1));
+      const uint16x8_t L6 = vmovl_u8(vld1_u8(dst + (i + 6) * BPS - 1));
+      const uint16x8_t L7 = vmovl_u8(vld1_u8(dst + (i + 7) * BPS - 1));
+      const uint16x8_t s0 = vaddq_u16(L0, L1);
+      const uint16x8_t s1 = vaddq_u16(L2, L3);
+      const uint16x8_t s2 = vaddq_u16(L4, L5);
+      const uint16x8_t s3 = vaddq_u16(L6, L7);
+      const uint16x8_t s01 = vaddq_u16(s0, s1);
+      const uint16x8_t s23 = vaddq_u16(s2, s3);
+      const uint16x8_t sum = vaddq_u16(s01, s23);
+      sum_left = vaddq_u16(sum_left, sum);
+    }
+  }
+
+  if (do_top && do_left) {
+    const uint16x8_t sum = vaddq_u16(sum_left, sum_top);
+    dc0 = vrshrn_n_u16(sum, 5);
+  } else if (do_top) {
+    dc0 = vrshrn_n_u16(sum_top, 4);
+  } else if (do_left) {
+    dc0 = vrshrn_n_u16(sum_left, 4);
+  } else {
+    dc0 = vdup_n_u8(0x80);
+  }
+
+  {
+    const uint8x16_t dc = vdupq_lane_u8(dc0, 0);
+    int i;
+    for (i = 0; i < 16; ++i) {
+      vst1q_u8(dst + i * BPS, dc);
+    }
+  }
+}
+
+static void DC16TopLeft(uint8_t* dst) { DC16(dst, 1, 1); }
+static void DC16NoTop(uint8_t* dst) { DC16(dst, 0, 1); }
+static void DC16NoLeft(uint8_t* dst) { DC16(dst, 1, 0); }
+static void DC16NoTopLeft(uint8_t* dst) { DC16(dst, 0, 0); }
+
+static void TM16(uint8_t* dst) {
+  const uint8x8_t TL = vld1_dup_u8(dst - BPS - 1);  // top-left pixel 'A[-1]'
+  const uint8x16_t T = vld1q_u8(dst - BPS);  // top row 'A[0..15]'
+  // A[c] - A[-1]
+  const int16x8_t d_lo = vreinterpretq_s16_u16(vsubl_u8(vget_low_u8(T), TL));
+  const int16x8_t d_hi = vreinterpretq_s16_u16(vsubl_u8(vget_high_u8(T), TL));
+  int y;
+  for (y = 0; y < 16; y += 4) {
+    // left edge
+    const int16x8_t L0 = ConvertU8ToS16(vld1_dup_u8(dst + 0 * BPS - 1));
+    const int16x8_t L1 = ConvertU8ToS16(vld1_dup_u8(dst + 1 * BPS - 1));
+    const int16x8_t L2 = ConvertU8ToS16(vld1_dup_u8(dst + 2 * BPS - 1));
+    const int16x8_t L3 = ConvertU8ToS16(vld1_dup_u8(dst + 3 * BPS - 1));
+    const int16x8_t r0_lo = vaddq_s16(L0, d_lo);  // L[r] + A[c] - A[-1]
+    const int16x8_t r1_lo = vaddq_s16(L1, d_lo);
+    const int16x8_t r2_lo = vaddq_s16(L2, d_lo);
+    const int16x8_t r3_lo = vaddq_s16(L3, d_lo);
+    const int16x8_t r0_hi = vaddq_s16(L0, d_hi);
+    const int16x8_t r1_hi = vaddq_s16(L1, d_hi);
+    const int16x8_t r2_hi = vaddq_s16(L2, d_hi);
+    const int16x8_t r3_hi = vaddq_s16(L3, d_hi);
+    // Saturate and store the result.
+    const uint8x16_t row0 = vcombine_u8(vqmovun_s16(r0_lo), vqmovun_s16(r0_hi));
+    const uint8x16_t row1 = vcombine_u8(vqmovun_s16(r1_lo), vqmovun_s16(r1_hi));
+    const uint8x16_t row2 = vcombine_u8(vqmovun_s16(r2_lo), vqmovun_s16(r2_hi));
+    const uint8x16_t row3 = vcombine_u8(vqmovun_s16(r3_lo), vqmovun_s16(r3_hi));
+    vst1q_u8(dst + 0 * BPS, row0);
+    vst1q_u8(dst + 1 * BPS, row1);
+    vst1q_u8(dst + 2 * BPS, row2);
+    vst1q_u8(dst + 3 * BPS, row3);
+    dst += 4 * BPS;
+  }
+}
+
+//------------------------------------------------------------------------------
+// Entry point
+
+extern void VP8DspInitNEON(void);
+
+WEBP_TSAN_IGNORE_FUNCTION void VP8DspInitNEON(void) {
+  VP8Transform = TransformTwo;
+  VP8TransformAC3 = TransformAC3;
+  VP8TransformDC = TransformDC;
+  VP8TransformWHT = TransformWHT;
+
+  VP8VFilter16 = VFilter16;
+  VP8VFilter16i = VFilter16i;
+  VP8HFilter16 = HFilter16;
+#if !defined(WORK_AROUND_GCC)
+  VP8HFilter16i = HFilter16i;
 #endif
+  VP8VFilter8 = VFilter8;
+  VP8VFilter8i = VFilter8i;
+#if !defined(WORK_AROUND_GCC)
+  VP8HFilter8 = HFilter8;
+  VP8HFilter8i = HFilter8i;
+#endif
+  VP8SimpleVFilter16 = SimpleVFilter16;
+  VP8SimpleHFilter16 = SimpleHFilter16;
+  VP8SimpleVFilter16i = SimpleVFilter16i;
+  VP8SimpleHFilter16i = SimpleHFilter16i;
+
+  VP8PredLuma4[0] = DC4;
+  VP8PredLuma4[1] = TM4;
+  VP8PredLuma4[2] = VE4;
+  VP8PredLuma4[4] = RD4;
+  VP8PredLuma4[6] = LD4;
+
+  VP8PredLuma16[0] = DC16TopLeft;
+  VP8PredLuma16[1] = TM16;
+  VP8PredLuma16[2] = VE16;
+  VP8PredLuma16[3] = HE16;
+  VP8PredLuma16[4] = DC16NoTop;
+  VP8PredLuma16[5] = DC16NoLeft;
+  VP8PredLuma16[6] = DC16NoTopLeft;
+
+  VP8PredChroma8[0] = DC8uv;
+  VP8PredChroma8[1] = TM8uv;
+  VP8PredChroma8[2] = VE8uv;
+  VP8PredChroma8[3] = HE8uv;
+  VP8PredChroma8[4] = DC8uvNoTop;
+  VP8PredChroma8[5] = DC8uvNoLeft;
+  VP8PredChroma8[6] = DC8uvNoTopLeft;
+}
+
+#else  // !WEBP_USE_NEON
+
+WEBP_DSP_INIT_STUB(VP8DspInitNEON)
 
-#endif   // WEBP_USE_NEON
+#endif  // WEBP_USE_NEON
diff --git a/drivers/webp/dsp/dec_sse2.c b/drivers/webp/dsp/dec_sse2.c
index 472b68ecb8..d4838b9210 100644
--- a/drivers/webp/dsp/dec_sse2.c
+++ b/drivers/webp/dsp/dec_sse2.c
@@ -1,8 +1,10 @@
 // Copyright 2011 Google Inc. All Rights Reserved.
 //
-// This code is licensed under the same terms as WebM:
-//  Software License Agreement:  http://www.webmproject.org/license/software/
-//  Additional IP Rights Grant:  http://www.webmproject.org/license/additional/
+// Use of this source code is governed by a BSD-style license
+// that can be found in the COPYING file in the root of the source
+// tree. An additional intellectual property rights grant can be found
+// in the file PATENTS. All contributing project authors may
+// be found in the AUTHORS file in the root of the source tree.
 // -----------------------------------------------------------------------------
 //
 // SSE2 version of some decoding functions (idct, loop filtering).
@@ -14,17 +16,17 @@
 
 #if defined(WEBP_USE_SSE2)
 
+// The 3-coeff sparse transform in SSE2 is not really faster than the plain-C
+// one it seems => disable it by default. Uncomment the following to enable:
+// #define USE_TRANSFORM_AC3
+
 #include <emmintrin.h>
 #include "../dec/vp8i.h"
 
-#if defined(__cplusplus) || defined(c_plusplus)
-extern "C" {
-#endif
-
 //------------------------------------------------------------------------------
 // Transforms (Paragraph 14.4)
 
-static void TransformSSE2(const int16_t* in, uint8_t* dst, int do_two) {
+static void Transform(const int16_t* in, uint8_t* dst, int do_two) {
   // This implementation makes use of 16-bit fixed point versions of two
   // multiply constants:
   //    K1 = sqrt(2) * cos (pi/8) ~= 85627 / 2^16
@@ -50,19 +52,19 @@ static void TransformSSE2(const int16_t* in, uint8_t* dst, int do_two) {
   // vectors will just contain random value we'll never use nor store.
   __m128i in0, in1, in2, in3;
   {
-    in0 = _mm_loadl_epi64((__m128i*)&in[0]);
-    in1 = _mm_loadl_epi64((__m128i*)&in[4]);
-    in2 = _mm_loadl_epi64((__m128i*)&in[8]);
-    in3 = _mm_loadl_epi64((__m128i*)&in[12]);
+    in0 = _mm_loadl_epi64((const __m128i*)&in[0]);
+    in1 = _mm_loadl_epi64((const __m128i*)&in[4]);
+    in2 = _mm_loadl_epi64((const __m128i*)&in[8]);
+    in3 = _mm_loadl_epi64((const __m128i*)&in[12]);
     // a00 a10 a20 a30   x x x x
     // a01 a11 a21 a31   x x x x
     // a02 a12 a22 a32   x x x x
     // a03 a13 a23 a33   x x x x
     if (do_two) {
-      const __m128i inB0 = _mm_loadl_epi64((__m128i*)&in[16]);
-      const __m128i inB1 = _mm_loadl_epi64((__m128i*)&in[20]);
-      const __m128i inB2 = _mm_loadl_epi64((__m128i*)&in[24]);
-      const __m128i inB3 = _mm_loadl_epi64((__m128i*)&in[28]);
+      const __m128i inB0 = _mm_loadl_epi64((const __m128i*)&in[16]);
+      const __m128i inB1 = _mm_loadl_epi64((const __m128i*)&in[20]);
+      const __m128i inB2 = _mm_loadl_epi64((const __m128i*)&in[24]);
+      const __m128i inB3 = _mm_loadl_epi64((const __m128i*)&in[28]);
       in0 = _mm_unpacklo_epi64(in0, inB0);
       in1 = _mm_unpacklo_epi64(in1, inB1);
       in2 = _mm_unpacklo_epi64(in2, inB2);
@@ -194,21 +196,21 @@ static void TransformSSE2(const int16_t* in, uint8_t* dst, int do_two) {
 
   // Add inverse transform to 'dst' and store.
   {
-    const __m128i zero = _mm_set1_epi16(0);
+    const __m128i zero = _mm_setzero_si128();
     // Load the reference(s).
     __m128i dst0, dst1, dst2, dst3;
     if (do_two) {
       // Load eight bytes/pixels per line.
-      dst0 = _mm_loadl_epi64((__m128i*)&dst[0 * BPS]);
-      dst1 = _mm_loadl_epi64((__m128i*)&dst[1 * BPS]);
-      dst2 = _mm_loadl_epi64((__m128i*)&dst[2 * BPS]);
-      dst3 = _mm_loadl_epi64((__m128i*)&dst[3 * BPS]);
+      dst0 = _mm_loadl_epi64((__m128i*)(dst + 0 * BPS));
+      dst1 = _mm_loadl_epi64((__m128i*)(dst + 1 * BPS));
+      dst2 = _mm_loadl_epi64((__m128i*)(dst + 2 * BPS));
+      dst3 = _mm_loadl_epi64((__m128i*)(dst + 3 * BPS));
     } else {
       // Load four bytes/pixels per line.
-      dst0 = _mm_cvtsi32_si128(*(int*)&dst[0 * BPS]);
-      dst1 = _mm_cvtsi32_si128(*(int*)&dst[1 * BPS]);
-      dst2 = _mm_cvtsi32_si128(*(int*)&dst[2 * BPS]);
-      dst3 = _mm_cvtsi32_si128(*(int*)&dst[3 * BPS]);
+      dst0 = _mm_cvtsi32_si128(*(int*)(dst + 0 * BPS));
+      dst1 = _mm_cvtsi32_si128(*(int*)(dst + 1 * BPS));
+      dst2 = _mm_cvtsi32_si128(*(int*)(dst + 2 * BPS));
+      dst3 = _mm_cvtsi32_si128(*(int*)(dst + 3 * BPS));
     }
     // Convert to 16b.
     dst0 = _mm_unpacklo_epi8(dst0, zero);
@@ -228,20 +230,66 @@ static void TransformSSE2(const int16_t* in, uint8_t* dst, int do_two) {
     // Store the results.
     if (do_two) {
       // Store eight bytes/pixels per line.
-      _mm_storel_epi64((__m128i*)&dst[0 * BPS], dst0);
-      _mm_storel_epi64((__m128i*)&dst[1 * BPS], dst1);
-      _mm_storel_epi64((__m128i*)&dst[2 * BPS], dst2);
-      _mm_storel_epi64((__m128i*)&dst[3 * BPS], dst3);
+      _mm_storel_epi64((__m128i*)(dst + 0 * BPS), dst0);
+      _mm_storel_epi64((__m128i*)(dst + 1 * BPS), dst1);
+      _mm_storel_epi64((__m128i*)(dst + 2 * BPS), dst2);
+      _mm_storel_epi64((__m128i*)(dst + 3 * BPS), dst3);
     } else {
       // Store four bytes/pixels per line.
-      *((int32_t *)&dst[0 * BPS]) = _mm_cvtsi128_si32(dst0);
-      *((int32_t *)&dst[1 * BPS]) = _mm_cvtsi128_si32(dst1);
-      *((int32_t *)&dst[2 * BPS]) = _mm_cvtsi128_si32(dst2);
-      *((int32_t *)&dst[3 * BPS]) = _mm_cvtsi128_si32(dst3);
+      *(int*)(dst + 0 * BPS) = _mm_cvtsi128_si32(dst0);
+      *(int*)(dst + 1 * BPS) = _mm_cvtsi128_si32(dst1);
+      *(int*)(dst + 2 * BPS) = _mm_cvtsi128_si32(dst2);
+      *(int*)(dst + 3 * BPS) = _mm_cvtsi128_si32(dst3);
     }
   }
 }
 
+#if defined(USE_TRANSFORM_AC3)
+#define MUL(a, b) (((a) * (b)) >> 16)
+static void TransformAC3(const int16_t* in, uint8_t* dst) {
+  static const int kC1 = 20091 + (1 << 16);
+  static const int kC2 = 35468;
+  const __m128i A = _mm_set1_epi16(in[0] + 4);
+  const __m128i c4 = _mm_set1_epi16(MUL(in[4], kC2));
+  const __m128i d4 = _mm_set1_epi16(MUL(in[4], kC1));
+  const int c1 = MUL(in[1], kC2);
+  const int d1 = MUL(in[1], kC1);
+  const __m128i CD = _mm_set_epi16(0, 0, 0, 0, -d1, -c1, c1, d1);
+  const __m128i B = _mm_adds_epi16(A, CD);
+  const __m128i m0 = _mm_adds_epi16(B, d4);
+  const __m128i m1 = _mm_adds_epi16(B, c4);
+  const __m128i m2 = _mm_subs_epi16(B, c4);
+  const __m128i m3 = _mm_subs_epi16(B, d4);
+  const __m128i zero = _mm_setzero_si128();
+  // Load the source pixels.
+  __m128i dst0 = _mm_cvtsi32_si128(*(int*)(dst + 0 * BPS));
+  __m128i dst1 = _mm_cvtsi32_si128(*(int*)(dst + 1 * BPS));
+  __m128i dst2 = _mm_cvtsi32_si128(*(int*)(dst + 2 * BPS));
+  __m128i dst3 = _mm_cvtsi32_si128(*(int*)(dst + 3 * BPS));
+  // Convert to 16b.
+  dst0 = _mm_unpacklo_epi8(dst0, zero);
+  dst1 = _mm_unpacklo_epi8(dst1, zero);
+  dst2 = _mm_unpacklo_epi8(dst2, zero);
+  dst3 = _mm_unpacklo_epi8(dst3, zero);
+  // Add the inverse transform.
+  dst0 = _mm_adds_epi16(dst0, _mm_srai_epi16(m0, 3));
+  dst1 = _mm_adds_epi16(dst1, _mm_srai_epi16(m1, 3));
+  dst2 = _mm_adds_epi16(dst2, _mm_srai_epi16(m2, 3));
+  dst3 = _mm_adds_epi16(dst3, _mm_srai_epi16(m3, 3));
+  // Unsigned saturate to 8b.
+  dst0 = _mm_packus_epi16(dst0, dst0);
+  dst1 = _mm_packus_epi16(dst1, dst1);
+  dst2 = _mm_packus_epi16(dst2, dst2);
+  dst3 = _mm_packus_epi16(dst3, dst3);
+  // Store the results.
+  *(int*)(dst + 0 * BPS) = _mm_cvtsi128_si32(dst0);
+  *(int*)(dst + 1 * BPS) = _mm_cvtsi128_si32(dst1);
+  *(int*)(dst + 2 * BPS) = _mm_cvtsi128_si32(dst2);
+  *(int*)(dst + 3 * BPS) = _mm_cvtsi128_si32(dst3);
+}
+#undef MUL
+#endif   // USE_TRANSFORM_AC3
+
 //------------------------------------------------------------------------------
 // Loop Filter (Paragraph 15)
 
@@ -250,20 +298,14 @@ static void TransformSSE2(const int16_t* in, uint8_t* dst, int do_two) {
     _mm_subs_epu8((q), (p)),                                                   \
     _mm_subs_epu8((p), (q)))
 
-// Shift each byte of "a" by N bits while preserving by the sign bit.
-//
-// It first shifts the lower bytes of the words and then the upper bytes and
-// then merges the results together.
-#define SIGNED_SHIFT_N(a, N) {                                                 \
-  __m128i t = a;                                                               \
-  t = _mm_slli_epi16(t, 8);                                                    \
-  t = _mm_srai_epi16(t, N);                                                    \
-  t = _mm_srli_epi16(t, 8);                                                    \
-                                                                               \
-  a = _mm_srai_epi16(a, N + 8);                                                \
-  a = _mm_slli_epi16(a, 8);                                                    \
-                                                                               \
-  a = _mm_or_si128(t, a);                                                      \
+// Shift each byte of "x" by 3 bits while preserving by the sign bit.
+static WEBP_INLINE void SignedShift8b(__m128i* const x) {
+  const __m128i zero = _mm_setzero_si128();
+  const __m128i lo_0 = _mm_unpacklo_epi8(zero, *x);
+  const __m128i hi_0 = _mm_unpackhi_epi8(zero, *x);
+  const __m128i lo_1 = _mm_srai_epi16(lo_0, 3 + 8);
+  const __m128i hi_1 = _mm_srai_epi16(hi_0, 3 + 8);
+  *x = _mm_packs_epi16(lo_1, hi_1);
 }
 
 #define FLIP_SIGN_BIT2(a, b) {                                                 \
@@ -276,103 +318,124 @@ static void TransformSSE2(const int16_t* in, uint8_t* dst, int do_two) {
   FLIP_SIGN_BIT2(c, d);                                                        \
 }
 
-#define GET_NOTHEV(p1, p0, q0, q1, hev_thresh, not_hev) {                      \
-  const __m128i zero = _mm_setzero_si128();                                    \
-  const __m128i t1 = MM_ABS(p1, p0);                                           \
-  const __m128i t2 = MM_ABS(q1, q0);                                           \
-                                                                               \
-  const __m128i h = _mm_set1_epi8(hev_thresh);                                 \
-  const __m128i t3 = _mm_subs_epu8(t1, h);  /* abs(p1 - p0) - hev_tresh */     \
-  const __m128i t4 = _mm_subs_epu8(t2, h);  /* abs(q1 - q0) - hev_tresh */     \
-                                                                               \
-  not_hev = _mm_or_si128(t3, t4);                                              \
-  not_hev = _mm_cmpeq_epi8(not_hev, zero); /* not_hev <= t1 && not_hev <= t2 */\
-}
-
-#define GET_BASE_DELTA(p1, p0, q0, q1, o) {                                    \
-  const __m128i qp0 = _mm_subs_epi8(q0, p0);  /* q0 - p0 */                    \
-  o = _mm_subs_epi8(p1, q1);            /* p1 - q1 */                          \
-  o = _mm_adds_epi8(o, qp0);            /* p1 - q1 + 1 * (q0 - p0) */          \
-  o = _mm_adds_epi8(o, qp0);            /* p1 - q1 + 2 * (q0 - p0) */          \
-  o = _mm_adds_epi8(o, qp0);            /* p1 - q1 + 3 * (q0 - p0) */          \
-}
-
-#define DO_SIMPLE_FILTER(p0, q0, fl) {                                         \
-  const __m128i three = _mm_set1_epi8(3);                                      \
-  const __m128i four = _mm_set1_epi8(4);                                       \
-  __m128i v3 = _mm_adds_epi8(fl, three);                                       \
-  __m128i v4 = _mm_adds_epi8(fl, four);                                        \
-                                                                               \
-  /* Do +4 side */                                                             \
-  SIGNED_SHIFT_N(v4, 3);                /* v4 >> 3  */                         \
-  q0 = _mm_subs_epi8(q0, v4);           /* q0 -= v4 */                         \
-                                                                               \
-  /* Now do +3 side */                                                         \
-  SIGNED_SHIFT_N(v3, 3);                /* v3 >> 3  */                         \
-  p0 = _mm_adds_epi8(p0, v3);           /* p0 += v3 */                         \
+// input/output is uint8_t
+static WEBP_INLINE void GetNotHEV(const __m128i* const p1,
+                                  const __m128i* const p0,
+                                  const __m128i* const q0,
+                                  const __m128i* const q1,
+                                  int hev_thresh, __m128i* const not_hev) {
+  const __m128i zero = _mm_setzero_si128();
+  const __m128i t_1 = MM_ABS(*p1, *p0);
+  const __m128i t_2 = MM_ABS(*q1, *q0);
+
+  const __m128i h = _mm_set1_epi8(hev_thresh);
+  const __m128i t_max = _mm_max_epu8(t_1, t_2);
+
+  const __m128i t_max_h = _mm_subs_epu8(t_max, h);
+  *not_hev = _mm_cmpeq_epi8(t_max_h, zero);  // not_hev <= t1 && not_hev <= t2
 }
 
-// Updates values of 2 pixels at MB edge during complex filtering.
-// Update operations:
-// q = q - a and p = p + a; where a = [(a_hi >> 7), (a_lo >> 7)]
-#define UPDATE_2PIXELS(pi, qi, a_lo, a_hi) {                                   \
-  const __m128i a_lo7 = _mm_srai_epi16(a_lo, 7);                               \
-  const __m128i a_hi7 = _mm_srai_epi16(a_hi, 7);                               \
-  const __m128i a = _mm_packs_epi16(a_lo7, a_hi7);                             \
-  pi = _mm_adds_epi8(pi, a);                                                   \
-  qi = _mm_subs_epi8(qi, a);                                                   \
+// input pixels are int8_t
+static WEBP_INLINE void GetBaseDelta(const __m128i* const p1,
+                                     const __m128i* const p0,
+                                     const __m128i* const q0,
+                                     const __m128i* const q1,
+                                     __m128i* const delta) {
+  // beware of addition order, for saturation!
+  const __m128i p1_q1 = _mm_subs_epi8(*p1, *q1);   // p1 - q1
+  const __m128i q0_p0 = _mm_subs_epi8(*q0, *p0);   // q0 - p0
+  const __m128i s1 = _mm_adds_epi8(p1_q1, q0_p0);  // p1 - q1 + 1 * (q0 - p0)
+  const __m128i s2 = _mm_adds_epi8(q0_p0, s1);     // p1 - q1 + 2 * (q0 - p0)
+  const __m128i s3 = _mm_adds_epi8(q0_p0, s2);     // p1 - q1 + 3 * (q0 - p0)
+  *delta = s3;
 }
 
-static void NeedsFilter(const __m128i* p1, const __m128i* p0, const __m128i* q0,
-                        const __m128i* q1, int thresh, __m128i *mask) {
-  __m128i t1 = MM_ABS(*p1, *q1);        // abs(p1 - q1)
-  *mask = _mm_set1_epi8(0xFE);
-  t1 = _mm_and_si128(t1, *mask);        // set lsb of each byte to zero
-  t1 = _mm_srli_epi16(t1, 1);           // abs(p1 - q1) / 2
+// input and output are int8_t
+static WEBP_INLINE void DoSimpleFilter(__m128i* const p0, __m128i* const q0,
+                                       const __m128i* const fl) {
+  const __m128i k3 = _mm_set1_epi8(3);
+  const __m128i k4 = _mm_set1_epi8(4);
+  __m128i v3 = _mm_adds_epi8(*fl, k3);
+  __m128i v4 = _mm_adds_epi8(*fl, k4);
+
+  SignedShift8b(&v4);                  // v4 >> 3
+  SignedShift8b(&v3);                  // v3 >> 3
+  *q0 = _mm_subs_epi8(*q0, v4);        // q0 -= v4
+  *p0 = _mm_adds_epi8(*p0, v3);        // p0 += v3
+}
 
-  *mask = MM_ABS(*p0, *q0);             // abs(p0 - q0)
-  *mask = _mm_adds_epu8(*mask, *mask);  // abs(p0 - q0) * 2
-  *mask = _mm_adds_epu8(*mask, t1);     // abs(p0 - q0) * 2 + abs(p1 - q1) / 2
+// Updates values of 2 pixels at MB edge during complex filtering.
+// Update operations:
+// q = q - delta and p = p + delta; where delta = [(a_hi >> 7), (a_lo >> 7)]
+// Pixels 'pi' and 'qi' are int8_t on input, uint8_t on output (sign flip).
+static WEBP_INLINE void Update2Pixels(__m128i* const pi, __m128i* const qi,
+                                      const __m128i* const a0_lo,
+                                      const __m128i* const a0_hi) {
+  const __m128i a1_lo = _mm_srai_epi16(*a0_lo, 7);
+  const __m128i a1_hi = _mm_srai_epi16(*a0_hi, 7);
+  const __m128i delta = _mm_packs_epi16(a1_lo, a1_hi);
+  const __m128i sign_bit = _mm_set1_epi8(0x80);
+  *pi = _mm_adds_epi8(*pi, delta);
+  *qi = _mm_subs_epi8(*qi, delta);
+  FLIP_SIGN_BIT2(*pi, *qi);
+}
 
-  t1 = _mm_set1_epi8(thresh);
-  *mask = _mm_subs_epu8(*mask, t1);     // mask <= thresh
-  *mask = _mm_cmpeq_epi8(*mask, _mm_setzero_si128());
+// input pixels are uint8_t
+static WEBP_INLINE void NeedsFilter(const __m128i* const p1,
+                                    const __m128i* const p0,
+                                    const __m128i* const q0,
+                                    const __m128i* const q1,
+                                    int thresh, __m128i* const mask) {
+  const __m128i m_thresh = _mm_set1_epi8(thresh);
+  const __m128i t1 = MM_ABS(*p1, *q1);        // abs(p1 - q1)
+  const __m128i kFE = _mm_set1_epi8(0xFE);
+  const __m128i t2 = _mm_and_si128(t1, kFE);  // set lsb of each byte to zero
+  const __m128i t3 = _mm_srli_epi16(t2, 1);   // abs(p1 - q1) / 2
+
+  const __m128i t4 = MM_ABS(*p0, *q0);        // abs(p0 - q0)
+  const __m128i t5 = _mm_adds_epu8(t4, t4);   // abs(p0 - q0) * 2
+  const __m128i t6 = _mm_adds_epu8(t5, t3);   // abs(p0-q0)*2 + abs(p1-q1)/2
+
+  const __m128i t7 = _mm_subs_epu8(t6, m_thresh);  // mask <= m_thresh
+  *mask = _mm_cmpeq_epi8(t7, _mm_setzero_si128());
 }
 
 //------------------------------------------------------------------------------
 // Edge filtering functions
 
 // Applies filter on 2 pixels (p0 and q0)
-static WEBP_INLINE void DoFilter2(const __m128i* p1, __m128i* p0, __m128i* q0,
-                                  const __m128i* q1, int thresh) {
+static WEBP_INLINE void DoFilter2(__m128i* const p1, __m128i* const p0,
+                                  __m128i* const q0, __m128i* const q1,
+                                  int thresh) {
   __m128i a, mask;
   const __m128i sign_bit = _mm_set1_epi8(0x80);
+  // convert p1/q1 to int8_t (for GetBaseDelta)
   const __m128i p1s = _mm_xor_si128(*p1, sign_bit);
   const __m128i q1s = _mm_xor_si128(*q1, sign_bit);
 
   NeedsFilter(p1, p0, q0, q1, thresh, &mask);
 
-  // convert to signed values
   FLIP_SIGN_BIT2(*p0, *q0);
-
-  GET_BASE_DELTA(p1s, *p0, *q0, q1s, a);
+  GetBaseDelta(&p1s, p0, q0, &q1s, &a);
   a = _mm_and_si128(a, mask);     // mask filter values we don't care about
-  DO_SIMPLE_FILTER(*p0, *q0, a);
-
-  // unoffset
+  DoSimpleFilter(p0, q0, &a);
   FLIP_SIGN_BIT2(*p0, *q0);
 }
 
 // Applies filter on 4 pixels (p1, p0, q0 and q1)
-static WEBP_INLINE void DoFilter4(__m128i* p1, __m128i *p0,
-                                  __m128i* q0, __m128i* q1,
-                                  const __m128i* mask, int hev_thresh) {
+static WEBP_INLINE void DoFilter4(__m128i* const p1, __m128i* const p0,
+                                  __m128i* const q0, __m128i* const q1,
+                                  const __m128i* const mask, int hev_thresh) {
+  const __m128i zero = _mm_setzero_si128();
+  const __m128i sign_bit = _mm_set1_epi8(0x80);
+  const __m128i k64 = _mm_set1_epi8(64);
+  const __m128i k3 = _mm_set1_epi8(3);
+  const __m128i k4 = _mm_set1_epi8(4);
   __m128i not_hev;
   __m128i t1, t2, t3;
-  const __m128i sign_bit = _mm_set1_epi8(0x80);
 
   // compute hev mask
-  GET_NOTHEV(*p1, *p0, *q0, *q1, hev_thresh, not_hev);
+  GetNotHEV(p1, p0, q0, q1, hev_thresh, &not_hev);
 
   // convert to signed values
   FLIP_SIGN_BIT4(*p1, *p0, *q0, *q1);
@@ -385,135 +448,115 @@ static WEBP_INLINE void DoFilter4(__m128i* p1, __m128i *p0,
   t1 = _mm_adds_epi8(t1, t2);          // hev(p1 - q1) + 3 * (q0 - p0)
   t1 = _mm_and_si128(t1, *mask);       // mask filter values we don't care about
 
-  // Do +4 side
-  t2 = _mm_set1_epi8(4);
-  t2 = _mm_adds_epi8(t1, t2);        // 3 * (q0 - p0) + (p1 - q1) + 4
-  SIGNED_SHIFT_N(t2, 3);             // (3 * (q0 - p0) + hev(p1 - q1) + 4) >> 3
-  t3 = t2;                           // save t2
-  *q0 = _mm_subs_epi8(*q0, t2);      // q0 -= t2
-
-  // Now do +3 side
-  t2 = _mm_set1_epi8(3);
-  t2 = _mm_adds_epi8(t1, t2);        // +3 instead of +4
-  SIGNED_SHIFT_N(t2, 3);             // (3 * (q0 - p0) + hev(p1 - q1) + 3) >> 3
+  t2 = _mm_adds_epi8(t1, k3);        // 3 * (q0 - p0) + hev(p1 - q1) + 3
+  t3 = _mm_adds_epi8(t1, k4);        // 3 * (q0 - p0) + hev(p1 - q1) + 4
+  SignedShift8b(&t2);                // (3 * (q0 - p0) + hev(p1 - q1) + 3) >> 3
+  SignedShift8b(&t3);                // (3 * (q0 - p0) + hev(p1 - q1) + 4) >> 3
   *p0 = _mm_adds_epi8(*p0, t2);      // p0 += t2
+  *q0 = _mm_subs_epi8(*q0, t3);      // q0 -= t3
+  FLIP_SIGN_BIT2(*p0, *q0);
 
-  t2 = _mm_set1_epi8(1);
-  t3 = _mm_adds_epi8(t3, t2);
-  SIGNED_SHIFT_N(t3, 1);             // (3 * (q0 - p0) + hev(p1 - q1) + 4) >> 4
+  // this is equivalent to signed (a + 1) >> 1 calculation
+  t2 = _mm_add_epi8(t3, sign_bit);
+  t3 = _mm_avg_epu8(t2, zero);
+  t3 = _mm_sub_epi8(t3, k64);
 
   t3 = _mm_and_si128(not_hev, t3);   // if !hev
   *q1 = _mm_subs_epi8(*q1, t3);      // q1 -= t3
   *p1 = _mm_adds_epi8(*p1, t3);      // p1 += t3
-
-  // unoffset
-  FLIP_SIGN_BIT4(*p1, *p0, *q0, *q1);
+  FLIP_SIGN_BIT2(*p1, *q1);
 }
 
 // Applies filter on 6 pixels (p2, p1, p0, q0, q1 and q2)
-static WEBP_INLINE void DoFilter6(__m128i *p2, __m128i* p1, __m128i *p0,
-                                  __m128i* q0, __m128i* q1, __m128i *q2,
-                                  const __m128i* mask, int hev_thresh) {
-  __m128i a, not_hev;
+static WEBP_INLINE void DoFilter6(__m128i* const p2, __m128i* const p1,
+                                  __m128i* const p0, __m128i* const q0,
+                                  __m128i* const q1, __m128i* const q2,
+                                  const __m128i* const mask, int hev_thresh) {
+  const __m128i zero = _mm_setzero_si128();
   const __m128i sign_bit = _mm_set1_epi8(0x80);
+  __m128i a, not_hev;
 
   // compute hev mask
-  GET_NOTHEV(*p1, *p0, *q0, *q1, hev_thresh, not_hev);
+  GetNotHEV(p1, p0, q0, q1, hev_thresh, &not_hev);
 
-  // convert to signed values
   FLIP_SIGN_BIT4(*p1, *p0, *q0, *q1);
   FLIP_SIGN_BIT2(*p2, *q2);
-
-  GET_BASE_DELTA(*p1, *p0, *q0, *q1, a);
+  GetBaseDelta(p1, p0, q0, q1, &a);
 
   { // do simple filter on pixels with hev
     const __m128i m = _mm_andnot_si128(not_hev, *mask);
     const __m128i f = _mm_and_si128(a, m);
-    DO_SIMPLE_FILTER(*p0, *q0, f);
+    DoSimpleFilter(p0, q0, &f);
   }
+
   { // do strong filter on pixels with not hev
-    const __m128i zero = _mm_setzero_si128();
-    const __m128i nine = _mm_set1_epi16(0x0900);
-    const __m128i sixty_three = _mm_set1_epi16(63);
+    const __m128i k9 = _mm_set1_epi16(0x0900);
+    const __m128i k63 = _mm_set1_epi16(63);
 
     const __m128i m = _mm_and_si128(not_hev, *mask);
     const __m128i f = _mm_and_si128(a, m);
+
     const __m128i f_lo = _mm_unpacklo_epi8(zero, f);
     const __m128i f_hi = _mm_unpackhi_epi8(zero, f);
 
-    const __m128i f9_lo = _mm_mulhi_epi16(f_lo, nine);   // Filter (lo) * 9
-    const __m128i f9_hi = _mm_mulhi_epi16(f_hi, nine);   // Filter (hi) * 9
-    const __m128i f18_lo = _mm_add_epi16(f9_lo, f9_lo);  // Filter (lo) * 18
-    const __m128i f18_hi = _mm_add_epi16(f9_hi, f9_hi);  // Filter (hi) * 18
+    const __m128i f9_lo = _mm_mulhi_epi16(f_lo, k9);    // Filter (lo) * 9
+    const __m128i f9_hi = _mm_mulhi_epi16(f_hi, k9);    // Filter (hi) * 9
 
-    const __m128i a2_lo = _mm_add_epi16(f9_lo, sixty_three);  // Filter * 9 + 63
-    const __m128i a2_hi = _mm_add_epi16(f9_hi, sixty_three);  // Filter * 9 + 63
+    const __m128i a2_lo = _mm_add_epi16(f9_lo, k63);    // Filter * 9 + 63
+    const __m128i a2_hi = _mm_add_epi16(f9_hi, k63);    // Filter * 9 + 63
 
-    const __m128i a1_lo = _mm_add_epi16(f18_lo, sixty_three);  // F... * 18 + 63
-    const __m128i a1_hi = _mm_add_epi16(f18_hi, sixty_three);  // F... * 18 + 63
+    const __m128i a1_lo = _mm_add_epi16(a2_lo, f9_lo);  // Filter * 18 + 63
+    const __m128i a1_hi = _mm_add_epi16(a2_hi, f9_hi);  // Filter * 18 + 63
 
-    const __m128i a0_lo = _mm_add_epi16(f18_lo, a2_lo);  // Filter * 27 + 63
-    const __m128i a0_hi = _mm_add_epi16(f18_hi, a2_hi);  // Filter * 27 + 63
+    const __m128i a0_lo = _mm_add_epi16(a1_lo, f9_lo);  // Filter * 27 + 63
+    const __m128i a0_hi = _mm_add_epi16(a1_hi, f9_hi);  // Filter * 27 + 63
 
-    UPDATE_2PIXELS(*p2, *q2, a2_lo, a2_hi);
-    UPDATE_2PIXELS(*p1, *q1, a1_lo, a1_hi);
-    UPDATE_2PIXELS(*p0, *q0, a0_lo, a0_hi);
+    Update2Pixels(p2, q2, &a2_lo, &a2_hi);
+    Update2Pixels(p1, q1, &a1_lo, &a1_hi);
+    Update2Pixels(p0, q0, &a0_lo, &a0_hi);
   }
+}
 
-  // unoffset
-  FLIP_SIGN_BIT4(*p1, *p0, *q0, *q1);
-  FLIP_SIGN_BIT2(*p2, *q2);
+// memcpy() is the safe way of moving potentially unaligned 32b memory.
+static WEBP_INLINE uint32_t MemToUint32(const uint8_t* const ptr) {
+  uint32_t A;
+  memcpy(&A, (const int*)ptr, sizeof(A));
+  return A;
 }
 
 // reads 8 rows across a vertical edge.
-//
-// TODO(somnath): Investigate _mm_shuffle* also see if it can be broken into
-// two Load4x4() to avoid code duplication.
-static WEBP_INLINE void Load8x4(const uint8_t* b, int stride,
-                                __m128i* p, __m128i* q) {
-  __m128i t1, t2;
-
-  // Load 0th, 1st, 4th and 5th rows
-  __m128i r0 =  _mm_cvtsi32_si128(*((int*)&b[0 * stride]));  // 03 02 01 00
-  __m128i r1 =  _mm_cvtsi32_si128(*((int*)&b[1 * stride]));  // 13 12 11 10
-  __m128i r4 =  _mm_cvtsi32_si128(*((int*)&b[4 * stride]));  // 43 42 41 40
-  __m128i r5 =  _mm_cvtsi32_si128(*((int*)&b[5 * stride]));  // 53 52 51 50
-
-  r0 = _mm_unpacklo_epi32(r0, r4);               // 43 42 41 40 03 02 01 00
-  r1 = _mm_unpacklo_epi32(r1, r5);               // 53 52 51 50 13 12 11 10
-
-  // t1 = 53 43 52 42 51 41 50 40 13 03 12 02 11 01 10 00
-  t1 = _mm_unpacklo_epi8(r0, r1);
-
-  // Load 2nd, 3rd, 6th and 7th rows
-  r0 =  _mm_cvtsi32_si128(*((int*)&b[2 * stride]));          // 23 22 21 22
-  r1 =  _mm_cvtsi32_si128(*((int*)&b[3 * stride]));          // 33 32 31 30
-  r4 =  _mm_cvtsi32_si128(*((int*)&b[6 * stride]));          // 63 62 61 60
-  r5 =  _mm_cvtsi32_si128(*((int*)&b[7 * stride]));          // 73 72 71 70
-
-  r0 = _mm_unpacklo_epi32(r0, r4);               // 63 62 61 60 23 22 21 20
-  r1 = _mm_unpacklo_epi32(r1, r5);               // 73 72 71 70 33 32 31 30
-
-  // t2 = 73 63 72 62 71 61 70 60 33 23 32 22 31 21 30 20
-  t2 = _mm_unpacklo_epi8(r0, r1);
-
-  // t1 = 33 23 13 03 32 22 12 02 31 21 11 01 30 20 10 00
-  // t2 = 73 63 53 43 72 62 52 42 71 61 51 41 70 60 50 40
-  r0 = t1;
-  t1 = _mm_unpacklo_epi16(t1, t2);
-  t2 = _mm_unpackhi_epi16(r0, t2);
+static WEBP_INLINE void Load8x4(const uint8_t* const b, int stride,
+                                __m128i* const p, __m128i* const q) {
+  // A0 = 63 62 61 60 23 22 21 20 43 42 41 40 03 02 01 00
+  // A1 = 73 72 71 70 33 32 31 30 53 52 51 50 13 12 11 10
+  const __m128i A0 = _mm_set_epi32(
+      MemToUint32(&b[6 * stride]), MemToUint32(&b[2 * stride]),
+      MemToUint32(&b[4 * stride]), MemToUint32(&b[0 * stride]));
+  const __m128i A1 = _mm_set_epi32(
+      MemToUint32(&b[7 * stride]), MemToUint32(&b[3 * stride]),
+      MemToUint32(&b[5 * stride]), MemToUint32(&b[1 * stride]));
+
+  // B0 = 53 43 52 42 51 41 50 40 13 03 12 02 11 01 10 00
+  // B1 = 73 63 72 62 71 61 70 60 33 23 32 22 31 21 30 20
+  const __m128i B0 = _mm_unpacklo_epi8(A0, A1);
+  const __m128i B1 = _mm_unpackhi_epi8(A0, A1);
+
+  // C0 = 33 23 13 03 32 22 12 02 31 21 11 01 30 20 10 00
+  // C1 = 73 63 53 43 72 62 52 42 71 61 51 41 70 60 50 40
+  const __m128i C0 = _mm_unpacklo_epi16(B0, B1);
+  const __m128i C1 = _mm_unpackhi_epi16(B0, B1);
 
   // *p = 71 61 51 41 31 21 11 01 70 60 50 40 30 20 10 00
   // *q = 73 63 53 43 33 23 13 03 72 62 52 42 32 22 12 02
-  *p = _mm_unpacklo_epi32(t1, t2);
-  *q = _mm_unpackhi_epi32(t1, t2);
+  *p = _mm_unpacklo_epi32(C0, C1);
+  *q = _mm_unpackhi_epi32(C0, C1);
 }
 
-static WEBP_INLINE void Load16x4(const uint8_t* r0, const uint8_t* r8,
+static WEBP_INLINE void Load16x4(const uint8_t* const r0,
+                                 const uint8_t* const r8,
                                  int stride,
-                                 __m128i* p1, __m128i* p0,
-                                 __m128i* q0, __m128i* q1) {
-  __m128i t1, t2;
+                                 __m128i* const p1, __m128i* const p0,
+                                 __m128i* const q0, __m128i* const q1) {
   // Assume the pixels around the edge (|) are numbered as follows
   //                00 01 | 02 03
   //                10 11 | 12 13
@@ -532,19 +575,21 @@ static WEBP_INLINE void Load16x4(const uint8_t* r0, const uint8_t* r8,
   Load8x4(r0, stride, p1, q0);
   Load8x4(r8, stride, p0, q1);
 
-  t1 = *p1;
-  t2 = *q0;
-  // p1 = f0 e0 d0 c0 b0 a0 90 80 70 60 50 40 30 20 10 00
-  // p0 = f1 e1 d1 c1 b1 a1 91 81 71 61 51 41 31 21 11 01
-  // q0 = f2 e2 d2 c2 b2 a2 92 82 72 62 52 42 32 22 12 02
-  // q1 = f3 e3 d3 c3 b3 a3 93 83 73 63 53 43 33 23 13 03
-  *p1 = _mm_unpacklo_epi64(t1, *p0);
-  *p0 = _mm_unpackhi_epi64(t1, *p0);
-  *q0 = _mm_unpacklo_epi64(t2, *q1);
-  *q1 = _mm_unpackhi_epi64(t2, *q1);
+  {
+    // p1 = f0 e0 d0 c0 b0 a0 90 80 70 60 50 40 30 20 10 00
+    // p0 = f1 e1 d1 c1 b1 a1 91 81 71 61 51 41 31 21 11 01
+    // q0 = f2 e2 d2 c2 b2 a2 92 82 72 62 52 42 32 22 12 02
+    // q1 = f3 e3 d3 c3 b3 a3 93 83 73 63 53 43 33 23 13 03
+    const __m128i t1 = *p1;
+    const __m128i t2 = *q0;
+    *p1 = _mm_unpacklo_epi64(t1, *p0);
+    *p0 = _mm_unpackhi_epi64(t1, *p0);
+    *q0 = _mm_unpacklo_epi64(t2, *q1);
+    *q1 = _mm_unpackhi_epi64(t2, *q1);
+  }
 }
 
-static WEBP_INLINE void Store4x4(__m128i* x, uint8_t* dst, int stride) {
+static WEBP_INLINE void Store4x4(__m128i* const x, uint8_t* dst, int stride) {
   int i;
   for (i = 0; i < 4; ++i, dst += stride) {
     *((int32_t*)dst) = _mm_cvtsi128_si32(*x);
@@ -553,48 +598,51 @@ static WEBP_INLINE void Store4x4(__m128i* x, uint8_t* dst, int stride) {
 }
 
 // Transpose back and store
-static WEBP_INLINE void Store16x4(uint8_t* r0, uint8_t* r8, int stride,
-                                  __m128i* p1, __m128i* p0,
-                                  __m128i* q0, __m128i* q1) {
-  __m128i t1;
+static WEBP_INLINE void Store16x4(const __m128i* const p1,
+                                  const __m128i* const p0,
+                                  const __m128i* const q0,
+                                  const __m128i* const q1,
+                                  uint8_t* r0, uint8_t* r8,
+                                  int stride) {
+  __m128i t1, p1_s, p0_s, q0_s, q1_s;
 
   // p0 = 71 70 61 60 51 50 41 40 31 30 21 20 11 10 01 00
   // p1 = f1 f0 e1 e0 d1 d0 c1 c0 b1 b0 a1 a0 91 90 81 80
   t1 = *p0;
-  *p0 = _mm_unpacklo_epi8(*p1, t1);
-  *p1 = _mm_unpackhi_epi8(*p1, t1);
+  p0_s = _mm_unpacklo_epi8(*p1, t1);
+  p1_s = _mm_unpackhi_epi8(*p1, t1);
 
   // q0 = 73 72 63 62 53 52 43 42 33 32 23 22 13 12 03 02
   // q1 = f3 f2 e3 e2 d3 d2 c3 c2 b3 b2 a3 a2 93 92 83 82
   t1 = *q0;
-  *q0 = _mm_unpacklo_epi8(t1, *q1);
-  *q1 = _mm_unpackhi_epi8(t1, *q1);
+  q0_s = _mm_unpacklo_epi8(t1, *q1);
+  q1_s = _mm_unpackhi_epi8(t1, *q1);
 
   // p0 = 33 32 31 30 23 22 21 20 13 12 11 10 03 02 01 00
   // q0 = 73 72 71 70 63 62 61 60 53 52 51 50 43 42 41 40
-  t1 = *p0;
-  *p0 = _mm_unpacklo_epi16(t1, *q0);
-  *q0 = _mm_unpackhi_epi16(t1, *q0);
+  t1 = p0_s;
+  p0_s = _mm_unpacklo_epi16(t1, q0_s);
+  q0_s = _mm_unpackhi_epi16(t1, q0_s);
 
   // p1 = b3 b2 b1 b0 a3 a2 a1 a0 93 92 91 90 83 82 81 80
   // q1 = f3 f2 f1 f0 e3 e2 e1 e0 d3 d2 d1 d0 c3 c2 c1 c0
-  t1 = *p1;
-  *p1 = _mm_unpacklo_epi16(t1, *q1);
-  *q1 = _mm_unpackhi_epi16(t1, *q1);
+  t1 = p1_s;
+  p1_s = _mm_unpacklo_epi16(t1, q1_s);
+  q1_s = _mm_unpackhi_epi16(t1, q1_s);
 
-  Store4x4(p0, r0, stride);
+  Store4x4(&p0_s, r0, stride);
   r0 += 4 * stride;
-  Store4x4(q0, r0, stride);
+  Store4x4(&q0_s, r0, stride);
 
-  Store4x4(p1, r8, stride);
+  Store4x4(&p1_s, r8, stride);
   r8 += 4 * stride;
-  Store4x4(q1, r8, stride);
+  Store4x4(&q1_s, r8, stride);
 }
 
 //------------------------------------------------------------------------------
 // Simple In-loop filtering (Paragraph 15.2)
 
-static void SimpleVFilter16SSE2(uint8_t* p, int stride, int thresh) {
+static void SimpleVFilter16(uint8_t* p, int stride, int thresh) {
   // Load
   __m128i p1 = _mm_loadu_si128((__m128i*)&p[-2 * stride]);
   __m128i p0 = _mm_loadu_si128((__m128i*)&p[-stride]);
@@ -605,49 +653,49 @@ static void SimpleVFilter16SSE2(uint8_t* p, int stride, int thresh) {
 
   // Store
   _mm_storeu_si128((__m128i*)&p[-stride], p0);
-  _mm_storeu_si128((__m128i*)p, q0);
+  _mm_storeu_si128((__m128i*)&p[0], q0);
 }
 
-static void SimpleHFilter16SSE2(uint8_t* p, int stride, int thresh) {
+static void SimpleHFilter16(uint8_t* p, int stride, int thresh) {
   __m128i p1, p0, q0, q1;
 
   p -= 2;  // beginning of p1
 
-  Load16x4(p, p + 8 * stride,  stride, &p1, &p0, &q0, &q1);
+  Load16x4(p, p + 8 * stride, stride, &p1, &p0, &q0, &q1);
   DoFilter2(&p1, &p0, &q0, &q1, thresh);
-  Store16x4(p, p + 8 * stride, stride, &p1, &p0, &q0, &q1);
+  Store16x4(&p1, &p0, &q0, &q1, p, p + 8 * stride, stride);
 }
 
-static void SimpleVFilter16iSSE2(uint8_t* p, int stride, int thresh) {
+static void SimpleVFilter16i(uint8_t* p, int stride, int thresh) {
   int k;
   for (k = 3; k > 0; --k) {
     p += 4 * stride;
-    SimpleVFilter16SSE2(p, stride, thresh);
+    SimpleVFilter16(p, stride, thresh);
   }
 }
 
-static void SimpleHFilter16iSSE2(uint8_t* p, int stride, int thresh) {
+static void SimpleHFilter16i(uint8_t* p, int stride, int thresh) {
   int k;
   for (k = 3; k > 0; --k) {
     p += 4;
-    SimpleHFilter16SSE2(p, stride, thresh);
+    SimpleHFilter16(p, stride, thresh);
   }
 }
 
 //------------------------------------------------------------------------------
 // Complex In-loop filtering (Paragraph 15.3)
 
-#define MAX_DIFF1(p3, p2, p1, p0, m) {                                         \
-  m = MM_ABS(p3, p2);                                                          \
+#define MAX_DIFF1(p3, p2, p1, p0, m) do {                                      \
+  m = MM_ABS(p1, p0);                                                          \
+  m = _mm_max_epu8(m, MM_ABS(p3, p2));                                         \
   m = _mm_max_epu8(m, MM_ABS(p2, p1));                                         \
-  m = _mm_max_epu8(m, MM_ABS(p1, p0));                                         \
-}
+} while (0)
 
-#define MAX_DIFF2(p3, p2, p1, p0, m) {                                         \
+#define MAX_DIFF2(p3, p2, p1, p0, m) do {                                      \
+  m = _mm_max_epu8(m, MM_ABS(p1, p0));                                         \
   m = _mm_max_epu8(m, MM_ABS(p3, p2));                                         \
   m = _mm_max_epu8(m, MM_ABS(p2, p1));                                         \
-  m = _mm_max_epu8(m, MM_ABS(p1, p0));                                         \
-}
+} while (0)
 
 #define LOAD_H_EDGES4(p, stride, e1, e2, e3, e4) {                             \
   e1 = _mm_loadu_si128((__m128i*)&(p)[0 * stride]);                            \
@@ -656,10 +704,11 @@ static void SimpleHFilter16iSSE2(uint8_t* p, int stride, int thresh) {
   e4 = _mm_loadu_si128((__m128i*)&(p)[3 * stride]);                            \
 }
 
-#define LOADUV_H_EDGE(p, u, v, stride) {                                       \
-  p = _mm_loadl_epi64((__m128i*)&(u)[(stride)]);                               \
-  p = _mm_unpacklo_epi64(p, _mm_loadl_epi64((__m128i*)&(v)[(stride)]));        \
-}
+#define LOADUV_H_EDGE(p, u, v, stride) do {                                    \
+  const __m128i U = _mm_loadl_epi64((__m128i*)&(u)[(stride)]);                 \
+  const __m128i V = _mm_loadl_epi64((__m128i*)&(v)[(stride)]);                 \
+  p = _mm_unpacklo_epi64(U, V);                                                \
+} while (0)
 
 #define LOADUV_H_EDGES4(u, v, stride, e1, e2, e3, e4) {                        \
   LOADUV_H_EDGE(e1, u, v, 0 * stride);                                         \
@@ -674,18 +723,23 @@ static void SimpleHFilter16iSSE2(uint8_t* p, int stride, int thresh) {
   _mm_storel_epi64((__m128i*)&v[(stride)], p);                                 \
 }
 
-#define COMPLEX_FL_MASK(p1, p0, q0, q1, thresh, ithresh, mask) {               \
-  __m128i fl_yes;                                                              \
-  const __m128i it = _mm_set1_epi8(ithresh);                                   \
-  mask = _mm_subs_epu8(mask, it);                                              \
-  mask = _mm_cmpeq_epi8(mask, _mm_setzero_si128());                            \
-  NeedsFilter(&p1, &p0, &q0, &q1, thresh, &fl_yes);                            \
-  mask = _mm_and_si128(mask, fl_yes);                                          \
+static WEBP_INLINE void ComplexMask(const __m128i* const p1,
+                                    const __m128i* const p0,
+                                    const __m128i* const q0,
+                                    const __m128i* const q1,
+                                    int thresh, int ithresh,
+                                    __m128i* const mask) {
+  const __m128i it = _mm_set1_epi8(ithresh);
+  const __m128i diff = _mm_subs_epu8(*mask, it);
+  const __m128i thresh_mask = _mm_cmpeq_epi8(diff, _mm_setzero_si128());
+  __m128i filter_mask;
+  NeedsFilter(p1, p0, q0, q1, thresh, &filter_mask);
+  *mask = _mm_and_si128(thresh_mask, filter_mask);
 }
 
 // on macroblock edges
-static void VFilter16SSE2(uint8_t* p, int stride,
-                          int thresh, int ithresh, int hev_thresh) {
+static void VFilter16(uint8_t* p, int stride,
+                      int thresh, int ithresh, int hev_thresh) {
   __m128i t1;
   __m128i mask;
   __m128i p2, p1, p0, q0, q1, q2;
@@ -698,20 +752,20 @@ static void VFilter16SSE2(uint8_t* p, int stride,
   LOAD_H_EDGES4(p, stride, q0, q1, q2, t1);
   MAX_DIFF2(t1, q2, q1, q0, mask);
 
-  COMPLEX_FL_MASK(p1, p0, q0, q1, thresh, ithresh, mask);
+  ComplexMask(&p1, &p0, &q0, &q1, thresh, ithresh, &mask);
   DoFilter6(&p2, &p1, &p0, &q0, &q1, &q2, &mask, hev_thresh);
 
   // Store
   _mm_storeu_si128((__m128i*)&p[-3 * stride], p2);
   _mm_storeu_si128((__m128i*)&p[-2 * stride], p1);
   _mm_storeu_si128((__m128i*)&p[-1 * stride], p0);
-  _mm_storeu_si128((__m128i*)&p[0 * stride], q0);
-  _mm_storeu_si128((__m128i*)&p[1 * stride], q1);
-  _mm_storeu_si128((__m128i*)&p[2 * stride], q2);
+  _mm_storeu_si128((__m128i*)&p[+0 * stride], q0);
+  _mm_storeu_si128((__m128i*)&p[+1 * stride], q1);
+  _mm_storeu_si128((__m128i*)&p[+2 * stride], q2);
 }
 
-static void HFilter16SSE2(uint8_t* p, int stride,
-                          int thresh, int ithresh, int hev_thresh) {
+static void HFilter16(uint8_t* p, int stride,
+                      int thresh, int ithresh, int hev_thresh) {
   __m128i mask;
   __m128i p3, p2, p1, p0, q0, q1, q2, q3;
 
@@ -722,71 +776,78 @@ static void HFilter16SSE2(uint8_t* p, int stride,
   Load16x4(p, p + 8 * stride, stride, &q0, &q1, &q2, &q3);  // q0, q1, q2, q3
   MAX_DIFF2(q3, q2, q1, q0, mask);
 
-  COMPLEX_FL_MASK(p1, p0, q0, q1, thresh, ithresh, mask);
+  ComplexMask(&p1, &p0, &q0, &q1, thresh, ithresh, &mask);
   DoFilter6(&p2, &p1, &p0, &q0, &q1, &q2, &mask, hev_thresh);
 
-  Store16x4(b, b + 8 * stride, stride, &p3, &p2, &p1, &p0);
-  Store16x4(p, p + 8 * stride, stride, &q0, &q1, &q2, &q3);
+  Store16x4(&p3, &p2, &p1, &p0, b, b + 8 * stride, stride);
+  Store16x4(&q0, &q1, &q2, &q3, p, p + 8 * stride, stride);
 }
 
 // on three inner edges
-static void VFilter16iSSE2(uint8_t* p, int stride,
-                           int thresh, int ithresh, int hev_thresh) {
+static void VFilter16i(uint8_t* p, int stride,
+                       int thresh, int ithresh, int hev_thresh) {
   int k;
-  __m128i mask;
-  __m128i t1, t2, p1, p0, q0, q1;
+  __m128i p3, p2, p1, p0;   // loop invariants
 
-  for (k = 3; k > 0; --k) {
-    // Load p3, p2, p1, p0
-    LOAD_H_EDGES4(p, stride, t2, t1, p1, p0);
-    MAX_DIFF1(t2, t1, p1, p0, mask);
+  LOAD_H_EDGES4(p, stride, p3, p2, p1, p0);  // prologue
 
+  for (k = 3; k > 0; --k) {
+    __m128i mask, tmp1, tmp2;
+    uint8_t* const b = p + 2 * stride;   // beginning of p1
     p += 4 * stride;
 
-    // Load q0, q1, q2, q3
-    LOAD_H_EDGES4(p, stride, q0, q1, t1, t2);
-    MAX_DIFF2(t2, t1, q1, q0, mask);
+    MAX_DIFF1(p3, p2, p1, p0, mask);   // compute partial mask
+    LOAD_H_EDGES4(p, stride, p3, p2, tmp1, tmp2);
+    MAX_DIFF2(p3, p2, tmp1, tmp2, mask);
 
-    COMPLEX_FL_MASK(p1, p0, q0, q1, thresh, ithresh, mask);
-    DoFilter4(&p1, &p0, &q0, &q1, &mask, hev_thresh);
+    // p3 and p2 are not just temporary variables here: they will be
+    // re-used for next span. And q2/q3 will become p1/p0 accordingly.
+    ComplexMask(&p1, &p0, &p3, &p2, thresh, ithresh, &mask);
+    DoFilter4(&p1, &p0, &p3, &p2, &mask, hev_thresh);
 
     // Store
-    _mm_storeu_si128((__m128i*)&p[-2 * stride], p1);
-    _mm_storeu_si128((__m128i*)&p[-1 * stride], p0);
-    _mm_storeu_si128((__m128i*)&p[0 * stride], q0);
-    _mm_storeu_si128((__m128i*)&p[1 * stride], q1);
+    _mm_storeu_si128((__m128i*)&b[0 * stride], p1);
+    _mm_storeu_si128((__m128i*)&b[1 * stride], p0);
+    _mm_storeu_si128((__m128i*)&b[2 * stride], p3);
+    _mm_storeu_si128((__m128i*)&b[3 * stride], p2);
+
+    // rotate samples
+    p1 = tmp1;
+    p0 = tmp2;
   }
 }
 
-static void HFilter16iSSE2(uint8_t* p, int stride,
-                           int thresh, int ithresh, int hev_thresh) {
+static void HFilter16i(uint8_t* p, int stride,
+                       int thresh, int ithresh, int hev_thresh) {
   int k;
-  uint8_t* b;
-  __m128i mask;
-  __m128i t1, t2, p1, p0, q0, q1;
+  __m128i p3, p2, p1, p0;   // loop invariants
+
+  Load16x4(p, p + 8 * stride, stride, &p3, &p2, &p1, &p0);  // prologue
 
   for (k = 3; k > 0; --k) {
-    b = p;
-    Load16x4(b, b + 8 * stride, stride, &t2, &t1, &p1, &p0);  // p3, p2, p1, p0
-    MAX_DIFF1(t2, t1, p1, p0, mask);
+    __m128i mask, tmp1, tmp2;
+    uint8_t* const b = p + 2;   // beginning of p1
 
-    b += 4;  // beginning of q0
-    Load16x4(b, b + 8 * stride, stride, &q0, &q1, &t1, &t2);  // q0, q1, q2, q3
-    MAX_DIFF2(t2, t1, q1, q0, mask);
+    p += 4;  // beginning of q0 (and next span)
 
-    COMPLEX_FL_MASK(p1, p0, q0, q1, thresh, ithresh, mask);
-    DoFilter4(&p1, &p0, &q0, &q1, &mask, hev_thresh);
+    MAX_DIFF1(p3, p2, p1, p0, mask);   // compute partial mask
+    Load16x4(p, p + 8 * stride, stride, &p3, &p2, &tmp1, &tmp2);
+    MAX_DIFF2(p3, p2, tmp1, tmp2, mask);
 
-    b -= 2;  // beginning of p1
-    Store16x4(b, b + 8 * stride, stride, &p1, &p0, &q0, &q1);
+    ComplexMask(&p1, &p0, &p3, &p2, thresh, ithresh, &mask);
+    DoFilter4(&p1, &p0, &p3, &p2, &mask, hev_thresh);
 
-    p += 4;
+    Store16x4(&p1, &p0, &p3, &p2, b, b + 8 * stride, stride);
+
+    // rotate samples
+    p1 = tmp1;
+    p0 = tmp2;
   }
 }
 
 // 8-pixels wide variant, for chroma filtering
-static void VFilter8SSE2(uint8_t* u, uint8_t* v, int stride,
-                         int thresh, int ithresh, int hev_thresh) {
+static void VFilter8(uint8_t* u, uint8_t* v, int stride,
+                     int thresh, int ithresh, int hev_thresh) {
   __m128i mask;
   __m128i t1, p2, p1, p0, q0, q1, q2;
 
@@ -798,7 +859,7 @@ static void VFilter8SSE2(uint8_t* u, uint8_t* v, int stride,
   LOADUV_H_EDGES4(u, v, stride, q0, q1, q2, t1);
   MAX_DIFF2(t1, q2, q1, q0, mask);
 
-  COMPLEX_FL_MASK(p1, p0, q0, q1, thresh, ithresh, mask);
+  ComplexMask(&p1, &p0, &q0, &q1, thresh, ithresh, &mask);
   DoFilter6(&p2, &p1, &p0, &q0, &q1, &q2, &mask, hev_thresh);
 
   // Store
@@ -810,8 +871,8 @@ static void VFilter8SSE2(uint8_t* u, uint8_t* v, int stride,
   STOREUV(q2, u, v, 2 * stride);
 }
 
-static void HFilter8SSE2(uint8_t* u, uint8_t* v, int stride,
-                         int thresh, int ithresh, int hev_thresh) {
+static void HFilter8(uint8_t* u, uint8_t* v, int stride,
+                     int thresh, int ithresh, int hev_thresh) {
   __m128i mask;
   __m128i p3, p2, p1, p0, q0, q1, q2, q3;
 
@@ -823,15 +884,15 @@ static void HFilter8SSE2(uint8_t* u, uint8_t* v, int stride,
   Load16x4(u, v, stride, &q0, &q1, &q2, &q3);    // q0, q1, q2, q3
   MAX_DIFF2(q3, q2, q1, q0, mask);
 
-  COMPLEX_FL_MASK(p1, p0, q0, q1, thresh, ithresh, mask);
+  ComplexMask(&p1, &p0, &q0, &q1, thresh, ithresh, &mask);
   DoFilter6(&p2, &p1, &p0, &q0, &q1, &q2, &mask, hev_thresh);
 
-  Store16x4(tu, tv, stride, &p3, &p2, &p1, &p0);
-  Store16x4(u, v, stride, &q0, &q1, &q2, &q3);
+  Store16x4(&p3, &p2, &p1, &p0, tu, tv, stride);
+  Store16x4(&q0, &q1, &q2, &q3, u, v, stride);
 }
 
-static void VFilter8iSSE2(uint8_t* u, uint8_t* v, int stride,
-                          int thresh, int ithresh, int hev_thresh) {
+static void VFilter8i(uint8_t* u, uint8_t* v, int stride,
+                      int thresh, int ithresh, int hev_thresh) {
   __m128i mask;
   __m128i t1, t2, p1, p0, q0, q1;
 
@@ -846,7 +907,7 @@ static void VFilter8iSSE2(uint8_t* u, uint8_t* v, int stride,
   LOADUV_H_EDGES4(u, v, stride, q0, q1, t1, t2);
   MAX_DIFF2(t2, t1, q1, q0, mask);
 
-  COMPLEX_FL_MASK(p1, p0, q0, q1, thresh, ithresh, mask);
+  ComplexMask(&p1, &p0, &q0, &q1, thresh, ithresh, &mask);
   DoFilter4(&p1, &p0, &q0, &q1, &mask, hev_thresh);
 
   // Store
@@ -856,8 +917,8 @@ static void VFilter8iSSE2(uint8_t* u, uint8_t* v, int stride,
   STOREUV(q1, u, v, 1 * stride);
 }
 
-static void HFilter8iSSE2(uint8_t* u, uint8_t* v, int stride,
-                          int thresh, int ithresh, int hev_thresh) {
+static void HFilter8i(uint8_t* u, uint8_t* v, int stride,
+                      int thresh, int ithresh, int hev_thresh) {
   __m128i mask;
   __m128i t1, t2, p1, p0, q0, q1;
   Load16x4(u, v, stride, &t2, &t1, &p1, &p0);   // p3, p2, p1, p0
@@ -868,36 +929,361 @@ static void HFilter8iSSE2(uint8_t* u, uint8_t* v, int stride,
   Load16x4(u, v, stride, &q0, &q1, &t1, &t2);  // q0, q1, q2, q3
   MAX_DIFF2(t2, t1, q1, q0, mask);
 
-  COMPLEX_FL_MASK(p1, p0, q0, q1, thresh, ithresh, mask);
+  ComplexMask(&p1, &p0, &q0, &q1, thresh, ithresh, &mask);
   DoFilter4(&p1, &p0, &q0, &q1, &mask, hev_thresh);
 
   u -= 2;  // beginning of p1
   v -= 2;
-  Store16x4(u, v, stride, &p1, &p0, &q0, &q1);
+  Store16x4(&p1, &p0, &q0, &q1, u, v, stride);
 }
 
-extern void VP8DspInitSSE2(void);
+//------------------------------------------------------------------------------
+// 4x4 predictions
+
+#define DST(x, y) dst[(x) + (y) * BPS]
+#define AVG3(a, b, c) (((a) + 2 * (b) + (c) + 2) >> 2)
+
+// We use the following 8b-arithmetic tricks:
+//     (a + 2 * b + c + 2) >> 2 = (AC + b + 1) >> 1
+//   where: AC = (a + c) >> 1 = [(a + c + 1) >> 1] - [(a^c) & 1]
+// and:
+//     (a + 2 * b + c + 2) >> 2 = (AB + BC + 1) >> 1 - (ab|bc)&lsb
+//   where: AC = (a + b + 1) >> 1,   BC = (b + c + 1) >> 1
+//   and ab = a ^ b, bc = b ^ c, lsb = (AC^BC)&1
+
+static void VE4(uint8_t* dst) {    // vertical
+  const __m128i one = _mm_set1_epi8(1);
+  const __m128i ABCDEFGH = _mm_loadl_epi64((__m128i*)(dst - BPS - 1));
+  const __m128i BCDEFGH0 = _mm_srli_si128(ABCDEFGH, 1);
+  const __m128i CDEFGH00 = _mm_srli_si128(ABCDEFGH, 2);
+  const __m128i a = _mm_avg_epu8(ABCDEFGH, CDEFGH00);
+  const __m128i lsb = _mm_and_si128(_mm_xor_si128(ABCDEFGH, CDEFGH00), one);
+  const __m128i b = _mm_subs_epu8(a, lsb);
+  const __m128i avg = _mm_avg_epu8(b, BCDEFGH0);
+  const uint32_t vals = _mm_cvtsi128_si32(avg);
+  int i;
+  for (i = 0; i < 4; ++i) {
+    *(uint32_t*)(dst + i * BPS) = vals;
+  }
+}
+
+static void LD4(uint8_t* dst) {   // Down-Left
+  const __m128i one = _mm_set1_epi8(1);
+  const __m128i ABCDEFGH = _mm_loadl_epi64((__m128i*)(dst - BPS));
+  const __m128i BCDEFGH0 = _mm_srli_si128(ABCDEFGH, 1);
+  const __m128i CDEFGH00 = _mm_srli_si128(ABCDEFGH, 2);
+  const __m128i CDEFGHH0 = _mm_insert_epi16(CDEFGH00, dst[-BPS + 7], 3);
+  const __m128i avg1 = _mm_avg_epu8(ABCDEFGH, CDEFGHH0);
+  const __m128i lsb = _mm_and_si128(_mm_xor_si128(ABCDEFGH, CDEFGHH0), one);
+  const __m128i avg2 = _mm_subs_epu8(avg1, lsb);
+  const __m128i abcdefg = _mm_avg_epu8(avg2, BCDEFGH0);
+  *(uint32_t*)(dst + 0 * BPS) = _mm_cvtsi128_si32(               abcdefg    );
+  *(uint32_t*)(dst + 1 * BPS) = _mm_cvtsi128_si32(_mm_srli_si128(abcdefg, 1));
+  *(uint32_t*)(dst + 2 * BPS) = _mm_cvtsi128_si32(_mm_srli_si128(abcdefg, 2));
+  *(uint32_t*)(dst + 3 * BPS) = _mm_cvtsi128_si32(_mm_srli_si128(abcdefg, 3));
+}
+
+static void VR4(uint8_t* dst) {   // Vertical-Right
+  const __m128i one = _mm_set1_epi8(1);
+  const int I = dst[-1 + 0 * BPS];
+  const int J = dst[-1 + 1 * BPS];
+  const int K = dst[-1 + 2 * BPS];
+  const int X = dst[-1 - BPS];
+  const __m128i XABCD = _mm_loadl_epi64((__m128i*)(dst - BPS - 1));
+  const __m128i ABCD0 = _mm_srli_si128(XABCD, 1);
+  const __m128i abcd = _mm_avg_epu8(XABCD, ABCD0);
+  const __m128i _XABCD = _mm_slli_si128(XABCD, 1);
+  const __m128i IXABCD = _mm_insert_epi16(_XABCD, I | (X << 8), 0);
+  const __m128i avg1 = _mm_avg_epu8(IXABCD, ABCD0);
+  const __m128i lsb = _mm_and_si128(_mm_xor_si128(IXABCD, ABCD0), one);
+  const __m128i avg2 = _mm_subs_epu8(avg1, lsb);
+  const __m128i efgh = _mm_avg_epu8(avg2, XABCD);
+  *(uint32_t*)(dst + 0 * BPS) = _mm_cvtsi128_si32(               abcd    );
+  *(uint32_t*)(dst + 1 * BPS) = _mm_cvtsi128_si32(               efgh    );
+  *(uint32_t*)(dst + 2 * BPS) = _mm_cvtsi128_si32(_mm_slli_si128(abcd, 1));
+  *(uint32_t*)(dst + 3 * BPS) = _mm_cvtsi128_si32(_mm_slli_si128(efgh, 1));
+
+  // these two are hard to implement in SSE2, so we keep the C-version:
+  DST(0, 2) = AVG3(J, I, X);
+  DST(0, 3) = AVG3(K, J, I);
+}
 
-void VP8DspInitSSE2(void) {
-  VP8Transform = TransformSSE2;
+static void VL4(uint8_t* dst) {   // Vertical-Left
+  const __m128i one = _mm_set1_epi8(1);
+  const __m128i ABCDEFGH = _mm_loadl_epi64((__m128i*)(dst - BPS));
+  const __m128i BCDEFGH_ = _mm_srli_si128(ABCDEFGH, 1);
+  const __m128i CDEFGH__ = _mm_srli_si128(ABCDEFGH, 2);
+  const __m128i avg1 = _mm_avg_epu8(ABCDEFGH, BCDEFGH_);
+  const __m128i avg2 = _mm_avg_epu8(CDEFGH__, BCDEFGH_);
+  const __m128i avg3 = _mm_avg_epu8(avg1, avg2);
+  const __m128i lsb1 = _mm_and_si128(_mm_xor_si128(avg1, avg2), one);
+  const __m128i ab = _mm_xor_si128(ABCDEFGH, BCDEFGH_);
+  const __m128i bc = _mm_xor_si128(CDEFGH__, BCDEFGH_);
+  const __m128i abbc = _mm_or_si128(ab, bc);
+  const __m128i lsb2 = _mm_and_si128(abbc, lsb1);
+  const __m128i avg4 = _mm_subs_epu8(avg3, lsb2);
+  const uint32_t extra_out = _mm_cvtsi128_si32(_mm_srli_si128(avg4, 4));
+  *(uint32_t*)(dst + 0 * BPS) = _mm_cvtsi128_si32(               avg1    );
+  *(uint32_t*)(dst + 1 * BPS) = _mm_cvtsi128_si32(               avg4    );
+  *(uint32_t*)(dst + 2 * BPS) = _mm_cvtsi128_si32(_mm_srli_si128(avg1, 1));
+  *(uint32_t*)(dst + 3 * BPS) = _mm_cvtsi128_si32(_mm_srli_si128(avg4, 1));
+
+  // these two are hard to get and irregular
+  DST(3, 2) = (extra_out >> 0) & 0xff;
+  DST(3, 3) = (extra_out >> 8) & 0xff;
+}
+
+static void RD4(uint8_t* dst) {   // Down-right
+  const __m128i one = _mm_set1_epi8(1);
+  const __m128i XABCD = _mm_loadl_epi64((__m128i*)(dst - BPS - 1));
+  const __m128i ____XABCD = _mm_slli_si128(XABCD, 4);
+  const uint32_t I = dst[-1 + 0 * BPS];
+  const uint32_t J = dst[-1 + 1 * BPS];
+  const uint32_t K = dst[-1 + 2 * BPS];
+  const uint32_t L = dst[-1 + 3 * BPS];
+  const __m128i LKJI_____ =
+      _mm_cvtsi32_si128(L | (K << 8) | (J << 16) | (I << 24));
+  const __m128i LKJIXABCD = _mm_or_si128(LKJI_____, ____XABCD);
+  const __m128i KJIXABCD_ = _mm_srli_si128(LKJIXABCD, 1);
+  const __m128i JIXABCD__ = _mm_srli_si128(LKJIXABCD, 2);
+  const __m128i avg1 = _mm_avg_epu8(JIXABCD__, LKJIXABCD);
+  const __m128i lsb = _mm_and_si128(_mm_xor_si128(JIXABCD__, LKJIXABCD), one);
+  const __m128i avg2 = _mm_subs_epu8(avg1, lsb);
+  const __m128i abcdefg = _mm_avg_epu8(avg2, KJIXABCD_);
+  *(uint32_t*)(dst + 3 * BPS) = _mm_cvtsi128_si32(               abcdefg    );
+  *(uint32_t*)(dst + 2 * BPS) = _mm_cvtsi128_si32(_mm_srli_si128(abcdefg, 1));
+  *(uint32_t*)(dst + 1 * BPS) = _mm_cvtsi128_si32(_mm_srli_si128(abcdefg, 2));
+  *(uint32_t*)(dst + 0 * BPS) = _mm_cvtsi128_si32(_mm_srli_si128(abcdefg, 3));
+}
+
+#undef DST
+#undef AVG3
+
+//------------------------------------------------------------------------------
+// Luma 16x16
+
+static WEBP_INLINE void TrueMotion(uint8_t* dst, int size) {
+  const uint8_t* top = dst - BPS;
+  const __m128i zero = _mm_setzero_si128();
+  int y;
+  if (size == 4) {
+    const __m128i top_values = _mm_cvtsi32_si128(MemToUint32(top));
+    const __m128i top_base = _mm_unpacklo_epi8(top_values, zero);
+    for (y = 0; y < 4; ++y, dst += BPS) {
+      const int val = dst[-1] - top[-1];
+      const __m128i base = _mm_set1_epi16(val);
+      const __m128i out = _mm_packus_epi16(_mm_add_epi16(base, top_base), zero);
+      *(int*)dst = _mm_cvtsi128_si32(out);
+    }
+  } else if (size == 8) {
+    const __m128i top_values = _mm_loadl_epi64((const __m128i*)top);
+    const __m128i top_base = _mm_unpacklo_epi8(top_values, zero);
+    for (y = 0; y < 8; ++y, dst += BPS) {
+      const int val = dst[-1] - top[-1];
+      const __m128i base = _mm_set1_epi16(val);
+      const __m128i out = _mm_packus_epi16(_mm_add_epi16(base, top_base), zero);
+      _mm_storel_epi64((__m128i*)dst, out);
+    }
+  } else {
+    const __m128i top_values = _mm_loadu_si128((const __m128i*)top);
+    const __m128i top_base_0 = _mm_unpacklo_epi8(top_values, zero);
+    const __m128i top_base_1 = _mm_unpackhi_epi8(top_values, zero);
+    for (y = 0; y < 16; ++y, dst += BPS) {
+      const int val = dst[-1] - top[-1];
+      const __m128i base = _mm_set1_epi16(val);
+      const __m128i out_0 = _mm_add_epi16(base, top_base_0);
+      const __m128i out_1 = _mm_add_epi16(base, top_base_1);
+      const __m128i out = _mm_packus_epi16(out_0, out_1);
+      _mm_storeu_si128((__m128i*)dst, out);
+    }
+  }
+}
+
+static void TM4(uint8_t* dst)   { TrueMotion(dst, 4); }
+static void TM8uv(uint8_t* dst) { TrueMotion(dst, 8); }
+static void TM16(uint8_t* dst)  { TrueMotion(dst, 16); }
+
+static void VE16(uint8_t* dst) {
+  const __m128i top = _mm_loadu_si128((const __m128i*)(dst - BPS));
+  int j;
+  for (j = 0; j < 16; ++j) {
+    _mm_storeu_si128((__m128i*)(dst + j * BPS), top);
+  }
+}
+
+static void HE16(uint8_t* dst) {     // horizontal
+  int j;
+  for (j = 16; j > 0; --j) {
+    const __m128i values = _mm_set1_epi8(dst[-1]);
+    _mm_storeu_si128((__m128i*)dst, values);
+    dst += BPS;
+  }
+}
+
+static WEBP_INLINE void Put16(uint8_t v, uint8_t* dst) {
+  int j;
+  const __m128i values = _mm_set1_epi8(v);
+  for (j = 0; j < 16; ++j) {
+    _mm_storeu_si128((__m128i*)(dst + j * BPS), values);
+  }
+}
+
+static void DC16(uint8_t* dst) {    // DC
+  const __m128i zero = _mm_setzero_si128();
+  const __m128i top = _mm_loadu_si128((const __m128i*)(dst - BPS));
+  const __m128i sad8x2 = _mm_sad_epu8(top, zero);
+  // sum the two sads: sad8x2[0:1] + sad8x2[8:9]
+  const __m128i sum = _mm_add_epi16(sad8x2, _mm_shuffle_epi32(sad8x2, 2));
+  int left = 0;
+  int j;
+  for (j = 0; j < 16; ++j) {
+    left += dst[-1 + j * BPS];
+  }
+  {
+    const int DC = _mm_cvtsi128_si32(sum) + left + 16;
+    Put16(DC >> 5, dst);
+  }
+}
+
+static void DC16NoTop(uint8_t* dst) {   // DC with top samples not available
+  int DC = 8;
+  int j;
+  for (j = 0; j < 16; ++j) {
+    DC += dst[-1 + j * BPS];
+  }
+  Put16(DC >> 4, dst);
+}
+
+static void DC16NoLeft(uint8_t* dst) {  // DC with left samples not available
+  const __m128i zero = _mm_setzero_si128();
+  const __m128i top = _mm_loadu_si128((const __m128i*)(dst - BPS));
+  const __m128i sad8x2 = _mm_sad_epu8(top, zero);
+  // sum the two sads: sad8x2[0:1] + sad8x2[8:9]
+  const __m128i sum = _mm_add_epi16(sad8x2, _mm_shuffle_epi32(sad8x2, 2));
+  const int DC = _mm_cvtsi128_si32(sum) + 8;
+  Put16(DC >> 4, dst);
+}
 
-  VP8VFilter16 = VFilter16SSE2;
-  VP8HFilter16 = HFilter16SSE2;
-  VP8VFilter8 = VFilter8SSE2;
-  VP8HFilter8 = HFilter8SSE2;
-  VP8VFilter16i = VFilter16iSSE2;
-  VP8HFilter16i = HFilter16iSSE2;
-  VP8VFilter8i = VFilter8iSSE2;
-  VP8HFilter8i = HFilter8iSSE2;
+static void DC16NoTopLeft(uint8_t* dst) {  // DC with no top and left samples
+  Put16(0x80, dst);
+}
+
+//------------------------------------------------------------------------------
+// Chroma
+
+static void VE8uv(uint8_t* dst) {    // vertical
+  int j;
+  const __m128i top = _mm_loadl_epi64((const __m128i*)(dst - BPS));
+  for (j = 0; j < 8; ++j) {
+    _mm_storel_epi64((__m128i*)(dst + j * BPS), top);
+  }
+}
+
+static void HE8uv(uint8_t* dst) {    // horizontal
+  int j;
+  for (j = 0; j < 8; ++j) {
+    const __m128i values = _mm_set1_epi8(dst[-1]);
+    _mm_storel_epi64((__m128i*)dst, values);
+    dst += BPS;
+  }
+}
+
+// helper for chroma-DC predictions
+static WEBP_INLINE void Put8x8uv(uint8_t v, uint8_t* dst) {
+  int j;
+  const __m128i values = _mm_set1_epi8(v);
+  for (j = 0; j < 8; ++j) {
+    _mm_storel_epi64((__m128i*)(dst + j * BPS), values);
+  }
+}
 
-  VP8SimpleVFilter16 = SimpleVFilter16SSE2;
-  VP8SimpleHFilter16 = SimpleHFilter16SSE2;
-  VP8SimpleVFilter16i = SimpleVFilter16iSSE2;
-  VP8SimpleHFilter16i = SimpleHFilter16iSSE2;
+static void DC8uv(uint8_t* dst) {     // DC
+  const __m128i zero = _mm_setzero_si128();
+  const __m128i top = _mm_loadl_epi64((const __m128i*)(dst - BPS));
+  const __m128i sum = _mm_sad_epu8(top, zero);
+  int left = 0;
+  int j;
+  for (j = 0; j < 8; ++j) {
+    left += dst[-1 + j * BPS];
+  }
+  {
+    const int DC = _mm_cvtsi128_si32(sum) + left + 8;
+    Put8x8uv(DC >> 4, dst);
+  }
 }
 
-#if defined(__cplusplus) || defined(c_plusplus)
-}    // extern "C"
+static void DC8uvNoLeft(uint8_t* dst) {   // DC with no left samples
+  const __m128i zero = _mm_setzero_si128();
+  const __m128i top = _mm_loadl_epi64((const __m128i*)(dst - BPS));
+  const __m128i sum = _mm_sad_epu8(top, zero);
+  const int DC = _mm_cvtsi128_si32(sum) + 4;
+  Put8x8uv(DC >> 3, dst);
+}
+
+static void DC8uvNoTop(uint8_t* dst) {  // DC with no top samples
+  int dc0 = 4;
+  int i;
+  for (i = 0; i < 8; ++i) {
+    dc0 += dst[-1 + i * BPS];
+  }
+  Put8x8uv(dc0 >> 3, dst);
+}
+
+static void DC8uvNoTopLeft(uint8_t* dst) {    // DC with nothing
+  Put8x8uv(0x80, dst);
+}
+
+//------------------------------------------------------------------------------
+// Entry point
+
+extern void VP8DspInitSSE2(void);
+
+WEBP_TSAN_IGNORE_FUNCTION void VP8DspInitSSE2(void) {
+  VP8Transform = Transform;
+#if defined(USE_TRANSFORM_AC3)
+  VP8TransformAC3 = TransformAC3;
 #endif
 
-#endif   // WEBP_USE_SSE2
+  VP8VFilter16 = VFilter16;
+  VP8HFilter16 = HFilter16;
+  VP8VFilter8 = VFilter8;
+  VP8HFilter8 = HFilter8;
+  VP8VFilter16i = VFilter16i;
+  VP8HFilter16i = HFilter16i;
+  VP8VFilter8i = VFilter8i;
+  VP8HFilter8i = HFilter8i;
+
+  VP8SimpleVFilter16 = SimpleVFilter16;
+  VP8SimpleHFilter16 = SimpleHFilter16;
+  VP8SimpleVFilter16i = SimpleVFilter16i;
+  VP8SimpleHFilter16i = SimpleHFilter16i;
+
+  VP8PredLuma4[1] = TM4;
+  VP8PredLuma4[2] = VE4;
+  VP8PredLuma4[4] = RD4;
+  VP8PredLuma4[5] = VR4;
+  VP8PredLuma4[6] = LD4;
+  VP8PredLuma4[7] = VL4;
+
+  VP8PredLuma16[0] = DC16;
+  VP8PredLuma16[1] = TM16;
+  VP8PredLuma16[2] = VE16;
+  VP8PredLuma16[3] = HE16;
+  VP8PredLuma16[4] = DC16NoTop;
+  VP8PredLuma16[5] = DC16NoLeft;
+  VP8PredLuma16[6] = DC16NoTopLeft;
+
+  VP8PredChroma8[0] = DC8uv;
+  VP8PredChroma8[1] = TM8uv;
+  VP8PredChroma8[2] = VE8uv;
+  VP8PredChroma8[3] = HE8uv;
+  VP8PredChroma8[4] = DC8uvNoTop;
+  VP8PredChroma8[5] = DC8uvNoLeft;
+  VP8PredChroma8[6] = DC8uvNoTopLeft;
+}
+
+#else  // !WEBP_USE_SSE2
+
+WEBP_DSP_INIT_STUB(VP8DspInitSSE2)
+
+#endif  // WEBP_USE_SSE2
diff --git a/drivers/webp/dsp/dec_sse41.c b/drivers/webp/dsp/dec_sse41.c
new file mode 100644
index 0000000000..dc1e70428d
--- /dev/null
+++ b/drivers/webp/dsp/dec_sse41.c
@@ -0,0 +1,45 @@
+// Copyright 2015 Google Inc. All Rights Reserved.
+//
+// Use of this source code is governed by a BSD-style license
+// that can be found in the COPYING file in the root of the source
+// tree. An additional intellectual property rights grant can be found
+// in the file PATENTS. All contributing project authors may
+// be found in the AUTHORS file in the root of the source tree.
+// -----------------------------------------------------------------------------
+//
+// SSE4 version of some decoding functions.
+//
+// Author: Skal (pascal.massimino@gmail.com)
+
+#include "./dsp.h"
+
+#if defined(WEBP_USE_SSE41)
+
+#include <smmintrin.h>
+#include "../dec/vp8i.h"
+
+static void HE16(uint8_t* dst) {     // horizontal
+  int j;
+  const __m128i kShuffle3 = _mm_set1_epi8(3);
+  for (j = 16; j > 0; --j) {
+    const __m128i in = _mm_cvtsi32_si128(*(int*)(dst - 4));
+    const __m128i values = _mm_shuffle_epi8(in, kShuffle3);
+    _mm_storeu_si128((__m128i*)dst, values);
+    dst += BPS;
+  }
+}
+
+//------------------------------------------------------------------------------
+// Entry point
+
+extern void VP8DspInitSSE41(void);
+
+WEBP_TSAN_IGNORE_FUNCTION void VP8DspInitSSE41(void) {
+  VP8PredLuma16[3] = HE16;
+}
+
+#else  // !WEBP_USE_SSE41
+
+WEBP_DSP_INIT_STUB(VP8DspInitSSE41)
+
+#endif  // WEBP_USE_SSE41
diff --git a/drivers/webp/dsp/dsp.h b/drivers/webp/dsp/dsp.h
index fd686a8532..4613d9c3ff 100644
--- a/drivers/webp/dsp/dsp.h
+++ b/drivers/webp/dsp/dsp.h
@@ -1,8 +1,10 @@
 // Copyright 2011 Google Inc. All Rights Reserved.
 //
-// This code is licensed under the same terms as WebM:
-//  Software License Agreement:  http://www.webmproject.org/license/software/
-//  Additional IP Rights Grant:  http://www.webmproject.org/license/additional/
+// Use of this source code is governed by a BSD-style license
+// that can be found in the COPYING file in the root of the source
+// tree. An additional intellectual property rights grant can be found
+// in the file PATENTS. All contributing project authors may
+// be found in the AUTHORS file in the root of the source tree.
 // -----------------------------------------------------------------------------
 //
 //   Speed-critical functions.
@@ -12,44 +14,121 @@
 #ifndef WEBP_DSP_DSP_H_
 #define WEBP_DSP_DSP_H_
 
-#include "../types.h"
+#ifdef HAVE_CONFIG_H
+#include "webp/config.h"
+#endif
+
+#include "webp/types.h"
 
-#if defined(__cplusplus) || defined(c_plusplus)
+#ifdef __cplusplus
 extern "C" {
 #endif
 
+#define BPS 32   // this is the common stride for enc/dec
+
 //------------------------------------------------------------------------------
 // CPU detection
 
-#if defined(_MSC_VER) && (defined(_M_X64) || defined(_M_IX86))
+#if defined(__GNUC__)
+# define LOCAL_GCC_VERSION ((__GNUC__ << 8) | __GNUC_MINOR__)
+# define LOCAL_GCC_PREREQ(maj, min) \
+    (LOCAL_GCC_VERSION >= (((maj) << 8) | (min)))
+#else
+# define LOCAL_GCC_VERSION 0
+# define LOCAL_GCC_PREREQ(maj, min) 0
+#endif
+
+#ifndef __has_builtin
+# define __has_builtin(x) 0
+#endif
+
+#if defined(_MSC_VER) && _MSC_VER > 1310 && \
+    (defined(_M_X64) || defined(_M_IX86))
 #define WEBP_MSC_SSE2  // Visual C++ SSE2 targets
 #endif
 
-#if defined(__SSE2__) || defined(WEBP_MSC_SSE2)
+#if defined(_MSC_VER) && _MSC_VER >= 1500 && \
+    (defined(_M_X64) || defined(_M_IX86))
+#define WEBP_MSC_SSE41  // Visual C++ SSE4.1 targets
+#endif
+
+// WEBP_HAVE_* are used to indicate the presence of the instruction set in dsp
+// files without intrinsics, allowing the corresponding Init() to be called.
+// Files containing intrinsics will need to be built targeting the instruction
+// set so should succeed on one of the earlier tests.
+#if defined(__SSE2__) || defined(WEBP_MSC_SSE2) || defined(WEBP_HAVE_SSE2)
 #define WEBP_USE_SSE2
 #endif
 
-#if defined(__ANDROID__) && defined(__ARM_ARCH_7A__) && defined(__ARM_NEON__)
+#if defined(__SSE4_1__) || defined(WEBP_MSC_SSE41) || defined(WEBP_HAVE_SSE41)
+#define WEBP_USE_SSE41
+#endif
+
+#if defined(__AVX2__) || defined(WEBP_HAVE_AVX2)
+#define WEBP_USE_AVX2
+#endif
+
+#if defined(__ANDROID__) && defined(__ARM_ARCH_7A__)
 #define WEBP_ANDROID_NEON  // Android targets that might support NEON
 #endif
 
-#if ( (defined(__ARM_NEON__) && !defined(__aarch64__)) || defined(WEBP_ANDROID_NEON)) && !defined(PSP2_ENABLED)
+// The intrinsics currently cause compiler errors with arm-nacl-gcc and the
+// inline assembly would need to be modified for use with Native Client.
+#if (defined(__ARM_NEON__) || defined(WEBP_ANDROID_NEON) || \
+     defined(__aarch64__)) && !defined(__native_client__)
+#define WEBP_USE_NEON
+#endif
+
+#if defined(_MSC_VER) && _MSC_VER >= 1700 && defined(_M_ARM)
 #define WEBP_USE_NEON
+#define WEBP_USE_INTRINSICS
+#endif
+
+#if defined(__mips__) && !defined(__mips64) && \
+    defined(__mips_isa_rev) && (__mips_isa_rev >= 1) && (__mips_isa_rev < 6)
+#define WEBP_USE_MIPS32
+#if (__mips_isa_rev >= 2)
+#define WEBP_USE_MIPS32_R2
+#if defined(__mips_dspr2) || (__mips_dsp_rev >= 2)
+#define WEBP_USE_MIPS_DSP_R2
+#endif
+#endif
+#endif
+
+// This macro prevents thread_sanitizer from reporting known concurrent writes.
+#define WEBP_TSAN_IGNORE_FUNCTION
+#if defined(__has_feature)
+#if __has_feature(thread_sanitizer)
+#undef WEBP_TSAN_IGNORE_FUNCTION
+#define WEBP_TSAN_IGNORE_FUNCTION __attribute__((no_sanitize_thread))
+#endif
 #endif
 
 typedef enum {
   kSSE2,
   kSSE3,
-  kNEON
+  kSSE4_1,
+  kAVX,
+  kAVX2,
+  kNEON,
+  kMIPS32,
+  kMIPSdspR2
 } CPUFeature;
 // returns true if the CPU supports the feature.
 typedef int (*VP8CPUInfo)(CPUFeature feature);
-extern VP8CPUInfo VP8GetCPUInfo;
+WEBP_EXTERN(VP8CPUInfo) VP8GetCPUInfo;
 
 //------------------------------------------------------------------------------
-// Encoding
+// Init stub generator
 
-int VP8GetAlpha(const int histo[]);
+// Defines an init function stub to ensure each module exposes a symbol,
+// avoiding a compiler warning.
+#define WEBP_DSP_INIT_STUB(func) \
+  extern void func(void); \
+  WEBP_TSAN_IGNORE_FUNCTION void func(void) {}
+
+//------------------------------------------------------------------------------
+// Encoding
 
 // Transforms
 // VP8Idct: Does one of two inverse transforms. If do_two is set, the transforms
@@ -60,7 +139,7 @@ typedef void (*VP8Fdct)(const uint8_t* src, const uint8_t* ref, int16_t* out);
 typedef void (*VP8WHT)(const int16_t* in, int16_t* out);
 extern VP8Idct VP8ITransform;
 extern VP8Fdct VP8FTransform;
-extern VP8WHT VP8ITransformWHT;
+extern VP8Fdct VP8FTransform2;   // performs two transforms at a time
 extern VP8WHT VP8FTransformWHT;
 // Predictions
 // *dst is the destination block. *top and *left can be NULL.
@@ -79,20 +158,63 @@ extern VP8WMetric VP8TDisto4x4, VP8TDisto16x16;
 
 typedef void (*VP8BlockCopy)(const uint8_t* src, uint8_t* dst);
 extern VP8BlockCopy VP8Copy4x4;
+extern VP8BlockCopy VP8Copy16x8;
 // Quantization
 struct VP8Matrix;   // forward declaration
 typedef int (*VP8QuantizeBlock)(int16_t in[16], int16_t out[16],
-                                int n, const struct VP8Matrix* const mtx);
+                                const struct VP8Matrix* const mtx);
+// Same as VP8QuantizeBlock, but quantizes two consecutive blocks.
+typedef int (*VP8Quantize2Blocks)(int16_t in[32], int16_t out[32],
+                                  const struct VP8Matrix* const mtx);
+
 extern VP8QuantizeBlock VP8EncQuantizeBlock;
+extern VP8Quantize2Blocks VP8EncQuantize2Blocks;
+
+// specific to 2nd transform:
+typedef int (*VP8QuantizeBlockWHT)(int16_t in[16], int16_t out[16],
+                                   const struct VP8Matrix* const mtx);
+extern VP8QuantizeBlockWHT VP8EncQuantizeBlockWHT;
 
-// Compute susceptibility based on DCT-coeff histograms:
-// the higher, the "easier" the macroblock is to compress.
-typedef int (*VP8CHisto)(const uint8_t* ref, const uint8_t* pred,
-                         int start_block, int end_block);
 extern const int VP8DspScan[16 + 4 + 4];
+
+// Collect histogram for susceptibility calculation.
+#define MAX_COEFF_THRESH   31   // size of histogram used by CollectHistogram.
+typedef struct {
+  // We only need to store max_value and last_non_zero, not the distribution.
+  int max_value;
+  int last_non_zero;
+} VP8Histogram;
+typedef void (*VP8CHisto)(const uint8_t* ref, const uint8_t* pred,
+                          int start_block, int end_block,
+                          VP8Histogram* const histo);
 extern VP8CHisto VP8CollectHistogram;
+// General-purpose util function to help VP8CollectHistogram().
+void VP8SetHistogramData(const int distribution[MAX_COEFF_THRESH + 1],
+                         VP8Histogram* const histo);
+
+// must be called before using any of the above
+void VP8EncDspInit(void);
+
+//------------------------------------------------------------------------------
+// cost functions (encoding)
+
+extern const uint16_t VP8EntropyCost[256];        // 8bit fixed-point log(p)
+// approximate cost per level:
+extern const uint16_t VP8LevelFixedCosts[2047 /*MAX_LEVEL*/ + 1];
+extern const uint8_t VP8EncBands[16 + 1];
+
+struct VP8Residual;
+typedef void (*VP8SetResidualCoeffsFunc)(const int16_t* const coeffs,
+                                         struct VP8Residual* const res);
+extern VP8SetResidualCoeffsFunc VP8SetResidualCoeffs;
 
-void VP8EncDspInit(void);   // must be called before using any of the above
+// Cost calculation function.
+typedef int (*VP8GetResidualCostFunc)(int ctx0,
+                                      const struct VP8Residual* const res);
+extern VP8GetResidualCostFunc VP8GetResidualCost;
+
+// must be called before anything using the above
+void VP8EncDspCostInit(void);
 
 //------------------------------------------------------------------------------
 // Decoding
@@ -101,17 +223,26 @@ typedef void (*VP8DecIdct)(const int16_t* coeffs, uint8_t* dst);
 // when doing two transforms, coeffs is actually int16_t[2][16].
 typedef void (*VP8DecIdct2)(const int16_t* coeffs, uint8_t* dst, int do_two);
 extern VP8DecIdct2 VP8Transform;
+extern VP8DecIdct VP8TransformAC3;
 extern VP8DecIdct VP8TransformUV;
 extern VP8DecIdct VP8TransformDC;
 extern VP8DecIdct VP8TransformDCUV;
-extern void (*VP8TransformWHT)(const int16_t* in, int16_t* out);
+extern VP8WHT VP8TransformWHT;
 
 // *dst is the destination block, with stride BPS. Boundary samples are
 // assumed accessible when needed.
 typedef void (*VP8PredFunc)(uint8_t* dst);
-extern const VP8PredFunc VP8PredLuma16[/* NUM_B_DC_MODES */];
-extern const VP8PredFunc VP8PredChroma8[/* NUM_B_DC_MODES */];
-extern const VP8PredFunc VP8PredLuma4[/* NUM_BMODES */];
+extern VP8PredFunc VP8PredLuma16[/* NUM_B_DC_MODES */];
+extern VP8PredFunc VP8PredChroma8[/* NUM_B_DC_MODES */];
+extern VP8PredFunc VP8PredLuma4[/* NUM_BMODES */];
+
+// clipping tables (for filtering)
+extern const int8_t* const VP8ksclip1;  // clips [-1020, 1020] to [-128, 127]
+extern const int8_t* const VP8ksclip2;  // clips [-112, 112] to [-16, 15]
+extern const uint8_t* const VP8kclip1;  // clips [-255,511] to [0,255]
+extern const uint8_t* const VP8kabs0;   // abs(x) for x in [-255,255]
+// must be called first
+void VP8InitClipTables(void);
 
 // simple filter (only for luma)
 typedef void (*VP8SimpleFilterFunc)(uint8_t* p, int stride, int thresh);
@@ -145,6 +276,8 @@ void VP8DspInit(void);
 
 #define FANCY_UPSAMPLING   // undefined to remove fancy upsampling support
 
+// Convert a pair of y/u/v lines together to the output rgb/a colorspace.
+// bottom_y can be NULL if only one line of output is needed (at top/bottom).
 typedef void (*WebPUpsampleLinePairFunc)(
     const uint8_t* top_y, const uint8_t* bottom_y,
     const uint8_t* top_u, const uint8_t* top_v,
@@ -156,18 +289,20 @@ typedef void (*WebPUpsampleLinePairFunc)(
 // Fancy upsampling functions to convert YUV to RGB(A) modes
 extern WebPUpsampleLinePairFunc WebPUpsamplers[/* MODE_LAST */];
 
-// Initializes SSE2 version of the fancy upsamplers.
-void WebPInitUpsamplersSSE2(void);
-
 #endif    // FANCY_UPSAMPLING
 
-// Point-sampling methods.
-typedef void (*WebPSampleLinePairFunc)(
-    const uint8_t* top_y, const uint8_t* bottom_y,
-    const uint8_t* u, const uint8_t* v,
-    uint8_t* top_dst, uint8_t* bottom_dst, int len);
+// Per-row point-sampling methods.
+typedef void (*WebPSamplerRowFunc)(const uint8_t* y,
+                                   const uint8_t* u, const uint8_t* v,
+                                   uint8_t* dst, int len);
+// Generic function to apply 'WebPSamplerRowFunc' to the whole plane:
+void WebPSamplerProcessPlane(const uint8_t* y, int y_stride,
+                             const uint8_t* u, const uint8_t* v, int uv_stride,
+                             uint8_t* dst, int dst_stride,
+                             int width, int height, WebPSamplerRowFunc func);
 
-extern const WebPSampleLinePairFunc WebPSamplers[/* MODE_LAST */];
+// Sampling functions to convert rows of YUV to RGB(A)
+extern WebPSamplerRowFunc WebPSamplers[/* MODE_LAST */];
 
 // General function for converting two lines of ARGB or RGBA.
 // 'alpha_is_last' should be true if 0xff000000 is stored in memory as
@@ -179,13 +314,84 @@ typedef void (*WebPYUV444Converter)(const uint8_t* y,
                                     const uint8_t* u, const uint8_t* v,
                                     uint8_t* dst, int len);
 
-extern const WebPYUV444Converter WebPYUV444Converters[/* MODE_LAST */];
+extern WebPYUV444Converter WebPYUV444Converters[/* MODE_LAST */];
 
-// Main function to be called
+// Must be called before using the WebPUpsamplers[] (and for premultiplied
+// colorspaces like rgbA, rgbA4444, etc)
 void WebPInitUpsamplers(void);
+// Must be called before using WebPSamplers[]
+void WebPInitSamplers(void);
+// Must be called before using WebPYUV444Converters[]
+void WebPInitYUV444Converters(void);
 
 //------------------------------------------------------------------------------
-// Pre-multiply planes with alpha values
+// ARGB -> YUV converters
+
+// Convert ARGB samples to luma Y.
+extern void (*WebPConvertARGBToY)(const uint32_t* argb, uint8_t* y, int width);
+// Convert ARGB samples to U/V with downsampling. do_store should be '1' for
+// even lines and '0' for odd ones. 'src_width' is the original width, not
+// the U/V one.
+extern void (*WebPConvertARGBToUV)(const uint32_t* argb, uint8_t* u, uint8_t* v,
+                                   int src_width, int do_store);
+
+// Convert a row of accumulated (four-values) of rgba32 toward U/V
+extern void (*WebPConvertRGBA32ToUV)(const uint16_t* rgb,
+                                     uint8_t* u, uint8_t* v, int width);
+
+// Convert RGB or BGR to Y
+extern void (*WebPConvertRGB24ToY)(const uint8_t* rgb, uint8_t* y, int width);
+extern void (*WebPConvertBGR24ToY)(const uint8_t* bgr, uint8_t* y, int width);
+
+// used for plain-C fallback.
+extern void WebPConvertARGBToUV_C(const uint32_t* argb, uint8_t* u, uint8_t* v,
+                                  int src_width, int do_store);
+extern void WebPConvertRGBA32ToUV_C(const uint16_t* rgb,
+                                    uint8_t* u, uint8_t* v, int width);
+
+// Must be called before using the above.
+void WebPInitConvertARGBToYUV(void);
+
+//------------------------------------------------------------------------------
+// Rescaler
+
+struct WebPRescaler;
+
+// Import a row of data and save its contribution in the rescaler.
+// 'channel' denotes the channel number to be imported. 'Expand' corresponds to
+// the wrk->x_expand case. Otherwise, 'Shrink' is to be used.
+typedef void (*WebPRescalerImportRowFunc)(struct WebPRescaler* const wrk,
+                                          const uint8_t* src);
+
+extern WebPRescalerImportRowFunc WebPRescalerImportRowExpand;
+extern WebPRescalerImportRowFunc WebPRescalerImportRowShrink;
+
+// Export one row (starting at x_out position) from rescaler.
+// 'Expand' corresponds to the wrk->y_expand case.
+// Otherwise 'Shrink' is to be used
+typedef void (*WebPRescalerExportRowFunc)(struct WebPRescaler* const wrk);
+extern WebPRescalerExportRowFunc WebPRescalerExportRowExpand;
+extern WebPRescalerExportRowFunc WebPRescalerExportRowShrink;
+
+// Plain-C implementation, as fall-back.
+extern void WebPRescalerImportRowExpandC(struct WebPRescaler* const wrk,
+                                         const uint8_t* src);
+extern void WebPRescalerImportRowShrinkC(struct WebPRescaler* const wrk,
+                                         const uint8_t* src);
+extern void WebPRescalerExportRowExpandC(struct WebPRescaler* const wrk);
+extern void WebPRescalerExportRowShrinkC(struct WebPRescaler* const wrk);
+
+// Main entry calls:
+extern void WebPRescalerImportRow(struct WebPRescaler* const wrk,
+                                  const uint8_t* src);
+// Export one row (starting at x_out position) from rescaler.
+extern void WebPRescalerExportRow(struct WebPRescaler* const wrk);
+
+// Must be called first before using the above.
+void WebPRescalerDspInit(void);
+
+//------------------------------------------------------------------------------
+// Utilities for processing transparent channel.
 
 // Apply alpha pre-multiply on an rgba, bgra or argb plane of size w * h.
 // alpha_first should be 0 for argb, 1 for rgba or bgra (where alpha is last).
@@ -196,14 +402,98 @@ extern void (*WebPApplyAlphaMultiply)(
 extern void (*WebPApplyAlphaMultiply4444)(
     uint8_t* rgba4444, int w, int h, int stride);
 
+// Dispatch the values from alpha[] plane to the ARGB destination 'dst'.
+// Returns true if alpha[] plane has non-trivial values different from 0xff.
+extern int (*WebPDispatchAlpha)(const uint8_t* alpha, int alpha_stride,
+                                int width, int height,
+                                uint8_t* dst, int dst_stride);
+
+// Transfer packed 8b alpha[] values to green channel in dst[], zero'ing the
+// A/R/B values. 'dst_stride' is the stride for dst[] in uint32_t units.
+extern void (*WebPDispatchAlphaToGreen)(const uint8_t* alpha, int alpha_stride,
+                                        int width, int height,
+                                        uint32_t* dst, int dst_stride);
+
+// Extract the alpha values from 32b values in argb[] and pack them into alpha[]
+// (this is the opposite of WebPDispatchAlpha).
+// Returns true if there's only trivial 0xff alpha values.
+extern int (*WebPExtractAlpha)(const uint8_t* argb, int argb_stride,
+                               int width, int height,
+                               uint8_t* alpha, int alpha_stride);
+
+// Pre-Multiply operation transforms x into x * A / 255  (where x=Y,R,G or B).
+// Un-Multiply operation transforms x into x * 255 / A.
+
+// Pre-Multiply or Un-Multiply (if 'inverse' is true) argb values in a row.
+extern void (*WebPMultARGBRow)(uint32_t* const ptr, int width, int inverse);
+
+// Same a WebPMultARGBRow(), but for several rows.
+void WebPMultARGBRows(uint8_t* ptr, int stride, int width, int num_rows,
+                      int inverse);
+
+// Same for a row of single values, with side alpha values.
+extern void (*WebPMultRow)(uint8_t* const ptr, const uint8_t* const alpha,
+                           int width, int inverse);
+
+// Same a WebPMultRow(), but for several 'num_rows' rows.
+void WebPMultRows(uint8_t* ptr, int stride,
+                  const uint8_t* alpha, int alpha_stride,
+                  int width, int num_rows, int inverse);
+
+// Plain-C versions, used as fallback by some implementations.
+void WebPMultRowC(uint8_t* const ptr, const uint8_t* const alpha,
+                  int width, int inverse);
+void WebPMultARGBRowC(uint32_t* const ptr, int width, int inverse);
+
 // To be called first before using the above.
-void WebPInitPremultiply(void);
+void WebPInitAlphaProcessing(void);
+
+// ARGB packing function: a/r/g/b input is rgba or bgra order.
+extern void (*VP8PackARGB)(const uint8_t* a, const uint8_t* r,
+                           const uint8_t* g, const uint8_t* b, int len,
+                           uint32_t* out);
+
+// RGB packing function. 'step' can be 3 or 4. r/g/b input is rgb or bgr order.
+extern void (*VP8PackRGB)(const uint8_t* r, const uint8_t* g, const uint8_t* b,
+                          int len, int step, uint32_t* out);
 
-void WebPInitPremultiplySSE2(void);   // should not be called directly.
+// To be called first before using the above.
+void VP8EncDspARGBInit(void);
 
 //------------------------------------------------------------------------------
+// Filter functions
+
+typedef enum {     // Filter types.
+  WEBP_FILTER_NONE = 0,
+  WEBP_FILTER_HORIZONTAL,
+  WEBP_FILTER_VERTICAL,
+  WEBP_FILTER_GRADIENT,
+  WEBP_FILTER_LAST = WEBP_FILTER_GRADIENT + 1,  // end marker
+  WEBP_FILTER_BEST,    // meta-types
+  WEBP_FILTER_FAST
+} WEBP_FILTER_TYPE;
+
+typedef void (*WebPFilterFunc)(const uint8_t* in, int width, int height,
+                               int stride, uint8_t* out);
+typedef void (*WebPUnfilterFunc)(int width, int height, int stride,
+                                 int row, int num_rows, uint8_t* data);
+
+// Filter the given data using the given predictor.
+// 'in' corresponds to a 2-dimensional pixel array of size (stride * height)
+// in raster order.
+// 'stride' is number of bytes per scan line (with possible padding).
+// 'out' should be pre-allocated.
+extern WebPFilterFunc WebPFilters[WEBP_FILTER_LAST];
+
+// In-place reconstruct the original data from the given filtered data.
+// The reconstruction will be done for 'num_rows' rows starting from 'row'
+// (assuming rows upto 'row - 1' are already reconstructed).
+extern WebPUnfilterFunc WebPUnfilters[WEBP_FILTER_LAST];
+
+// To be called first before using the above.
+void VP8FiltersInit(void);
 
-#if defined(__cplusplus) || defined(c_plusplus)
+#ifdef __cplusplus
 }    // extern "C"
 #endif
 
diff --git a/drivers/webp/dsp/enc.c b/drivers/webp/dsp/enc.c
index 02234564be..95e63f89ab 100644
--- a/drivers/webp/dsp/enc.c
+++ b/drivers/webp/dsp/enc.c
@@ -1,47 +1,34 @@
 // Copyright 2011 Google Inc. All Rights Reserved.
 //
-// This code is licensed under the same terms as WebM:
-//  Software License Agreement:  http://www.webmproject.org/license/software/
-//  Additional IP Rights Grant:  http://www.webmproject.org/license/additional/
+// Use of this source code is governed by a BSD-style license
+// that can be found in the COPYING file in the root of the source
+// tree. An additional intellectual property rights grant can be found
+// in the file PATENTS. All contributing project authors may
+// be found in the AUTHORS file in the root of the source tree.
 // -----------------------------------------------------------------------------
 //
 // Speed-critical encoding functions.
 //
 // Author: Skal (pascal.massimino@gmail.com)
 
+#include <assert.h>
 #include <stdlib.h>  // for abs()
+
 #include "./dsp.h"
 #include "../enc/vp8enci.h"
 
-#if defined(__cplusplus) || defined(c_plusplus)
-extern "C" {
-#endif
+static WEBP_INLINE uint8_t clip_8b(int v) {
+  return (!(v & ~0xff)) ? v : (v < 0) ? 0 : 255;
+}
+
+static WEBP_INLINE int clip_max(int v, int max) {
+  return (v > max) ? max : v;
+}
 
 //------------------------------------------------------------------------------
 // Compute susceptibility based on DCT-coeff histograms:
 // the higher, the "easier" the macroblock is to compress.
 
-static int ClipAlpha(int alpha) {
-  return alpha < 0 ? 0 : alpha > 255 ? 255 : alpha;
-}
-
-int VP8GetAlpha(const int histo[MAX_COEFF_THRESH + 1]) {
-  int num = 0, den = 0, val = 0;
-  int k;
-  int alpha;
-  // note: changing this loop to avoid the numerous "k + 1" slows things down.
-  for (k = 0; k < MAX_COEFF_THRESH; ++k) {
-    if (histo[k + 1]) {
-      val += histo[k + 1];
-      num += val * (k + 1);
-      den += (k + 1) * (k + 1);
-    }
-  }
-  // we scale the value to a usable [0..255] range
-  alpha = den ? 10 * num / den - 5 : 0;
-  return ClipAlpha(alpha);
-}
-
 const int VP8DspScan[16 + 4 + 4] = {
   // Luma
   0 +  0 * BPS,  4 +  0 * BPS, 8 +  0 * BPS, 12 +  0 * BPS,
@@ -53,27 +40,41 @@ const int VP8DspScan[16 + 4 + 4] = {
   8 + 0 * BPS,  12 + 0 * BPS, 8 + 4 * BPS, 12 + 4 * BPS     // V
 };
 
-static int CollectHistogram(const uint8_t* ref, const uint8_t* pred,
-                            int start_block, int end_block) {
-  int histo[MAX_COEFF_THRESH + 1] = { 0 };
-  int16_t out[16];
-  int j, k;
+// general-purpose util function
+void VP8SetHistogramData(const int distribution[MAX_COEFF_THRESH + 1],
+                         VP8Histogram* const histo) {
+  int max_value = 0, last_non_zero = 1;
+  int k;
+  for (k = 0; k <= MAX_COEFF_THRESH; ++k) {
+    const int value = distribution[k];
+    if (value > 0) {
+      if (value > max_value) max_value = value;
+      last_non_zero = k;
+    }
+  }
+  histo->max_value = max_value;
+  histo->last_non_zero = last_non_zero;
+}
+
+static void CollectHistogram(const uint8_t* ref, const uint8_t* pred,
+                             int start_block, int end_block,
+                             VP8Histogram* const histo) {
+  int j;
+  int distribution[MAX_COEFF_THRESH + 1] = { 0 };
   for (j = start_block; j < end_block; ++j) {
-    VP8FTransform(ref + VP8DspScan[j], pred + VP8DspScan[j], out);
+    int k;
+    int16_t out[16];
 
-    // Convert coefficients to bin (within out[]).
-    for (k = 0; k < 16; ++k) {
-      const int v = abs(out[k]) >> 2;
-      out[k] = (v > MAX_COEFF_THRESH) ? MAX_COEFF_THRESH : v;
-    }
+    VP8FTransform(ref + VP8DspScan[j], pred + VP8DspScan[j], out);
 
-    // Use bin to update histogram.
+    // Convert coefficients to bin.
     for (k = 0; k < 16; ++k) {
-      histo[out[k]]++;
+      const int v = abs(out[k]) >> 3;  // TODO(skal): add rounding?
+      const int clipped_value = clip_max(v, MAX_COEFF_THRESH);
+      ++distribution[clipped_value];
     }
   }
-
-  return VP8GetAlpha(histo);
+  VP8SetHistogramData(distribution, histo);
 }
 
 //------------------------------------------------------------------------------
@@ -85,19 +86,16 @@ static uint8_t clip1[255 + 510 + 1];    // clips [-255,510] to [0,255]
 // and make sure it's set to true _last_ (so as to be thread-safe)
 static volatile int tables_ok = 0;
 
-static void InitTables(void) {
+static WEBP_TSAN_IGNORE_FUNCTION void InitTables(void) {
   if (!tables_ok) {
     int i;
     for (i = -255; i <= 255 + 255; ++i) {
-      clip1[255 + i] = (i < 0) ? 0 : (i > 255) ? 255 : i;
+      clip1[255 + i] = clip_8b(i);
     }
     tables_ok = 1;
   }
 }
 
-static WEBP_INLINE uint8_t clip_8b(int v) {
-  return (!(v & ~0xff)) ? v : v < 0 ? 0 : 255;
-}
 
 //------------------------------------------------------------------------------
 // Transforms (Paragraph 14.4)
@@ -154,84 +152,63 @@ static void FTransform(const uint8_t* src, const uint8_t* ref, int16_t* out) {
   int i;
   int tmp[16];
   for (i = 0; i < 4; ++i, src += BPS, ref += BPS) {
-    const int d0 = src[0] - ref[0];
+    const int d0 = src[0] - ref[0];   // 9bit dynamic range ([-255,255])
     const int d1 = src[1] - ref[1];
     const int d2 = src[2] - ref[2];
     const int d3 = src[3] - ref[3];
-    const int a0 = (d0 + d3) << 3;
-    const int a1 = (d1 + d2) << 3;
-    const int a2 = (d1 - d2) << 3;
-    const int a3 = (d0 - d3) << 3;
-    tmp[0 + i * 4] = (a0 + a1);
-    tmp[1 + i * 4] = (a2 * 2217 + a3 * 5352 + 14500) >> 12;
-    tmp[2 + i * 4] = (a0 - a1);
-    tmp[3 + i * 4] = (a3 * 2217 - a2 * 5352 +  7500) >> 12;
+    const int a0 = (d0 + d3);         // 10b                      [-510,510]
+    const int a1 = (d1 + d2);
+    const int a2 = (d1 - d2);
+    const int a3 = (d0 - d3);
+    tmp[0 + i * 4] = (a0 + a1) * 8;   // 14b                      [-8160,8160]
+    tmp[1 + i * 4] = (a2 * 2217 + a3 * 5352 + 1812) >> 9;      // [-7536,7542]
+    tmp[2 + i * 4] = (a0 - a1) * 8;
+    tmp[3 + i * 4] = (a3 * 2217 - a2 * 5352 +  937) >> 9;
   }
   for (i = 0; i < 4; ++i) {
-    const int a0 = (tmp[0 + i] + tmp[12 + i]);
+    const int a0 = (tmp[0 + i] + tmp[12 + i]);  // 15b
     const int a1 = (tmp[4 + i] + tmp[ 8 + i]);
     const int a2 = (tmp[4 + i] - tmp[ 8 + i]);
     const int a3 = (tmp[0 + i] - tmp[12 + i]);
-    out[0 + i] = (a0 + a1 + 7) >> 4;
+    out[0 + i] = (a0 + a1 + 7) >> 4;            // 12b
     out[4 + i] = ((a2 * 2217 + a3 * 5352 + 12000) >> 16) + (a3 != 0);
     out[8 + i] = (a0 - a1 + 7) >> 4;
     out[12+ i] = ((a3 * 2217 - a2 * 5352 + 51000) >> 16);
   }
 }
 
-static void ITransformWHT(const int16_t* in, int16_t* out) {
-  int tmp[16];
-  int i;
-  for (i = 0; i < 4; ++i) {
-    const int a0 = in[0 + i] + in[12 + i];
-    const int a1 = in[4 + i] + in[ 8 + i];
-    const int a2 = in[4 + i] - in[ 8 + i];
-    const int a3 = in[0 + i] - in[12 + i];
-    tmp[0  + i] = a0 + a1;
-    tmp[8  + i] = a0 - a1;
-    tmp[4  + i] = a3 + a2;
-    tmp[12 + i] = a3 - a2;
-  }
-  for (i = 0; i < 4; ++i) {
-    const int dc = tmp[0 + i * 4] + 3;    // w/ rounder
-    const int a0 = dc             + tmp[3 + i * 4];
-    const int a1 = tmp[1 + i * 4] + tmp[2 + i * 4];
-    const int a2 = tmp[1 + i * 4] - tmp[2 + i * 4];
-    const int a3 = dc             - tmp[3 + i * 4];
-    out[ 0] = (a0 + a1) >> 3;
-    out[16] = (a3 + a2) >> 3;
-    out[32] = (a0 - a1) >> 3;
-    out[48] = (a3 - a2) >> 3;
-    out += 64;
-  }
+static void FTransform2(const uint8_t* src, const uint8_t* ref, int16_t* out) {
+  VP8FTransform(src, ref, out);
+  VP8FTransform(src + 4, ref + 4, out + 16);
 }
 
 static void FTransformWHT(const int16_t* in, int16_t* out) {
-  int tmp[16];
+  // input is 12b signed
+  int32_t tmp[16];
   int i;
   for (i = 0; i < 4; ++i, in += 64) {
-    const int a0 = (in[0 * 16] + in[2 * 16]) << 2;
-    const int a1 = (in[1 * 16] + in[3 * 16]) << 2;
-    const int a2 = (in[1 * 16] - in[3 * 16]) << 2;
-    const int a3 = (in[0 * 16] - in[2 * 16]) << 2;
-    tmp[0 + i * 4] = (a0 + a1) + (a0 != 0);
+    const int a0 = (in[0 * 16] + in[2 * 16]);  // 13b
+    const int a1 = (in[1 * 16] + in[3 * 16]);
+    const int a2 = (in[1 * 16] - in[3 * 16]);
+    const int a3 = (in[0 * 16] - in[2 * 16]);
+    tmp[0 + i * 4] = a0 + a1;   // 14b
     tmp[1 + i * 4] = a3 + a2;
     tmp[2 + i * 4] = a3 - a2;
     tmp[3 + i * 4] = a0 - a1;
   }
   for (i = 0; i < 4; ++i) {
-    const int a0 = (tmp[0 + i] + tmp[8 + i]);
+    const int a0 = (tmp[0 + i] + tmp[8 + i]);  // 15b
     const int a1 = (tmp[4 + i] + tmp[12+ i]);
     const int a2 = (tmp[4 + i] - tmp[12+ i]);
     const int a3 = (tmp[0 + i] - tmp[8 + i]);
-    const int b0 = a0 + a1;
+    const int b0 = a0 + a1;    // 16b
     const int b1 = a3 + a2;
     const int b2 = a3 - a2;
     const int b3 = a0 - a1;
-    out[ 0 + i] = (b0 + (b0 > 0) + 3) >> 3;
-    out[ 4 + i] = (b1 + (b1 > 0) + 3) >> 3;
-    out[ 8 + i] = (b2 + (b2 > 0) + 3) >> 3;
-    out[12 + i] = (b3 + (b3 > 0) + 3) >> 3;
+    out[ 0 + i] = b0 >> 1;     // 15b
+    out[ 4 + i] = b1 >> 1;
+    out[ 8 + i] = b2 >> 1;
+    out[12 + i] = b3 >> 1;
   }
 }
 
@@ -241,8 +218,6 @@ static void FTransformWHT(const int16_t* in, int16_t* out) {
 //------------------------------------------------------------------------------
 // Intra predictions
 
-#define DST(x, y) dst[(x) + (y) * BPS]
-
 static WEBP_INLINE void Fill(uint8_t* dst, int value, int size) {
   int j;
   for (j = 0; j < size; ++j) {
@@ -253,7 +228,7 @@ static WEBP_INLINE void Fill(uint8_t* dst, int value, int size) {
 static WEBP_INLINE void VerticalPred(uint8_t* dst,
                                      const uint8_t* top, int size) {
   int j;
-  if (top) {
+  if (top != NULL) {
     for (j = 0; j < size; ++j) memcpy(dst + j * BPS, top, size);
   } else {
     Fill(dst, 127, size);
@@ -262,7 +237,7 @@ static WEBP_INLINE void VerticalPred(uint8_t* dst,
 
 static WEBP_INLINE void HorizontalPred(uint8_t* dst,
                                        const uint8_t* left, int size) {
-  if (left) {
+  if (left != NULL) {
     int j;
     for (j = 0; j < size; ++j) {
       memset(dst + j * BPS, left[j], size);
@@ -275,8 +250,8 @@ static WEBP_INLINE void HorizontalPred(uint8_t* dst,
 static WEBP_INLINE void TrueMotion(uint8_t* dst, const uint8_t* left,
                                    const uint8_t* top, int size) {
   int y;
-  if (left) {
-    if (top) {
+  if (left != NULL) {
+    if (top != NULL) {
       const uint8_t* const clip = clip1 + 255 - left[-1];
       for (y = 0; y < size; ++y) {
         const uint8_t* const clip_table = clip + left[y];
@@ -294,7 +269,7 @@ static WEBP_INLINE void TrueMotion(uint8_t* dst, const uint8_t* left,
     // is equivalent to VE prediction where you just copy the top samples.
     // Note that if top samples are not available, the default value is
     // then 129, and not 127 as in the VerticalPred case.
-    if (top) {
+    if (top != NULL) {
       VerticalPred(dst, top, size);
     } else {
       Fill(dst, 129, size);
@@ -307,15 +282,15 @@ static WEBP_INLINE void DCMode(uint8_t* dst, const uint8_t* left,
                                int size, int round, int shift) {
   int DC = 0;
   int j;
-  if (top) {
+  if (top != NULL) {
     for (j = 0; j < size; ++j) DC += top[j];
-    if (left) {   // top and left present
+    if (left != NULL) {   // top and left present
       for (j = 0; j < size; ++j) DC += left[j];
     } else {      // top, but no left
       DC += DC;
     }
     DC = (DC + round) >> shift;
-  } else if (left) {   // left but no top
+  } else if (left != NULL) {   // left but no top
     for (j = 0; j < size; ++j) DC += left[j];
     DC += DC;
     DC = (DC + round) >> shift;
@@ -337,8 +312,8 @@ static void IntraChromaPreds(uint8_t* dst, const uint8_t* left,
   TrueMotion(C8TM8 + dst, left, top, 8);
   // V block
   dst += 8;
-  if (top) top += 8;
-  if (left) left += 16;
+  if (top != NULL) top += 8;
+  if (left != NULL) left += 16;
   DCMode(C8DC8 + dst, left, top, 8, 8, 4);
   VerticalPred(C8VE8 + dst, top, 8);
   HorizontalPred(C8HE8 + dst, left, 8);
@@ -359,6 +334,7 @@ static void Intra16Preds(uint8_t* dst,
 //------------------------------------------------------------------------------
 // luma 4x4 prediction
 
+#define DST(x, y) dst[(x) + (y) * BPS]
 #define AVG3(a, b, c) (((a) + 2 * (b) + (c) + 2) >> 2)
 #define AVG2(a, b) (((a) + (b) + 1) >> 1)
 
@@ -589,30 +565,30 @@ static int TTransform(const uint8_t* in, const uint16_t* w) {
   int i;
   // horizontal pass
   for (i = 0; i < 4; ++i, in += BPS) {
-    const int a0 = (in[0] + in[2]) << 2;
-    const int a1 = (in[1] + in[3]) << 2;
-    const int a2 = (in[1] - in[3]) << 2;
-    const int a3 = (in[0] - in[2]) << 2;
-    tmp[0 + i * 4] = a0 + a1 + (a0 != 0);
+    const int a0 = in[0] + in[2];
+    const int a1 = in[1] + in[3];
+    const int a2 = in[1] - in[3];
+    const int a3 = in[0] - in[2];
+    tmp[0 + i * 4] = a0 + a1;
     tmp[1 + i * 4] = a3 + a2;
     tmp[2 + i * 4] = a3 - a2;
     tmp[3 + i * 4] = a0 - a1;
   }
   // vertical pass
   for (i = 0; i < 4; ++i, ++w) {
-    const int a0 = (tmp[0 + i] + tmp[8 + i]);
-    const int a1 = (tmp[4 + i] + tmp[12+ i]);
-    const int a2 = (tmp[4 + i] - tmp[12+ i]);
-    const int a3 = (tmp[0 + i] - tmp[8 + i]);
+    const int a0 = tmp[0 + i] + tmp[8 + i];
+    const int a1 = tmp[4 + i] + tmp[12+ i];
+    const int a2 = tmp[4 + i] - tmp[12+ i];
+    const int a3 = tmp[0 + i] - tmp[8 + i];
     const int b0 = a0 + a1;
     const int b1 = a3 + a2;
     const int b2 = a3 - a2;
     const int b3 = a0 - a1;
-    // abs((b + (b<0) + 3) >> 3) = (abs(b) + 3) >> 3
-    sum += w[ 0] * ((abs(b0) + 3) >> 3);
-    sum += w[ 4] * ((abs(b1) + 3) >> 3);
-    sum += w[ 8] * ((abs(b2) + 3) >> 3);
-    sum += w[12] * ((abs(b3) + 3) >> 3);
+
+    sum += w[ 0] * abs(b0);
+    sum += w[ 4] * abs(b1);
+    sum += w[ 8] * abs(b2);
+    sum += w[12] * abs(b3);
   }
   return sum;
 }
@@ -621,7 +597,7 @@ static int Disto4x4(const uint8_t* const a, const uint8_t* const b,
                     const uint16_t* const w) {
   const int sum1 = TTransform(a, w);
   const int sum2 = TTransform(b, w);
-  return (abs(sum2 - sum1) + 8) >> 4;
+  return abs(sum2 - sum1) >> 5;
 }
 
 static int Disto16x16(const uint8_t* const a, const uint8_t* const b,
@@ -646,21 +622,57 @@ static const uint8_t kZigzag[16] = {
 
 // Simple quantization
 static int QuantizeBlock(int16_t in[16], int16_t out[16],
-                         int n, const VP8Matrix* const mtx) {
+                         const VP8Matrix* const mtx) {
   int last = -1;
-  for (; n < 16; ++n) {
+  int n;
+  for (n = 0; n < 16; ++n) {
+    const int j = kZigzag[n];
+    const int sign = (in[j] < 0);
+    const uint32_t coeff = (sign ? -in[j] : in[j]) + mtx->sharpen_[j];
+    if (coeff > mtx->zthresh_[j]) {
+      const uint32_t Q = mtx->q_[j];
+      const uint32_t iQ = mtx->iq_[j];
+      const uint32_t B = mtx->bias_[j];
+      int level = QUANTDIV(coeff, iQ, B);
+      if (level > MAX_LEVEL) level = MAX_LEVEL;
+      if (sign) level = -level;
+      in[j] = level * Q;
+      out[n] = level;
+      if (level) last = n;
+    } else {
+      out[n] = 0;
+      in[j] = 0;
+    }
+  }
+  return (last >= 0);
+}
+
+static int Quantize2Blocks(int16_t in[32], int16_t out[32],
+                           const VP8Matrix* const mtx) {
+  int nz;
+  nz  = VP8EncQuantizeBlock(in + 0 * 16, out + 0 * 16, mtx) << 0;
+  nz |= VP8EncQuantizeBlock(in + 1 * 16, out + 1 * 16, mtx) << 1;
+  return nz;
+}
+
+static int QuantizeBlockWHT(int16_t in[16], int16_t out[16],
+                            const VP8Matrix* const mtx) {
+  int n, last = -1;
+  for (n = 0; n < 16; ++n) {
     const int j = kZigzag[n];
     const int sign = (in[j] < 0);
-    int coeff = (sign ? -in[j] : in[j]) + mtx->sharpen_[j];
-    if (coeff > 2047) coeff = 2047;
+    const uint32_t coeff = sign ? -in[j] : in[j];
+    assert(mtx->sharpen_[j] == 0);
     if (coeff > mtx->zthresh_[j]) {
-      const int Q = mtx->q_[j];
-      const int iQ = mtx->iq_[j];
-      const int B = mtx->bias_[j];
-      out[n] = QUANTDIV(coeff, iQ, B);
-      if (sign) out[n] = -out[n];
-      in[j] = out[n] * Q;
-      if (out[n]) last = n;
+      const uint32_t Q = mtx->q_[j];
+      const uint32_t iQ = mtx->iq_[j];
+      const uint32_t B = mtx->bias_[j];
+      int level = QUANTDIV(coeff, iQ, B);
+      if (level > MAX_LEVEL) level = MAX_LEVEL;
+      if (sign) level = -level;
+      in[j] = level * Q;
+      out[n] = level;
+      if (level) last = n;
     } else {
       out[n] = 0;
       in[j] = 0;
@@ -672,16 +684,22 @@ static int QuantizeBlock(int16_t in[16], int16_t out[16],
 //------------------------------------------------------------------------------
 // Block copy
 
-static WEBP_INLINE void Copy(const uint8_t* src, uint8_t* dst, int size) {
+static WEBP_INLINE void Copy(const uint8_t* src, uint8_t* dst, int w, int h) {
   int y;
-  for (y = 0; y < size; ++y) {
-    memcpy(dst, src, size);
+  for (y = 0; y < h; ++y) {
+    memcpy(dst, src, w);
     src += BPS;
     dst += BPS;
   }
 }
 
-static void Copy4x4(const uint8_t* src, uint8_t* dst) { Copy(src, dst, 4); }
+static void Copy4x4(const uint8_t* src, uint8_t* dst) {
+  Copy(src, dst, 4, 4);
+}
+
+static void Copy16x8(const uint8_t* src, uint8_t* dst) {
+  Copy(src, dst, 16, 8);
+}
 
 //------------------------------------------------------------------------------
 // Initialization
@@ -691,7 +709,7 @@ static void Copy4x4(const uint8_t* src, uint8_t* dst) { Copy(src, dst, 4); }
 VP8CHisto VP8CollectHistogram;
 VP8Idct VP8ITransform;
 VP8Fdct VP8FTransform;
-VP8WHT VP8ITransformWHT;
+VP8Fdct VP8FTransform2;
 VP8WHT VP8FTransformWHT;
 VP8Intra4Preds VP8EncPredLuma4;
 VP8IntraPreds VP8EncPredLuma16;
@@ -703,18 +721,32 @@ VP8Metric VP8SSE4x4;
 VP8WMetric VP8TDisto4x4;
 VP8WMetric VP8TDisto16x16;
 VP8QuantizeBlock VP8EncQuantizeBlock;
+VP8Quantize2Blocks VP8EncQuantize2Blocks;
+VP8QuantizeBlockWHT VP8EncQuantizeBlockWHT;
 VP8BlockCopy VP8Copy4x4;
+VP8BlockCopy VP8Copy16x8;
 
 extern void VP8EncDspInitSSE2(void);
+extern void VP8EncDspInitSSE41(void);
+extern void VP8EncDspInitAVX2(void);
+extern void VP8EncDspInitNEON(void);
+extern void VP8EncDspInitMIPS32(void);
+extern void VP8EncDspInitMIPSdspR2(void);
+
+static volatile VP8CPUInfo enc_last_cpuinfo_used =
+    (VP8CPUInfo)&enc_last_cpuinfo_used;
 
-void VP8EncDspInit(void) {
+WEBP_TSAN_IGNORE_FUNCTION void VP8EncDspInit(void) {
+  if (enc_last_cpuinfo_used == VP8GetCPUInfo) return;
+
+  VP8DspInit();  // common inverse transforms
   InitTables();
 
   // default C implementations
   VP8CollectHistogram = CollectHistogram;
   VP8ITransform = ITransform;
   VP8FTransform = FTransform;
-  VP8ITransformWHT = ITransformWHT;
+  VP8FTransform2 = FTransform2;
   VP8FTransformWHT = FTransformWHT;
   VP8EncPredLuma4 = Intra4Preds;
   VP8EncPredLuma16 = Intra16Preds;
@@ -726,18 +758,43 @@ void VP8EncDspInit(void) {
   VP8TDisto4x4 = Disto4x4;
   VP8TDisto16x16 = Disto16x16;
   VP8EncQuantizeBlock = QuantizeBlock;
+  VP8EncQuantize2Blocks = Quantize2Blocks;
+  VP8EncQuantizeBlockWHT = QuantizeBlockWHT;
   VP8Copy4x4 = Copy4x4;
+  VP8Copy16x8 = Copy16x8;
 
   // If defined, use CPUInfo() to overwrite some pointers with faster versions.
-  if (VP8GetCPUInfo) {
+  if (VP8GetCPUInfo != NULL) {
 #if defined(WEBP_USE_SSE2)
     if (VP8GetCPUInfo(kSSE2)) {
       VP8EncDspInitSSE2();
+#if defined(WEBP_USE_SSE41)
+      if (VP8GetCPUInfo(kSSE4_1)) {
+        VP8EncDspInitSSE41();
+      }
+#endif
+    }
+#endif
+#if defined(WEBP_USE_AVX2)
+    if (VP8GetCPUInfo(kAVX2)) {
+      VP8EncDspInitAVX2();
+    }
+#endif
+#if defined(WEBP_USE_NEON)
+    if (VP8GetCPUInfo(kNEON)) {
+      VP8EncDspInitNEON();
+    }
+#endif
+#if defined(WEBP_USE_MIPS32)
+    if (VP8GetCPUInfo(kMIPS32)) {
+      VP8EncDspInitMIPS32();
+    }
+#endif
+#if defined(WEBP_USE_MIPS_DSP_R2)
+    if (VP8GetCPUInfo(kMIPSdspR2)) {
+      VP8EncDspInitMIPSdspR2();
     }
 #endif
   }
+  enc_last_cpuinfo_used = VP8GetCPUInfo;
 }
-
-#if defined(__cplusplus) || defined(c_plusplus)
-}    // extern "C"
-#endif
diff --git a/drivers/webp/dsp/enc_avx2.c b/drivers/webp/dsp/enc_avx2.c
new file mode 100644
index 0000000000..93efb30b10
--- /dev/null
+++ b/drivers/webp/dsp/enc_avx2.c
@@ -0,0 +1,21 @@
+// Copyright 2014 Google Inc. All Rights Reserved.
+//
+// Use of this source code is governed by a BSD-style license
+// that can be found in the COPYING file in the root of the source
+// tree. An additional intellectual property rights grant can be found
+// in the file PATENTS. All contributing project authors may
+// be found in the AUTHORS file in the root of the source tree.
+// -----------------------------------------------------------------------------
+//
+// AVX2 version of speed-critical encoding functions.
+
+#include "./dsp.h"
+
+#if defined(WEBP_USE_AVX2)
+
+#endif  // WEBP_USE_AVX2
+
+//------------------------------------------------------------------------------
+// Entry point
+
+WEBP_DSP_INIT_STUB(VP8EncDspInitAVX2)
diff --git a/drivers/webp/dsp/enc_mips32.c b/drivers/webp/dsp/enc_mips32.c
new file mode 100644
index 0000000000..fd10143de9
--- /dev/null
+++ b/drivers/webp/dsp/enc_mips32.c
@@ -0,0 +1,672 @@
+// Copyright 2014 Google Inc. All Rights Reserved.
+//
+// Use of this source code is governed by a BSD-style license
+// that can be found in the COPYING file in the root of the source
+// tree. An additional intellectual property rights grant can be found
+// in the file PATENTS. All contributing project authors may
+// be found in the AUTHORS file in the root of the source tree.
+// -----------------------------------------------------------------------------
+//
+// MIPS version of speed-critical encoding functions.
+//
+// Author(s): Djordje Pesut    (djordje.pesut@imgtec.com)
+//            Jovan Zelincevic (jovan.zelincevic@imgtec.com)
+//            Slobodan Prijic  (slobodan.prijic@imgtec.com)
+
+#include "./dsp.h"
+
+#if defined(WEBP_USE_MIPS32)
+
+#include "./mips_macro.h"
+#include "../enc/vp8enci.h"
+#include "../enc/cost.h"
+
+static const int kC1 = 20091 + (1 << 16);
+static const int kC2 = 35468;
+
+// macro for one vertical pass in ITransformOne
+// MUL macro inlined
+// temp0..temp15 holds tmp[0]..tmp[15]
+// A..D - offsets in bytes to load from in buffer
+// TEMP0..TEMP3 - registers for corresponding tmp elements
+// TEMP4..TEMP5 - temporary registers
+#define VERTICAL_PASS(A, B, C, D, TEMP4, TEMP0, TEMP1, TEMP2, TEMP3)        \
+  "lh      %[temp16],      " #A "(%[temp20])                 \n\t"          \
+  "lh      %[temp18],      " #B "(%[temp20])                 \n\t"          \
+  "lh      %[temp17],      " #C "(%[temp20])                 \n\t"          \
+  "lh      %[temp19],      " #D "(%[temp20])                 \n\t"          \
+  "addu    %[" #TEMP4 "],    %[temp16],      %[temp18]       \n\t"          \
+  "subu    %[temp16],      %[temp16],      %[temp18]         \n\t"          \
+  "mul     %[" #TEMP0 "],    %[temp17],      %[kC2]          \n\t"          \
+  "mul     %[temp18],      %[temp19],      %[kC1]            \n\t"          \
+  "mul     %[temp17],      %[temp17],      %[kC1]            \n\t"          \
+  "mul     %[temp19],      %[temp19],      %[kC2]            \n\t"          \
+  "sra     %[" #TEMP0 "],    %[" #TEMP0 "],    16            \n\n"          \
+  "sra     %[temp18],      %[temp18],      16                \n\n"          \
+  "sra     %[temp17],      %[temp17],      16                \n\n"          \
+  "sra     %[temp19],      %[temp19],      16                \n\n"          \
+  "subu    %[" #TEMP2 "],    %[" #TEMP0 "],    %[temp18]     \n\t"          \
+  "addu    %[" #TEMP3 "],    %[temp17],      %[temp19]       \n\t"          \
+  "addu    %[" #TEMP0 "],    %[" #TEMP4 "],    %[" #TEMP3 "] \n\t"          \
+  "addu    %[" #TEMP1 "],    %[temp16],      %[" #TEMP2 "]   \n\t"          \
+  "subu    %[" #TEMP2 "],    %[temp16],      %[" #TEMP2 "]   \n\t"          \
+  "subu    %[" #TEMP3 "],    %[" #TEMP4 "],    %[" #TEMP3 "] \n\t"
+
+// macro for one horizontal pass in ITransformOne
+// MUL and STORE macros inlined
+// a = clip_8b(a) is replaced with: a = max(a, 0); a = min(a, 255)
+// temp0..temp15 holds tmp[0]..tmp[15]
+// A - offset in bytes to load from ref and store to dst buffer
+// TEMP0, TEMP4, TEMP8 and TEMP12 - registers for corresponding tmp elements
+#define HORIZONTAL_PASS(A, TEMP0, TEMP4, TEMP8, TEMP12)                       \
+  "addiu   %[" #TEMP0 "],    %[" #TEMP0 "],    4               \n\t"          \
+  "addu    %[temp16],      %[" #TEMP0 "],    %[" #TEMP8 "]     \n\t"          \
+  "subu    %[temp17],      %[" #TEMP0 "],    %[" #TEMP8 "]     \n\t"          \
+  "mul     %[" #TEMP0 "],    %[" #TEMP4 "],    %[kC2]          \n\t"          \
+  "mul     %[" #TEMP8 "],    %[" #TEMP12 "],   %[kC1]          \n\t"          \
+  "mul     %[" #TEMP4 "],    %[" #TEMP4 "],    %[kC1]          \n\t"          \
+  "mul     %[" #TEMP12 "],   %[" #TEMP12 "],   %[kC2]          \n\t"          \
+  "sra     %[" #TEMP0 "],    %[" #TEMP0 "],    16              \n\t"          \
+  "sra     %[" #TEMP8 "],    %[" #TEMP8 "],    16              \n\t"          \
+  "sra     %[" #TEMP4 "],    %[" #TEMP4 "],    16              \n\t"          \
+  "sra     %[" #TEMP12 "],   %[" #TEMP12 "],   16              \n\t"          \
+  "subu    %[temp18],      %[" #TEMP0 "],    %[" #TEMP8 "]     \n\t"          \
+  "addu    %[temp19],      %[" #TEMP4 "],    %[" #TEMP12 "]    \n\t"          \
+  "addu    %[" #TEMP0 "],    %[temp16],      %[temp19]         \n\t"          \
+  "addu    %[" #TEMP4 "],    %[temp17],      %[temp18]         \n\t"          \
+  "subu    %[" #TEMP8 "],    %[temp17],      %[temp18]         \n\t"          \
+  "subu    %[" #TEMP12 "],   %[temp16],      %[temp19]         \n\t"          \
+  "lw      %[temp20],      0(%[args])                          \n\t"          \
+  "sra     %[" #TEMP0 "],    %[" #TEMP0 "],    3               \n\t"          \
+  "sra     %[" #TEMP4 "],    %[" #TEMP4 "],    3               \n\t"          \
+  "sra     %[" #TEMP8 "],    %[" #TEMP8 "],    3               \n\t"          \
+  "sra     %[" #TEMP12 "],   %[" #TEMP12 "],   3               \n\t"          \
+  "lbu     %[temp16],      0+" XSTR(BPS) "*" #A "(%[temp20])   \n\t"          \
+  "lbu     %[temp17],      1+" XSTR(BPS) "*" #A "(%[temp20])   \n\t"          \
+  "lbu     %[temp18],      2+" XSTR(BPS) "*" #A "(%[temp20])   \n\t"          \
+  "lbu     %[temp19],      3+" XSTR(BPS) "*" #A "(%[temp20])   \n\t"          \
+  "addu    %[" #TEMP0 "],    %[temp16],      %[" #TEMP0 "]     \n\t"          \
+  "addu    %[" #TEMP4 "],    %[temp17],      %[" #TEMP4 "]     \n\t"          \
+  "addu    %[" #TEMP8 "],    %[temp18],      %[" #TEMP8 "]     \n\t"          \
+  "addu    %[" #TEMP12 "],   %[temp19],      %[" #TEMP12 "]    \n\t"          \
+  "slt     %[temp16],      %[" #TEMP0 "],    $zero             \n\t"          \
+  "slt     %[temp17],      %[" #TEMP4 "],    $zero             \n\t"          \
+  "slt     %[temp18],      %[" #TEMP8 "],    $zero             \n\t"          \
+  "slt     %[temp19],      %[" #TEMP12 "],   $zero             \n\t"          \
+  "movn    %[" #TEMP0 "],    $zero,          %[temp16]         \n\t"          \
+  "movn    %[" #TEMP4 "],    $zero,          %[temp17]         \n\t"          \
+  "movn    %[" #TEMP8 "],    $zero,          %[temp18]         \n\t"          \
+  "movn    %[" #TEMP12 "],   $zero,          %[temp19]         \n\t"          \
+  "addiu   %[temp20],      $zero,          255                 \n\t"          \
+  "slt     %[temp16],      %[" #TEMP0 "],    %[temp20]         \n\t"          \
+  "slt     %[temp17],      %[" #TEMP4 "],    %[temp20]         \n\t"          \
+  "slt     %[temp18],      %[" #TEMP8 "],    %[temp20]         \n\t"          \
+  "slt     %[temp19],      %[" #TEMP12 "],   %[temp20]         \n\t"          \
+  "movz    %[" #TEMP0 "],    %[temp20],      %[temp16]         \n\t"          \
+  "movz    %[" #TEMP4 "],    %[temp20],      %[temp17]         \n\t"          \
+  "lw      %[temp16],      8(%[args])                          \n\t"          \
+  "movz    %[" #TEMP8 "],    %[temp20],      %[temp18]         \n\t"          \
+  "movz    %[" #TEMP12 "],   %[temp20],      %[temp19]         \n\t"          \
+  "sb      %[" #TEMP0 "],    0+" XSTR(BPS) "*" #A "(%[temp16]) \n\t"          \
+  "sb      %[" #TEMP4 "],    1+" XSTR(BPS) "*" #A "(%[temp16]) \n\t"          \
+  "sb      %[" #TEMP8 "],    2+" XSTR(BPS) "*" #A "(%[temp16]) \n\t"          \
+  "sb      %[" #TEMP12 "],   3+" XSTR(BPS) "*" #A "(%[temp16]) \n\t"
+
+// Does one or two inverse transforms.
+static WEBP_INLINE void ITransformOne(const uint8_t* ref, const int16_t* in,
+                                      uint8_t* dst) {
+  int temp0, temp1, temp2, temp3, temp4, temp5, temp6;
+  int temp7, temp8, temp9, temp10, temp11, temp12, temp13;
+  int temp14, temp15, temp16, temp17, temp18, temp19, temp20;
+  const int* args[3] = {(const int*)ref, (const int*)in, (const int*)dst};
+
+  __asm__ volatile(
+    "lw      %[temp20],      4(%[args])                      \n\t"
+    VERTICAL_PASS(0, 16,  8, 24, temp4,  temp0,  temp1,  temp2,  temp3)
+    VERTICAL_PASS(2, 18, 10, 26, temp8,  temp4,  temp5,  temp6,  temp7)
+    VERTICAL_PASS(4, 20, 12, 28, temp12, temp8,  temp9,  temp10, temp11)
+    VERTICAL_PASS(6, 22, 14, 30, temp20, temp12, temp13, temp14, temp15)
+
+    HORIZONTAL_PASS(0, temp0, temp4, temp8,  temp12)
+    HORIZONTAL_PASS(1, temp1, temp5, temp9,  temp13)
+    HORIZONTAL_PASS(2, temp2, temp6, temp10, temp14)
+    HORIZONTAL_PASS(3, temp3, temp7, temp11, temp15)
+
+    : [temp0]"=&r"(temp0), [temp1]"=&r"(temp1), [temp2]"=&r"(temp2),
+      [temp3]"=&r"(temp3), [temp4]"=&r"(temp4), [temp5]"=&r"(temp5),
+      [temp6]"=&r"(temp6), [temp7]"=&r"(temp7), [temp8]"=&r"(temp8),
+      [temp9]"=&r"(temp9), [temp10]"=&r"(temp10), [temp11]"=&r"(temp11),
+      [temp12]"=&r"(temp12), [temp13]"=&r"(temp13), [temp14]"=&r"(temp14),
+      [temp15]"=&r"(temp15), [temp16]"=&r"(temp16), [temp17]"=&r"(temp17),
+      [temp18]"=&r"(temp18), [temp19]"=&r"(temp19), [temp20]"=&r"(temp20)
+    : [args]"r"(args), [kC1]"r"(kC1), [kC2]"r"(kC2)
+    : "memory", "hi", "lo"
+  );
+}
+
+static void ITransform(const uint8_t* ref, const int16_t* in,
+                       uint8_t* dst, int do_two) {
+  ITransformOne(ref, in, dst);
+  if (do_two) {
+    ITransformOne(ref + 4, in + 16, dst + 4);
+  }
+}
+
+#undef VERTICAL_PASS
+#undef HORIZONTAL_PASS
+
+// macro for one pass through for loop in QuantizeBlock
+// QUANTDIV macro inlined
+// J - offset in bytes (kZigzag[n] * 2)
+// K - offset in bytes (kZigzag[n] * 4)
+// N - offset in bytes (n * 2)
+#define QUANTIZE_ONE(J, K, N)                                               \
+  "lh           %[temp0],       " #J "(%[ppin])                     \n\t"   \
+  "lhu          %[temp1],       " #J "(%[ppsharpen])                \n\t"   \
+  "lw           %[temp2],       " #K "(%[ppzthresh])                \n\t"   \
+  "sra          %[sign],        %[temp0],           15              \n\t"   \
+  "xor          %[coeff],       %[temp0],           %[sign]         \n\t"   \
+  "subu         %[coeff],       %[coeff],           %[sign]         \n\t"   \
+  "addu         %[coeff],       %[coeff],           %[temp1]        \n\t"   \
+  "slt          %[temp4],       %[temp2],           %[coeff]        \n\t"   \
+  "addiu        %[temp5],       $zero,              0               \n\t"   \
+  "addiu        %[level],       $zero,              0               \n\t"   \
+  "beqz         %[temp4],       2f                                  \n\t"   \
+  "lhu          %[temp1],       " #J "(%[ppiq])                     \n\t"   \
+  "lw           %[temp2],       " #K "(%[ppbias])                   \n\t"   \
+  "lhu          %[temp3],       " #J "(%[ppq])                      \n\t"   \
+  "mul          %[level],       %[coeff],           %[temp1]        \n\t"   \
+  "addu         %[level],       %[level],           %[temp2]        \n\t"   \
+  "sra          %[level],       %[level],           17              \n\t"   \
+  "slt          %[temp4],       %[max_level],       %[level]        \n\t"   \
+  "movn         %[level],       %[max_level],       %[temp4]        \n\t"   \
+  "xor          %[level],       %[level],           %[sign]         \n\t"   \
+  "subu         %[level],       %[level],           %[sign]         \n\t"   \
+  "mul          %[temp5],       %[level],           %[temp3]        \n\t"   \
+"2:                                                                 \n\t"   \
+  "sh           %[temp5],       " #J "(%[ppin])                     \n\t"   \
+  "sh           %[level],       " #N "(%[pout])                     \n\t"
+
+static int QuantizeBlock(int16_t in[16], int16_t out[16],
+                         const VP8Matrix* const mtx) {
+  int temp0, temp1, temp2, temp3, temp4, temp5;
+  int sign, coeff, level, i;
+  int max_level = MAX_LEVEL;
+
+  int16_t* ppin             = &in[0];
+  int16_t* pout             = &out[0];
+  const uint16_t* ppsharpen = &mtx->sharpen_[0];
+  const uint32_t* ppzthresh = &mtx->zthresh_[0];
+  const uint16_t* ppq       = &mtx->q_[0];
+  const uint16_t* ppiq      = &mtx->iq_[0];
+  const uint32_t* ppbias    = &mtx->bias_[0];
+
+  __asm__ volatile(
+    QUANTIZE_ONE( 0,  0,  0)
+    QUANTIZE_ONE( 2,  4,  2)
+    QUANTIZE_ONE( 8, 16,  4)
+    QUANTIZE_ONE(16, 32,  6)
+    QUANTIZE_ONE(10, 20,  8)
+    QUANTIZE_ONE( 4,  8, 10)
+    QUANTIZE_ONE( 6, 12, 12)
+    QUANTIZE_ONE(12, 24, 14)
+    QUANTIZE_ONE(18, 36, 16)
+    QUANTIZE_ONE(24, 48, 18)
+    QUANTIZE_ONE(26, 52, 20)
+    QUANTIZE_ONE(20, 40, 22)
+    QUANTIZE_ONE(14, 28, 24)
+    QUANTIZE_ONE(22, 44, 26)
+    QUANTIZE_ONE(28, 56, 28)
+    QUANTIZE_ONE(30, 60, 30)
+
+    : [temp0]"=&r"(temp0), [temp1]"=&r"(temp1),
+      [temp2]"=&r"(temp2), [temp3]"=&r"(temp3),
+      [temp4]"=&r"(temp4), [temp5]"=&r"(temp5),
+      [sign]"=&r"(sign), [coeff]"=&r"(coeff),
+      [level]"=&r"(level)
+    : [pout]"r"(pout), [ppin]"r"(ppin),
+      [ppiq]"r"(ppiq), [max_level]"r"(max_level),
+      [ppbias]"r"(ppbias), [ppzthresh]"r"(ppzthresh),
+      [ppsharpen]"r"(ppsharpen), [ppq]"r"(ppq)
+    : "memory", "hi", "lo"
+  );
+
+  // moved out from macro to increase possibility for earlier breaking
+  for (i = 15; i >= 0; i--) {
+    if (out[i]) return 1;
+  }
+  return 0;
+}
+
+static int Quantize2Blocks(int16_t in[32], int16_t out[32],
+                           const VP8Matrix* const mtx) {
+  int nz;
+  nz  = QuantizeBlock(in + 0 * 16, out + 0 * 16, mtx) << 0;
+  nz |= QuantizeBlock(in + 1 * 16, out + 1 * 16, mtx) << 1;
+  return nz;
+}
+
+#undef QUANTIZE_ONE
+
+// macro for one horizontal pass in Disto4x4 (TTransform)
+// two calls of function TTransform are merged into single one
+// A - offset in bytes to load from a and b buffers
+// E..H - offsets in bytes to store first results to tmp buffer
+// E1..H1 - offsets in bytes to store second results to tmp buffer
+#define HORIZONTAL_PASS(A, E, F, G, H, E1, F1, G1, H1)                  \
+  "lbu    %[temp0],  0+" XSTR(BPS) "*" #A "(%[a])  \n\t"                \
+  "lbu    %[temp1],  1+" XSTR(BPS) "*" #A "(%[a])  \n\t"                \
+  "lbu    %[temp2],  2+" XSTR(BPS) "*" #A "(%[a])  \n\t"                \
+  "lbu    %[temp3],  3+" XSTR(BPS) "*" #A "(%[a])  \n\t"                \
+  "lbu    %[temp4],  0+" XSTR(BPS) "*" #A "(%[b])  \n\t"                \
+  "lbu    %[temp5],  1+" XSTR(BPS) "*" #A "(%[b])  \n\t"                \
+  "lbu    %[temp6],  2+" XSTR(BPS) "*" #A "(%[b])  \n\t"                \
+  "lbu    %[temp7],  3+" XSTR(BPS) "*" #A "(%[b])  \n\t"                \
+  "addu   %[temp8],  %[temp0],    %[temp2]         \n\t"                \
+  "subu   %[temp0],  %[temp0],    %[temp2]         \n\t"                \
+  "addu   %[temp2],  %[temp1],    %[temp3]         \n\t"                \
+  "subu   %[temp1],  %[temp1],    %[temp3]         \n\t"                \
+  "addu   %[temp3],  %[temp4],    %[temp6]         \n\t"                \
+  "subu   %[temp4],  %[temp4],    %[temp6]         \n\t"                \
+  "addu   %[temp6],  %[temp5],    %[temp7]         \n\t"                \
+  "subu   %[temp5],  %[temp5],    %[temp7]         \n\t"                \
+  "addu   %[temp7],  %[temp8],    %[temp2]         \n\t"                \
+  "subu   %[temp2],  %[temp8],    %[temp2]         \n\t"                \
+  "addu   %[temp8],  %[temp0],    %[temp1]         \n\t"                \
+  "subu   %[temp0],  %[temp0],    %[temp1]         \n\t"                \
+  "addu   %[temp1],  %[temp3],    %[temp6]         \n\t"                \
+  "subu   %[temp3],  %[temp3],    %[temp6]         \n\t"                \
+  "addu   %[temp6],  %[temp4],    %[temp5]         \n\t"                \
+  "subu   %[temp4],  %[temp4],    %[temp5]         \n\t"                \
+  "sw     %[temp7],  " #E "(%[tmp])                \n\t"                \
+  "sw     %[temp2],  " #H "(%[tmp])                \n\t"                \
+  "sw     %[temp8],  " #F "(%[tmp])                \n\t"                \
+  "sw     %[temp0],  " #G "(%[tmp])                \n\t"                \
+  "sw     %[temp1],  " #E1 "(%[tmp])               \n\t"                \
+  "sw     %[temp3],  " #H1 "(%[tmp])               \n\t"                \
+  "sw     %[temp6],  " #F1 "(%[tmp])               \n\t"                \
+  "sw     %[temp4],  " #G1 "(%[tmp])               \n\t"
+
+// macro for one vertical pass in Disto4x4 (TTransform)
+// two calls of function TTransform are merged into single one
+// since only one accu is available in mips32r1 instruction set
+//   first is done second call of function TTransform and after
+//   that first one.
+//   const int sum1 = TTransform(a, w);
+//   const int sum2 = TTransform(b, w);
+//   return abs(sum2 - sum1) >> 5;
+//   (sum2 - sum1) is calculated with madds (sub2) and msubs (sub1)
+// A..D - offsets in bytes to load first results from tmp buffer
+// A1..D1 - offsets in bytes to load second results from tmp buffer
+// E..H - offsets in bytes to load from w buffer
+#define VERTICAL_PASS(A, B, C, D, A1, B1, C1, D1, E, F, G, H)     \
+  "lw     %[temp0],  " #A1 "(%[tmp])         \n\t"                \
+  "lw     %[temp1],  " #C1 "(%[tmp])         \n\t"                \
+  "lw     %[temp2],  " #B1 "(%[tmp])         \n\t"                \
+  "lw     %[temp3],  " #D1 "(%[tmp])         \n\t"                \
+  "addu   %[temp8],  %[temp0],    %[temp1]   \n\t"                \
+  "subu   %[temp0],  %[temp0],    %[temp1]   \n\t"                \
+  "addu   %[temp1],  %[temp2],    %[temp3]   \n\t"                \
+  "subu   %[temp2],  %[temp2],    %[temp3]   \n\t"                \
+  "addu   %[temp3],  %[temp8],    %[temp1]   \n\t"                \
+  "subu   %[temp8],  %[temp8],    %[temp1]   \n\t"                \
+  "addu   %[temp1],  %[temp0],    %[temp2]   \n\t"                \
+  "subu   %[temp0],  %[temp0],    %[temp2]   \n\t"                \
+  "sra    %[temp4],  %[temp3],    31         \n\t"                \
+  "sra    %[temp5],  %[temp1],    31         \n\t"                \
+  "sra    %[temp6],  %[temp0],    31         \n\t"                \
+  "sra    %[temp7],  %[temp8],    31         \n\t"                \
+  "xor    %[temp3],  %[temp3],    %[temp4]   \n\t"                \
+  "xor    %[temp1],  %[temp1],    %[temp5]   \n\t"                \
+  "xor    %[temp0],  %[temp0],    %[temp6]   \n\t"                \
+  "xor    %[temp8],  %[temp8],    %[temp7]   \n\t"                \
+  "subu   %[temp3],  %[temp3],    %[temp4]   \n\t"                \
+  "subu   %[temp1],  %[temp1],    %[temp5]   \n\t"                \
+  "subu   %[temp0],  %[temp0],    %[temp6]   \n\t"                \
+  "subu   %[temp8],  %[temp8],    %[temp7]   \n\t"                \
+  "lhu    %[temp4],  " #E "(%[w])            \n\t"                \
+  "lhu    %[temp5],  " #F "(%[w])            \n\t"                \
+  "lhu    %[temp6],  " #G "(%[w])            \n\t"                \
+  "lhu    %[temp7],  " #H "(%[w])            \n\t"                \
+  "madd   %[temp4],  %[temp3]                \n\t"                \
+  "madd   %[temp5],  %[temp1]                \n\t"                \
+  "madd   %[temp6],  %[temp0]                \n\t"                \
+  "madd   %[temp7],  %[temp8]                \n\t"                \
+  "lw     %[temp0],  " #A "(%[tmp])          \n\t"                \
+  "lw     %[temp1],  " #C "(%[tmp])          \n\t"                \
+  "lw     %[temp2],  " #B "(%[tmp])          \n\t"                \
+  "lw     %[temp3],  " #D "(%[tmp])          \n\t"                \
+  "addu   %[temp8],  %[temp0],    %[temp1]   \n\t"                \
+  "subu   %[temp0],  %[temp0],    %[temp1]   \n\t"                \
+  "addu   %[temp1],  %[temp2],    %[temp3]   \n\t"                \
+  "subu   %[temp2],  %[temp2],    %[temp3]   \n\t"                \
+  "addu   %[temp3],  %[temp8],    %[temp1]   \n\t"                \
+  "subu   %[temp1],  %[temp8],    %[temp1]   \n\t"                \
+  "addu   %[temp8],  %[temp0],    %[temp2]   \n\t"                \
+  "subu   %[temp0],  %[temp0],    %[temp2]   \n\t"                \
+  "sra    %[temp2],  %[temp3],    31         \n\t"                \
+  "xor    %[temp3],  %[temp3],    %[temp2]   \n\t"                \
+  "subu   %[temp3],  %[temp3],    %[temp2]   \n\t"                \
+  "msub   %[temp4],  %[temp3]                \n\t"                \
+  "sra    %[temp2],  %[temp8],    31         \n\t"                \
+  "sra    %[temp3],  %[temp0],    31         \n\t"                \
+  "sra    %[temp4],  %[temp1],    31         \n\t"                \
+  "xor    %[temp8],  %[temp8],    %[temp2]   \n\t"                \
+  "xor    %[temp0],  %[temp0],    %[temp3]   \n\t"                \
+  "xor    %[temp1],  %[temp1],    %[temp4]   \n\t"                \
+  "subu   %[temp8],  %[temp8],    %[temp2]   \n\t"                \
+  "subu   %[temp0],  %[temp0],    %[temp3]   \n\t"                \
+  "subu   %[temp1],  %[temp1],    %[temp4]   \n\t"                \
+  "msub   %[temp5],  %[temp8]                \n\t"                \
+  "msub   %[temp6],  %[temp0]                \n\t"                \
+  "msub   %[temp7],  %[temp1]                \n\t"
+
+static int Disto4x4(const uint8_t* const a, const uint8_t* const b,
+                    const uint16_t* const w) {
+  int tmp[32];
+  int temp0, temp1, temp2, temp3, temp4, temp5, temp6, temp7, temp8;
+
+  __asm__ volatile(
+    HORIZONTAL_PASS(0,   0,  4,  8, 12,    64,  68,  72,  76)
+    HORIZONTAL_PASS(1,  16, 20, 24, 28,    80,  84,  88,  92)
+    HORIZONTAL_PASS(2,  32, 36, 40, 44,    96, 100, 104, 108)
+    HORIZONTAL_PASS(3,  48, 52, 56, 60,   112, 116, 120, 124)
+    "mthi   $zero                             \n\t"
+    "mtlo   $zero                             \n\t"
+    VERTICAL_PASS( 0, 16, 32, 48,     64, 80,  96, 112,   0,  8, 16, 24)
+    VERTICAL_PASS( 4, 20, 36, 52,     68, 84, 100, 116,   2, 10, 18, 26)
+    VERTICAL_PASS( 8, 24, 40, 56,     72, 88, 104, 120,   4, 12, 20, 28)
+    VERTICAL_PASS(12, 28, 44, 60,     76, 92, 108, 124,   6, 14, 22, 30)
+    "mflo   %[temp0]                          \n\t"
+    "sra    %[temp1],  %[temp0],  31          \n\t"
+    "xor    %[temp0],  %[temp0],  %[temp1]    \n\t"
+    "subu   %[temp0],  %[temp0],  %[temp1]    \n\t"
+    "sra    %[temp0],  %[temp0],  5           \n\t"
+
+    : [temp0]"=&r"(temp0), [temp1]"=&r"(temp1), [temp2]"=&r"(temp2),
+      [temp3]"=&r"(temp3), [temp4]"=&r"(temp4), [temp5]"=&r"(temp5),
+      [temp6]"=&r"(temp6), [temp7]"=&r"(temp7), [temp8]"=&r"(temp8)
+    : [a]"r"(a), [b]"r"(b), [w]"r"(w), [tmp]"r"(tmp)
+    : "memory", "hi", "lo"
+  );
+
+  return temp0;
+}
+
+#undef VERTICAL_PASS
+#undef HORIZONTAL_PASS
+
+static int Disto16x16(const uint8_t* const a, const uint8_t* const b,
+                      const uint16_t* const w) {
+  int D = 0;
+  int x, y;
+  for (y = 0; y < 16 * BPS; y += 4 * BPS) {
+    for (x = 0; x < 16; x += 4) {
+      D += Disto4x4(a + x + y, b + x + y, w);
+    }
+  }
+  return D;
+}
+
+// macro for one horizontal pass in FTransform
+// temp0..temp15 holds tmp[0]..tmp[15]
+// A - offset in bytes to load from src and ref buffers
+// TEMP0..TEMP3 - registers for corresponding tmp elements
+#define HORIZONTAL_PASS(A, TEMP0, TEMP1, TEMP2, TEMP3)                  \
+  "lw     %[" #TEMP1 "],  0(%[args])                           \n\t"    \
+  "lw     %[" #TEMP2 "],  4(%[args])                           \n\t"    \
+  "lbu    %[temp16],    0+" XSTR(BPS) "*" #A "(%[" #TEMP1 "])  \n\t"    \
+  "lbu    %[temp17],    0+" XSTR(BPS) "*" #A "(%[" #TEMP2 "])  \n\t"    \
+  "lbu    %[temp18],    1+" XSTR(BPS) "*" #A "(%[" #TEMP1 "])  \n\t"    \
+  "lbu    %[temp19],    1+" XSTR(BPS) "*" #A "(%[" #TEMP2 "])  \n\t"    \
+  "subu   %[temp20],    %[temp16],    %[temp17]                \n\t"    \
+  "lbu    %[temp16],    2+" XSTR(BPS) "*" #A "(%[" #TEMP1 "])  \n\t"    \
+  "lbu    %[temp17],    2+" XSTR(BPS) "*" #A "(%[" #TEMP2 "])  \n\t"    \
+  "subu   %[" #TEMP0 "],  %[temp18],    %[temp19]              \n\t"    \
+  "lbu    %[temp18],    3+" XSTR(BPS) "*" #A "(%[" #TEMP1 "])  \n\t"    \
+  "lbu    %[temp19],    3+" XSTR(BPS) "*" #A "(%[" #TEMP2 "])  \n\t"    \
+  "subu   %[" #TEMP1 "],  %[temp16],    %[temp17]              \n\t"    \
+  "subu   %[" #TEMP2 "],  %[temp18],    %[temp19]              \n\t"    \
+  "addu   %[" #TEMP3 "],  %[temp20],    %[" #TEMP2 "]          \n\t"    \
+  "subu   %[" #TEMP2 "],  %[temp20],    %[" #TEMP2 "]          \n\t"    \
+  "addu   %[temp20],    %[" #TEMP0 "],  %[" #TEMP1 "]          \n\t"    \
+  "subu   %[" #TEMP0 "],  %[" #TEMP0 "],  %[" #TEMP1 "]        \n\t"    \
+  "mul    %[temp16],    %[" #TEMP2 "],  %[c5352]               \n\t"    \
+  "mul    %[temp17],    %[" #TEMP2 "],  %[c2217]               \n\t"    \
+  "mul    %[temp18],    %[" #TEMP0 "],  %[c5352]               \n\t"    \
+  "mul    %[temp19],    %[" #TEMP0 "],  %[c2217]               \n\t"    \
+  "addu   %[" #TEMP1 "],  %[" #TEMP3 "],  %[temp20]            \n\t"    \
+  "subu   %[temp20],    %[" #TEMP3 "],  %[temp20]              \n\t"    \
+  "sll    %[" #TEMP0 "],  %[" #TEMP1 "],  3                    \n\t"    \
+  "sll    %[" #TEMP2 "],  %[temp20],    3                      \n\t"    \
+  "addiu  %[temp16],    %[temp16],    1812                     \n\t"    \
+  "addiu  %[temp17],    %[temp17],    937                      \n\t"    \
+  "addu   %[temp16],    %[temp16],    %[temp19]                \n\t"    \
+  "subu   %[temp17],    %[temp17],    %[temp18]                \n\t"    \
+  "sra    %[" #TEMP1 "],  %[temp16],    9                      \n\t"    \
+  "sra    %[" #TEMP3 "],  %[temp17],    9                      \n\t"
+
+// macro for one vertical pass in FTransform
+// temp0..temp15 holds tmp[0]..tmp[15]
+// A..D - offsets in bytes to store to out buffer
+// TEMP0, TEMP4, TEMP8 and TEMP12 - registers for corresponding tmp elements
+#define VERTICAL_PASS(A, B, C, D, TEMP0, TEMP4, TEMP8, TEMP12)    \
+  "addu   %[temp16],    %[" #TEMP0 "],  %[" #TEMP12 "]   \n\t"    \
+  "subu   %[temp19],    %[" #TEMP0 "],  %[" #TEMP12 "]   \n\t"    \
+  "addu   %[temp17],    %[" #TEMP4 "],  %[" #TEMP8 "]    \n\t"    \
+  "subu   %[temp18],    %[" #TEMP4 "],  %[" #TEMP8 "]    \n\t"    \
+  "mul    %[" #TEMP8 "],  %[temp19],    %[c2217]         \n\t"    \
+  "mul    %[" #TEMP12 "], %[temp18],    %[c2217]         \n\t"    \
+  "mul    %[" #TEMP4 "],  %[temp19],    %[c5352]         \n\t"    \
+  "mul    %[temp18],    %[temp18],    %[c5352]           \n\t"    \
+  "addiu  %[temp16],    %[temp16],    7                  \n\t"    \
+  "addu   %[" #TEMP0 "],  %[temp16],    %[temp17]        \n\t"    \
+  "sra    %[" #TEMP0 "],  %[" #TEMP0 "],  4              \n\t"    \
+  "addu   %[" #TEMP12 "], %[" #TEMP12 "], %[" #TEMP4 "]  \n\t"    \
+  "subu   %[" #TEMP4 "],  %[temp16],    %[temp17]        \n\t"    \
+  "sra    %[" #TEMP4 "],  %[" #TEMP4 "],  4              \n\t"    \
+  "addiu  %[" #TEMP8 "],  %[" #TEMP8 "],  30000          \n\t"    \
+  "addiu  %[" #TEMP12 "], %[" #TEMP12 "], 12000          \n\t"    \
+  "addiu  %[" #TEMP8 "],  %[" #TEMP8 "],  21000          \n\t"    \
+  "subu   %[" #TEMP8 "],  %[" #TEMP8 "],  %[temp18]      \n\t"    \
+  "sra    %[" #TEMP12 "], %[" #TEMP12 "], 16             \n\t"    \
+  "sra    %[" #TEMP8 "],  %[" #TEMP8 "],  16             \n\t"    \
+  "addiu  %[temp16],    %[" #TEMP12 "], 1                \n\t"    \
+  "movn   %[" #TEMP12 "], %[temp16],    %[temp19]        \n\t"    \
+  "sh     %[" #TEMP0 "],  " #A "(%[temp20])              \n\t"    \
+  "sh     %[" #TEMP4 "],  " #C "(%[temp20])              \n\t"    \
+  "sh     %[" #TEMP8 "],  " #D "(%[temp20])              \n\t"    \
+  "sh     %[" #TEMP12 "], " #B "(%[temp20])              \n\t"
+
+static void FTransform(const uint8_t* src, const uint8_t* ref, int16_t* out) {
+  int temp0, temp1, temp2, temp3, temp4, temp5, temp6, temp7, temp8;
+  int temp9, temp10, temp11, temp12, temp13, temp14, temp15, temp16;
+  int temp17, temp18, temp19, temp20;
+  const int c2217 = 2217;
+  const int c5352 = 5352;
+  const int* const args[3] =
+      { (const int*)src, (const int*)ref, (const int*)out };
+
+  __asm__ volatile(
+    HORIZONTAL_PASS(0, temp0,  temp1,  temp2,  temp3)
+    HORIZONTAL_PASS(1, temp4,  temp5,  temp6,  temp7)
+    HORIZONTAL_PASS(2, temp8,  temp9,  temp10, temp11)
+    HORIZONTAL_PASS(3, temp12, temp13, temp14, temp15)
+    "lw   %[temp20],    8(%[args])                     \n\t"
+    VERTICAL_PASS(0,  8, 16, 24, temp0, temp4, temp8,  temp12)
+    VERTICAL_PASS(2, 10, 18, 26, temp1, temp5, temp9,  temp13)
+    VERTICAL_PASS(4, 12, 20, 28, temp2, temp6, temp10, temp14)
+    VERTICAL_PASS(6, 14, 22, 30, temp3, temp7, temp11, temp15)
+
+    : [temp0]"=&r"(temp0), [temp1]"=&r"(temp1), [temp2]"=&r"(temp2),
+      [temp3]"=&r"(temp3), [temp4]"=&r"(temp4), [temp5]"=&r"(temp5),
+      [temp6]"=&r"(temp6), [temp7]"=&r"(temp7), [temp8]"=&r"(temp8),
+      [temp9]"=&r"(temp9), [temp10]"=&r"(temp10), [temp11]"=&r"(temp11),
+      [temp12]"=&r"(temp12), [temp13]"=&r"(temp13), [temp14]"=&r"(temp14),
+      [temp15]"=&r"(temp15), [temp16]"=&r"(temp16), [temp17]"=&r"(temp17),
+      [temp18]"=&r"(temp18), [temp19]"=&r"(temp19), [temp20]"=&r"(temp20)
+    : [args]"r"(args), [c2217]"r"(c2217), [c5352]"r"(c5352)
+    : "memory", "hi", "lo"
+  );
+}
+
+#undef VERTICAL_PASS
+#undef HORIZONTAL_PASS
+
+#if !defined(WORK_AROUND_GCC)
+
+#define GET_SSE_INNER(A, B, C, D)                               \
+  "lbu     %[temp0],    " #A "(%[a])                 \n\t"      \
+  "lbu     %[temp1],    " #A "(%[b])                 \n\t"      \
+  "lbu     %[temp2],    " #B "(%[a])                 \n\t"      \
+  "lbu     %[temp3],    " #B "(%[b])                 \n\t"      \
+  "lbu     %[temp4],    " #C "(%[a])                 \n\t"      \
+  "lbu     %[temp5],    " #C "(%[b])                 \n\t"      \
+  "lbu     %[temp6],    " #D "(%[a])                 \n\t"      \
+  "lbu     %[temp7],    " #D "(%[b])                 \n\t"      \
+  "subu    %[temp0],    %[temp0],     %[temp1]       \n\t"      \
+  "subu    %[temp2],    %[temp2],     %[temp3]       \n\t"      \
+  "subu    %[temp4],    %[temp4],     %[temp5]       \n\t"      \
+  "subu    %[temp6],    %[temp6],     %[temp7]       \n\t"      \
+  "madd    %[temp0],    %[temp0]                     \n\t"      \
+  "madd    %[temp2],    %[temp2]                     \n\t"      \
+  "madd    %[temp4],    %[temp4]                     \n\t"      \
+  "madd    %[temp6],    %[temp6]                     \n\t"
+
+#define GET_SSE(A, B, C, D)               \
+  GET_SSE_INNER(A, A + 1, A + 2, A + 3)   \
+  GET_SSE_INNER(B, B + 1, B + 2, B + 3)   \
+  GET_SSE_INNER(C, C + 1, C + 2, C + 3)   \
+  GET_SSE_INNER(D, D + 1, D + 2, D + 3)
+
+static int SSE16x16(const uint8_t* a, const uint8_t* b) {
+  int count;
+  int temp0, temp1, temp2, temp3, temp4, temp5, temp6, temp7;
+
+  __asm__ volatile(
+     "mult   $zero,    $zero                            \n\t"
+
+     GET_SSE( 0 * BPS, 4 +  0 * BPS, 8 +  0 * BPS, 12 +  0 * BPS)
+     GET_SSE( 1 * BPS, 4 +  1 * BPS, 8 +  1 * BPS, 12 +  1 * BPS)
+     GET_SSE( 2 * BPS, 4 +  2 * BPS, 8 +  2 * BPS, 12 +  2 * BPS)
+     GET_SSE( 3 * BPS, 4 +  3 * BPS, 8 +  3 * BPS, 12 +  3 * BPS)
+     GET_SSE( 4 * BPS, 4 +  4 * BPS, 8 +  4 * BPS, 12 +  4 * BPS)
+     GET_SSE( 5 * BPS, 4 +  5 * BPS, 8 +  5 * BPS, 12 +  5 * BPS)
+     GET_SSE( 6 * BPS, 4 +  6 * BPS, 8 +  6 * BPS, 12 +  6 * BPS)
+     GET_SSE( 7 * BPS, 4 +  7 * BPS, 8 +  7 * BPS, 12 +  7 * BPS)
+     GET_SSE( 8 * BPS, 4 +  8 * BPS, 8 +  8 * BPS, 12 +  8 * BPS)
+     GET_SSE( 9 * BPS, 4 +  9 * BPS, 8 +  9 * BPS, 12 +  9 * BPS)
+     GET_SSE(10 * BPS, 4 + 10 * BPS, 8 + 10 * BPS, 12 + 10 * BPS)
+     GET_SSE(11 * BPS, 4 + 11 * BPS, 8 + 11 * BPS, 12 + 11 * BPS)
+     GET_SSE(12 * BPS, 4 + 12 * BPS, 8 + 12 * BPS, 12 + 12 * BPS)
+     GET_SSE(13 * BPS, 4 + 13 * BPS, 8 + 13 * BPS, 12 + 13 * BPS)
+     GET_SSE(14 * BPS, 4 + 14 * BPS, 8 + 14 * BPS, 12 + 14 * BPS)
+     GET_SSE(15 * BPS, 4 + 15 * BPS, 8 + 15 * BPS, 12 + 15 * BPS)
+
+    "mflo    %[count]                                   \n\t"
+    : [temp0]"=&r"(temp0), [temp1]"=&r"(temp1), [temp2]"=&r"(temp2),
+      [temp3]"=&r"(temp3), [temp4]"=&r"(temp4), [temp5]"=&r"(temp5),
+      [temp6]"=&r"(temp6), [temp7]"=&r"(temp7), [count]"=&r"(count)
+    : [a]"r"(a), [b]"r"(b)
+    : "memory", "hi", "lo"
+  );
+  return count;
+}
+
+static int SSE16x8(const uint8_t* a, const uint8_t* b) {
+  int count;
+  int temp0, temp1, temp2, temp3, temp4, temp5, temp6, temp7;
+
+  __asm__ volatile(
+     "mult   $zero,    $zero                            \n\t"
+
+     GET_SSE( 0 * BPS, 4 +  0 * BPS, 8 +  0 * BPS, 12 +  0 * BPS)
+     GET_SSE( 1 * BPS, 4 +  1 * BPS, 8 +  1 * BPS, 12 +  1 * BPS)
+     GET_SSE( 2 * BPS, 4 +  2 * BPS, 8 +  2 * BPS, 12 +  2 * BPS)
+     GET_SSE( 3 * BPS, 4 +  3 * BPS, 8 +  3 * BPS, 12 +  3 * BPS)
+     GET_SSE( 4 * BPS, 4 +  4 * BPS, 8 +  4 * BPS, 12 +  4 * BPS)
+     GET_SSE( 5 * BPS, 4 +  5 * BPS, 8 +  5 * BPS, 12 +  5 * BPS)
+     GET_SSE( 6 * BPS, 4 +  6 * BPS, 8 +  6 * BPS, 12 +  6 * BPS)
+     GET_SSE( 7 * BPS, 4 +  7 * BPS, 8 +  7 * BPS, 12 +  7 * BPS)
+
+    "mflo    %[count]                                   \n\t"
+    : [temp0]"=&r"(temp0), [temp1]"=&r"(temp1), [temp2]"=&r"(temp2),
+      [temp3]"=&r"(temp3), [temp4]"=&r"(temp4), [temp5]"=&r"(temp5),
+      [temp6]"=&r"(temp6), [temp7]"=&r"(temp7), [count]"=&r"(count)
+    : [a]"r"(a), [b]"r"(b)
+    : "memory", "hi", "lo"
+  );
+  return count;
+}
+
+static int SSE8x8(const uint8_t* a, const uint8_t* b) {
+  int count;
+  int temp0, temp1, temp2, temp3, temp4, temp5, temp6, temp7;
+
+  __asm__ volatile(
+     "mult   $zero,    $zero                            \n\t"
+
+     GET_SSE(0 * BPS, 4 + 0 * BPS, 1 * BPS, 4 + 1 * BPS)
+     GET_SSE(2 * BPS, 4 + 2 * BPS, 3 * BPS, 4 + 3 * BPS)
+     GET_SSE(4 * BPS, 4 + 4 * BPS, 5 * BPS, 4 + 5 * BPS)
+     GET_SSE(6 * BPS, 4 + 6 * BPS, 7 * BPS, 4 + 7 * BPS)
+
+    "mflo    %[count]                                   \n\t"
+    : [temp0]"=&r"(temp0), [temp1]"=&r"(temp1), [temp2]"=&r"(temp2),
+      [temp3]"=&r"(temp3), [temp4]"=&r"(temp4), [temp5]"=&r"(temp5),
+      [temp6]"=&r"(temp6), [temp7]"=&r"(temp7), [count]"=&r"(count)
+    : [a]"r"(a), [b]"r"(b)
+    : "memory", "hi", "lo"
+  );
+  return count;
+}
+
+static int SSE4x4(const uint8_t* a, const uint8_t* b) {
+  int count;
+  int temp0, temp1, temp2, temp3, temp4, temp5, temp6, temp7;
+
+  __asm__ volatile(
+     "mult   $zero,    $zero                            \n\t"
+
+     GET_SSE(0 * BPS, 1 * BPS, 2 * BPS, 3 * BPS)
+
+    "mflo    %[count]                                   \n\t"
+    : [temp0]"=&r"(temp0), [temp1]"=&r"(temp1), [temp2]"=&r"(temp2),
+      [temp3]"=&r"(temp3), [temp4]"=&r"(temp4), [temp5]"=&r"(temp5),
+      [temp6]"=&r"(temp6), [temp7]"=&r"(temp7), [count]"=&r"(count)
+    : [a]"r"(a), [b]"r"(b)
+    : "memory", "hi", "lo"
+  );
+  return count;
+}
+
+#undef GET_SSE
+#undef GET_SSE_INNER
+
+#endif  // !WORK_AROUND_GCC
+
+//------------------------------------------------------------------------------
+// Entry point
+
+extern void VP8EncDspInitMIPS32(void);
+
+WEBP_TSAN_IGNORE_FUNCTION void VP8EncDspInitMIPS32(void) {
+  VP8ITransform = ITransform;
+  VP8FTransform = FTransform;
+  VP8EncQuantizeBlock = QuantizeBlock;
+  VP8EncQuantize2Blocks = Quantize2Blocks;
+  VP8TDisto4x4 = Disto4x4;
+  VP8TDisto16x16 = Disto16x16;
+#if !defined(WORK_AROUND_GCC)
+  VP8SSE16x16 = SSE16x16;
+  VP8SSE8x8 = SSE8x8;
+  VP8SSE16x8 = SSE16x8;
+  VP8SSE4x4 = SSE4x4;
+#endif
+}
+
+#else  // !WEBP_USE_MIPS32
+
+WEBP_DSP_INIT_STUB(VP8EncDspInitMIPS32)
+
+#endif  // WEBP_USE_MIPS32
diff --git a/drivers/webp/dsp/enc_mips_dsp_r2.c b/drivers/webp/dsp/enc_mips_dsp_r2.c
new file mode 100644
index 0000000000..7c814fa04a
--- /dev/null
+++ b/drivers/webp/dsp/enc_mips_dsp_r2.c
@@ -0,0 +1,1512 @@
+// Copyright 2014 Google Inc. All Rights Reserved.
+//
+// Use of this source code is governed by a BSD-style license
+// that can be found in the COPYING file in the root of the source
+// tree. An additional intellectual property rights grant can be found
+// in the file PATENTS. All contributing project authors may
+// be found in the AUTHORS file in the root of the source tree.
+// -----------------------------------------------------------------------------
+//
+// MIPS version of speed-critical encoding functions.
+//
+// Author(s): Darko Laus (darko.laus@imgtec.com)
+//            Mirko Raus (mirko.raus@imgtec.com)
+
+#include "./dsp.h"
+
+#if defined(WEBP_USE_MIPS_DSP_R2)
+
+#include "./mips_macro.h"
+#include "../enc/cost.h"
+#include "../enc/vp8enci.h"
+
+static const int kC1 = 20091 + (1 << 16);
+static const int kC2 = 35468;
+
+// O - output
+// I - input (macro doesn't change it)
+#define ADD_SUB_HALVES_X4(O0, O1, O2, O3, O4, O5, O6, O7,                      \
+                          I0, I1, I2, I3, I4, I5, I6, I7)                      \
+  "addq.ph          %[" #O0 "],   %[" #I0 "],  %[" #I1 "]     \n\t"            \
+  "subq.ph          %[" #O1 "],   %[" #I0 "],  %[" #I1 "]     \n\t"            \
+  "addq.ph          %[" #O2 "],   %[" #I2 "],  %[" #I3 "]     \n\t"            \
+  "subq.ph          %[" #O3 "],   %[" #I2 "],  %[" #I3 "]     \n\t"            \
+  "addq.ph          %[" #O4 "],   %[" #I4 "],  %[" #I5 "]     \n\t"            \
+  "subq.ph          %[" #O5 "],   %[" #I4 "],  %[" #I5 "]     \n\t"            \
+  "addq.ph          %[" #O6 "],   %[" #I6 "],  %[" #I7 "]     \n\t"            \
+  "subq.ph          %[" #O7 "],   %[" #I6 "],  %[" #I7 "]     \n\t"
+
+// IO - input/output
+#define ABS_X8(IO0, IO1, IO2, IO3, IO4, IO5, IO6, IO7)                         \
+  "absq_s.ph        %[" #IO0 "],   %[" #IO0 "]                \n\t"            \
+  "absq_s.ph        %[" #IO1 "],   %[" #IO1 "]                \n\t"            \
+  "absq_s.ph        %[" #IO2 "],   %[" #IO2 "]                \n\t"            \
+  "absq_s.ph        %[" #IO3 "],   %[" #IO3 "]                \n\t"            \
+  "absq_s.ph        %[" #IO4 "],   %[" #IO4 "]                \n\t"            \
+  "absq_s.ph        %[" #IO5 "],   %[" #IO5 "]                \n\t"            \
+  "absq_s.ph        %[" #IO6 "],   %[" #IO6 "]                \n\t"            \
+  "absq_s.ph        %[" #IO7 "],   %[" #IO7 "]                \n\t"
+
+// dpa.w.ph $ac0 temp0 ,temp1
+//  $ac += temp0[31..16] * temp1[31..16] + temp0[15..0] * temp1[15..0]
+// dpax.w.ph $ac0 temp0 ,temp1
+//  $ac += temp0[31..16] * temp1[15..0] + temp0[15..0] * temp1[31..16]
+// O - output
+// I - input (macro doesn't change it)
+#define MUL_HALF(O0, I0, I1, I2, I3, I4, I5, I6, I7,                           \
+                 I8, I9, I10, I11, I12, I13, I14, I15)                         \
+    "mult            $ac0,      $zero,     $zero              \n\t"            \
+    "dpa.w.ph        $ac0,      %[" #I2 "],  %[" #I0 "]       \n\t"            \
+    "dpax.w.ph       $ac0,      %[" #I5 "],  %[" #I6 "]       \n\t"            \
+    "dpa.w.ph        $ac0,      %[" #I8 "],  %[" #I9 "]       \n\t"            \
+    "dpax.w.ph       $ac0,      %[" #I11 "], %[" #I4 "]       \n\t"            \
+    "dpa.w.ph        $ac0,      %[" #I12 "], %[" #I7 "]       \n\t"            \
+    "dpax.w.ph       $ac0,      %[" #I13 "], %[" #I1 "]       \n\t"            \
+    "dpa.w.ph        $ac0,      %[" #I14 "], %[" #I3 "]       \n\t"            \
+    "dpax.w.ph       $ac0,      %[" #I15 "], %[" #I10 "]      \n\t"            \
+    "mflo            %[" #O0 "],  $ac0                        \n\t"
+
+#define OUTPUT_EARLY_CLOBBER_REGS_17()                                         \
+  OUTPUT_EARLY_CLOBBER_REGS_10(),                                              \
+  [temp11]"=&r"(temp11), [temp12]"=&r"(temp12), [temp13]"=&r"(temp13),         \
+  [temp14]"=&r"(temp14), [temp15]"=&r"(temp15), [temp16]"=&r"(temp16),         \
+  [temp17]"=&r"(temp17)
+
+// macro for one horizontal pass in FTransform
+// temp0..temp15 holds tmp[0]..tmp[15]
+// A - offset in bytes to load from src and ref buffers
+// TEMP0..TEMP3 - registers for corresponding tmp elements
+#define HORIZONTAL_PASS(A, TEMP0, TEMP1, TEMP2, TEMP3)                         \
+  "lw              %[" #TEMP0 "],   0(%[args])                          \n\t"  \
+  "lw              %[" #TEMP1 "],   4(%[args])                          \n\t"  \
+  "lw              %[" #TEMP2 "],   " XSTR(BPS) "*" #A "(%[" #TEMP0 "]) \n\t"  \
+  "lw              %[" #TEMP3 "],   " XSTR(BPS) "*" #A "(%[" #TEMP1 "]) \n\t"  \
+  "preceu.ph.qbl   %[" #TEMP0 "],   %[" #TEMP2 "]                       \n\t"  \
+  "preceu.ph.qbl   %[" #TEMP1 "],   %[" #TEMP3 "]                       \n\t"  \
+  "preceu.ph.qbr   %[" #TEMP2 "],   %[" #TEMP2 "]                       \n\t"  \
+  "preceu.ph.qbr   %[" #TEMP3 "],   %[" #TEMP3 "]                       \n\t"  \
+  "subq.ph         %[" #TEMP0 "],   %[" #TEMP0 "],   %[" #TEMP1 "]      \n\t"  \
+  "subq.ph         %[" #TEMP2 "],   %[" #TEMP2 "],   %[" #TEMP3 "]      \n\t"  \
+  "rotr            %[" #TEMP0 "],   %[" #TEMP0 "],   16                 \n\t"  \
+  "addq.ph         %[" #TEMP1 "],   %[" #TEMP2 "],   %[" #TEMP0 "]      \n\t"  \
+  "subq.ph         %[" #TEMP3 "],   %[" #TEMP2 "],   %[" #TEMP0 "]      \n\t"  \
+  "seh             %[" #TEMP0 "],   %[" #TEMP1 "]                       \n\t"  \
+  "sra             %[temp16],     %[" #TEMP1 "],   16                   \n\t"  \
+  "seh             %[temp19],     %[" #TEMP3 "]                         \n\t"  \
+  "sra             %[" #TEMP3 "],   %[" #TEMP3 "],   16                 \n\t"  \
+  "subu            %[" #TEMP2 "],   %[" #TEMP0 "],   %[temp16]          \n\t"  \
+  "addu            %[" #TEMP0 "],   %[" #TEMP0 "],   %[temp16]          \n\t"  \
+  "mul             %[temp17],     %[temp19],     %[c2217]               \n\t"  \
+  "mul             %[temp18],     %[" #TEMP3 "],   %[c5352]             \n\t"  \
+  "mul             %[" #TEMP1 "],   %[temp19],     %[c5352]             \n\t"  \
+  "mul             %[temp16],     %[" #TEMP3 "],   %[c2217]             \n\t"  \
+  "sll             %[" #TEMP2 "],   %[" #TEMP2 "],   3                  \n\t"  \
+  "sll             %[" #TEMP0 "],   %[" #TEMP0 "],   3                  \n\t"  \
+  "subu            %[" #TEMP3 "],   %[temp17],     %[temp18]            \n\t"  \
+  "addu            %[" #TEMP1 "],   %[temp16],     %[" #TEMP1 "]        \n\t"  \
+  "addiu           %[" #TEMP3 "],   %[" #TEMP3 "],   937                \n\t"  \
+  "addiu           %[" #TEMP1 "],   %[" #TEMP1 "],   1812               \n\t"  \
+  "sra             %[" #TEMP3 "],   %[" #TEMP3 "],   9                  \n\t"  \
+  "sra             %[" #TEMP1 "],   %[" #TEMP1 "],   9                  \n\t"
+
+// macro for one vertical pass in FTransform
+// temp0..temp15 holds tmp[0]..tmp[15]
+// A..D - offsets in bytes to store to out buffer
+// TEMP0, TEMP4, TEMP8 and TEMP12 - registers for corresponding tmp elements
+#define VERTICAL_PASS(A, B, C, D, TEMP0, TEMP4, TEMP8, TEMP12)                 \
+  "addu            %[temp16],     %[" #TEMP0 "],   %[" #TEMP12 "]   \n\t"      \
+  "subu            %[temp19],     %[" #TEMP0 "],   %[" #TEMP12 "]   \n\t"      \
+  "addu            %[temp17],     %[" #TEMP4 "],   %[" #TEMP8 "]    \n\t"      \
+  "subu            %[temp18],     %[" #TEMP4 "],   %[" #TEMP8 "]    \n\t"      \
+  "mul             %[" #TEMP8 "],   %[temp19],     %[c2217]         \n\t"      \
+  "mul             %[" #TEMP12 "],  %[temp18],     %[c2217]         \n\t"      \
+  "mul             %[" #TEMP4 "],   %[temp19],     %[c5352]         \n\t"      \
+  "mul             %[temp18],     %[temp18],     %[c5352]           \n\t"      \
+  "addiu           %[temp16],     %[temp16],     7                  \n\t"      \
+  "addu            %[" #TEMP0 "],   %[temp16],     %[temp17]        \n\t"      \
+  "sra             %[" #TEMP0 "],   %[" #TEMP0 "],   4              \n\t"      \
+  "addu            %[" #TEMP12 "],  %[" #TEMP12 "],  %[" #TEMP4 "]  \n\t"      \
+  "subu            %[" #TEMP4 "],   %[temp16],     %[temp17]        \n\t"      \
+  "sra             %[" #TEMP4 "],   %[" #TEMP4 "],   4              \n\t"      \
+  "addiu           %[" #TEMP8 "],   %[" #TEMP8 "],   30000          \n\t"      \
+  "addiu           %[" #TEMP12 "],  %[" #TEMP12 "],  12000          \n\t"      \
+  "addiu           %[" #TEMP8 "],   %[" #TEMP8 "],   21000          \n\t"      \
+  "subu            %[" #TEMP8 "],   %[" #TEMP8 "],   %[temp18]      \n\t"      \
+  "sra             %[" #TEMP12 "],  %[" #TEMP12 "],  16             \n\t"      \
+  "sra             %[" #TEMP8 "],   %[" #TEMP8 "],   16             \n\t"      \
+  "addiu           %[temp16],     %[" #TEMP12 "],  1                \n\t"      \
+  "movn            %[" #TEMP12 "],  %[temp16],     %[temp19]        \n\t"      \
+  "sh              %[" #TEMP0 "],   " #A "(%[temp20])               \n\t"      \
+  "sh              %[" #TEMP4 "],   " #C "(%[temp20])               \n\t"      \
+  "sh              %[" #TEMP8 "],   " #D "(%[temp20])               \n\t"      \
+  "sh              %[" #TEMP12 "],  " #B "(%[temp20])               \n\t"
+
+static void FTransform(const uint8_t* src, const uint8_t* ref, int16_t* out) {
+  const int c2217 = 2217;
+  const int c5352 = 5352;
+  int temp0, temp1, temp2, temp3, temp4, temp5, temp6, temp7, temp8;
+  int temp9, temp10, temp11, temp12, temp13, temp14, temp15, temp16;
+  int temp17, temp18, temp19, temp20;
+  const int* const args[3] =
+      { (const int*)src, (const int*)ref, (const int*)out };
+
+  __asm__ volatile (
+    HORIZONTAL_PASS(0, temp0,  temp1,  temp2,  temp3)
+    HORIZONTAL_PASS(1, temp4,  temp5,  temp6,  temp7)
+    HORIZONTAL_PASS(2, temp8,  temp9,  temp10, temp11)
+    HORIZONTAL_PASS(3, temp12, temp13, temp14, temp15)
+    "lw            %[temp20],     8(%[args])                  \n\t"
+    VERTICAL_PASS(0,  8, 16, 24, temp0, temp4, temp8,  temp12)
+    VERTICAL_PASS(2, 10, 18, 26, temp1, temp5, temp9,  temp13)
+    VERTICAL_PASS(4, 12, 20, 28, temp2, temp6, temp10, temp14)
+    VERTICAL_PASS(6, 14, 22, 30, temp3, temp7, temp11, temp15)
+    OUTPUT_EARLY_CLOBBER_REGS_18(),
+      [temp0]"=&r"(temp0), [temp19]"=&r"(temp19), [temp20]"=&r"(temp20)
+    : [args]"r"(args), [c2217]"r"(c2217), [c5352]"r"(c5352)
+    : "memory", "hi", "lo"
+  );
+}
+
+#undef VERTICAL_PASS
+#undef HORIZONTAL_PASS
+
+static WEBP_INLINE void ITransformOne(const uint8_t* ref, const int16_t* in,
+                                      uint8_t* dst) {
+  int temp1, temp2, temp3, temp4, temp5, temp6, temp7, temp8, temp9;
+  int temp10, temp11, temp12, temp13, temp14, temp15, temp16, temp17, temp18;
+
+  __asm__ volatile (
+    "ulw              %[temp1],   0(%[in])                 \n\t"
+    "ulw              %[temp2],   16(%[in])                \n\t"
+    LOAD_IN_X2(temp5, temp6, 24, 26)
+    ADD_SUB_HALVES(temp3, temp4, temp1, temp2)
+    LOAD_IN_X2(temp1, temp2, 8, 10)
+    MUL_SHIFT_SUM(temp7, temp8, temp9, temp10, temp11, temp12, temp13, temp14,
+                  temp10, temp8, temp9, temp7, temp1, temp2, temp5, temp6,
+                  temp13, temp11, temp14, temp12)
+    INSERT_HALF_X2(temp8, temp7, temp10, temp9)
+    "ulw              %[temp17],  4(%[in])                 \n\t"
+    "ulw              %[temp18],  20(%[in])                \n\t"
+    ADD_SUB_HALVES(temp1, temp2, temp3, temp8)
+    ADD_SUB_HALVES(temp5, temp6, temp4, temp7)
+    ADD_SUB_HALVES(temp7, temp8, temp17, temp18)
+    LOAD_IN_X2(temp17, temp18, 12, 14)
+    LOAD_IN_X2(temp9, temp10, 28, 30)
+    MUL_SHIFT_SUM(temp11, temp12, temp13, temp14, temp15, temp16, temp4, temp17,
+                  temp12, temp14, temp11, temp13, temp17, temp18, temp9, temp10,
+                  temp15, temp4, temp16, temp17)
+    INSERT_HALF_X2(temp11, temp12, temp13, temp14)
+    ADD_SUB_HALVES(temp17, temp8, temp8, temp11)
+    ADD_SUB_HALVES(temp3, temp4, temp7, temp12)
+
+    // horizontal
+    SRA_16(temp9, temp10, temp11, temp12, temp1, temp2, temp5, temp6)
+    INSERT_HALF_X2(temp1, temp6, temp5, temp2)
+    SRA_16(temp13, temp14, temp15, temp16, temp3, temp4, temp17, temp8)
+    "repl.ph          %[temp2],   0x4                      \n\t"
+    INSERT_HALF_X2(temp3, temp8, temp17, temp4)
+    "addq.ph          %[temp1],   %[temp1],  %[temp2]      \n\t"
+    "addq.ph          %[temp6],   %[temp6],  %[temp2]      \n\t"
+    ADD_SUB_HALVES(temp2, temp4, temp1, temp3)
+    ADD_SUB_HALVES(temp5, temp7, temp6, temp8)
+    MUL_SHIFT_SUM(temp1, temp3, temp6, temp8, temp9, temp13, temp17, temp18,
+                  temp3, temp13, temp1, temp9, temp9, temp13, temp11, temp15,
+                  temp6, temp17, temp8, temp18)
+    MUL_SHIFT_SUM(temp6, temp8, temp18, temp17, temp11, temp15, temp12, temp16,
+                  temp8, temp15, temp6, temp11, temp12, temp16, temp10, temp14,
+                  temp18, temp12, temp17, temp16)
+    INSERT_HALF_X2(temp1, temp3, temp9, temp13)
+    INSERT_HALF_X2(temp6, temp8, temp11, temp15)
+    SHIFT_R_SUM_X2(temp9, temp10, temp11, temp12, temp13, temp14, temp15,
+                   temp16, temp2, temp4, temp5, temp7, temp3, temp1, temp8,
+                   temp6)
+    PACK_2_HALVES_TO_WORD(temp1, temp2, temp3, temp4, temp9, temp12, temp13,
+                          temp16, temp11, temp10, temp15, temp14)
+    LOAD_WITH_OFFSET_X4(temp10, temp11, temp14, temp15, ref,
+                        0, 0, 0, 0,
+                        0, 1, 2, 3,
+                        BPS)
+    CONVERT_2_BYTES_TO_HALF(temp5, temp6, temp7, temp8, temp17, temp18, temp10,
+                            temp11, temp10, temp11, temp14, temp15)
+    STORE_SAT_SUM_X2(temp5, temp6, temp7, temp8, temp17, temp18, temp10, temp11,
+                     temp9, temp12, temp1, temp2, temp13, temp16, temp3, temp4,
+                     dst, 0, 1, 2, 3, BPS)
+
+    OUTPUT_EARLY_CLOBBER_REGS_18()
+    : [dst]"r"(dst), [in]"r"(in), [kC1]"r"(kC1), [kC2]"r"(kC2), [ref]"r"(ref)
+    : "memory", "hi", "lo"
+  );
+}
+
+static void ITransform(const uint8_t* ref, const int16_t* in, uint8_t* dst,
+                       int do_two) {
+  ITransformOne(ref, in, dst);
+  if (do_two) {
+    ITransformOne(ref + 4, in + 16, dst + 4);
+  }
+}
+
+static int Disto4x4(const uint8_t* const a, const uint8_t* const b,
+                    const uint16_t* const w) {
+  int temp1, temp2, temp3, temp4, temp5, temp6, temp7, temp8, temp9;
+  int temp10, temp11, temp12, temp13, temp14, temp15, temp16, temp17;
+
+  __asm__ volatile (
+    LOAD_WITH_OFFSET_X4(temp1, temp2, temp3, temp4, a,
+                        0, 0, 0, 0,
+                        0, 1, 2, 3,
+                        BPS)
+    CONVERT_2_BYTES_TO_HALF(temp5, temp6, temp7, temp8, temp9,temp10, temp11,
+                            temp12, temp1, temp2, temp3, temp4)
+    ADD_SUB_HALVES_X4(temp1, temp2, temp3, temp4, temp5, temp6, temp7, temp8,
+                      temp5, temp6, temp7, temp8, temp9, temp10, temp11, temp12)
+    PACK_2_HALVES_TO_WORD(temp9, temp10, temp11, temp12, temp1, temp3, temp5,
+                          temp7, temp2, temp4, temp6, temp8)
+    ADD_SUB_HALVES_X4(temp2, temp4, temp6, temp8, temp9, temp1, temp3, temp10,
+                      temp1, temp9, temp3, temp10, temp5, temp11, temp7, temp12)
+    ADD_SUB_HALVES_X4(temp5, temp11, temp7, temp2, temp9, temp3, temp6, temp12,
+                      temp2, temp9, temp6, temp3, temp4, temp1, temp8, temp10)
+    ADD_SUB_HALVES_X4(temp1, temp4, temp10, temp8, temp7, temp11, temp5, temp2,
+                      temp5, temp7, temp11, temp2, temp9, temp6, temp3, temp12)
+    ABS_X8(temp1, temp4, temp10, temp8, temp7, temp11, temp5, temp2)
+    LOAD_WITH_OFFSET_X4(temp3, temp6, temp9, temp12, w,
+                        0, 4, 8, 12,
+                        0, 0, 0, 0,
+                        0)
+    LOAD_WITH_OFFSET_X4(temp13, temp14, temp15, temp16, w,
+                        0, 4, 8, 12,
+                        1, 1, 1, 1,
+                        16)
+    MUL_HALF(temp17, temp1, temp2, temp3, temp4, temp5, temp6, temp7, temp8,
+             temp9, temp10, temp11, temp12, temp13, temp14, temp15, temp16)
+    LOAD_WITH_OFFSET_X4(temp1, temp2, temp3, temp4, b,
+                        0, 0, 0, 0,
+                        0, 1, 2, 3,
+                        BPS)
+    CONVERT_2_BYTES_TO_HALF(temp5,temp6, temp7, temp8, temp9,temp10, temp11,
+                            temp12, temp1, temp2, temp3, temp4)
+    ADD_SUB_HALVES_X4(temp1, temp2, temp3, temp4, temp5, temp6, temp7, temp8,
+                      temp5, temp6, temp7, temp8, temp9, temp10, temp11, temp12)
+    PACK_2_HALVES_TO_WORD(temp9, temp10, temp11, temp12, temp1, temp3, temp5,
+                          temp7, temp2, temp4, temp6, temp8)
+    ADD_SUB_HALVES_X4(temp2, temp4, temp6, temp8, temp9, temp1, temp3, temp10,
+                      temp1, temp9, temp3, temp10, temp5, temp11, temp7, temp12)
+    ADD_SUB_HALVES_X4(temp5, temp11, temp7, temp2, temp9, temp3, temp6, temp12,
+                      temp2, temp9, temp6, temp3, temp4, temp1, temp8, temp10)
+    ADD_SUB_HALVES_X4(temp1, temp4, temp10, temp8, temp7, temp11, temp5, temp2,
+                      temp5, temp7, temp11, temp2, temp9, temp6, temp3, temp12)
+    ABS_X8(temp1, temp4, temp10, temp8, temp7, temp11, temp5, temp2)
+    LOAD_WITH_OFFSET_X4(temp3, temp6, temp9, temp12, w,
+                        0, 4, 8, 12,
+                        0, 0, 0, 0,
+                        0)
+    LOAD_WITH_OFFSET_X4(temp13, temp14, temp15, temp16, w,
+                        0, 4, 8, 12,
+                        1, 1, 1, 1,
+                        16)
+    MUL_HALF(temp3, temp1, temp2, temp3, temp4, temp5, temp6, temp7, temp8,
+             temp9, temp10, temp11, temp12, temp13, temp14, temp15, temp16)
+    OUTPUT_EARLY_CLOBBER_REGS_17()
+    : [a]"r"(a), [b]"r"(b), [w]"r"(w)
+    : "memory", "hi", "lo"
+  );
+  return abs(temp3 - temp17) >> 5;
+}
+
+static int Disto16x16(const uint8_t* const a, const uint8_t* const b,
+                      const uint16_t* const w) {
+  int D = 0;
+  int x, y;
+  for (y = 0; y < 16 * BPS; y += 4 * BPS) {
+    for (x = 0; x < 16; x += 4) {
+      D += Disto4x4(a + x + y, b + x + y, w);
+    }
+  }
+  return D;
+}
+
+//------------------------------------------------------------------------------
+// Intra predictions
+
+#define FILL_PART(J, SIZE)                                            \
+    "usw        %[value],  0+" #J "*" XSTR(BPS) "(%[dst])  \n\t"      \
+    "usw        %[value],  4+" #J "*" XSTR(BPS) "(%[dst])  \n\t"      \
+  ".if " #SIZE " == 16                                     \n\t"      \
+    "usw        %[value],  8+" #J "*" XSTR(BPS) "(%[dst])  \n\t"      \
+    "usw        %[value], 12+" #J "*" XSTR(BPS) "(%[dst])  \n\t"      \
+  ".endif                                                  \n\t"
+
+#define FILL_8_OR_16(DST, VALUE, SIZE) do {                         \
+  int value = (VALUE);                                              \
+  __asm__ volatile (                                                \
+    "replv.qb   %[value],  %[value]                      \n\t"      \
+    FILL_PART( 0, SIZE)                                             \
+    FILL_PART( 1, SIZE)                                             \
+    FILL_PART( 2, SIZE)                                             \
+    FILL_PART( 3, SIZE)                                             \
+    FILL_PART( 4, SIZE)                                             \
+    FILL_PART( 5, SIZE)                                             \
+    FILL_PART( 6, SIZE)                                             \
+    FILL_PART( 7, SIZE)                                             \
+  ".if " #SIZE " == 16                                   \n\t"      \
+    FILL_PART( 8, 16)                                               \
+    FILL_PART( 9, 16)                                               \
+    FILL_PART(10, 16)                                               \
+    FILL_PART(11, 16)                                               \
+    FILL_PART(12, 16)                                               \
+    FILL_PART(13, 16)                                               \
+    FILL_PART(14, 16)                                               \
+    FILL_PART(15, 16)                                               \
+  ".endif                                                \n\t"      \
+    : [value]"+&r"(value)                                           \
+    : [dst]"r"((DST))                                               \
+    : "memory"                                                      \
+  );                                                                \
+} while (0)
+
+#define VERTICAL_PRED(DST, TOP, SIZE)                                          \
+static WEBP_INLINE void VerticalPred##SIZE(uint8_t* (DST),                     \
+                                           const uint8_t* (TOP)) {             \
+  int j;                                                                       \
+  if ((TOP)) {                                                                 \
+    for (j = 0; j < (SIZE); ++j) memcpy((DST) + j * BPS, (TOP), (SIZE));       \
+  } else {                                                                     \
+    FILL_8_OR_16((DST), 127, (SIZE));                                          \
+  }                                                                            \
+}
+
+VERTICAL_PRED(dst, top, 8)
+VERTICAL_PRED(dst, top, 16)
+
+#undef VERTICAL_PRED
+
+#define HORIZONTAL_PRED(DST, LEFT, SIZE)                                       \
+static WEBP_INLINE void HorizontalPred##SIZE(uint8_t* (DST),                   \
+                                             const uint8_t* (LEFT)) {          \
+  if (LEFT) {                                                                  \
+    int j;                                                                     \
+    for (j = 0; j < (SIZE); ++j) {                                             \
+      memset((DST) + j * BPS, (LEFT)[j], (SIZE));                              \
+    }                                                                          \
+  } else {                                                                     \
+    FILL_8_OR_16((DST), 129, (SIZE));                                          \
+  }                                                                            \
+}
+
+HORIZONTAL_PRED(dst, left, 8)
+HORIZONTAL_PRED(dst, left, 16)
+
+#undef HORIZONTAL_PRED
+
+#define CLIPPING()                                                             \
+  "preceu.ph.qbl   %[temp2],   %[temp0]                  \n\t"                 \
+  "preceu.ph.qbr   %[temp0],   %[temp0]                  \n\t"                 \
+  "preceu.ph.qbl   %[temp3],   %[temp1]                  \n\t"                 \
+  "preceu.ph.qbr   %[temp1],   %[temp1]                  \n\t"                 \
+  "addu.ph         %[temp2],   %[temp2],   %[leftY_1]    \n\t"                 \
+  "addu.ph         %[temp0],   %[temp0],   %[leftY_1]    \n\t"                 \
+  "addu.ph         %[temp3],   %[temp3],   %[leftY_1]    \n\t"                 \
+  "addu.ph         %[temp1],   %[temp1],   %[leftY_1]    \n\t"                 \
+  "shll_s.ph       %[temp2],   %[temp2],   7             \n\t"                 \
+  "shll_s.ph       %[temp0],   %[temp0],   7             \n\t"                 \
+  "shll_s.ph       %[temp3],   %[temp3],   7             \n\t"                 \
+  "shll_s.ph       %[temp1],   %[temp1],   7             \n\t"                 \
+  "precrqu_s.qb.ph %[temp0],   %[temp2],   %[temp0]      \n\t"                 \
+  "precrqu_s.qb.ph %[temp1],   %[temp3],   %[temp1]      \n\t"
+
+#define CLIP_8B_TO_DST(DST, LEFT, TOP, SIZE) do {                              \
+  int leftY_1 = ((int)(LEFT)[y] << 16) + (LEFT)[y];                            \
+  int temp0, temp1, temp2, temp3;                                              \
+  __asm__ volatile (                                                           \
+    "replv.ph        %[leftY_1], %[leftY_1]              \n\t"                 \
+    "ulw             %[temp0],   0(%[top])               \n\t"                 \
+    "ulw             %[temp1],   4(%[top])               \n\t"                 \
+    "subu.ph         %[leftY_1], %[leftY_1], %[left_1]   \n\t"                 \
+    CLIPPING()                                                                 \
+    "usw             %[temp0],   0(%[dst])               \n\t"                 \
+    "usw             %[temp1],   4(%[dst])               \n\t"                 \
+  ".if " #SIZE " == 16                                   \n\t"                 \
+    "ulw             %[temp0],   8(%[top])               \n\t"                 \
+    "ulw             %[temp1],   12(%[top])              \n\t"                 \
+    CLIPPING()                                                                 \
+    "usw             %[temp0],   8(%[dst])               \n\t"                 \
+    "usw             %[temp1],   12(%[dst])              \n\t"                 \
+  ".endif                                                \n\t"                 \
+    : [leftY_1]"+&r"(leftY_1), [temp0]"=&r"(temp0), [temp1]"=&r"(temp1),       \
+      [temp2]"=&r"(temp2), [temp3]"=&r"(temp3)                                 \
+    : [left_1]"r"(left_1), [top]"r"((TOP)), [dst]"r"((DST))                    \
+    : "memory"                                                                 \
+  );                                                                           \
+} while (0)
+
+#define CLIP_TO_DST(DST, LEFT, TOP, SIZE) do {                                 \
+  int y;                                                                       \
+  const int left_1 = ((int)(LEFT)[-1] << 16) + (LEFT)[-1];                     \
+  for (y = 0; y < (SIZE); ++y) {                                               \
+    CLIP_8B_TO_DST((DST), (LEFT), (TOP), (SIZE));                              \
+    (DST) += BPS;                                                              \
+  }                                                                            \
+} while (0)
+
+#define TRUE_MOTION(DST, LEFT, TOP, SIZE)                                      \
+static WEBP_INLINE void TrueMotion##SIZE(uint8_t* (DST), const uint8_t* (LEFT),\
+                                         const uint8_t* (TOP)) {               \
+  if ((LEFT) != NULL) {                                                        \
+    if ((TOP) != NULL) {                                                       \
+      CLIP_TO_DST((DST), (LEFT), (TOP), (SIZE));                               \
+    } else {                                                                   \
+      HorizontalPred##SIZE((DST), (LEFT));                                     \
+    }                                                                          \
+  } else {                                                                     \
+    /* true motion without left samples (hence: with default 129 value)    */  \
+    /* is equivalent to VE prediction where you just copy the top samples. */  \
+    /* Note that if top samples are not available, the default value is    */  \
+    /* then 129, and not 127 as in the VerticalPred case.                  */  \
+    if ((TOP) != NULL) {                                                       \
+      VerticalPred##SIZE((DST), (TOP));                                        \
+    } else {                                                                   \
+      FILL_8_OR_16((DST), 129, (SIZE));                                        \
+    }                                                                          \
+  }                                                                            \
+}
+
+TRUE_MOTION(dst, left, top, 8)
+TRUE_MOTION(dst, left, top, 16)
+
+#undef TRUE_MOTION
+#undef CLIP_TO_DST
+#undef CLIP_8B_TO_DST
+#undef CLIPPING
+
+static WEBP_INLINE void DCMode16(uint8_t* dst, const uint8_t* left,
+                                 const uint8_t* top) {
+  int DC, DC1;
+  int temp0, temp1, temp2, temp3;
+
+  __asm__ volatile(
+    "beqz        %[top],   2f                  \n\t"
+    LOAD_WITH_OFFSET_X4(temp0, temp1, temp2, temp3, top,
+                        0, 4, 8, 12,
+                        0, 0, 0, 0,
+                        0)
+    "raddu.w.qb  %[temp0], %[temp0]            \n\t"
+    "raddu.w.qb  %[temp1], %[temp1]            \n\t"
+    "raddu.w.qb  %[temp2], %[temp2]            \n\t"
+    "raddu.w.qb  %[temp3], %[temp3]            \n\t"
+    "addu        %[temp0], %[temp0], %[temp1]  \n\t"
+    "addu        %[temp2], %[temp2], %[temp3]  \n\t"
+    "addu        %[DC],    %[temp0], %[temp2]  \n\t"
+    "move        %[DC1],   %[DC]               \n\t"
+    "beqz        %[left],  1f                  \n\t"
+    LOAD_WITH_OFFSET_X4(temp0, temp1, temp2, temp3, left,
+                        0, 4, 8, 12,
+                        0, 0, 0, 0,
+                        0)
+    "raddu.w.qb  %[temp0], %[temp0]            \n\t"
+    "raddu.w.qb  %[temp1], %[temp1]            \n\t"
+    "raddu.w.qb  %[temp2], %[temp2]            \n\t"
+    "raddu.w.qb  %[temp3], %[temp3]            \n\t"
+    "addu        %[temp0], %[temp0], %[temp1]  \n\t"
+    "addu        %[temp2], %[temp2], %[temp3]  \n\t"
+    "addu        %[DC1],   %[temp0], %[temp2]  \n\t"
+  "1:                                          \n\t"
+    "addu        %[DC],   %[DC],     %[DC1]    \n\t"
+    "j           3f                            \n\t"
+  "2:                                          \n\t"
+    "beqz        %[left],  4f                  \n\t"
+    LOAD_WITH_OFFSET_X4(temp0, temp1, temp2, temp3, left,
+                        0, 4, 8, 12,
+                        0, 0, 0, 0,
+                        0)
+    "raddu.w.qb  %[temp0], %[temp0]            \n\t"
+    "raddu.w.qb  %[temp1], %[temp1]            \n\t"
+    "raddu.w.qb  %[temp2], %[temp2]            \n\t"
+    "raddu.w.qb  %[temp3], %[temp3]            \n\t"
+    "addu        %[temp0], %[temp0], %[temp1]  \n\t"
+    "addu        %[temp2], %[temp2], %[temp3]  \n\t"
+    "addu        %[DC],    %[temp0], %[temp2]  \n\t"
+    "addu        %[DC],    %[DC],    %[DC]     \n\t"
+  "3:                                          \n\t"
+    "shra_r.w    %[DC],    %[DC],    5         \n\t"
+    "j           5f                            \n\t"
+  "4:                                          \n\t"
+    "li          %[DC],    0x80                \n\t"
+  "5:                                          \n\t"
+    : [temp0]"=&r"(temp0), [temp1]"=&r"(temp1), [DC]"=&r"(DC),
+      [temp2]"=&r"(temp2), [temp3]"=&r"(temp3), [DC1]"=&r"(DC1)
+    : [left]"r"(left), [top]"r"(top)
+    : "memory"
+  );
+
+  FILL_8_OR_16(dst, DC, 16);
+}
+
+static WEBP_INLINE void DCMode8(uint8_t* dst, const uint8_t* left,
+                                const uint8_t* top) {
+  int DC, DC1;
+  int temp0, temp1, temp2, temp3;
+
+  __asm__ volatile(
+    "beqz        %[top],   2f                  \n\t"
+    "ulw         %[temp0], 0(%[top])           \n\t"
+    "ulw         %[temp1], 4(%[top])           \n\t"
+    "raddu.w.qb  %[temp0], %[temp0]            \n\t"
+    "raddu.w.qb  %[temp1], %[temp1]            \n\t"
+    "addu        %[DC],    %[temp0], %[temp1]  \n\t"
+    "move        %[DC1],   %[DC]               \n\t"
+    "beqz        %[left],  1f                  \n\t"
+    "ulw         %[temp2], 0(%[left])          \n\t"
+    "ulw         %[temp3], 4(%[left])          \n\t"
+    "raddu.w.qb  %[temp2], %[temp2]            \n\t"
+    "raddu.w.qb  %[temp3], %[temp3]            \n\t"
+    "addu        %[DC1],   %[temp2], %[temp3]  \n\t"
+  "1:                                          \n\t"
+    "addu        %[DC],    %[DC],    %[DC1]    \n\t"
+    "j           3f                            \n\t"
+  "2:                                          \n\t"
+    "beqz        %[left],  4f                  \n\t"
+    "ulw         %[temp2], 0(%[left])          \n\t"
+    "ulw         %[temp3], 4(%[left])          \n\t"
+    "raddu.w.qb  %[temp2], %[temp2]            \n\t"
+    "raddu.w.qb  %[temp3], %[temp3]            \n\t"
+    "addu        %[DC],    %[temp2], %[temp3]  \n\t"
+    "addu        %[DC],    %[DC],    %[DC]     \n\t"
+  "3:                                          \n\t"
+    "shra_r.w    %[DC], %[DC], 4               \n\t"
+    "j           5f                            \n\t"
+  "4:                                          \n\t"
+    "li          %[DC], 0x80                   \n\t"
+  "5:                                          \n\t"
+    : [temp0]"=&r"(temp0), [temp1]"=&r"(temp1), [DC]"=&r"(DC),
+      [temp2]"=&r"(temp2), [temp3]"=&r"(temp3), [DC1]"=&r"(DC1)
+    : [left]"r"(left), [top]"r"(top)
+    : "memory"
+  );
+
+  FILL_8_OR_16(dst, DC, 8);
+}
+
+static void DC4(uint8_t* dst, const uint8_t* top) {
+  int temp0, temp1;
+  __asm__ volatile(
+    "ulw          %[temp0],   0(%[top])               \n\t"
+    "ulw          %[temp1],   -5(%[top])              \n\t"
+    "raddu.w.qb   %[temp0],   %[temp0]                \n\t"
+    "raddu.w.qb   %[temp1],   %[temp1]                \n\t"
+    "addu         %[temp0],   %[temp0],    %[temp1]   \n\t"
+    "addiu        %[temp0],   %[temp0],    4          \n\t"
+    "srl          %[temp0],   %[temp0],    3          \n\t"
+    "replv.qb     %[temp0],   %[temp0]                \n\t"
+    "usw          %[temp0],   0*" XSTR(BPS) "(%[dst]) \n\t"
+    "usw          %[temp0],   1*" XSTR(BPS) "(%[dst]) \n\t"
+    "usw          %[temp0],   2*" XSTR(BPS) "(%[dst]) \n\t"
+    "usw          %[temp0],   3*" XSTR(BPS) "(%[dst]) \n\t"
+    : [temp0]"=&r"(temp0), [temp1]"=&r"(temp1)
+    : [top]"r"(top), [dst]"r"(dst)
+    : "memory"
+  );
+}
+
+static void TM4(uint8_t* dst, const uint8_t* top) {
+  int a10, a32, temp0, temp1, temp2, temp3, temp4, temp5;
+  const int c35 = 0xff00ff;
+  __asm__ volatile (
+    "lbu              %[temp1],  0(%[top])                     \n\t"
+    "lbu              %[a10],    1(%[top])                     \n\t"
+    "lbu              %[temp2],  2(%[top])                     \n\t"
+    "lbu              %[a32],    3(%[top])                     \n\t"
+    "ulw              %[temp0],  -5(%[top])                    \n\t"
+    "lbu              %[temp4],  -1(%[top])                    \n\t"
+    "append           %[a10],    %[temp1],   16                \n\t"
+    "append           %[a32],    %[temp2],   16                \n\t"
+    "replv.ph         %[temp4],  %[temp4]                      \n\t"
+    "shrl.ph          %[temp1],  %[temp0],   8                 \n\t"
+    "and              %[temp0],  %[temp0],   %[c35]            \n\t"
+    "subu.ph          %[temp1],  %[temp1],   %[temp4]          \n\t"
+    "subu.ph          %[temp0],  %[temp0],   %[temp4]          \n\t"
+    "srl              %[temp2],  %[temp1],   16                \n\t"
+    "srl              %[temp3],  %[temp0],   16                \n\t"
+    "replv.ph         %[temp2],  %[temp2]                      \n\t"
+    "replv.ph         %[temp3],  %[temp3]                      \n\t"
+    "replv.ph         %[temp4],  %[temp1]                      \n\t"
+    "replv.ph         %[temp5],  %[temp0]                      \n\t"
+    "addu.ph          %[temp0],  %[temp3],   %[a10]            \n\t"
+    "addu.ph          %[temp1],  %[temp3],   %[a32]            \n\t"
+    "addu.ph          %[temp3],  %[temp2],   %[a10]            \n\t"
+    "addu.ph          %[temp2],  %[temp2],   %[a32]            \n\t"
+    "shll_s.ph        %[temp0],  %[temp0],   7                 \n\t"
+    "shll_s.ph        %[temp1],  %[temp1],   7                 \n\t"
+    "shll_s.ph        %[temp3],  %[temp3],   7                 \n\t"
+    "shll_s.ph        %[temp2],  %[temp2],   7                 \n\t"
+    "precrqu_s.qb.ph  %[temp0],  %[temp1],   %[temp0]          \n\t"
+    "precrqu_s.qb.ph  %[temp1],  %[temp2],   %[temp3]          \n\t"
+    "addu.ph          %[temp2],  %[temp5],   %[a10]            \n\t"
+    "addu.ph          %[temp3],  %[temp5],   %[a32]            \n\t"
+    "addu.ph          %[temp5],  %[temp4],   %[a10]            \n\t"
+    "addu.ph          %[temp4],  %[temp4],   %[a32]            \n\t"
+    "shll_s.ph        %[temp2],  %[temp2],   7                 \n\t"
+    "shll_s.ph        %[temp3],  %[temp3],   7                 \n\t"
+    "shll_s.ph        %[temp4],  %[temp4],   7                 \n\t"
+    "shll_s.ph        %[temp5],  %[temp5],   7                 \n\t"
+    "precrqu_s.qb.ph  %[temp2],  %[temp3],   %[temp2]          \n\t"
+    "precrqu_s.qb.ph  %[temp3],  %[temp4],   %[temp5]          \n\t"
+    "usw              %[temp1],  0*" XSTR(BPS) "(%[dst])       \n\t"
+    "usw              %[temp0],  1*" XSTR(BPS) "(%[dst])       \n\t"
+    "usw              %[temp3],  2*" XSTR(BPS) "(%[dst])       \n\t"
+    "usw              %[temp2],  3*" XSTR(BPS) "(%[dst])       \n\t"
+    : [temp0]"=&r"(temp0), [temp1]"=&r"(temp1), [temp2]"=&r"(temp2),
+      [temp3]"=&r"(temp3), [temp4]"=&r"(temp4), [temp5]"=&r"(temp5),
+      [a10]"=&r"(a10), [a32]"=&r"(a32)
+    : [c35]"r"(c35), [top]"r"(top), [dst]"r"(dst)
+    : "memory"
+  );
+}
+
+static void VE4(uint8_t* dst, const uint8_t* top) {
+  int temp0, temp1, temp2, temp3, temp4, temp5, temp6;
+  __asm__ volatile(
+    "ulw             %[temp0],   -1(%[top])              \n\t"
+    "ulh             %[temp1],   3(%[top])               \n\t"
+    "preceu.ph.qbr   %[temp2],   %[temp0]                \n\t"
+    "preceu.ph.qbl   %[temp3],   %[temp0]                \n\t"
+    "preceu.ph.qbr   %[temp4],   %[temp1]                \n\t"
+    "packrl.ph       %[temp5],   %[temp3],    %[temp2]   \n\t"
+    "packrl.ph       %[temp6],   %[temp4],    %[temp3]   \n\t"
+    "shll.ph         %[temp5],   %[temp5],    1          \n\t"
+    "shll.ph         %[temp6],   %[temp6],    1          \n\t"
+    "addq.ph         %[temp2],   %[temp5],    %[temp2]   \n\t"
+    "addq.ph         %[temp6],   %[temp6],    %[temp4]   \n\t"
+    "addq.ph         %[temp2],   %[temp2],    %[temp3]   \n\t"
+    "addq.ph         %[temp6],   %[temp6],    %[temp3]   \n\t"
+    "shra_r.ph       %[temp2],   %[temp2],    2          \n\t"
+    "shra_r.ph       %[temp6],   %[temp6],    2          \n\t"
+    "precr.qb.ph     %[temp4],   %[temp6],    %[temp2]   \n\t"
+    "usw             %[temp4],   0*" XSTR(BPS) "(%[dst]) \n\t"
+    "usw             %[temp4],   1*" XSTR(BPS) "(%[dst]) \n\t"
+    "usw             %[temp4],   2*" XSTR(BPS) "(%[dst]) \n\t"
+    "usw             %[temp4],   3*" XSTR(BPS) "(%[dst]) \n\t"
+    : [temp0]"=&r"(temp0), [temp1]"=&r"(temp1), [temp2]"=&r"(temp2),
+      [temp3]"=&r"(temp3), [temp4]"=&r"(temp4), [temp5]"=&r"(temp5),
+      [temp6]"=&r"(temp6)
+    : [top]"r"(top), [dst]"r"(dst)
+    : "memory"
+  );
+}
+
+static void HE4(uint8_t* dst, const uint8_t* top) {
+  int temp0, temp1, temp2, temp3, temp4, temp5, temp6;
+  __asm__ volatile(
+    "ulw             %[temp0],   -4(%[top])              \n\t"
+    "lbu             %[temp1],   -5(%[top])              \n\t"
+    "preceu.ph.qbr   %[temp2],   %[temp0]                \n\t"
+    "preceu.ph.qbl   %[temp3],   %[temp0]                \n\t"
+    "replv.ph        %[temp4],   %[temp1]                \n\t"
+    "packrl.ph       %[temp5],   %[temp3],    %[temp2]   \n\t"
+    "packrl.ph       %[temp6],   %[temp2],    %[temp4]   \n\t"
+    "shll.ph         %[temp5],   %[temp5],    1          \n\t"
+    "shll.ph         %[temp6],   %[temp6],    1          \n\t"
+    "addq.ph         %[temp3],   %[temp3],    %[temp5]   \n\t"
+    "addq.ph         %[temp3],   %[temp3],    %[temp2]   \n\t"
+    "addq.ph         %[temp2],   %[temp2],    %[temp6]   \n\t"
+    "addq.ph         %[temp2],   %[temp2],    %[temp4]   \n\t"
+    "shra_r.ph       %[temp3],   %[temp3],    2          \n\t"
+    "shra_r.ph       %[temp2],   %[temp2],    2          \n\t"
+    "replv.qb        %[temp0],   %[temp3]                \n\t"
+    "replv.qb        %[temp1],   %[temp2]                \n\t"
+    "srl             %[temp3],   %[temp3],    16         \n\t"
+    "srl             %[temp2],   %[temp2],    16         \n\t"
+    "replv.qb        %[temp3],   %[temp3]                \n\t"
+    "replv.qb        %[temp2],   %[temp2]                \n\t"
+    "usw             %[temp3],   0*" XSTR(BPS) "(%[dst]) \n\t"
+    "usw             %[temp0],   1*" XSTR(BPS) "(%[dst]) \n\t"
+    "usw             %[temp2],   2*" XSTR(BPS) "(%[dst]) \n\t"
+    "usw             %[temp1],   3*" XSTR(BPS) "(%[dst]) \n\t"
+    : [temp0]"=&r"(temp0), [temp1]"=&r"(temp1), [temp2]"=&r"(temp2),
+      [temp3]"=&r"(temp3), [temp4]"=&r"(temp4), [temp5]"=&r"(temp5),
+      [temp6]"=&r"(temp6)
+    : [top]"r"(top), [dst]"r"(dst)
+    : "memory"
+  );
+}
+
+static void RD4(uint8_t* dst, const uint8_t* top) {
+  int temp0, temp1, temp2, temp3, temp4, temp5;
+  int temp6, temp7, temp8, temp9, temp10, temp11;
+  __asm__ volatile(
+    "ulw             %[temp0],    -5(%[top])               \n\t"
+    "ulw             %[temp1],    -1(%[top])               \n\t"
+    "preceu.ph.qbl   %[temp2],    %[temp0]                 \n\t"
+    "preceu.ph.qbr   %[temp3],    %[temp0]                 \n\t"
+    "preceu.ph.qbr   %[temp4],    %[temp1]                 \n\t"
+    "preceu.ph.qbl   %[temp5],    %[temp1]                 \n\t"
+    "packrl.ph       %[temp6],    %[temp2],    %[temp3]    \n\t"
+    "packrl.ph       %[temp7],    %[temp4],    %[temp2]    \n\t"
+    "packrl.ph       %[temp8],    %[temp5],    %[temp4]    \n\t"
+    "shll.ph         %[temp6],    %[temp6],    1           \n\t"
+    "addq.ph         %[temp9],    %[temp2],    %[temp6]    \n\t"
+    "shll.ph         %[temp7],    %[temp7],    1           \n\t"
+    "addq.ph         %[temp9],    %[temp9],    %[temp3]    \n\t"
+    "shll.ph         %[temp8],    %[temp8],    1           \n\t"
+    "shra_r.ph       %[temp9],    %[temp9],    2           \n\t"
+    "addq.ph         %[temp10],   %[temp4],    %[temp7]    \n\t"
+    "addq.ph         %[temp11],   %[temp5],    %[temp8]    \n\t"
+    "addq.ph         %[temp10],   %[temp10],   %[temp2]    \n\t"
+    "addq.ph         %[temp11],   %[temp11],   %[temp4]    \n\t"
+    "shra_r.ph       %[temp10],   %[temp10],   2           \n\t"
+    "shra_r.ph       %[temp11],   %[temp11],   2           \n\t"
+    "lbu             %[temp0],    3(%[top])                \n\t"
+    "lbu             %[temp1],    2(%[top])                \n\t"
+    "lbu             %[temp2],    1(%[top])                \n\t"
+    "sll             %[temp1],    %[temp1],    1           \n\t"
+    "addu            %[temp0],    %[temp0],    %[temp1]    \n\t"
+    "addu            %[temp0],    %[temp0],    %[temp2]    \n\t"
+    "precr.qb.ph     %[temp9],    %[temp10],   %[temp9]    \n\t"
+    "shra_r.w        %[temp0],    %[temp0],    2           \n\t"
+    "precr.qb.ph     %[temp10],   %[temp11],   %[temp10]   \n\t"
+    "usw             %[temp9],    3*" XSTR(BPS) "(%[dst])  \n\t"
+    "usw             %[temp10],   1*" XSTR(BPS) "(%[dst])  \n\t"
+    "prepend         %[temp9],    %[temp11],   8           \n\t"
+    "prepend         %[temp10],   %[temp0],    8           \n\t"
+    "usw             %[temp9],    2*" XSTR(BPS) "(%[dst])  \n\t"
+    "usw             %[temp10],   0*" XSTR(BPS) "(%[dst])  \n\t"
+    : [temp0]"=&r"(temp0), [temp1]"=&r"(temp1), [temp2]"=&r"(temp2),
+      [temp3]"=&r"(temp3), [temp4]"=&r"(temp4), [temp5]"=&r"(temp5),
+      [temp6]"=&r"(temp6), [temp7]"=&r"(temp7), [temp8]"=&r"(temp8),
+      [temp9]"=&r"(temp9), [temp10]"=&r"(temp10), [temp11]"=&r"(temp11)
+    : [top]"r"(top), [dst]"r"(dst)
+    : "memory"
+  );
+}
+
+static void VR4(uint8_t* dst, const uint8_t* top) {
+  int temp0, temp1, temp2, temp3, temp4;
+  int temp5, temp6, temp7, temp8, temp9;
+  __asm__ volatile (
+    "ulw              %[temp0],   -4(%[top])              \n\t"
+    "ulw              %[temp1],   0(%[top])               \n\t"
+    "preceu.ph.qbl    %[temp2],   %[temp0]                \n\t"
+    "preceu.ph.qbr    %[temp0],   %[temp0]                \n\t"
+    "preceu.ph.qbla   %[temp3],   %[temp1]                \n\t"
+    "preceu.ph.qbra   %[temp1],   %[temp1]                \n\t"
+    "packrl.ph        %[temp7],   %[temp3],    %[temp2]   \n\t"
+    "addqh_r.ph       %[temp4],   %[temp1],    %[temp3]   \n\t"
+    "move             %[temp6],   %[temp1]                \n\t"
+    "append           %[temp1],   %[temp2],    16         \n\t"
+    "shll.ph          %[temp9],   %[temp6],    1          \n\t"
+    "addqh_r.ph       %[temp5],   %[temp7],    %[temp6]   \n\t"
+    "shll.ph          %[temp8],   %[temp7],    1          \n\t"
+    "addu.ph          %[temp3],   %[temp7],    %[temp3]   \n\t"
+    "addu.ph          %[temp1],   %[temp1],    %[temp6]   \n\t"
+    "packrl.ph        %[temp7],   %[temp2],    %[temp0]   \n\t"
+    "addu.ph          %[temp6],   %[temp0],    %[temp2]   \n\t"
+    "addu.ph          %[temp3],   %[temp3],    %[temp9]   \n\t"
+    "addu.ph          %[temp1],   %[temp1],    %[temp8]   \n\t"
+    "shll.ph          %[temp7],   %[temp7],    1          \n\t"
+    "shra_r.ph        %[temp3],   %[temp3],    2          \n\t"
+    "shra_r.ph        %[temp1],   %[temp1],    2          \n\t"
+    "addu.ph          %[temp6],   %[temp6],    %[temp7]   \n\t"
+    "shra_r.ph        %[temp6],   %[temp6],    2          \n\t"
+    "precrq.ph.w      %[temp8],   %[temp4],    %[temp5]   \n\t"
+    "append           %[temp4],   %[temp5],    16         \n\t"
+    "precrq.ph.w      %[temp2],   %[temp3],    %[temp1]   \n\t"
+    "append           %[temp3],   %[temp1],    16         \n\t"
+    "precr.qb.ph      %[temp8],   %[temp8],    %[temp4]   \n\t"
+    "precr.qb.ph      %[temp3],   %[temp2],    %[temp3]   \n\t"
+    "usw              %[temp8],   0*" XSTR(BPS) "(%[dst]) \n\t"
+    "usw              %[temp3],   1*" XSTR(BPS) "(%[dst]) \n\t"
+    "append           %[temp3],   %[temp6],    8          \n\t"
+    "srl              %[temp6],   %[temp6],    16         \n\t"
+    "append           %[temp8],   %[temp6],    8          \n\t"
+    "usw              %[temp3],   3*" XSTR(BPS) "(%[dst]) \n\t"
+    "usw              %[temp8],   2*" XSTR(BPS) "(%[dst]) \n\t"
+    : [temp0]"=&r"(temp0), [temp1]"=&r"(temp1), [temp2]"=&r"(temp2),
+      [temp3]"=&r"(temp3), [temp4]"=&r"(temp4), [temp5]"=&r"(temp5),
+      [temp6]"=&r"(temp6), [temp7]"=&r"(temp7), [temp8]"=&r"(temp8),
+      [temp9]"=&r"(temp9)
+    : [top]"r"(top), [dst]"r"(dst)
+    : "memory"
+  );
+}
+
+static void LD4(uint8_t* dst, const uint8_t* top) {
+  int temp0, temp1, temp2, temp3, temp4, temp5;
+  int temp6, temp7, temp8, temp9, temp10, temp11;
+  __asm__ volatile(
+    "ulw             %[temp0],    0(%[top])               \n\t"
+    "ulw             %[temp1],    4(%[top])               \n\t"
+    "preceu.ph.qbl   %[temp2],    %[temp0]                \n\t"
+    "preceu.ph.qbr   %[temp3],    %[temp0]                \n\t"
+    "preceu.ph.qbr   %[temp4],    %[temp1]                \n\t"
+    "preceu.ph.qbl   %[temp5],    %[temp1]                \n\t"
+    "packrl.ph       %[temp6],    %[temp2],    %[temp3]   \n\t"
+    "packrl.ph       %[temp7],    %[temp4],    %[temp2]   \n\t"
+    "packrl.ph       %[temp8],    %[temp5],    %[temp4]   \n\t"
+    "shll.ph         %[temp6],    %[temp6],    1          \n\t"
+    "addq.ph         %[temp9],    %[temp2],    %[temp6]   \n\t"
+    "shll.ph         %[temp7],    %[temp7],    1          \n\t"
+    "addq.ph         %[temp9],    %[temp9],    %[temp3]   \n\t"
+    "shll.ph         %[temp8],    %[temp8],    1          \n\t"
+    "shra_r.ph       %[temp9],    %[temp9],    2          \n\t"
+    "addq.ph         %[temp10],   %[temp4],    %[temp7]   \n\t"
+    "addq.ph         %[temp11],   %[temp5],    %[temp8]   \n\t"
+    "addq.ph         %[temp10],   %[temp10],   %[temp2]   \n\t"
+    "addq.ph         %[temp11],   %[temp11],   %[temp4]   \n\t"
+    "shra_r.ph       %[temp10],   %[temp10],   2          \n\t"
+    "shra_r.ph       %[temp11],   %[temp11],   2          \n\t"
+    "srl             %[temp1],    %[temp1],    24         \n\t"
+    "sll             %[temp1],    %[temp1],    1          \n\t"
+    "raddu.w.qb      %[temp5],    %[temp5]                \n\t"
+    "precr.qb.ph     %[temp9],    %[temp10],   %[temp9]   \n\t"
+    "precr.qb.ph     %[temp10],   %[temp11],   %[temp10]  \n\t"
+    "addu            %[temp1],    %[temp1],    %[temp5]   \n\t"
+    "shra_r.w        %[temp1],    %[temp1],    2          \n\t"
+    "usw             %[temp9],    0*" XSTR(BPS) "(%[dst]) \n\t"
+    "usw             %[temp10],   2*" XSTR(BPS) "(%[dst]) \n\t"
+    "prepend         %[temp9],    %[temp11],   8          \n\t"
+    "prepend         %[temp10],   %[temp1],    8          \n\t"
+    "usw             %[temp9],    1*" XSTR(BPS) "(%[dst]) \n\t"
+    "usw             %[temp10],   3*" XSTR(BPS) "(%[dst]) \n\t"
+    : [temp0]"=&r"(temp0), [temp1]"=&r"(temp1), [temp2]"=&r"(temp2),
+      [temp3]"=&r"(temp3), [temp4]"=&r"(temp4), [temp5]"=&r"(temp5),
+      [temp6]"=&r"(temp6), [temp7]"=&r"(temp7), [temp8]"=&r"(temp8),
+      [temp9]"=&r"(temp9), [temp10]"=&r"(temp10), [temp11]"=&r"(temp11)
+    : [top]"r"(top), [dst]"r"(dst)
+    : "memory"
+  );
+}
+
+static void VL4(uint8_t* dst, const uint8_t* top) {
+  int temp0, temp1, temp2, temp3, temp4;
+  int temp5, temp6, temp7, temp8, temp9;
+  __asm__ volatile (
+    "ulw              %[temp0],   0(%[top])               \n\t"
+    "ulw              %[temp1],   4(%[top])               \n\t"
+    "preceu.ph.qbla   %[temp2],   %[temp0]                \n\t"
+    "preceu.ph.qbra   %[temp0],   %[temp0]                \n\t"
+    "preceu.ph.qbl    %[temp3],   %[temp1]                \n\t"
+    "preceu.ph.qbr    %[temp1],   %[temp1]                \n\t"
+    "addqh_r.ph       %[temp4],   %[temp0],    %[temp2]   \n\t"
+    "packrl.ph        %[temp7],   %[temp1],    %[temp0]   \n\t"
+    "precrq.ph.w      %[temp6],   %[temp1],    %[temp2]   \n\t"
+    "shll.ph          %[temp9],   %[temp2],    1          \n\t"
+    "addqh_r.ph       %[temp5],   %[temp7],    %[temp2]   \n\t"
+    "shll.ph          %[temp8],   %[temp7],    1          \n\t"
+    "addu.ph          %[temp2],   %[temp2],    %[temp6]   \n\t"
+    "addu.ph          %[temp0],   %[temp0],    %[temp7]   \n\t"
+    "packrl.ph        %[temp7],   %[temp3],    %[temp1]   \n\t"
+    "addu.ph          %[temp6],   %[temp1],    %[temp3]   \n\t"
+    "addu.ph          %[temp2],   %[temp2],    %[temp8]   \n\t"
+    "addu.ph          %[temp0],   %[temp0],    %[temp9]   \n\t"
+    "shll.ph          %[temp7],   %[temp7],    1          \n\t"
+    "shra_r.ph        %[temp2],   %[temp2],    2          \n\t"
+    "shra_r.ph        %[temp0],   %[temp0],    2          \n\t"
+    "addu.ph          %[temp6],   %[temp6],    %[temp7]   \n\t"
+    "shra_r.ph        %[temp6],   %[temp6],    2          \n\t"
+    "precrq.ph.w      %[temp8],   %[temp5],    %[temp4]   \n\t"
+    "append           %[temp5],   %[temp4],    16         \n\t"
+    "precrq.ph.w      %[temp3],   %[temp2],    %[temp0]   \n\t"
+    "append           %[temp2],   %[temp0],    16         \n\t"
+    "precr.qb.ph      %[temp8],   %[temp8],    %[temp5]   \n\t"
+    "precr.qb.ph      %[temp3],   %[temp3],    %[temp2]   \n\t"
+    "usw              %[temp8],   0*" XSTR(BPS) "(%[dst]) \n\t"
+    "prepend          %[temp8],   %[temp6],    8          \n\t"
+    "usw              %[temp3],   1*" XSTR(BPS) "(%[dst]) \n\t"
+    "srl              %[temp6],   %[temp6],    16         \n\t"
+    "prepend          %[temp3],   %[temp6],    8          \n\t"
+    "usw              %[temp8],   2*" XSTR(BPS) "(%[dst]) \n\t"
+    "usw              %[temp3],   3*" XSTR(BPS) "(%[dst]) \n\t"
+    : [temp0]"=&r"(temp0), [temp1]"=&r"(temp1), [temp2]"=&r"(temp2),
+      [temp3]"=&r"(temp3), [temp4]"=&r"(temp4), [temp5]"=&r"(temp5),
+      [temp6]"=&r"(temp6), [temp7]"=&r"(temp7), [temp8]"=&r"(temp8),
+      [temp9]"=&r"(temp9)
+    : [top]"r"(top), [dst]"r"(dst)
+    : "memory"
+  );
+}
+
+static void HD4(uint8_t* dst, const uint8_t* top) {
+  int temp0, temp1, temp2, temp3, temp4;
+  int temp5, temp6, temp7, temp8, temp9;
+  __asm__ volatile (
+    "ulw              %[temp0],   -5(%[top])              \n\t"
+    "ulw              %[temp1],   -1(%[top])              \n\t"
+    "preceu.ph.qbla   %[temp2],   %[temp0]                \n\t"
+    "preceu.ph.qbra   %[temp0],   %[temp0]                \n\t"
+    "preceu.ph.qbl    %[temp3],   %[temp1]                \n\t"
+    "preceu.ph.qbr    %[temp1],   %[temp1]                \n\t"
+    "addqh_r.ph       %[temp4],   %[temp0],    %[temp2]   \n\t"
+    "packrl.ph        %[temp7],   %[temp1],    %[temp0]   \n\t"
+    "precrq.ph.w      %[temp6],   %[temp1],    %[temp2]   \n\t"
+    "shll.ph          %[temp9],   %[temp2],    1          \n\t"
+    "addqh_r.ph       %[temp5],   %[temp7],    %[temp2]   \n\t"
+    "shll.ph          %[temp8],   %[temp7],    1          \n\t"
+    "addu.ph          %[temp2],   %[temp2],    %[temp6]   \n\t"
+    "addu.ph          %[temp0],   %[temp0],    %[temp7]   \n\t"
+    "packrl.ph        %[temp7],   %[temp3],    %[temp1]   \n\t"
+    "addu.ph          %[temp6],   %[temp1],    %[temp3]   \n\t"
+    "addu.ph          %[temp2],   %[temp2],    %[temp8]   \n\t"
+    "addu.ph          %[temp0],   %[temp0],    %[temp9]   \n\t"
+    "shll.ph          %[temp7],   %[temp7],    1          \n\t"
+    "shra_r.ph        %[temp2],   %[temp2],    2          \n\t"
+    "shra_r.ph        %[temp0],   %[temp0],    2          \n\t"
+    "addu.ph          %[temp6],   %[temp6],    %[temp7]   \n\t"
+    "shra_r.ph        %[temp6],   %[temp6],    2          \n\t"
+    "precrq.ph.w      %[temp1],   %[temp2],    %[temp5]   \n\t"
+    "precrq.ph.w      %[temp3],   %[temp0],    %[temp4]   \n\t"
+    "precr.qb.ph      %[temp7],   %[temp6],    %[temp1]   \n\t"
+    "precr.qb.ph      %[temp6],   %[temp1],    %[temp3]   \n\t"
+    "usw              %[temp7],   0*" XSTR(BPS) "(%[dst]) \n\t"
+    "usw              %[temp6],   1*" XSTR(BPS) "(%[dst]) \n\t"
+    "append           %[temp2],   %[temp5],    16         \n\t"
+    "append           %[temp0],   %[temp4],    16         \n\t"
+    "precr.qb.ph      %[temp5],   %[temp3],    %[temp2]   \n\t"
+    "precr.qb.ph      %[temp4],   %[temp2],    %[temp0]   \n\t"
+    "usw              %[temp5],   2*" XSTR(BPS) "(%[dst]) \n\t"
+    "usw              %[temp4],   3*" XSTR(BPS) "(%[dst]) \n\t"
+    : [temp0]"=&r"(temp0), [temp1]"=&r"(temp1), [temp2]"=&r"(temp2),
+      [temp3]"=&r"(temp3), [temp4]"=&r"(temp4), [temp5]"=&r"(temp5),
+      [temp6]"=&r"(temp6), [temp7]"=&r"(temp7), [temp8]"=&r"(temp8),
+      [temp9]"=&r"(temp9)
+    : [top]"r"(top), [dst]"r"(dst)
+    : "memory"
+  );
+}
+
+static void HU4(uint8_t* dst, const uint8_t* top) {
+  int temp0, temp1, temp2, temp3, temp4, temp5, temp6, temp7;
+  __asm__ volatile (
+    "ulw             %[temp0],   -5(%[top])              \n\t"
+    "preceu.ph.qbl   %[temp1],   %[temp0]                \n\t"
+    "preceu.ph.qbr   %[temp2],   %[temp0]                \n\t"
+    "packrl.ph       %[temp3],   %[temp1],    %[temp2]   \n\t"
+    "replv.qb        %[temp7],   %[temp2]                \n\t"
+    "addqh_r.ph      %[temp4],   %[temp1],    %[temp3]   \n\t"
+    "addqh_r.ph      %[temp5],   %[temp3],    %[temp2]   \n\t"
+    "shll.ph         %[temp6],   %[temp3],    1          \n\t"
+    "addu.ph         %[temp3],   %[temp2],    %[temp3]   \n\t"
+    "addu.ph         %[temp6],   %[temp1],    %[temp6]   \n\t"
+    "shll.ph         %[temp0],   %[temp2],    1          \n\t"
+    "addu.ph         %[temp6],   %[temp6],    %[temp2]   \n\t"
+    "addu.ph         %[temp0],   %[temp3],    %[temp0]   \n\t"
+    "shra_r.ph       %[temp6],   %[temp6],    2          \n\t"
+    "shra_r.ph       %[temp0],   %[temp0],    2          \n\t"
+    "packrl.ph       %[temp3],   %[temp6],    %[temp5]   \n\t"
+    "precrq.ph.w     %[temp2],   %[temp6],    %[temp4]   \n\t"
+    "append          %[temp0],   %[temp5],    16         \n\t"
+    "precr.qb.ph     %[temp3],   %[temp3],    %[temp2]   \n\t"
+    "usw             %[temp3],   0*" XSTR(BPS) "(%[dst]) \n\t"
+    "precr.qb.ph     %[temp1],   %[temp7],    %[temp0]   \n\t"
+    "usw             %[temp7],   3*" XSTR(BPS) "(%[dst]) \n\t"
+    "packrl.ph       %[temp2],   %[temp1],    %[temp3]   \n\t"
+    "usw             %[temp1],   2*" XSTR(BPS) "(%[dst]) \n\t"
+    "usw             %[temp2],   1*" XSTR(BPS) "(%[dst]) \n\t"
+    : [temp0]"=&r"(temp0), [temp1]"=&r"(temp1), [temp2]"=&r"(temp2),
+      [temp3]"=&r"(temp3), [temp4]"=&r"(temp4), [temp5]"=&r"(temp5),
+      [temp6]"=&r"(temp6), [temp7]"=&r"(temp7)
+    : [top]"r"(top), [dst]"r"(dst)
+    : "memory"
+  );
+}
+
+//------------------------------------------------------------------------------
+// Chroma 8x8 prediction (paragraph 12.2)
+
+static void IntraChromaPreds(uint8_t* dst, const uint8_t* left,
+                             const uint8_t* top) {
+  // U block
+  DCMode8(C8DC8 + dst, left, top);
+  VerticalPred8(C8VE8 + dst, top);
+  HorizontalPred8(C8HE8 + dst, left);
+  TrueMotion8(C8TM8 + dst, left, top);
+  // V block
+  dst += 8;
+  if (top) top += 8;
+  if (left) left += 16;
+  DCMode8(C8DC8 + dst, left, top);
+  VerticalPred8(C8VE8 + dst, top);
+  HorizontalPred8(C8HE8 + dst, left);
+  TrueMotion8(C8TM8 + dst, left, top);
+}
+
+//------------------------------------------------------------------------------
+// luma 16x16 prediction (paragraph 12.3)
+
+static void Intra16Preds(uint8_t* dst,
+                         const uint8_t* left, const uint8_t* top) {
+  DCMode16(I16DC16 + dst, left, top);
+  VerticalPred16(I16VE16 + dst, top);
+  HorizontalPred16(I16HE16 + dst, left);
+  TrueMotion16(I16TM16 + dst, left, top);
+}
+
+// Left samples are top[-5 .. -2], top_left is top[-1], top are
+// located at top[0..3], and top right is top[4..7]
+static void Intra4Preds(uint8_t* dst, const uint8_t* top) {
+  DC4(I4DC4 + dst, top);
+  TM4(I4TM4 + dst, top);
+  VE4(I4VE4 + dst, top);
+  HE4(I4HE4 + dst, top);
+  RD4(I4RD4 + dst, top);
+  VR4(I4VR4 + dst, top);
+  LD4(I4LD4 + dst, top);
+  VL4(I4VL4 + dst, top);
+  HD4(I4HD4 + dst, top);
+  HU4(I4HU4 + dst, top);
+}
+
+//------------------------------------------------------------------------------
+// Metric
+
+#if !defined(WORK_AROUND_GCC)
+
+#define GET_SSE_INNER(A)                                                  \
+  "lw               %[temp0],    " #A "(%[a])                  \n\t"      \
+  "lw               %[temp1],    " #A "(%[b])                  \n\t"      \
+  "preceu.ph.qbr    %[temp2],    %[temp0]                      \n\t"      \
+  "preceu.ph.qbl    %[temp0],    %[temp0]                      \n\t"      \
+  "preceu.ph.qbr    %[temp3],    %[temp1]                      \n\t"      \
+  "preceu.ph.qbl    %[temp1],    %[temp1]                      \n\t"      \
+  "subq.ph          %[temp2],    %[temp2],    %[temp3]         \n\t"      \
+  "subq.ph          %[temp0],    %[temp0],    %[temp1]         \n\t"      \
+  "dpa.w.ph         $ac0,        %[temp2],    %[temp2]         \n\t"      \
+  "dpa.w.ph         $ac0,        %[temp0],    %[temp0]         \n\t"
+
+#define GET_SSE(A, B, C, D)               \
+  GET_SSE_INNER(A)                        \
+  GET_SSE_INNER(B)                        \
+  GET_SSE_INNER(C)                        \
+  GET_SSE_INNER(D)
+
+static int SSE16x16(const uint8_t* a, const uint8_t* b) {
+  int count;
+  int temp0, temp1, temp2, temp3;
+  __asm__ volatile (
+    "mult   $zero,    $zero                            \n\t"
+    GET_SSE( 0 * BPS, 4 +  0 * BPS, 8 +  0 * BPS, 12 +  0 * BPS)
+    GET_SSE( 1 * BPS, 4 +  1 * BPS, 8 +  1 * BPS, 12 +  1 * BPS)
+    GET_SSE( 2 * BPS, 4 +  2 * BPS, 8 +  2 * BPS, 12 +  2 * BPS)
+    GET_SSE( 3 * BPS, 4 +  3 * BPS, 8 +  3 * BPS, 12 +  3 * BPS)
+    GET_SSE( 4 * BPS, 4 +  4 * BPS, 8 +  4 * BPS, 12 +  4 * BPS)
+    GET_SSE( 5 * BPS, 4 +  5 * BPS, 8 +  5 * BPS, 12 +  5 * BPS)
+    GET_SSE( 6 * BPS, 4 +  6 * BPS, 8 +  6 * BPS, 12 +  6 * BPS)
+    GET_SSE( 7 * BPS, 4 +  7 * BPS, 8 +  7 * BPS, 12 +  7 * BPS)
+    GET_SSE( 8 * BPS, 4 +  8 * BPS, 8 +  8 * BPS, 12 +  8 * BPS)
+    GET_SSE( 9 * BPS, 4 +  9 * BPS, 8 +  9 * BPS, 12 +  9 * BPS)
+    GET_SSE(10 * BPS, 4 + 10 * BPS, 8 + 10 * BPS, 12 + 10 * BPS)
+    GET_SSE(11 * BPS, 4 + 11 * BPS, 8 + 11 * BPS, 12 + 11 * BPS)
+    GET_SSE(12 * BPS, 4 + 12 * BPS, 8 + 12 * BPS, 12 + 12 * BPS)
+    GET_SSE(13 * BPS, 4 + 13 * BPS, 8 + 13 * BPS, 12 + 13 * BPS)
+    GET_SSE(14 * BPS, 4 + 14 * BPS, 8 + 14 * BPS, 12 + 14 * BPS)
+    GET_SSE(15 * BPS, 4 + 15 * BPS, 8 + 15 * BPS, 12 + 15 * BPS)
+    "mflo   %[count]                                   \n\t"
+    : [temp0]"=&r"(temp0), [temp1]"=&r"(temp1), [temp2]"=&r"(temp2),
+      [temp3]"=&r"(temp3), [count]"=&r"(count)
+    : [a]"r"(a), [b]"r"(b)
+    : "memory", "hi", "lo"
+  );
+  return count;
+}
+
+static int SSE16x8(const uint8_t* a, const uint8_t* b) {
+  int count;
+  int temp0, temp1, temp2, temp3;
+  __asm__ volatile (
+    "mult   $zero,    $zero                            \n\t"
+    GET_SSE( 0 * BPS, 4 +  0 * BPS, 8 +  0 * BPS, 12 +  0 * BPS)
+    GET_SSE( 1 * BPS, 4 +  1 * BPS, 8 +  1 * BPS, 12 +  1 * BPS)
+    GET_SSE( 2 * BPS, 4 +  2 * BPS, 8 +  2 * BPS, 12 +  2 * BPS)
+    GET_SSE( 3 * BPS, 4 +  3 * BPS, 8 +  3 * BPS, 12 +  3 * BPS)
+    GET_SSE( 4 * BPS, 4 +  4 * BPS, 8 +  4 * BPS, 12 +  4 * BPS)
+    GET_SSE( 5 * BPS, 4 +  5 * BPS, 8 +  5 * BPS, 12 +  5 * BPS)
+    GET_SSE( 6 * BPS, 4 +  6 * BPS, 8 +  6 * BPS, 12 +  6 * BPS)
+    GET_SSE( 7 * BPS, 4 +  7 * BPS, 8 +  7 * BPS, 12 +  7 * BPS)
+    "mflo   %[count]                                   \n\t"
+    : [temp0]"=&r"(temp0), [temp1]"=&r"(temp1), [temp2]"=&r"(temp2),
+      [temp3]"=&r"(temp3), [count]"=&r"(count)
+    : [a]"r"(a), [b]"r"(b)
+    : "memory", "hi", "lo"
+  );
+  return count;
+}
+
+static int SSE8x8(const uint8_t* a, const uint8_t* b) {
+  int count;
+  int temp0, temp1, temp2, temp3;
+  __asm__ volatile (
+    "mult   $zero,    $zero                            \n\t"
+    GET_SSE(0 * BPS, 4 + 0 * BPS, 1 * BPS, 4 + 1 * BPS)
+    GET_SSE(2 * BPS, 4 + 2 * BPS, 3 * BPS, 4 + 3 * BPS)
+    GET_SSE(4 * BPS, 4 + 4 * BPS, 5 * BPS, 4 + 5 * BPS)
+    GET_SSE(6 * BPS, 4 + 6 * BPS, 7 * BPS, 4 + 7 * BPS)
+    "mflo   %[count]                                   \n\t"
+    : [temp0]"=&r"(temp0), [temp1]"=&r"(temp1), [temp2]"=&r"(temp2),
+      [temp3]"=&r"(temp3), [count]"=&r"(count)
+    : [a]"r"(a), [b]"r"(b)
+    : "memory", "hi", "lo"
+  );
+  return count;
+}
+
+static int SSE4x4(const uint8_t* a, const uint8_t* b) {
+  int count;
+  int temp0, temp1, temp2, temp3;
+  __asm__ volatile (
+    "mult   $zero,    $zero                            \n\t"
+    GET_SSE(0 * BPS, 1 * BPS, 2 * BPS, 3 * BPS)
+    "mflo   %[count]                                   \n\t"
+    : [temp0]"=&r"(temp0), [temp1]"=&r"(temp1), [temp2]"=&r"(temp2),
+      [temp3]"=&r"(temp3), [count]"=&r"(count)
+    : [a]"r"(a), [b]"r"(b)
+    : "memory", "hi", "lo"
+  );
+  return count;
+}
+
+#undef GET_SSE
+#undef GET_SSE_INNER
+
+#endif  // !WORK_AROUND_GCC
+
+#undef FILL_8_OR_16
+#undef FILL_PART
+#undef OUTPUT_EARLY_CLOBBER_REGS_17
+#undef MUL_HALF
+#undef ABS_X8
+#undef ADD_SUB_HALVES_X4
+
+//------------------------------------------------------------------------------
+// Quantization
+//
+
+// macro for one pass through for loop in QuantizeBlock reading 2 values at time
+// QUANTDIV macro inlined
+// J - offset in bytes (kZigzag[n] * 2)
+// K - offset in bytes (kZigzag[n] * 4)
+// N - offset in bytes (n * 2)
+// N1 - offset in bytes ((n + 1) * 2)
+#define QUANTIZE_ONE(J, K, N, N1)                                         \
+  "ulw         %[temp1],     " #J "(%[ppin])                 \n\t"        \
+  "ulw         %[temp2],     " #J "(%[ppsharpen])            \n\t"        \
+  "lhu         %[temp3],     " #K "(%[ppzthresh])            \n\t"        \
+  "lhu         %[temp6],     " #K "+4(%[ppzthresh])          \n\t"        \
+  "absq_s.ph   %[temp4],     %[temp1]                        \n\t"        \
+  "ins         %[temp3],     %[temp6],         16,       16  \n\t"        \
+  "addu.ph     %[coeff],     %[temp4],         %[temp2]      \n\t"        \
+  "shra.ph     %[sign],      %[temp1],         15            \n\t"        \
+  "li          %[level],     0x10001                         \n\t"        \
+  "cmp.lt.ph   %[temp3],     %[coeff]                        \n\t"        \
+  "lhu         %[temp1],     " #J "(%[ppiq])                 \n\t"        \
+  "pick.ph     %[temp5],     %[level],         $0            \n\t"        \
+  "lw          %[temp2],     " #K "(%[ppbias])               \n\t"        \
+  "beqz        %[temp5],     0f                              \n\t"        \
+  "lhu         %[temp3],     " #J "(%[ppq])                  \n\t"        \
+  "beq         %[temp5],     %[level],         1f            \n\t"        \
+  "andi        %[temp5],     %[temp5],         0x1           \n\t"        \
+  "andi        %[temp4],     %[coeff],         0xffff        \n\t"        \
+  "beqz        %[temp5],     2f                              \n\t"        \
+  "mul         %[level],     %[temp4],         %[temp1]      \n\t"        \
+  "sh          $0,           " #J "+2(%[ppin])               \n\t"        \
+  "sh          $0,           " #N1 "(%[pout])                \n\t"        \
+  "addu        %[level],     %[level],         %[temp2]      \n\t"        \
+  "sra         %[level],     %[level],         17            \n\t"        \
+  "slt         %[temp4],     %[max_level],     %[level]      \n\t"        \
+  "movn        %[level],     %[max_level],     %[temp4]      \n\t"        \
+  "andi        %[temp6],     %[sign],          0xffff        \n\t"        \
+  "xor         %[level],     %[level],         %[temp6]      \n\t"        \
+  "subu        %[level],     %[level],         %[temp6]      \n\t"        \
+  "mul         %[temp5],     %[level],         %[temp3]      \n\t"        \
+  "or          %[ret],       %[ret],           %[level]      \n\t"        \
+  "sh          %[level],     " #N "(%[pout])                 \n\t"        \
+  "sh          %[temp5],     " #J "(%[ppin])                 \n\t"        \
+  "j           3f                                            \n\t"        \
+"2:                                                          \n\t"        \
+  "lhu         %[temp1],     " #J "+2(%[ppiq])               \n\t"        \
+  "srl         %[temp5],     %[coeff],         16            \n\t"        \
+  "mul         %[level],     %[temp5],         %[temp1]      \n\t"        \
+  "lw          %[temp2],     " #K "+4(%[ppbias])             \n\t"        \
+  "lhu         %[temp3],     " #J "+2(%[ppq])                \n\t"        \
+  "addu        %[level],     %[level],         %[temp2]      \n\t"        \
+  "sra         %[level],     %[level],         17            \n\t"        \
+  "srl         %[temp6],     %[sign],          16            \n\t"        \
+  "slt         %[temp4],     %[max_level],     %[level]      \n\t"        \
+  "movn        %[level],     %[max_level],     %[temp4]      \n\t"        \
+  "xor         %[level],     %[level],         %[temp6]      \n\t"        \
+  "subu        %[level],     %[level],         %[temp6]      \n\t"        \
+  "mul         %[temp5],     %[level],         %[temp3]      \n\t"        \
+  "sh          $0,           " #J "(%[ppin])                 \n\t"        \
+  "sh          $0,           " #N "(%[pout])                 \n\t"        \
+  "or          %[ret],       %[ret],           %[level]      \n\t"        \
+  "sh          %[temp5],     " #J "+2(%[ppin])               \n\t"        \
+  "sh          %[level],     " #N1 "(%[pout])                \n\t"        \
+  "j           3f                                            \n\t"        \
+"1:                                                          \n\t"        \
+  "lhu         %[temp1],     " #J "(%[ppiq])                 \n\t"        \
+  "lw          %[temp2],     " #K "(%[ppbias])               \n\t"        \
+  "ulw         %[temp3],     " #J "(%[ppq])                  \n\t"        \
+  "andi        %[temp5],     %[coeff],         0xffff        \n\t"        \
+  "srl         %[temp0],     %[coeff],         16            \n\t"        \
+  "lhu         %[temp6],     " #J "+2(%[ppiq])               \n\t"        \
+  "lw          %[coeff],     " #K "+4(%[ppbias])             \n\t"        \
+  "mul         %[level],     %[temp5],         %[temp1]      \n\t"        \
+  "mul         %[temp4],     %[temp0],         %[temp6]      \n\t"        \
+  "addu        %[level],     %[level],         %[temp2]      \n\t"        \
+  "addu        %[temp4],     %[temp4],         %[coeff]      \n\t"        \
+  "precrq.ph.w %[level],     %[temp4],         %[level]      \n\t"        \
+  "shra.ph     %[level],     %[level],         1             \n\t"        \
+  "cmp.lt.ph   %[max_level1],%[level]                        \n\t"        \
+  "pick.ph     %[level],     %[max_level],     %[level]      \n\t"        \
+  "xor         %[level],     %[level],         %[sign]       \n\t"        \
+  "subu.ph     %[level],     %[level],         %[sign]       \n\t"        \
+  "mul.ph      %[temp3],     %[level],         %[temp3]      \n\t"        \
+  "or          %[ret],       %[ret],           %[level]      \n\t"        \
+  "sh          %[level],     " #N "(%[pout])                 \n\t"        \
+  "srl         %[level],     %[level],         16            \n\t"        \
+  "sh          %[level],     " #N1 "(%[pout])                \n\t"        \
+  "usw         %[temp3],     " #J "(%[ppin])                 \n\t"        \
+  "j           3f                                            \n\t"        \
+"0:                                                          \n\t"        \
+  "sh          $0,           " #N "(%[pout])                 \n\t"        \
+  "sh          $0,           " #N1 "(%[pout])                \n\t"        \
+  "usw         $0,           " #J "(%[ppin])                 \n\t"        \
+"3:                                                          \n\t"
+
+static int QuantizeBlock(int16_t in[16], int16_t out[16],
+                         const VP8Matrix* const mtx) {
+  int temp0, temp1, temp2, temp3, temp4, temp5,temp6;
+  int sign, coeff, level;
+  int max_level = MAX_LEVEL;
+  int max_level1 = max_level << 16 | max_level;
+  int ret = 0;
+
+  int16_t* ppin             = &in[0];
+  int16_t* pout             = &out[0];
+  const uint16_t* ppsharpen = &mtx->sharpen_[0];
+  const uint32_t* ppzthresh = &mtx->zthresh_[0];
+  const uint16_t* ppq       = &mtx->q_[0];
+  const uint16_t* ppiq      = &mtx->iq_[0];
+  const uint32_t* ppbias    = &mtx->bias_[0];
+
+  __asm__ volatile (
+    QUANTIZE_ONE( 0,  0,  0,  2)
+    QUANTIZE_ONE( 4,  8, 10, 12)
+    QUANTIZE_ONE( 8, 16,  4,  8)
+    QUANTIZE_ONE(12, 24, 14, 24)
+    QUANTIZE_ONE(16, 32,  6, 16)
+    QUANTIZE_ONE(20, 40, 22, 26)
+    QUANTIZE_ONE(24, 48, 18, 20)
+    QUANTIZE_ONE(28, 56, 28, 30)
+
+    : [temp0]"=&r"(temp0), [temp1]"=&r"(temp1),
+      [temp2]"=&r"(temp2), [temp3]"=&r"(temp3),
+      [temp4]"=&r"(temp4), [temp5]"=&r"(temp5),
+      [sign]"=&r"(sign), [coeff]"=&r"(coeff),
+      [level]"=&r"(level), [temp6]"=&r"(temp6), [ret]"+&r"(ret)
+    : [ppin]"r"(ppin), [pout]"r"(pout), [max_level1]"r"(max_level1),
+      [ppiq]"r"(ppiq), [max_level]"r"(max_level),
+      [ppbias]"r"(ppbias), [ppzthresh]"r"(ppzthresh),
+      [ppsharpen]"r"(ppsharpen), [ppq]"r"(ppq)
+    : "memory", "hi", "lo"
+  );
+
+  return (ret != 0);
+}
+
+static int Quantize2Blocks(int16_t in[32], int16_t out[32],
+                           const VP8Matrix* const mtx) {
+  int nz;
+  nz  = QuantizeBlock(in + 0 * 16, out + 0 * 16, mtx) << 0;
+  nz |= QuantizeBlock(in + 1 * 16, out + 1 * 16, mtx) << 1;
+  return nz;
+}
+
+#undef QUANTIZE_ONE
+
+// macro for one horizontal pass in FTransformWHT
+// temp0..temp7 holds tmp[0]..tmp[15]
+// A, B, C, D - offset in bytes to load from in buffer
+// TEMP0, TEMP1 - registers for corresponding tmp elements
+#define HORIZONTAL_PASS_WHT(A, B, C, D, TEMP0, TEMP1)                          \
+  "lh              %[" #TEMP0 "],  " #A "(%[in])            \n\t"              \
+  "lh              %[" #TEMP1 "],  " #B "(%[in])            \n\t"              \
+  "lh              %[temp8],     " #C "(%[in])              \n\t"              \
+  "lh              %[temp9],     " #D "(%[in])              \n\t"              \
+  "ins             %[" #TEMP1 "],  %[" #TEMP0 "],  16,  16  \n\t"              \
+  "ins             %[temp9],     %[temp8],     16,  16      \n\t"              \
+  "subq.ph         %[temp8],     %[" #TEMP1 "],  %[temp9]   \n\t"              \
+  "addq.ph         %[temp9],     %[" #TEMP1 "],  %[temp9]   \n\t"              \
+  "precrq.ph.w     %[" #TEMP0 "],  %[temp8],     %[temp9]   \n\t"              \
+  "append          %[temp8],     %[temp9],     16           \n\t"              \
+  "subq.ph         %[" #TEMP1 "],  %[" #TEMP0 "],  %[temp8] \n\t"              \
+  "addq.ph         %[" #TEMP0 "],  %[" #TEMP0 "],  %[temp8] \n\t"              \
+  "rotr            %[" #TEMP1 "],  %[" #TEMP1 "],  16       \n\t"
+
+// macro for one vertical pass in FTransformWHT
+// temp0..temp7 holds tmp[0]..tmp[15]
+// A, B, C, D - offsets in bytes to store to out buffer
+// TEMP0, TEMP2, TEMP4 and TEMP6 - registers for corresponding tmp elements
+#define VERTICAL_PASS_WHT(A, B, C, D, TEMP0, TEMP2, TEMP4, TEMP6)              \
+  "addq.ph         %[temp8],     %[" #TEMP0 "],  %[" #TEMP4 "]    \n\t"        \
+  "addq.ph         %[temp9],     %[" #TEMP2 "],  %[" #TEMP6 "]    \n\t"        \
+  "subq.ph         %[" #TEMP2 "],  %[" #TEMP2 "],  %[" #TEMP6 "]  \n\t"        \
+  "subq.ph         %[" #TEMP6 "],  %[" #TEMP0 "],  %[" #TEMP4 "]  \n\t"        \
+  "addqh.ph        %[" #TEMP0 "],  %[temp8],     %[temp9]         \n\t"        \
+  "subqh.ph        %[" #TEMP4 "],  %[" #TEMP6 "],  %[" #TEMP2 "]  \n\t"        \
+  "addqh.ph        %[" #TEMP2 "],  %[" #TEMP2 "],  %[" #TEMP6 "]  \n\t"        \
+  "subqh.ph        %[" #TEMP6 "],  %[temp8],     %[temp9]         \n\t"        \
+  "usw             %[" #TEMP0 "],  " #A "(%[out])                 \n\t"        \
+  "usw             %[" #TEMP2 "],  " #B "(%[out])                 \n\t"        \
+  "usw             %[" #TEMP4 "],  " #C "(%[out])                 \n\t"        \
+  "usw             %[" #TEMP6 "],  " #D "(%[out])                 \n\t"
+
+static void FTransformWHT(const int16_t* in, int16_t* out) {
+  int temp0, temp1, temp2, temp3, temp4;
+  int temp5, temp6, temp7, temp8, temp9;
+
+  __asm__ volatile (
+    HORIZONTAL_PASS_WHT(  0,  32,  64,  96, temp0, temp1)
+    HORIZONTAL_PASS_WHT(128, 160, 192, 224, temp2, temp3)
+    HORIZONTAL_PASS_WHT(256, 288, 320, 352, temp4, temp5)
+    HORIZONTAL_PASS_WHT(384, 416, 448, 480, temp6, temp7)
+    VERTICAL_PASS_WHT(0,  8, 16, 24, temp0, temp2, temp4, temp6)
+    VERTICAL_PASS_WHT(4, 12, 20, 28, temp1, temp3, temp5, temp7)
+    : [temp0]"=&r"(temp0), [temp1]"=&r"(temp1), [temp2]"=&r"(temp2),
+      [temp3]"=&r"(temp3), [temp4]"=&r"(temp4), [temp5]"=&r"(temp5),
+      [temp6]"=&r"(temp6), [temp7]"=&r"(temp7), [temp8]"=&r"(temp8),
+      [temp9]"=&r"(temp9)
+    : [in]"r"(in), [out]"r"(out)
+    : "memory"
+  );
+}
+
+#undef VERTICAL_PASS_WHT
+#undef HORIZONTAL_PASS_WHT
+
+// macro for converting coefficients to bin
+// convert 8 coeffs at time
+// A, B, C, D - offsets in bytes to load from out buffer
+#define CONVERT_COEFFS_TO_BIN(A, B, C, D)                                      \
+  "ulw        %[temp0],  " #A "(%[out])                \n\t"                   \
+  "ulw        %[temp1],  " #B "(%[out])                \n\t"                   \
+  "ulw        %[temp2],  " #C "(%[out])                \n\t"                   \
+  "ulw        %[temp3],  " #D "(%[out])                \n\t"                   \
+  "absq_s.ph  %[temp0],  %[temp0]                      \n\t"                   \
+  "absq_s.ph  %[temp1],  %[temp1]                      \n\t"                   \
+  "absq_s.ph  %[temp2],  %[temp2]                      \n\t"                   \
+  "absq_s.ph  %[temp3],  %[temp3]                      \n\t"                   \
+  /* TODO(skal): add rounding ? shra_r.ph : shra.ph */                         \
+  /*             for following 4 instructions       */                         \
+  "shra.ph    %[temp0],  %[temp0],    3                \n\t"                   \
+  "shra.ph    %[temp1],  %[temp1],    3                \n\t"                   \
+  "shra.ph    %[temp2],  %[temp2],    3                \n\t"                   \
+  "shra.ph    %[temp3],  %[temp3],    3                \n\t"                   \
+  "shll_s.ph  %[temp0],  %[temp0],    10               \n\t"                   \
+  "shll_s.ph  %[temp1],  %[temp1],    10               \n\t"                   \
+  "shll_s.ph  %[temp2],  %[temp2],    10               \n\t"                   \
+  "shll_s.ph  %[temp3],  %[temp3],    10               \n\t"                   \
+  "shrl.ph    %[temp0],  %[temp0],    10               \n\t"                   \
+  "shrl.ph    %[temp1],  %[temp1],    10               \n\t"                   \
+  "shrl.ph    %[temp2],  %[temp2],    10               \n\t"                   \
+  "shrl.ph    %[temp3],  %[temp3],    10               \n\t"                   \
+  "shll.ph    %[temp0],  %[temp0],    2                \n\t"                   \
+  "shll.ph    %[temp1],  %[temp1],    2                \n\t"                   \
+  "shll.ph    %[temp2],  %[temp2],    2                \n\t"                   \
+  "shll.ph    %[temp3],  %[temp3],    2                \n\t"                   \
+  "ext        %[temp4],  %[temp0],    0,       16      \n\t"                   \
+  "ext        %[temp0],  %[temp0],    16,      16      \n\t"                   \
+  "addu       %[temp4],  %[temp4],    %[dist]          \n\t"                   \
+  "addu       %[temp0],  %[temp0],    %[dist]          \n\t"                   \
+  "ext        %[temp5],  %[temp1],    0,       16      \n\t"                   \
+  "lw         %[temp8],  0(%[temp4])                   \n\t"                   \
+  "ext        %[temp1],  %[temp1],    16,      16      \n\t"                   \
+  "addu       %[temp5],  %[temp5],    %[dist]          \n\t"                   \
+  "addiu      %[temp8],  %[temp8],    1                \n\t"                   \
+  "sw         %[temp8],  0(%[temp4])                   \n\t"                   \
+  "lw         %[temp8],  0(%[temp0])                   \n\t"                   \
+  "addu       %[temp1],  %[temp1],    %[dist]          \n\t"                   \
+  "ext        %[temp6],  %[temp2],    0,       16      \n\t"                   \
+  "addiu      %[temp8],  %[temp8],    1                \n\t"                   \
+  "sw         %[temp8],  0(%[temp0])                   \n\t"                   \
+  "lw         %[temp8],  0(%[temp5])                   \n\t"                   \
+  "ext        %[temp2],  %[temp2],    16,      16      \n\t"                   \
+  "addu       %[temp6],  %[temp6],    %[dist]          \n\t"                   \
+  "addiu      %[temp8],  %[temp8],    1                \n\t"                   \
+  "sw         %[temp8],  0(%[temp5])                   \n\t"                   \
+  "lw         %[temp8],  0(%[temp1])                   \n\t"                   \
+  "addu       %[temp2],  %[temp2],    %[dist]          \n\t"                   \
+  "ext        %[temp7],  %[temp3],    0,       16      \n\t"                   \
+  "addiu      %[temp8],  %[temp8],    1                \n\t"                   \
+  "sw         %[temp8],  0(%[temp1])                   \n\t"                   \
+  "lw         %[temp8],  0(%[temp6])                   \n\t"                   \
+  "ext        %[temp3],  %[temp3],    16,      16      \n\t"                   \
+  "addu       %[temp7],  %[temp7],    %[dist]          \n\t"                   \
+  "addiu      %[temp8],  %[temp8],    1                \n\t"                   \
+  "sw         %[temp8],  0(%[temp6])                   \n\t"                   \
+  "lw         %[temp8],  0(%[temp2])                   \n\t"                   \
+  "addu       %[temp3],  %[temp3],    %[dist]          \n\t"                   \
+  "addiu      %[temp8],  %[temp8],    1                \n\t"                   \
+  "sw         %[temp8],  0(%[temp2])                   \n\t"                   \
+  "lw         %[temp8],  0(%[temp7])                   \n\t"                   \
+  "addiu      %[temp8],  %[temp8],    1                \n\t"                   \
+  "sw         %[temp8],  0(%[temp7])                   \n\t"                   \
+  "lw         %[temp8],  0(%[temp3])                   \n\t"                   \
+  "addiu      %[temp8],  %[temp8],    1                \n\t"                   \
+  "sw         %[temp8],  0(%[temp3])                   \n\t"
+
+static void CollectHistogram(const uint8_t* ref, const uint8_t* pred,
+                             int start_block, int end_block,
+                             VP8Histogram* const histo) {
+  int j;
+  int distribution[MAX_COEFF_THRESH + 1] = { 0 };
+  const int max_coeff = (MAX_COEFF_THRESH << 16) + MAX_COEFF_THRESH;
+  for (j = start_block; j < end_block; ++j) {
+    int16_t out[16];
+    int temp0, temp1, temp2, temp3, temp4, temp5, temp6, temp7, temp8;
+
+    VP8FTransform(ref + VP8DspScan[j], pred + VP8DspScan[j], out);
+
+    // Convert coefficients to bin.
+    __asm__ volatile (
+      CONVERT_COEFFS_TO_BIN( 0,  4,  8, 12)
+      CONVERT_COEFFS_TO_BIN(16, 20, 24, 28)
+      : [temp0]"=&r"(temp0), [temp1]"=&r"(temp1), [temp2]"=&r"(temp2),
+        [temp3]"=&r"(temp3), [temp4]"=&r"(temp4), [temp5]"=&r"(temp5),
+        [temp6]"=&r"(temp6), [temp7]"=&r"(temp7), [temp8]"=&r"(temp8)
+      : [dist]"r"(distribution), [out]"r"(out), [max_coeff]"r"(max_coeff)
+      : "memory"
+    );
+  }
+  VP8SetHistogramData(distribution, histo);
+}
+
+#undef CONVERT_COEFFS_TO_BIN
+
+//------------------------------------------------------------------------------
+// Entry point
+
+extern void VP8EncDspInitMIPSdspR2(void);
+
+WEBP_TSAN_IGNORE_FUNCTION void VP8EncDspInitMIPSdspR2(void) {
+  VP8FTransform = FTransform;
+  VP8ITransform = ITransform;
+  VP8TDisto4x4 = Disto4x4;
+  VP8TDisto16x16 = Disto16x16;
+  VP8EncPredLuma16 = Intra16Preds;
+  VP8EncPredChroma8 = IntraChromaPreds;
+  VP8EncPredLuma4 = Intra4Preds;
+#if !defined(WORK_AROUND_GCC)
+  VP8SSE16x16 = SSE16x16;
+  VP8SSE8x8 = SSE8x8;
+  VP8SSE16x8 = SSE16x8;
+  VP8SSE4x4 = SSE4x4;
+#endif
+  VP8EncQuantizeBlock = QuantizeBlock;
+  VP8EncQuantize2Blocks = Quantize2Blocks;
+  VP8FTransformWHT = FTransformWHT;
+  VP8CollectHistogram = CollectHistogram;
+}
+
+#else  // !WEBP_USE_MIPS_DSP_R2
+
+WEBP_DSP_INIT_STUB(VP8EncDspInitMIPSdspR2)
+
+#endif  // WEBP_USE_MIPS_DSP_R2
diff --git a/drivers/webp/dsp/enc_neon.c b/drivers/webp/dsp/enc_neon.c
new file mode 100644
index 0000000000..c2aef58e70
--- /dev/null
+++ b/drivers/webp/dsp/enc_neon.c
@@ -0,0 +1,934 @@
+// Copyright 2012 Google Inc. All Rights Reserved.
+//
+// Use of this source code is governed by a BSD-style license
+// that can be found in the COPYING file in the root of the source
+// tree. An additional intellectual property rights grant can be found
+// in the file PATENTS. All contributing project authors may
+// be found in the AUTHORS file in the root of the source tree.
+// -----------------------------------------------------------------------------
+//
+// ARM NEON version of speed-critical encoding functions.
+//
+// adapted from libvpx (http://www.webmproject.org/code/)
+
+#include "./dsp.h"
+
+#if defined(WEBP_USE_NEON)
+
+#include <assert.h>
+
+#include "./neon.h"
+#include "../enc/vp8enci.h"
+
+//------------------------------------------------------------------------------
+// Transforms (Paragraph 14.4)
+
+// Inverse transform.
+// This code is pretty much the same as TransformOne in the dec_neon.c, except
+// for subtraction to *ref. See the comments there for algorithmic explanations.
+
+static const int16_t kC1 = 20091;
+static const int16_t kC2 = 17734;  // half of kC2, actually. See comment above.
+
+// This code works but is *slower* than the inlined-asm version below
+// (with gcc-4.6). So we disable it for now. Later, it'll be conditional to
+// WEBP_USE_INTRINSICS define.
+// With gcc-4.8, it's a little faster speed than inlined-assembly.
+#if defined(WEBP_USE_INTRINSICS)
+
+// Treats 'v' as an uint8x8_t and zero extends to an int16x8_t.
+static WEBP_INLINE int16x8_t ConvertU8ToS16(uint32x2_t v) {
+  return vreinterpretq_s16_u16(vmovl_u8(vreinterpret_u8_u32(v)));
+}
+
+// Performs unsigned 8b saturation on 'dst01' and 'dst23' storing the result
+// to the corresponding rows of 'dst'.
+static WEBP_INLINE void SaturateAndStore4x4(uint8_t* const dst,
+                                            const int16x8_t dst01,
+                                            const int16x8_t dst23) {
+  // Unsigned saturate to 8b.
+  const uint8x8_t dst01_u8 = vqmovun_s16(dst01);
+  const uint8x8_t dst23_u8 = vqmovun_s16(dst23);
+
+  // Store the results.
+  vst1_lane_u32((uint32_t*)(dst + 0 * BPS), vreinterpret_u32_u8(dst01_u8), 0);
+  vst1_lane_u32((uint32_t*)(dst + 1 * BPS), vreinterpret_u32_u8(dst01_u8), 1);
+  vst1_lane_u32((uint32_t*)(dst + 2 * BPS), vreinterpret_u32_u8(dst23_u8), 0);
+  vst1_lane_u32((uint32_t*)(dst + 3 * BPS), vreinterpret_u32_u8(dst23_u8), 1);
+}
+
+static WEBP_INLINE void Add4x4(const int16x8_t row01, const int16x8_t row23,
+                               const uint8_t* const ref, uint8_t* const dst) {
+  uint32x2_t dst01 = vdup_n_u32(0);
+  uint32x2_t dst23 = vdup_n_u32(0);
+
+  // Load the source pixels.
+  dst01 = vld1_lane_u32((uint32_t*)(ref + 0 * BPS), dst01, 0);
+  dst23 = vld1_lane_u32((uint32_t*)(ref + 2 * BPS), dst23, 0);
+  dst01 = vld1_lane_u32((uint32_t*)(ref + 1 * BPS), dst01, 1);
+  dst23 = vld1_lane_u32((uint32_t*)(ref + 3 * BPS), dst23, 1);
+
+  {
+    // Convert to 16b.
+    const int16x8_t dst01_s16 = ConvertU8ToS16(dst01);
+    const int16x8_t dst23_s16 = ConvertU8ToS16(dst23);
+
+    // Descale with rounding.
+    const int16x8_t out01 = vrsraq_n_s16(dst01_s16, row01, 3);
+    const int16x8_t out23 = vrsraq_n_s16(dst23_s16, row23, 3);
+    // Add the inverse transform.
+    SaturateAndStore4x4(dst, out01, out23);
+  }
+}
+
+static WEBP_INLINE void Transpose8x2(const int16x8_t in0, const int16x8_t in1,
+                                     int16x8x2_t* const out) {
+  // a0 a1 a2 a3 | b0 b1 b2 b3   => a0 b0 c0 d0 | a1 b1 c1 d1
+  // c0 c1 c2 c3 | d0 d1 d2 d3      a2 b2 c2 d2 | a3 b3 c3 d3
+  const int16x8x2_t tmp0 = vzipq_s16(in0, in1);   // a0 c0 a1 c1 a2 c2 ...
+                                                  // b0 d0 b1 d1 b2 d2 ...
+  *out = vzipq_s16(tmp0.val[0], tmp0.val[1]);
+}
+
+static WEBP_INLINE void TransformPass(int16x8x2_t* const rows) {
+  // {rows} = in0 | in4
+  //          in8 | in12
+  // B1 = in4 | in12
+  const int16x8_t B1 =
+      vcombine_s16(vget_high_s16(rows->val[0]), vget_high_s16(rows->val[1]));
+  // C0 = kC1 * in4 | kC1 * in12
+  // C1 = kC2 * in4 | kC2 * in12
+  const int16x8_t C0 = vsraq_n_s16(B1, vqdmulhq_n_s16(B1, kC1), 1);
+  const int16x8_t C1 = vqdmulhq_n_s16(B1, kC2);
+  const int16x4_t a = vqadd_s16(vget_low_s16(rows->val[0]),
+                                vget_low_s16(rows->val[1]));   // in0 + in8
+  const int16x4_t b = vqsub_s16(vget_low_s16(rows->val[0]),
+                                vget_low_s16(rows->val[1]));   // in0 - in8
+  // c = kC2 * in4 - kC1 * in12
+  // d = kC1 * in4 + kC2 * in12
+  const int16x4_t c = vqsub_s16(vget_low_s16(C1), vget_high_s16(C0));
+  const int16x4_t d = vqadd_s16(vget_low_s16(C0), vget_high_s16(C1));
+  const int16x8_t D0 = vcombine_s16(a, b);      // D0 = a | b
+  const int16x8_t D1 = vcombine_s16(d, c);      // D1 = d | c
+  const int16x8_t E0 = vqaddq_s16(D0, D1);      // a+d | b+c
+  const int16x8_t E_tmp = vqsubq_s16(D0, D1);   // a-d | b-c
+  const int16x8_t E1 = vcombine_s16(vget_high_s16(E_tmp), vget_low_s16(E_tmp));
+  Transpose8x2(E0, E1, rows);
+}
+
+static void ITransformOne(const uint8_t* ref,
+                          const int16_t* in, uint8_t* dst) {
+  int16x8x2_t rows;
+  INIT_VECTOR2(rows, vld1q_s16(in + 0), vld1q_s16(in + 8));
+  TransformPass(&rows);
+  TransformPass(&rows);
+  Add4x4(rows.val[0], rows.val[1], ref, dst);
+}
+
+#else
+
+static void ITransformOne(const uint8_t* ref,
+                          const int16_t* in, uint8_t* dst) {
+  const int kBPS = BPS;
+  const int16_t kC1C2[] = { kC1, kC2, 0, 0 };
+
+  __asm__ volatile (
+    "vld1.16         {q1, q2}, [%[in]]           \n"
+    "vld1.16         {d0}, [%[kC1C2]]            \n"
+
+    // d2: in[0]
+    // d3: in[8]
+    // d4: in[4]
+    // d5: in[12]
+    "vswp            d3, d4                      \n"
+
+    // q8 = {in[4], in[12]} * kC1 * 2 >> 16
+    // q9 = {in[4], in[12]} * kC2 >> 16
+    "vqdmulh.s16     q8, q2, d0[0]               \n"
+    "vqdmulh.s16     q9, q2, d0[1]               \n"
+
+    // d22 = a = in[0] + in[8]
+    // d23 = b = in[0] - in[8]
+    "vqadd.s16       d22, d2, d3                 \n"
+    "vqsub.s16       d23, d2, d3                 \n"
+
+    //  q8 = in[4]/[12] * kC1 >> 16
+    "vshr.s16        q8, q8, #1                  \n"
+
+    // Add {in[4], in[12]} back after the multiplication.
+    "vqadd.s16       q8, q2, q8                  \n"
+
+    // d20 = c = in[4]*kC2 - in[12]*kC1
+    // d21 = d = in[4]*kC1 + in[12]*kC2
+    "vqsub.s16       d20, d18, d17               \n"
+    "vqadd.s16       d21, d19, d16               \n"
+
+    // d2 = tmp[0] = a + d
+    // d3 = tmp[1] = b + c
+    // d4 = tmp[2] = b - c
+    // d5 = tmp[3] = a - d
+    "vqadd.s16       d2, d22, d21                \n"
+    "vqadd.s16       d3, d23, d20                \n"
+    "vqsub.s16       d4, d23, d20                \n"
+    "vqsub.s16       d5, d22, d21                \n"
+
+    "vzip.16         q1, q2                      \n"
+    "vzip.16         q1, q2                      \n"
+
+    "vswp            d3, d4                      \n"
+
+    // q8 = {tmp[4], tmp[12]} * kC1 * 2 >> 16
+    // q9 = {tmp[4], tmp[12]} * kC2 >> 16
+    "vqdmulh.s16     q8, q2, d0[0]               \n"
+    "vqdmulh.s16     q9, q2, d0[1]               \n"
+
+    // d22 = a = tmp[0] + tmp[8]
+    // d23 = b = tmp[0] - tmp[8]
+    "vqadd.s16       d22, d2, d3                 \n"
+    "vqsub.s16       d23, d2, d3                 \n"
+
+    "vshr.s16        q8, q8, #1                  \n"
+    "vqadd.s16       q8, q2, q8                  \n"
+
+    // d20 = c = in[4]*kC2 - in[12]*kC1
+    // d21 = d = in[4]*kC1 + in[12]*kC2
+    "vqsub.s16       d20, d18, d17               \n"
+    "vqadd.s16       d21, d19, d16               \n"
+
+    // d2 = tmp[0] = a + d
+    // d3 = tmp[1] = b + c
+    // d4 = tmp[2] = b - c
+    // d5 = tmp[3] = a - d
+    "vqadd.s16       d2, d22, d21                \n"
+    "vqadd.s16       d3, d23, d20                \n"
+    "vqsub.s16       d4, d23, d20                \n"
+    "vqsub.s16       d5, d22, d21                \n"
+
+    "vld1.32         d6[0], [%[ref]], %[kBPS]    \n"
+    "vld1.32         d6[1], [%[ref]], %[kBPS]    \n"
+    "vld1.32         d7[0], [%[ref]], %[kBPS]    \n"
+    "vld1.32         d7[1], [%[ref]], %[kBPS]    \n"
+
+    "sub         %[ref], %[ref], %[kBPS], lsl #2 \n"
+
+    // (val) + 4 >> 3
+    "vrshr.s16       d2, d2, #3                  \n"
+    "vrshr.s16       d3, d3, #3                  \n"
+    "vrshr.s16       d4, d4, #3                  \n"
+    "vrshr.s16       d5, d5, #3                  \n"
+
+    "vzip.16         q1, q2                      \n"
+    "vzip.16         q1, q2                      \n"
+
+    // Must accumulate before saturating
+    "vmovl.u8        q8, d6                      \n"
+    "vmovl.u8        q9, d7                      \n"
+
+    "vqadd.s16       q1, q1, q8                  \n"
+    "vqadd.s16       q2, q2, q9                  \n"
+
+    "vqmovun.s16     d0, q1                      \n"
+    "vqmovun.s16     d1, q2                      \n"
+
+    "vst1.32         d0[0], [%[dst]], %[kBPS]    \n"
+    "vst1.32         d0[1], [%[dst]], %[kBPS]    \n"
+    "vst1.32         d1[0], [%[dst]], %[kBPS]    \n"
+    "vst1.32         d1[1], [%[dst]]             \n"
+
+    : [in] "+r"(in), [dst] "+r"(dst)               // modified registers
+    : [kBPS] "r"(kBPS), [kC1C2] "r"(kC1C2), [ref] "r"(ref)  // constants
+    : "memory", "q0", "q1", "q2", "q8", "q9", "q10", "q11"  // clobbered
+  );
+}
+
+#endif    // WEBP_USE_INTRINSICS
+
+static void ITransform(const uint8_t* ref,
+                       const int16_t* in, uint8_t* dst, int do_two) {
+  ITransformOne(ref, in, dst);
+  if (do_two) {
+    ITransformOne(ref + 4, in + 16, dst + 4);
+  }
+}
+
+// Load all 4x4 pixels into a single uint8x16_t variable.
+static uint8x16_t Load4x4(const uint8_t* src) {
+  uint32x4_t out = vdupq_n_u32(0);
+  out = vld1q_lane_u32((const uint32_t*)(src + 0 * BPS), out, 0);
+  out = vld1q_lane_u32((const uint32_t*)(src + 1 * BPS), out, 1);
+  out = vld1q_lane_u32((const uint32_t*)(src + 2 * BPS), out, 2);
+  out = vld1q_lane_u32((const uint32_t*)(src + 3 * BPS), out, 3);
+  return vreinterpretq_u8_u32(out);
+}
+
+// Forward transform.
+
+#if defined(WEBP_USE_INTRINSICS)
+
+static WEBP_INLINE void Transpose4x4_S16(const int16x4_t A, const int16x4_t B,
+                                         const int16x4_t C, const int16x4_t D,
+                                         int16x8_t* const out01,
+                                         int16x8_t* const out32) {
+  const int16x4x2_t AB = vtrn_s16(A, B);
+  const int16x4x2_t CD = vtrn_s16(C, D);
+  const int32x2x2_t tmp02 = vtrn_s32(vreinterpret_s32_s16(AB.val[0]),
+                                     vreinterpret_s32_s16(CD.val[0]));
+  const int32x2x2_t tmp13 = vtrn_s32(vreinterpret_s32_s16(AB.val[1]),
+                                     vreinterpret_s32_s16(CD.val[1]));
+  *out01 = vreinterpretq_s16_s64(
+      vcombine_s64(vreinterpret_s64_s32(tmp02.val[0]),
+                   vreinterpret_s64_s32(tmp13.val[0])));
+  *out32 = vreinterpretq_s16_s64(
+      vcombine_s64(vreinterpret_s64_s32(tmp13.val[1]),
+                   vreinterpret_s64_s32(tmp02.val[1])));
+}
+
+static WEBP_INLINE int16x8_t DiffU8ToS16(const uint8x8_t a,
+                                         const uint8x8_t b) {
+  return vreinterpretq_s16_u16(vsubl_u8(a, b));
+}
+
+static void FTransform(const uint8_t* src, const uint8_t* ref,
+                       int16_t* out) {
+  int16x8_t d0d1, d3d2;   // working 4x4 int16 variables
+  {
+    const uint8x16_t S0 = Load4x4(src);
+    const uint8x16_t R0 = Load4x4(ref);
+    const int16x8_t D0D1 = DiffU8ToS16(vget_low_u8(S0), vget_low_u8(R0));
+    const int16x8_t D2D3 = DiffU8ToS16(vget_high_u8(S0), vget_high_u8(R0));
+    const int16x4_t D0 = vget_low_s16(D0D1);
+    const int16x4_t D1 = vget_high_s16(D0D1);
+    const int16x4_t D2 = vget_low_s16(D2D3);
+    const int16x4_t D3 = vget_high_s16(D2D3);
+    Transpose4x4_S16(D0, D1, D2, D3, &d0d1, &d3d2);
+  }
+  {    // 1rst pass
+    const int32x4_t kCst937 = vdupq_n_s32(937);
+    const int32x4_t kCst1812 = vdupq_n_s32(1812);
+    const int16x8_t a0a1 = vaddq_s16(d0d1, d3d2);   // d0+d3 | d1+d2   (=a0|a1)
+    const int16x8_t a3a2 = vsubq_s16(d0d1, d3d2);   // d0-d3 | d1-d2   (=a3|a2)
+    const int16x8_t a0a1_2 = vshlq_n_s16(a0a1, 3);
+    const int16x4_t tmp0 = vadd_s16(vget_low_s16(a0a1_2),
+                                    vget_high_s16(a0a1_2));
+    const int16x4_t tmp2 = vsub_s16(vget_low_s16(a0a1_2),
+                                    vget_high_s16(a0a1_2));
+    const int32x4_t a3_2217 = vmull_n_s16(vget_low_s16(a3a2), 2217);
+    const int32x4_t a2_2217 = vmull_n_s16(vget_high_s16(a3a2), 2217);
+    const int32x4_t a2_p_a3 = vmlal_n_s16(a2_2217, vget_low_s16(a3a2), 5352);
+    const int32x4_t a3_m_a2 = vmlsl_n_s16(a3_2217, vget_high_s16(a3a2), 5352);
+    const int16x4_t tmp1 = vshrn_n_s32(vaddq_s32(a2_p_a3, kCst1812), 9);
+    const int16x4_t tmp3 = vshrn_n_s32(vaddq_s32(a3_m_a2, kCst937), 9);
+    Transpose4x4_S16(tmp0, tmp1, tmp2, tmp3, &d0d1, &d3d2);
+  }
+  {    // 2nd pass
+    // the (1<<16) addition is for the replacement: a3!=0  <-> 1-(a3==0)
+    const int32x4_t kCst12000 = vdupq_n_s32(12000 + (1 << 16));
+    const int32x4_t kCst51000 = vdupq_n_s32(51000);
+    const int16x8_t a0a1 = vaddq_s16(d0d1, d3d2);   // d0+d3 | d1+d2   (=a0|a1)
+    const int16x8_t a3a2 = vsubq_s16(d0d1, d3d2);   // d0-d3 | d1-d2   (=a3|a2)
+    const int16x4_t a0_k7 = vadd_s16(vget_low_s16(a0a1), vdup_n_s16(7));
+    const int16x4_t out0 = vshr_n_s16(vadd_s16(a0_k7, vget_high_s16(a0a1)), 4);
+    const int16x4_t out2 = vshr_n_s16(vsub_s16(a0_k7, vget_high_s16(a0a1)), 4);
+    const int32x4_t a3_2217 = vmull_n_s16(vget_low_s16(a3a2), 2217);
+    const int32x4_t a2_2217 = vmull_n_s16(vget_high_s16(a3a2), 2217);
+    const int32x4_t a2_p_a3 = vmlal_n_s16(a2_2217, vget_low_s16(a3a2), 5352);
+    const int32x4_t a3_m_a2 = vmlsl_n_s16(a3_2217, vget_high_s16(a3a2), 5352);
+    const int16x4_t tmp1 = vaddhn_s32(a2_p_a3, kCst12000);
+    const int16x4_t out3 = vaddhn_s32(a3_m_a2, kCst51000);
+    const int16x4_t a3_eq_0 =
+        vreinterpret_s16_u16(vceq_s16(vget_low_s16(a3a2), vdup_n_s16(0)));
+    const int16x4_t out1 = vadd_s16(tmp1, a3_eq_0);
+    vst1_s16(out +  0, out0);
+    vst1_s16(out +  4, out1);
+    vst1_s16(out +  8, out2);
+    vst1_s16(out + 12, out3);
+  }
+}
+
+#else
+
+// adapted from vp8/encoder/arm/neon/shortfdct_neon.asm
+static const int16_t kCoeff16[] = {
+  5352,  5352,  5352, 5352, 2217,  2217,  2217, 2217
+};
+static const int32_t kCoeff32[] = {
+   1812,  1812,  1812,  1812,
+    937,   937,   937,   937,
+  12000, 12000, 12000, 12000,
+  51000, 51000, 51000, 51000
+};
+
+static void FTransform(const uint8_t* src, const uint8_t* ref,
+                       int16_t* out) {
+  const int kBPS = BPS;
+  const uint8_t* src_ptr = src;
+  const uint8_t* ref_ptr = ref;
+  const int16_t* coeff16 = kCoeff16;
+  const int32_t* coeff32 = kCoeff32;
+
+  __asm__ volatile (
+    // load src into q4, q5 in high half
+    "vld1.8 {d8},  [%[src_ptr]], %[kBPS]      \n"
+    "vld1.8 {d10}, [%[src_ptr]], %[kBPS]      \n"
+    "vld1.8 {d9},  [%[src_ptr]], %[kBPS]      \n"
+    "vld1.8 {d11}, [%[src_ptr]]               \n"
+
+    // load ref into q6, q7 in high half
+    "vld1.8 {d12}, [%[ref_ptr]], %[kBPS]      \n"
+    "vld1.8 {d14}, [%[ref_ptr]], %[kBPS]      \n"
+    "vld1.8 {d13}, [%[ref_ptr]], %[kBPS]      \n"
+    "vld1.8 {d15}, [%[ref_ptr]]               \n"
+
+    // Pack the high values in to q4 and q6
+    "vtrn.32     q4, q5                       \n"
+    "vtrn.32     q6, q7                       \n"
+
+    // d[0-3] = src - ref
+    "vsubl.u8    q0, d8, d12                  \n"
+    "vsubl.u8    q1, d9, d13                  \n"
+
+    // load coeff16 into q8(d16=5352, d17=2217)
+    "vld1.16     {q8}, [%[coeff16]]           \n"
+
+    // load coeff32 high half into q9 = 1812, q10 = 937
+    "vld1.32     {q9, q10}, [%[coeff32]]!     \n"
+
+    // load coeff32 low half into q11=12000, q12=51000
+    "vld1.32     {q11,q12}, [%[coeff32]]      \n"
+
+    // part 1
+    // Transpose. Register dN is the same as dN in C
+    "vtrn.32         d0, d2                   \n"
+    "vtrn.32         d1, d3                   \n"
+    "vtrn.16         d0, d1                   \n"
+    "vtrn.16         d2, d3                   \n"
+
+    "vadd.s16        d4, d0, d3               \n" // a0 = d0 + d3
+    "vadd.s16        d5, d1, d2               \n" // a1 = d1 + d2
+    "vsub.s16        d6, d1, d2               \n" // a2 = d1 - d2
+    "vsub.s16        d7, d0, d3               \n" // a3 = d0 - d3
+
+    "vadd.s16        d0, d4, d5               \n" // a0 + a1
+    "vshl.s16        d0, d0, #3               \n" // temp[0+i*4] = (a0+a1) << 3
+    "vsub.s16        d2, d4, d5               \n" // a0 - a1
+    "vshl.s16        d2, d2, #3               \n" // (temp[2+i*4] = (a0-a1) << 3
+
+    "vmlal.s16       q9, d7, d16              \n" // a3*5352 + 1812
+    "vmlal.s16       q10, d7, d17             \n" // a3*2217 + 937
+    "vmlal.s16       q9, d6, d17              \n" // a2*2217 + a3*5352 + 1812
+    "vmlsl.s16       q10, d6, d16             \n" // a3*2217 + 937 - a2*5352
+
+    // temp[1+i*4] = (d2*2217 + d3*5352 + 1812) >> 9
+    // temp[3+i*4] = (d3*2217 + 937 - d2*5352) >> 9
+    "vshrn.s32       d1, q9, #9               \n"
+    "vshrn.s32       d3, q10, #9              \n"
+
+    // part 2
+    // transpose d0=ip[0], d1=ip[4], d2=ip[8], d3=ip[12]
+    "vtrn.32         d0, d2                   \n"
+    "vtrn.32         d1, d3                   \n"
+    "vtrn.16         d0, d1                   \n"
+    "vtrn.16         d2, d3                   \n"
+
+    "vmov.s16        d26, #7                  \n"
+
+    "vadd.s16        d4, d0, d3               \n" // a1 = ip[0] + ip[12]
+    "vadd.s16        d5, d1, d2               \n" // b1 = ip[4] + ip[8]
+    "vsub.s16        d6, d1, d2               \n" // c1 = ip[4] - ip[8]
+    "vadd.s16        d4, d4, d26              \n" // a1 + 7
+    "vsub.s16        d7, d0, d3               \n" // d1 = ip[0] - ip[12]
+
+    "vadd.s16        d0, d4, d5               \n" // op[0] = a1 + b1 + 7
+    "vsub.s16        d2, d4, d5               \n" // op[8] = a1 - b1 + 7
+
+    "vmlal.s16       q11, d7, d16             \n" // d1*5352 + 12000
+    "vmlal.s16       q12, d7, d17             \n" // d1*2217 + 51000
+
+    "vceq.s16        d4, d7, #0               \n"
+
+    "vshr.s16        d0, d0, #4               \n"
+    "vshr.s16        d2, d2, #4               \n"
+
+    "vmlal.s16       q11, d6, d17             \n" // c1*2217 + d1*5352 + 12000
+    "vmlsl.s16       q12, d6, d16             \n" // d1*2217 - c1*5352 + 51000
+
+    "vmvn            d4, d4                   \n" // !(d1 == 0)
+    // op[4] = (c1*2217 + d1*5352 + 12000)>>16
+    "vshrn.s32       d1, q11, #16             \n"
+    // op[4] += (d1!=0)
+    "vsub.s16        d1, d1, d4               \n"
+    // op[12]= (d1*2217 - c1*5352 + 51000)>>16
+    "vshrn.s32       d3, q12, #16             \n"
+
+    // set result to out array
+    "vst1.16         {q0, q1}, [%[out]]   \n"
+    : [src_ptr] "+r"(src_ptr), [ref_ptr] "+r"(ref_ptr),
+      [coeff32] "+r"(coeff32)          // modified registers
+    : [kBPS] "r"(kBPS), [coeff16] "r"(coeff16),
+      [out] "r"(out)                   // constants
+    : "memory", "q0", "q1", "q2", "q3", "q4", "q5", "q6", "q7", "q8", "q9",
+      "q10", "q11", "q12", "q13"       // clobbered
+  );
+}
+
+#endif
+
+#define LOAD_LANE_16b(VALUE, LANE) do {             \
+  (VALUE) = vld1_lane_s16(src, (VALUE), (LANE));    \
+  src += stride;                                    \
+} while (0)
+
+static void FTransformWHT(const int16_t* src, int16_t* out) {
+  const int stride = 16;
+  const int16x4_t zero = vdup_n_s16(0);
+  int32x4x4_t tmp0;
+  int16x4x4_t in;
+  INIT_VECTOR4(in, zero, zero, zero, zero);
+  LOAD_LANE_16b(in.val[0], 0);
+  LOAD_LANE_16b(in.val[1], 0);
+  LOAD_LANE_16b(in.val[2], 0);
+  LOAD_LANE_16b(in.val[3], 0);
+  LOAD_LANE_16b(in.val[0], 1);
+  LOAD_LANE_16b(in.val[1], 1);
+  LOAD_LANE_16b(in.val[2], 1);
+  LOAD_LANE_16b(in.val[3], 1);
+  LOAD_LANE_16b(in.val[0], 2);
+  LOAD_LANE_16b(in.val[1], 2);
+  LOAD_LANE_16b(in.val[2], 2);
+  LOAD_LANE_16b(in.val[3], 2);
+  LOAD_LANE_16b(in.val[0], 3);
+  LOAD_LANE_16b(in.val[1], 3);
+  LOAD_LANE_16b(in.val[2], 3);
+  LOAD_LANE_16b(in.val[3], 3);
+
+  {
+    // a0 = in[0 * 16] + in[2 * 16]
+    // a1 = in[1 * 16] + in[3 * 16]
+    // a2 = in[1 * 16] - in[3 * 16]
+    // a3 = in[0 * 16] - in[2 * 16]
+    const int32x4_t a0 = vaddl_s16(in.val[0], in.val[2]);
+    const int32x4_t a1 = vaddl_s16(in.val[1], in.val[3]);
+    const int32x4_t a2 = vsubl_s16(in.val[1], in.val[3]);
+    const int32x4_t a3 = vsubl_s16(in.val[0], in.val[2]);
+    tmp0.val[0] = vaddq_s32(a0, a1);
+    tmp0.val[1] = vaddq_s32(a3, a2);
+    tmp0.val[2] = vsubq_s32(a3, a2);
+    tmp0.val[3] = vsubq_s32(a0, a1);
+  }
+  {
+    const int32x4x4_t tmp1 = Transpose4x4(tmp0);
+    // a0 = tmp[0 + i] + tmp[ 8 + i]
+    // a1 = tmp[4 + i] + tmp[12 + i]
+    // a2 = tmp[4 + i] - tmp[12 + i]
+    // a3 = tmp[0 + i] - tmp[ 8 + i]
+    const int32x4_t a0 = vaddq_s32(tmp1.val[0], tmp1.val[2]);
+    const int32x4_t a1 = vaddq_s32(tmp1.val[1], tmp1.val[3]);
+    const int32x4_t a2 = vsubq_s32(tmp1.val[1], tmp1.val[3]);
+    const int32x4_t a3 = vsubq_s32(tmp1.val[0], tmp1.val[2]);
+    const int32x4_t b0 = vhaddq_s32(a0, a1);  // (a0 + a1) >> 1
+    const int32x4_t b1 = vhaddq_s32(a3, a2);  // (a3 + a2) >> 1
+    const int32x4_t b2 = vhsubq_s32(a3, a2);  // (a3 - a2) >> 1
+    const int32x4_t b3 = vhsubq_s32(a0, a1);  // (a0 - a1) >> 1
+    const int16x4_t out0 = vmovn_s32(b0);
+    const int16x4_t out1 = vmovn_s32(b1);
+    const int16x4_t out2 = vmovn_s32(b2);
+    const int16x4_t out3 = vmovn_s32(b3);
+
+    vst1_s16(out +  0, out0);
+    vst1_s16(out +  4, out1);
+    vst1_s16(out +  8, out2);
+    vst1_s16(out + 12, out3);
+  }
+}
+#undef LOAD_LANE_16b
+
+//------------------------------------------------------------------------------
+// Texture distortion
+//
+// We try to match the spectral content (weighted) between source and
+// reconstructed samples.
+
+// a 0123, b 0123
+// a 4567, b 4567
+// a 89ab, b 89ab
+// a cdef, b cdef
+//
+// transpose
+//
+// a 048c, b 048c
+// a 159d, b 159d
+// a 26ae, b 26ae
+// a 37bf, b 37bf
+//
+static WEBP_INLINE uint8x8x4_t DistoTranspose4x4U8(uint8x8x4_t d4_in) {
+  const uint8x8x2_t d2_tmp0 = vtrn_u8(d4_in.val[0], d4_in.val[1]);
+  const uint8x8x2_t d2_tmp1 = vtrn_u8(d4_in.val[2], d4_in.val[3]);
+  const uint16x4x2_t d2_tmp2 = vtrn_u16(vreinterpret_u16_u8(d2_tmp0.val[0]),
+                                        vreinterpret_u16_u8(d2_tmp1.val[0]));
+  const uint16x4x2_t d2_tmp3 = vtrn_u16(vreinterpret_u16_u8(d2_tmp0.val[1]),
+                                        vreinterpret_u16_u8(d2_tmp1.val[1]));
+
+  d4_in.val[0] = vreinterpret_u8_u16(d2_tmp2.val[0]);
+  d4_in.val[2] = vreinterpret_u8_u16(d2_tmp2.val[1]);
+  d4_in.val[1] = vreinterpret_u8_u16(d2_tmp3.val[0]);
+  d4_in.val[3] = vreinterpret_u8_u16(d2_tmp3.val[1]);
+  return d4_in;
+}
+
+static WEBP_INLINE int16x8x4_t DistoTranspose4x4S16(int16x8x4_t q4_in) {
+  const int16x8x2_t q2_tmp0 = vtrnq_s16(q4_in.val[0], q4_in.val[1]);
+  const int16x8x2_t q2_tmp1 = vtrnq_s16(q4_in.val[2], q4_in.val[3]);
+  const int32x4x2_t q2_tmp2 = vtrnq_s32(vreinterpretq_s32_s16(q2_tmp0.val[0]),
+                                        vreinterpretq_s32_s16(q2_tmp1.val[0]));
+  const int32x4x2_t q2_tmp3 = vtrnq_s32(vreinterpretq_s32_s16(q2_tmp0.val[1]),
+                                        vreinterpretq_s32_s16(q2_tmp1.val[1]));
+  q4_in.val[0] = vreinterpretq_s16_s32(q2_tmp2.val[0]);
+  q4_in.val[2] = vreinterpretq_s16_s32(q2_tmp2.val[1]);
+  q4_in.val[1] = vreinterpretq_s16_s32(q2_tmp3.val[0]);
+  q4_in.val[3] = vreinterpretq_s16_s32(q2_tmp3.val[1]);
+  return q4_in;
+}
+
+static WEBP_INLINE int16x8x4_t DistoHorizontalPass(const uint8x8x4_t d4_in) {
+  // {a0, a1} = {in[0] + in[2], in[1] + in[3]}
+  // {a3, a2} = {in[0] - in[2], in[1] - in[3]}
+  const int16x8_t q_a0 = vreinterpretq_s16_u16(vaddl_u8(d4_in.val[0],
+                                                        d4_in.val[2]));
+  const int16x8_t q_a1 = vreinterpretq_s16_u16(vaddl_u8(d4_in.val[1],
+                                                        d4_in.val[3]));
+  const int16x8_t q_a3 = vreinterpretq_s16_u16(vsubl_u8(d4_in.val[0],
+                                                        d4_in.val[2]));
+  const int16x8_t q_a2 = vreinterpretq_s16_u16(vsubl_u8(d4_in.val[1],
+                                                        d4_in.val[3]));
+  int16x8x4_t q4_out;
+  // tmp[0] = a0 + a1
+  // tmp[1] = a3 + a2
+  // tmp[2] = a3 - a2
+  // tmp[3] = a0 - a1
+  INIT_VECTOR4(q4_out,
+               vaddq_s16(q_a0, q_a1), vaddq_s16(q_a3, q_a2),
+               vsubq_s16(q_a3, q_a2), vsubq_s16(q_a0, q_a1));
+  return q4_out;
+}
+
+static WEBP_INLINE int16x8x4_t DistoVerticalPass(int16x8x4_t q4_in) {
+  const int16x8_t q_a0 = vaddq_s16(q4_in.val[0], q4_in.val[2]);
+  const int16x8_t q_a1 = vaddq_s16(q4_in.val[1], q4_in.val[3]);
+  const int16x8_t q_a2 = vsubq_s16(q4_in.val[1], q4_in.val[3]);
+  const int16x8_t q_a3 = vsubq_s16(q4_in.val[0], q4_in.val[2]);
+
+  q4_in.val[0] = vaddq_s16(q_a0, q_a1);
+  q4_in.val[1] = vaddq_s16(q_a3, q_a2);
+  q4_in.val[2] = vabdq_s16(q_a3, q_a2);
+  q4_in.val[3] = vabdq_s16(q_a0, q_a1);
+  q4_in.val[0] = vabsq_s16(q4_in.val[0]);
+  q4_in.val[1] = vabsq_s16(q4_in.val[1]);
+  return q4_in;
+}
+
+static WEBP_INLINE int16x4x4_t DistoLoadW(const uint16_t* w) {
+  const uint16x8_t q_w07 = vld1q_u16(&w[0]);
+  const uint16x8_t q_w8f = vld1q_u16(&w[8]);
+  int16x4x4_t d4_w;
+  INIT_VECTOR4(d4_w,
+               vget_low_s16(vreinterpretq_s16_u16(q_w07)),
+               vget_high_s16(vreinterpretq_s16_u16(q_w07)),
+               vget_low_s16(vreinterpretq_s16_u16(q_w8f)),
+               vget_high_s16(vreinterpretq_s16_u16(q_w8f)));
+  return d4_w;
+}
+
+static WEBP_INLINE int32x2_t DistoSum(const int16x8x4_t q4_in,
+                                      const int16x4x4_t d4_w) {
+  int32x2_t d_sum;
+  // sum += w[ 0] * abs(b0);
+  // sum += w[ 4] * abs(b1);
+  // sum += w[ 8] * abs(b2);
+  // sum += w[12] * abs(b3);
+  int32x4_t q_sum0 = vmull_s16(d4_w.val[0], vget_low_s16(q4_in.val[0]));
+  int32x4_t q_sum1 = vmull_s16(d4_w.val[1], vget_low_s16(q4_in.val[1]));
+  int32x4_t q_sum2 = vmull_s16(d4_w.val[2], vget_low_s16(q4_in.val[2]));
+  int32x4_t q_sum3 = vmull_s16(d4_w.val[3], vget_low_s16(q4_in.val[3]));
+  q_sum0 = vmlsl_s16(q_sum0, d4_w.val[0], vget_high_s16(q4_in.val[0]));
+  q_sum1 = vmlsl_s16(q_sum1, d4_w.val[1], vget_high_s16(q4_in.val[1]));
+  q_sum2 = vmlsl_s16(q_sum2, d4_w.val[2], vget_high_s16(q4_in.val[2]));
+  q_sum3 = vmlsl_s16(q_sum3, d4_w.val[3], vget_high_s16(q4_in.val[3]));
+
+  q_sum0 = vaddq_s32(q_sum0, q_sum1);
+  q_sum2 = vaddq_s32(q_sum2, q_sum3);
+  q_sum2 = vaddq_s32(q_sum0, q_sum2);
+  d_sum = vpadd_s32(vget_low_s32(q_sum2), vget_high_s32(q_sum2));
+  d_sum = vpadd_s32(d_sum, d_sum);
+  return d_sum;
+}
+
+#define LOAD_LANE_32b(src, VALUE, LANE) \
+    (VALUE) = vld1_lane_u32((const uint32_t*)(src), (VALUE), (LANE))
+
+// Hadamard transform
+// Returns the weighted sum of the absolute value of transformed coefficients.
+static int Disto4x4(const uint8_t* const a, const uint8_t* const b,
+                    const uint16_t* const w) {
+  uint32x2_t d_in_ab_0123 = vdup_n_u32(0);
+  uint32x2_t d_in_ab_4567 = vdup_n_u32(0);
+  uint32x2_t d_in_ab_89ab = vdup_n_u32(0);
+  uint32x2_t d_in_ab_cdef = vdup_n_u32(0);
+  uint8x8x4_t d4_in;
+
+  // load data a, b
+  LOAD_LANE_32b(a + 0 * BPS, d_in_ab_0123, 0);
+  LOAD_LANE_32b(a + 1 * BPS, d_in_ab_4567, 0);
+  LOAD_LANE_32b(a + 2 * BPS, d_in_ab_89ab, 0);
+  LOAD_LANE_32b(a + 3 * BPS, d_in_ab_cdef, 0);
+  LOAD_LANE_32b(b + 0 * BPS, d_in_ab_0123, 1);
+  LOAD_LANE_32b(b + 1 * BPS, d_in_ab_4567, 1);
+  LOAD_LANE_32b(b + 2 * BPS, d_in_ab_89ab, 1);
+  LOAD_LANE_32b(b + 3 * BPS, d_in_ab_cdef, 1);
+  INIT_VECTOR4(d4_in,
+               vreinterpret_u8_u32(d_in_ab_0123),
+               vreinterpret_u8_u32(d_in_ab_4567),
+               vreinterpret_u8_u32(d_in_ab_89ab),
+               vreinterpret_u8_u32(d_in_ab_cdef));
+
+  {
+    // horizontal pass
+    const uint8x8x4_t d4_t = DistoTranspose4x4U8(d4_in);
+    const int16x8x4_t q4_h = DistoHorizontalPass(d4_t);
+    const int16x4x4_t d4_w = DistoLoadW(w);
+    // vertical pass
+    const int16x8x4_t q4_t = DistoTranspose4x4S16(q4_h);
+    const int16x8x4_t q4_v = DistoVerticalPass(q4_t);
+    int32x2_t d_sum = DistoSum(q4_v, d4_w);
+
+    // abs(sum2 - sum1) >> 5
+    d_sum = vabs_s32(d_sum);
+    d_sum  = vshr_n_s32(d_sum, 5);
+    return vget_lane_s32(d_sum, 0);
+  }
+}
+#undef LOAD_LANE_32b
+
+static int Disto16x16(const uint8_t* const a, const uint8_t* const b,
+                      const uint16_t* const w) {
+  int D = 0;
+  int x, y;
+  for (y = 0; y < 16 * BPS; y += 4 * BPS) {
+    for (x = 0; x < 16; x += 4) {
+      D += Disto4x4(a + x + y, b + x + y, w);
+    }
+  }
+  return D;
+}
+
+//------------------------------------------------------------------------------
+
+static void CollectHistogram(const uint8_t* ref, const uint8_t* pred,
+                             int start_block, int end_block,
+                             VP8Histogram* const histo) {
+  const uint16x8_t max_coeff_thresh = vdupq_n_u16(MAX_COEFF_THRESH);
+  int j;
+  int distribution[MAX_COEFF_THRESH + 1] = { 0 };
+  for (j = start_block; j < end_block; ++j) {
+    int16_t out[16];
+    FTransform(ref + VP8DspScan[j], pred + VP8DspScan[j], out);
+    {
+      int k;
+      const int16x8_t a0 = vld1q_s16(out + 0);
+      const int16x8_t b0 = vld1q_s16(out + 8);
+      const uint16x8_t a1 = vreinterpretq_u16_s16(vabsq_s16(a0));
+      const uint16x8_t b1 = vreinterpretq_u16_s16(vabsq_s16(b0));
+      const uint16x8_t a2 = vshrq_n_u16(a1, 3);
+      const uint16x8_t b2 = vshrq_n_u16(b1, 3);
+      const uint16x8_t a3 = vminq_u16(a2, max_coeff_thresh);
+      const uint16x8_t b3 = vminq_u16(b2, max_coeff_thresh);
+      vst1q_s16(out + 0, vreinterpretq_s16_u16(a3));
+      vst1q_s16(out + 8, vreinterpretq_s16_u16(b3));
+      // Convert coefficients to bin.
+      for (k = 0; k < 16; ++k) {
+        ++distribution[out[k]];
+      }
+    }
+  }
+  VP8SetHistogramData(distribution, histo);
+}
+
+//------------------------------------------------------------------------------
+
+static WEBP_INLINE void AccumulateSSE16(const uint8_t* const a,
+                                        const uint8_t* const b,
+                                        uint32x4_t* const sum) {
+  const uint8x16_t a0 = vld1q_u8(a);
+  const uint8x16_t b0 = vld1q_u8(b);
+  const uint8x16_t abs_diff = vabdq_u8(a0, b0);
+  uint16x8_t prod = vmull_u8(vget_low_u8(abs_diff), vget_low_u8(abs_diff));
+  prod = vmlal_u8(prod, vget_high_u8(abs_diff), vget_high_u8(abs_diff));
+  *sum = vpadalq_u16(*sum, prod);      // pair-wise add and accumulate
+}
+
+// Horizontal sum of all four uint32_t values in 'sum'.
+static int SumToInt(uint32x4_t sum) {
+  const uint64x2_t sum2 = vpaddlq_u32(sum);
+  const uint64_t sum3 = vgetq_lane_u64(sum2, 0) + vgetq_lane_u64(sum2, 1);
+  return (int)sum3;
+}
+
+static int SSE16x16(const uint8_t* a, const uint8_t* b) {
+  uint32x4_t sum = vdupq_n_u32(0);
+  int y;
+  for (y = 0; y < 16; ++y) {
+    AccumulateSSE16(a + y * BPS, b + y * BPS, &sum);
+  }
+  return SumToInt(sum);
+}
+
+static int SSE16x8(const uint8_t* a, const uint8_t* b) {
+  uint32x4_t sum = vdupq_n_u32(0);
+  int y;
+  for (y = 0; y < 8; ++y) {
+    AccumulateSSE16(a + y * BPS, b + y * BPS, &sum);
+  }
+  return SumToInt(sum);
+}
+
+static int SSE8x8(const uint8_t* a, const uint8_t* b) {
+  uint32x4_t sum = vdupq_n_u32(0);
+  int y;
+  for (y = 0; y < 8; ++y) {
+    const uint8x8_t a0 = vld1_u8(a + y * BPS);
+    const uint8x8_t b0 = vld1_u8(b + y * BPS);
+    const uint8x8_t abs_diff = vabd_u8(a0, b0);
+    const uint16x8_t prod = vmull_u8(abs_diff, abs_diff);
+    sum = vpadalq_u16(sum, prod);
+  }
+  return SumToInt(sum);
+}
+
+static int SSE4x4(const uint8_t* a, const uint8_t* b) {
+  const uint8x16_t a0 = Load4x4(a);
+  const uint8x16_t b0 = Load4x4(b);
+  const uint8x16_t abs_diff = vabdq_u8(a0, b0);
+  uint16x8_t prod = vmull_u8(vget_low_u8(abs_diff), vget_low_u8(abs_diff));
+  prod = vmlal_u8(prod, vget_high_u8(abs_diff), vget_high_u8(abs_diff));
+  return SumToInt(vpaddlq_u16(prod));
+}
+
+//------------------------------------------------------------------------------
+
+// Compilation with gcc-4.6.x is problematic for now.
+#if !defined(WORK_AROUND_GCC)
+
+static int16x8_t Quantize(int16_t* const in,
+                          const VP8Matrix* const mtx, int offset) {
+  const uint16x8_t sharp = vld1q_u16(&mtx->sharpen_[offset]);
+  const uint16x8_t q = vld1q_u16(&mtx->q_[offset]);
+  const uint16x8_t iq = vld1q_u16(&mtx->iq_[offset]);
+  const uint32x4_t bias0 = vld1q_u32(&mtx->bias_[offset + 0]);
+  const uint32x4_t bias1 = vld1q_u32(&mtx->bias_[offset + 4]);
+
+  const int16x8_t a = vld1q_s16(in + offset);                // in
+  const uint16x8_t b = vreinterpretq_u16_s16(vabsq_s16(a));  // coeff = abs(in)
+  const int16x8_t sign = vshrq_n_s16(a, 15);                 // sign
+  const uint16x8_t c = vaddq_u16(b, sharp);                  // + sharpen
+  const uint32x4_t m0 = vmull_u16(vget_low_u16(c), vget_low_u16(iq));
+  const uint32x4_t m1 = vmull_u16(vget_high_u16(c), vget_high_u16(iq));
+  const uint32x4_t m2 = vhaddq_u32(m0, bias0);
+  const uint32x4_t m3 = vhaddq_u32(m1, bias1);     // (coeff * iQ + bias) >> 1
+  const uint16x8_t c0 = vcombine_u16(vshrn_n_u32(m2, 16),
+                                     vshrn_n_u32(m3, 16));   // QFIX=17 = 16+1
+  const uint16x8_t c1 = vminq_u16(c0, vdupq_n_u16(MAX_LEVEL));
+  const int16x8_t c2 = veorq_s16(vreinterpretq_s16_u16(c1), sign);
+  const int16x8_t c3 = vsubq_s16(c2, sign);                  // restore sign
+  const int16x8_t c4 = vmulq_s16(c3, vreinterpretq_s16_u16(q));
+  vst1q_s16(in + offset, c4);
+  assert(QFIX == 17);  // this function can't work as is if QFIX != 16+1
+  return c3;
+}
+
+static const uint8_t kShuffles[4][8] = {
+  { 0,   1,  2,  3,  8,  9, 16, 17 },
+  { 10, 11,  4,  5,  6,  7, 12, 13 },
+  { 18, 19, 24, 25, 26, 27, 20, 21 },
+  { 14, 15, 22, 23, 28, 29, 30, 31 }
+};
+
+static int QuantizeBlock(int16_t in[16], int16_t out[16],
+                         const VP8Matrix* const mtx) {
+  const int16x8_t out0 = Quantize(in, mtx, 0);
+  const int16x8_t out1 = Quantize(in, mtx, 8);
+  uint8x8x4_t shuffles;
+  // vtbl?_u8 are marked unavailable for iOS arm64 with Xcode < 6.3, use
+  // non-standard versions there.
+#if defined(__APPLE__) && defined(__aarch64__) && \
+    defined(__apple_build_version__) && (__apple_build_version__< 6020037)
+  uint8x16x2_t all_out;
+  INIT_VECTOR2(all_out, vreinterpretq_u8_s16(out0), vreinterpretq_u8_s16(out1));
+  INIT_VECTOR4(shuffles,
+               vtbl2q_u8(all_out, vld1_u8(kShuffles[0])),
+               vtbl2q_u8(all_out, vld1_u8(kShuffles[1])),
+               vtbl2q_u8(all_out, vld1_u8(kShuffles[2])),
+               vtbl2q_u8(all_out, vld1_u8(kShuffles[3])));
+#else
+  uint8x8x4_t all_out;
+  INIT_VECTOR4(all_out,
+               vreinterpret_u8_s16(vget_low_s16(out0)),
+               vreinterpret_u8_s16(vget_high_s16(out0)),
+               vreinterpret_u8_s16(vget_low_s16(out1)),
+               vreinterpret_u8_s16(vget_high_s16(out1)));
+  INIT_VECTOR4(shuffles,
+               vtbl4_u8(all_out, vld1_u8(kShuffles[0])),
+               vtbl4_u8(all_out, vld1_u8(kShuffles[1])),
+               vtbl4_u8(all_out, vld1_u8(kShuffles[2])),
+               vtbl4_u8(all_out, vld1_u8(kShuffles[3])));
+#endif
+  // Zigzag reordering
+  vst1_u8((uint8_t*)(out +  0), shuffles.val[0]);
+  vst1_u8((uint8_t*)(out +  4), shuffles.val[1]);
+  vst1_u8((uint8_t*)(out +  8), shuffles.val[2]);
+  vst1_u8((uint8_t*)(out + 12), shuffles.val[3]);
+  // test zeros
+  if (*(uint64_t*)(out +  0) != 0) return 1;
+  if (*(uint64_t*)(out +  4) != 0) return 1;
+  if (*(uint64_t*)(out +  8) != 0) return 1;
+  if (*(uint64_t*)(out + 12) != 0) return 1;
+  return 0;
+}
+
+static int Quantize2Blocks(int16_t in[32], int16_t out[32],
+                           const VP8Matrix* const mtx) {
+  int nz;
+  nz  = QuantizeBlock(in + 0 * 16, out + 0 * 16, mtx) << 0;
+  nz |= QuantizeBlock(in + 1 * 16, out + 1 * 16, mtx) << 1;
+  return nz;
+}
+
+#endif   // !WORK_AROUND_GCC
+
+//------------------------------------------------------------------------------
+// Entry point
+
+extern void VP8EncDspInitNEON(void);
+
+WEBP_TSAN_IGNORE_FUNCTION void VP8EncDspInitNEON(void) {
+  VP8ITransform = ITransform;
+  VP8FTransform = FTransform;
+
+  VP8FTransformWHT = FTransformWHT;
+
+  VP8TDisto4x4 = Disto4x4;
+  VP8TDisto16x16 = Disto16x16;
+  VP8CollectHistogram = CollectHistogram;
+  VP8SSE16x16 = SSE16x16;
+  VP8SSE16x8 = SSE16x8;
+  VP8SSE8x8 = SSE8x8;
+  VP8SSE4x4 = SSE4x4;
+#if !defined(WORK_AROUND_GCC)
+  VP8EncQuantizeBlock = QuantizeBlock;
+  VP8EncQuantize2Blocks = Quantize2Blocks;
+#endif
+}
+
+#else  // !WEBP_USE_NEON
+
+WEBP_DSP_INIT_STUB(VP8EncDspInitNEON)
+
+#endif  // WEBP_USE_NEON
diff --git a/drivers/webp/dsp/enc_sse2.c b/drivers/webp/dsp/enc_sse2.c
index b046761dc1..63d9cecd85 100644
--- a/drivers/webp/dsp/enc_sse2.c
+++ b/drivers/webp/dsp/enc_sse2.c
@@ -1,8 +1,10 @@
 // Copyright 2011 Google Inc. All Rights Reserved.
 //
-// This code is licensed under the same terms as WebM:
-//  Software License Agreement:  http://www.webmproject.org/license/software/
-//  Additional IP Rights Grant:  http://www.webmproject.org/license/additional/
+// Use of this source code is governed by a BSD-style license
+// that can be found in the COPYING file in the root of the source
+// tree. An additional intellectual property rights grant can be found
+// in the file PATENTS. All contributing project authors may
+// be found in the AUTHORS file in the root of the source tree.
 // -----------------------------------------------------------------------------
 //
 // SSE2 version of speed-critical encoding functions.
@@ -15,64 +17,55 @@
 #include <stdlib.h>  // for abs()
 #include <emmintrin.h>
 
+#include "../enc/cost.h"
 #include "../enc/vp8enci.h"
+#include "../utils/utils.h"
 
-#if defined(__cplusplus) || defined(c_plusplus)
-extern "C" {
+//------------------------------------------------------------------------------
+// Quite useful macro for debugging. Left here for convenience.
+
+#if 0
+#include <stdio.h>
+static void PrintReg(const __m128i r, const char* const name, int size) {
+  int n;
+  union {
+    __m128i r;
+    uint8_t i8[16];
+    uint16_t i16[8];
+    uint32_t i32[4];
+    uint64_t i64[2];
+  } tmp;
+  tmp.r = r;
+  fprintf(stderr, "%s\t: ", name);
+  if (size == 8) {
+    for (n = 0; n < 16; ++n) fprintf(stderr, "%.2x ", tmp.i8[n]);
+  } else if (size == 16) {
+    for (n = 0; n < 8; ++n) fprintf(stderr, "%.4x ", tmp.i16[n]);
+  } else if (size == 32) {
+    for (n = 0; n < 4; ++n) fprintf(stderr, "%.8x ", tmp.i32[n]);
+  } else {
+    for (n = 0; n < 2; ++n) fprintf(stderr, "%.16lx ", tmp.i64[n]);
+  }
+  fprintf(stderr, "\n");
+}
 #endif
 
 //------------------------------------------------------------------------------
-// Compute susceptibility based on DCT-coeff histograms:
-// the higher, the "easier" the macroblock is to compress.
-
-static int CollectHistogramSSE2(const uint8_t* ref, const uint8_t* pred,
-                                int start_block, int end_block) {
-  int histo[MAX_COEFF_THRESH + 1] = { 0 };
-  int16_t out[16];
-  int j, k;
-  const __m128i max_coeff_thresh = _mm_set1_epi16(MAX_COEFF_THRESH);
-  for (j = start_block; j < end_block; ++j) {
-    VP8FTransform(ref + VP8DspScan[j], pred + VP8DspScan[j], out);
-
-    // Convert coefficients to bin (within out[]).
-    {
-      // Load.
-      const __m128i out0 = _mm_loadu_si128((__m128i*)&out[0]);
-      const __m128i out1 = _mm_loadu_si128((__m128i*)&out[8]);
-      // sign(out) = out >> 15  (0x0000 if positive, 0xffff if negative)
-      const __m128i sign0 = _mm_srai_epi16(out0, 15);
-      const __m128i sign1 = _mm_srai_epi16(out1, 15);
-      // abs(out) = (out ^ sign) - sign
-      const __m128i xor0 = _mm_xor_si128(out0, sign0);
-      const __m128i xor1 = _mm_xor_si128(out1, sign1);
-      const __m128i abs0 = _mm_sub_epi16(xor0, sign0);
-      const __m128i abs1 = _mm_sub_epi16(xor1, sign1);
-      // v = abs(out) >> 2
-      const __m128i v0 = _mm_srai_epi16(abs0, 2);
-      const __m128i v1 = _mm_srai_epi16(abs1, 2);
-      // bin = min(v, MAX_COEFF_THRESH)
-      const __m128i bin0 = _mm_min_epi16(v0, max_coeff_thresh);
-      const __m128i bin1 = _mm_min_epi16(v1, max_coeff_thresh);
-      // Store.
-      _mm_storeu_si128((__m128i*)&out[0], bin0);
-      _mm_storeu_si128((__m128i*)&out[8], bin1);
-    }
+// util for unaligned loads.
 
-    // Use bin to update histogram.
-    for (k = 0; k < 16; ++k) {
-      histo[out[k]]++;
-    }
-  }
-
-  return VP8GetAlpha(histo);
+// memcpy() is the safe way of moving potentially unaligned 32b memory.
+static WEBP_INLINE uint32_t MemToUint32(const uint8_t* const ptr) {
+  uint32_t A;
+  memcpy(&A, (const int*)ptr, sizeof(A));
+  return A;
 }
 
 //------------------------------------------------------------------------------
 // Transforms (Paragraph 14.4)
 
 // Does one or two inverse transforms.
-static void ITransformSSE2(const uint8_t* ref, const int16_t* in, uint8_t* dst,
-                           int do_two) {
+static void ITransform(const uint8_t* ref, const int16_t* in, uint8_t* dst,
+                       int do_two) {
   // This implementation makes use of 16-bit fixed point versions of two
   // multiply constants:
   //    K1 = sqrt(2) * cos (pi/8) ~= 85627 / 2^16
@@ -99,19 +92,19 @@ static void ITransformSSE2(const uint8_t* ref, const int16_t* in, uint8_t* dst,
   // use nor store.
   __m128i in0, in1, in2, in3;
   {
-    in0 = _mm_loadl_epi64((__m128i*)&in[0]);
-    in1 = _mm_loadl_epi64((__m128i*)&in[4]);
-    in2 = _mm_loadl_epi64((__m128i*)&in[8]);
-    in3 = _mm_loadl_epi64((__m128i*)&in[12]);
+    in0 = _mm_loadl_epi64((const __m128i*)&in[0]);
+    in1 = _mm_loadl_epi64((const __m128i*)&in[4]);
+    in2 = _mm_loadl_epi64((const __m128i*)&in[8]);
+    in3 = _mm_loadl_epi64((const __m128i*)&in[12]);
     // a00 a10 a20 a30   x x x x
     // a01 a11 a21 a31   x x x x
     // a02 a12 a22 a32   x x x x
     // a03 a13 a23 a33   x x x x
     if (do_two) {
-      const __m128i inB0 = _mm_loadl_epi64((__m128i*)&in[16]);
-      const __m128i inB1 = _mm_loadl_epi64((__m128i*)&in[20]);
-      const __m128i inB2 = _mm_loadl_epi64((__m128i*)&in[24]);
-      const __m128i inB3 = _mm_loadl_epi64((__m128i*)&in[28]);
+      const __m128i inB0 = _mm_loadl_epi64((const __m128i*)&in[16]);
+      const __m128i inB1 = _mm_loadl_epi64((const __m128i*)&in[20]);
+      const __m128i inB2 = _mm_loadl_epi64((const __m128i*)&in[24]);
+      const __m128i inB3 = _mm_loadl_epi64((const __m128i*)&in[28]);
       in0 = _mm_unpacklo_epi64(in0, inB0);
       in1 = _mm_unpacklo_epi64(in1, inB1);
       in2 = _mm_unpacklo_epi64(in2, inB2);
@@ -243,21 +236,21 @@ static void ITransformSSE2(const uint8_t* ref, const int16_t* in, uint8_t* dst,
 
   // Add inverse transform to 'ref' and store.
   {
-    const __m128i zero = _mm_set1_epi16(0);
+    const __m128i zero = _mm_setzero_si128();
     // Load the reference(s).
     __m128i ref0, ref1, ref2, ref3;
     if (do_two) {
       // Load eight bytes/pixels per line.
-      ref0 = _mm_loadl_epi64((__m128i*)&ref[0 * BPS]);
-      ref1 = _mm_loadl_epi64((__m128i*)&ref[1 * BPS]);
-      ref2 = _mm_loadl_epi64((__m128i*)&ref[2 * BPS]);
-      ref3 = _mm_loadl_epi64((__m128i*)&ref[3 * BPS]);
+      ref0 = _mm_loadl_epi64((const __m128i*)&ref[0 * BPS]);
+      ref1 = _mm_loadl_epi64((const __m128i*)&ref[1 * BPS]);
+      ref2 = _mm_loadl_epi64((const __m128i*)&ref[2 * BPS]);
+      ref3 = _mm_loadl_epi64((const __m128i*)&ref[3 * BPS]);
     } else {
       // Load four bytes/pixels per line.
-      ref0 = _mm_cvtsi32_si128(*(int*)&ref[0 * BPS]);
-      ref1 = _mm_cvtsi32_si128(*(int*)&ref[1 * BPS]);
-      ref2 = _mm_cvtsi32_si128(*(int*)&ref[2 * BPS]);
-      ref3 = _mm_cvtsi32_si128(*(int*)&ref[3 * BPS]);
+      ref0 = _mm_cvtsi32_si128(MemToUint32(&ref[0 * BPS]));
+      ref1 = _mm_cvtsi32_si128(MemToUint32(&ref[1 * BPS]));
+      ref2 = _mm_cvtsi32_si128(MemToUint32(&ref[2 * BPS]));
+      ref3 = _mm_cvtsi32_si128(MemToUint32(&ref[3 * BPS]));
     }
     // Convert to 16b.
     ref0 = _mm_unpacklo_epi8(ref0, zero);
@@ -291,200 +284,865 @@ static void ITransformSSE2(const uint8_t* ref, const int16_t* in, uint8_t* dst,
   }
 }
 
-static void FTransformSSE2(const uint8_t* src, const uint8_t* ref,
-                           int16_t* out) {
+static void FTransformPass1(const __m128i* const in01,
+                            const __m128i* const in23,
+                            __m128i* const out01,
+                            __m128i* const out32) {
+  const __m128i k937 = _mm_set1_epi32(937);
+  const __m128i k1812 = _mm_set1_epi32(1812);
+
+  const __m128i k88p = _mm_set_epi16(8, 8, 8, 8, 8, 8, 8, 8);
+  const __m128i k88m = _mm_set_epi16(-8, 8, -8, 8, -8, 8, -8, 8);
+  const __m128i k5352_2217p = _mm_set_epi16(2217, 5352, 2217, 5352,
+                                            2217, 5352, 2217, 5352);
+  const __m128i k5352_2217m = _mm_set_epi16(-5352, 2217, -5352, 2217,
+                                            -5352, 2217, -5352, 2217);
+
+  // *in01 = 00 01 10 11 02 03 12 13
+  // *in23 = 20 21 30 31 22 23 32 33
+  const __m128i shuf01_p = _mm_shufflehi_epi16(*in01, _MM_SHUFFLE(2, 3, 0, 1));
+  const __m128i shuf23_p = _mm_shufflehi_epi16(*in23, _MM_SHUFFLE(2, 3, 0, 1));
+  // 00 01 10 11 03 02 13 12
+  // 20 21 30 31 23 22 33 32
+  const __m128i s01 = _mm_unpacklo_epi64(shuf01_p, shuf23_p);
+  const __m128i s32 = _mm_unpackhi_epi64(shuf01_p, shuf23_p);
+  // 00 01 10 11 20 21 30 31
+  // 03 02 13 12 23 22 33 32
+  const __m128i a01 = _mm_add_epi16(s01, s32);
+  const __m128i a32 = _mm_sub_epi16(s01, s32);
+  // [d0 + d3 | d1 + d2 | ...] = [a0 a1 | a0' a1' | ... ]
+  // [d0 - d3 | d1 - d2 | ...] = [a3 a2 | a3' a2' | ... ]
+
+  const __m128i tmp0   = _mm_madd_epi16(a01, k88p);  // [ (a0 + a1) << 3, ... ]
+  const __m128i tmp2   = _mm_madd_epi16(a01, k88m);  // [ (a0 - a1) << 3, ... ]
+  const __m128i tmp1_1 = _mm_madd_epi16(a32, k5352_2217p);
+  const __m128i tmp3_1 = _mm_madd_epi16(a32, k5352_2217m);
+  const __m128i tmp1_2 = _mm_add_epi32(tmp1_1, k1812);
+  const __m128i tmp3_2 = _mm_add_epi32(tmp3_1, k937);
+  const __m128i tmp1   = _mm_srai_epi32(tmp1_2, 9);
+  const __m128i tmp3   = _mm_srai_epi32(tmp3_2, 9);
+  const __m128i s03    = _mm_packs_epi32(tmp0, tmp2);
+  const __m128i s12    = _mm_packs_epi32(tmp1, tmp3);
+  const __m128i s_lo   = _mm_unpacklo_epi16(s03, s12);   // 0 1 0 1 0 1...
+  const __m128i s_hi   = _mm_unpackhi_epi16(s03, s12);   // 2 3 2 3 2 3
+  const __m128i v23    = _mm_unpackhi_epi32(s_lo, s_hi);
+  *out01 = _mm_unpacklo_epi32(s_lo, s_hi);
+  *out32 = _mm_shuffle_epi32(v23, _MM_SHUFFLE(1, 0, 3, 2));  // 3 2 3 2 3 2..
+}
+
+static void FTransformPass2(const __m128i* const v01, const __m128i* const v32,
+                            int16_t* out) {
   const __m128i zero = _mm_setzero_si128();
   const __m128i seven = _mm_set1_epi16(7);
-  const __m128i k7500 = _mm_set1_epi32(7500);
-  const __m128i k14500 = _mm_set1_epi32(14500);
-  const __m128i k51000 = _mm_set1_epi32(51000);
-  const __m128i k12000_plus_one = _mm_set1_epi32(12000 + (1 << 16));
   const __m128i k5352_2217 = _mm_set_epi16(5352,  2217, 5352,  2217,
                                            5352,  2217, 5352,  2217);
   const __m128i k2217_5352 = _mm_set_epi16(2217, -5352, 2217, -5352,
                                            2217, -5352, 2217, -5352);
+  const __m128i k12000_plus_one = _mm_set1_epi32(12000 + (1 << 16));
+  const __m128i k51000 = _mm_set1_epi32(51000);
+
+  // Same operations are done on the (0,3) and (1,2) pairs.
+  // a0 = v0 + v3
+  // a1 = v1 + v2
+  // a3 = v0 - v3
+  // a2 = v1 - v2
+  const __m128i a01 = _mm_add_epi16(*v01, *v32);
+  const __m128i a32 = _mm_sub_epi16(*v01, *v32);
+  const __m128i a11 = _mm_unpackhi_epi64(a01, a01);
+  const __m128i a22 = _mm_unpackhi_epi64(a32, a32);
+  const __m128i a01_plus_7 = _mm_add_epi16(a01, seven);
+
+  // d0 = (a0 + a1 + 7) >> 4;
+  // d2 = (a0 - a1 + 7) >> 4;
+  const __m128i c0 = _mm_add_epi16(a01_plus_7, a11);
+  const __m128i c2 = _mm_sub_epi16(a01_plus_7, a11);
+  const __m128i d0 = _mm_srai_epi16(c0, 4);
+  const __m128i d2 = _mm_srai_epi16(c2, 4);
+
+  // f1 = ((b3 * 5352 + b2 * 2217 + 12000) >> 16)
+  // f3 = ((b3 * 2217 - b2 * 5352 + 51000) >> 16)
+  const __m128i b23 = _mm_unpacklo_epi16(a22, a32);
+  const __m128i c1 = _mm_madd_epi16(b23, k5352_2217);
+  const __m128i c3 = _mm_madd_epi16(b23, k2217_5352);
+  const __m128i d1 = _mm_add_epi32(c1, k12000_plus_one);
+  const __m128i d3 = _mm_add_epi32(c3, k51000);
+  const __m128i e1 = _mm_srai_epi32(d1, 16);
+  const __m128i e3 = _mm_srai_epi32(d3, 16);
+  const __m128i f1 = _mm_packs_epi32(e1, e1);
+  const __m128i f3 = _mm_packs_epi32(e3, e3);
+  // f1 = f1 + (a3 != 0);
+  // The compare will return (0xffff, 0) for (==0, !=0). To turn that into the
+  // desired (0, 1), we add one earlier through k12000_plus_one.
+  // -> f1 = f1 + 1 - (a3 == 0)
+  const __m128i g1 = _mm_add_epi16(f1, _mm_cmpeq_epi16(a32, zero));
+
+  const __m128i d0_g1 = _mm_unpacklo_epi64(d0, g1);
+  const __m128i d2_f3 = _mm_unpacklo_epi64(d2, f3);
+  _mm_storeu_si128((__m128i*)&out[0], d0_g1);
+  _mm_storeu_si128((__m128i*)&out[8], d2_f3);
+}
+
+static void FTransform(const uint8_t* src, const uint8_t* ref, int16_t* out) {
+  const __m128i zero = _mm_setzero_si128();
 
+  // Load src and convert to 16b.
+  const __m128i src0 = _mm_loadl_epi64((const __m128i*)&src[0 * BPS]);
+  const __m128i src1 = _mm_loadl_epi64((const __m128i*)&src[1 * BPS]);
+  const __m128i src2 = _mm_loadl_epi64((const __m128i*)&src[2 * BPS]);
+  const __m128i src3 = _mm_loadl_epi64((const __m128i*)&src[3 * BPS]);
+  const __m128i src_0 = _mm_unpacklo_epi8(src0, zero);
+  const __m128i src_1 = _mm_unpacklo_epi8(src1, zero);
+  const __m128i src_2 = _mm_unpacklo_epi8(src2, zero);
+  const __m128i src_3 = _mm_unpacklo_epi8(src3, zero);
+  // Load ref and convert to 16b.
+  const __m128i ref0 = _mm_loadl_epi64((const __m128i*)&ref[0 * BPS]);
+  const __m128i ref1 = _mm_loadl_epi64((const __m128i*)&ref[1 * BPS]);
+  const __m128i ref2 = _mm_loadl_epi64((const __m128i*)&ref[2 * BPS]);
+  const __m128i ref3 = _mm_loadl_epi64((const __m128i*)&ref[3 * BPS]);
+  const __m128i ref_0 = _mm_unpacklo_epi8(ref0, zero);
+  const __m128i ref_1 = _mm_unpacklo_epi8(ref1, zero);
+  const __m128i ref_2 = _mm_unpacklo_epi8(ref2, zero);
+  const __m128i ref_3 = _mm_unpacklo_epi8(ref3, zero);
+  // Compute difference. -> 00 01 02 03 00 00 00 00
+  const __m128i diff0 = _mm_sub_epi16(src_0, ref_0);
+  const __m128i diff1 = _mm_sub_epi16(src_1, ref_1);
+  const __m128i diff2 = _mm_sub_epi16(src_2, ref_2);
+  const __m128i diff3 = _mm_sub_epi16(src_3, ref_3);
+
+  // Unpack and shuffle
+  // 00 01 02 03   0 0 0 0
+  // 10 11 12 13   0 0 0 0
+  // 20 21 22 23   0 0 0 0
+  // 30 31 32 33   0 0 0 0
+  const __m128i shuf01 = _mm_unpacklo_epi32(diff0, diff1);
+  const __m128i shuf23 = _mm_unpacklo_epi32(diff2, diff3);
   __m128i v01, v32;
 
-  // Difference between src and ref and initial transpose.
-  {
-    // Load src and convert to 16b.
-    const __m128i src0 = _mm_loadl_epi64((__m128i*)&src[0 * BPS]);
-    const __m128i src1 = _mm_loadl_epi64((__m128i*)&src[1 * BPS]);
-    const __m128i src2 = _mm_loadl_epi64((__m128i*)&src[2 * BPS]);
-    const __m128i src3 = _mm_loadl_epi64((__m128i*)&src[3 * BPS]);
-    const __m128i src_0 = _mm_unpacklo_epi8(src0, zero);
-    const __m128i src_1 = _mm_unpacklo_epi8(src1, zero);
-    const __m128i src_2 = _mm_unpacklo_epi8(src2, zero);
-    const __m128i src_3 = _mm_unpacklo_epi8(src3, zero);
-    // Load ref and convert to 16b.
-    const __m128i ref0 = _mm_loadl_epi64((__m128i*)&ref[0 * BPS]);
-    const __m128i ref1 = _mm_loadl_epi64((__m128i*)&ref[1 * BPS]);
-    const __m128i ref2 = _mm_loadl_epi64((__m128i*)&ref[2 * BPS]);
-    const __m128i ref3 = _mm_loadl_epi64((__m128i*)&ref[3 * BPS]);
-    const __m128i ref_0 = _mm_unpacklo_epi8(ref0, zero);
-    const __m128i ref_1 = _mm_unpacklo_epi8(ref1, zero);
-    const __m128i ref_2 = _mm_unpacklo_epi8(ref2, zero);
-    const __m128i ref_3 = _mm_unpacklo_epi8(ref3, zero);
-    // Compute difference.
-    const __m128i diff0 = _mm_sub_epi16(src_0, ref_0);
-    const __m128i diff1 = _mm_sub_epi16(src_1, ref_1);
-    const __m128i diff2 = _mm_sub_epi16(src_2, ref_2);
-    const __m128i diff3 = _mm_sub_epi16(src_3, ref_3);
-
-    // Transpose.
-    // 00 01 02 03   0 0 0 0
-    // 10 11 12 13   0 0 0 0
-    // 20 21 22 23   0 0 0 0
-    // 30 31 32 33   0 0 0 0
-    const __m128i transpose0_0 = _mm_unpacklo_epi16(diff0, diff1);
-    const __m128i transpose0_1 = _mm_unpacklo_epi16(diff2, diff3);
-    // 00 10 01 11   02 12 03 13
-    // 20 30 21 31   22 32 23 33
-    const __m128i v23 = _mm_unpackhi_epi32(transpose0_0, transpose0_1);
-    v01 = _mm_unpacklo_epi32(transpose0_0, transpose0_1);
-    v32 = _mm_shuffle_epi32(v23, _MM_SHUFFLE(1, 0, 3, 2));
-    // a02 a12 a22 a32   a03 a13 a23 a33
-    // a00 a10 a20 a30   a01 a11 a21 a31
-    // a03 a13 a23 a33   a02 a12 a22 a32
-  }
-
-  // First pass and subsequent transpose.
-  {
-    // Same operations are done on the (0,3) and (1,2) pairs.
-    // b0 = (a0 + a3) << 3
-    // b1 = (a1 + a2) << 3
-    // b3 = (a0 - a3) << 3
-    // b2 = (a1 - a2) << 3
-    const __m128i a01 = _mm_add_epi16(v01, v32);
-    const __m128i a32 = _mm_sub_epi16(v01, v32);
-    const __m128i b01 = _mm_slli_epi16(a01, 3);
-    const __m128i b32 = _mm_slli_epi16(a32, 3);
-    const __m128i b11 = _mm_unpackhi_epi64(b01, b01);
-    const __m128i b22 = _mm_unpackhi_epi64(b32, b32);
-
-    // e0 = b0 + b1
-    // e2 = b0 - b1
-    const __m128i e0 = _mm_add_epi16(b01, b11);
-    const __m128i e2 = _mm_sub_epi16(b01, b11);
-    const __m128i e02 = _mm_unpacklo_epi64(e0, e2);
-
-    // e1 = (b3 * 5352 + b2 * 2217 + 14500) >> 12
-    // e3 = (b3 * 2217 - b2 * 5352 +  7500) >> 12
-    const __m128i b23 = _mm_unpacklo_epi16(b22, b32);
-    const __m128i c1 = _mm_madd_epi16(b23, k5352_2217);
-    const __m128i c3 = _mm_madd_epi16(b23, k2217_5352);
-    const __m128i d1 = _mm_add_epi32(c1, k14500);
-    const __m128i d3 = _mm_add_epi32(c3, k7500);
-    const __m128i e1 = _mm_srai_epi32(d1, 12);
-    const __m128i e3 = _mm_srai_epi32(d3, 12);
-    const __m128i e13 = _mm_packs_epi32(e1, e3);
-
-    // Transpose.
-    // 00 01 02 03  20 21 22 23
-    // 10 11 12 13  30 31 32 33
-    const __m128i transpose0_0 = _mm_unpacklo_epi16(e02, e13);
-    const __m128i transpose0_1 = _mm_unpackhi_epi16(e02, e13);
-    // 00 10 01 11   02 12 03 13
-    // 20 30 21 31   22 32 23 33
-    const __m128i v23 = _mm_unpackhi_epi32(transpose0_0, transpose0_1);
-    v01 = _mm_unpacklo_epi32(transpose0_0, transpose0_1);
-    v32 = _mm_shuffle_epi32(v23, _MM_SHUFFLE(1, 0, 3, 2));
-    // 02 12 22 32   03 13 23 33
-    // 00 10 20 30   01 11 21 31
-    // 03 13 23 33   02 12 22 32
-  }
+  // First pass
+  FTransformPass1(&shuf01, &shuf23, &v01, &v32);
 
   // Second pass
+  FTransformPass2(&v01, &v32, out);
+}
+
+static void FTransform2(const uint8_t* src, const uint8_t* ref, int16_t* out) {
+  const __m128i zero = _mm_setzero_si128();
+
+  // Load src and convert to 16b.
+  const __m128i src0 = _mm_loadl_epi64((const __m128i*)&src[0 * BPS]);
+  const __m128i src1 = _mm_loadl_epi64((const __m128i*)&src[1 * BPS]);
+  const __m128i src2 = _mm_loadl_epi64((const __m128i*)&src[2 * BPS]);
+  const __m128i src3 = _mm_loadl_epi64((const __m128i*)&src[3 * BPS]);
+  const __m128i src_0 = _mm_unpacklo_epi8(src0, zero);
+  const __m128i src_1 = _mm_unpacklo_epi8(src1, zero);
+  const __m128i src_2 = _mm_unpacklo_epi8(src2, zero);
+  const __m128i src_3 = _mm_unpacklo_epi8(src3, zero);
+  // Load ref and convert to 16b.
+  const __m128i ref0 = _mm_loadl_epi64((const __m128i*)&ref[0 * BPS]);
+  const __m128i ref1 = _mm_loadl_epi64((const __m128i*)&ref[1 * BPS]);
+  const __m128i ref2 = _mm_loadl_epi64((const __m128i*)&ref[2 * BPS]);
+  const __m128i ref3 = _mm_loadl_epi64((const __m128i*)&ref[3 * BPS]);
+  const __m128i ref_0 = _mm_unpacklo_epi8(ref0, zero);
+  const __m128i ref_1 = _mm_unpacklo_epi8(ref1, zero);
+  const __m128i ref_2 = _mm_unpacklo_epi8(ref2, zero);
+  const __m128i ref_3 = _mm_unpacklo_epi8(ref3, zero);
+  // Compute difference. -> 00 01 02 03  00' 01' 02' 03'
+  const __m128i diff0 = _mm_sub_epi16(src_0, ref_0);
+  const __m128i diff1 = _mm_sub_epi16(src_1, ref_1);
+  const __m128i diff2 = _mm_sub_epi16(src_2, ref_2);
+  const __m128i diff3 = _mm_sub_epi16(src_3, ref_3);
+
+  // Unpack and shuffle
+  // 00 01 02 03   0 0 0 0
+  // 10 11 12 13   0 0 0 0
+  // 20 21 22 23   0 0 0 0
+  // 30 31 32 33   0 0 0 0
+  const __m128i shuf01l = _mm_unpacklo_epi32(diff0, diff1);
+  const __m128i shuf23l = _mm_unpacklo_epi32(diff2, diff3);
+  const __m128i shuf01h = _mm_unpackhi_epi32(diff0, diff1);
+  const __m128i shuf23h = _mm_unpackhi_epi32(diff2, diff3);
+  __m128i v01l, v32l;
+  __m128i v01h, v32h;
+
+  // First pass
+  FTransformPass1(&shuf01l, &shuf23l, &v01l, &v32l);
+  FTransformPass1(&shuf01h, &shuf23h, &v01h, &v32h);
+
+  // Second pass
+  FTransformPass2(&v01l, &v32l, out + 0);
+  FTransformPass2(&v01h, &v32h, out + 16);
+}
+
+static void FTransformWHTRow(const int16_t* const in, __m128i* const out) {
+  const __m128i kMult1 = _mm_set_epi16(0, 0, 0, 0, 1, 1, 1, 1);
+  const __m128i kMult2 = _mm_set_epi16(0, 0, 0, 0, -1, 1, -1, 1);
+  const __m128i src0 = _mm_loadl_epi64((__m128i*)&in[0 * 16]);
+  const __m128i src1 = _mm_loadl_epi64((__m128i*)&in[1 * 16]);
+  const __m128i src2 = _mm_loadl_epi64((__m128i*)&in[2 * 16]);
+  const __m128i src3 = _mm_loadl_epi64((__m128i*)&in[3 * 16]);
+  const __m128i A01 = _mm_unpacklo_epi16(src0, src1);  // A0 A1 | ...
+  const __m128i A23 = _mm_unpacklo_epi16(src2, src3);  // A2 A3 | ...
+  const __m128i B0 = _mm_adds_epi16(A01, A23);    // a0 | a1 | ...
+  const __m128i B1 = _mm_subs_epi16(A01, A23);    // a3 | a2 | ...
+  const __m128i C0 = _mm_unpacklo_epi32(B0, B1);  // a0 | a1 | a3 | a2
+  const __m128i C1 = _mm_unpacklo_epi32(B1, B0);  // a3 | a2 | a0 | a1
+  const __m128i D0 = _mm_madd_epi16(C0, kMult1);  // out0, out1
+  const __m128i D1 = _mm_madd_epi16(C1, kMult2);  // out2, out3
+  *out = _mm_unpacklo_epi64(D0, D1);
+}
+
+static void FTransformWHT(const int16_t* in, int16_t* out) {
+  __m128i row0, row1, row2, row3;
+  FTransformWHTRow(in + 0 * 64, &row0);
+  FTransformWHTRow(in + 1 * 64, &row1);
+  FTransformWHTRow(in + 2 * 64, &row2);
+  FTransformWHTRow(in + 3 * 64, &row3);
+
   {
-    // Same operations are done on the (0,3) and (1,2) pairs.
-    // a0 = v0 + v3
-    // a1 = v1 + v2
-    // a3 = v0 - v3
-    // a2 = v1 - v2
-    const __m128i a01 = _mm_add_epi16(v01, v32);
-    const __m128i a32 = _mm_sub_epi16(v01, v32);
-    const __m128i a11 = _mm_unpackhi_epi64(a01, a01);
-    const __m128i a22 = _mm_unpackhi_epi64(a32, a32);
-
-    // d0 = (a0 + a1 + 7) >> 4;
-    // d2 = (a0 - a1 + 7) >> 4;
-    const __m128i b0 = _mm_add_epi16(a01, a11);
-    const __m128i b2 = _mm_sub_epi16(a01, a11);
-    const __m128i c0 = _mm_add_epi16(b0, seven);
-    const __m128i c2 = _mm_add_epi16(b2, seven);
-    const __m128i d0 = _mm_srai_epi16(c0, 4);
-    const __m128i d2 = _mm_srai_epi16(c2, 4);
-
-    // f1 = ((b3 * 5352 + b2 * 2217 + 12000) >> 16)
-    // f3 = ((b3 * 2217 - b2 * 5352 + 51000) >> 16)
-    const __m128i b23 = _mm_unpacklo_epi16(a22, a32);
-    const __m128i c1 = _mm_madd_epi16(b23, k5352_2217);
-    const __m128i c3 = _mm_madd_epi16(b23, k2217_5352);
-    const __m128i d1 = _mm_add_epi32(c1, k12000_plus_one);
-    const __m128i d3 = _mm_add_epi32(c3, k51000);
-    const __m128i e1 = _mm_srai_epi32(d1, 16);
-    const __m128i e3 = _mm_srai_epi32(d3, 16);
-    const __m128i f1 = _mm_packs_epi32(e1, e1);
-    const __m128i f3 = _mm_packs_epi32(e3, e3);
-    // f1 = f1 + (a3 != 0);
-    // The compare will return (0xffff, 0) for (==0, !=0). To turn that into the
-    // desired (0, 1), we add one earlier through k12000_plus_one.
-    const __m128i g1 = _mm_add_epi16(f1, _mm_cmpeq_epi16(a32, zero));
-
-    _mm_storel_epi64((__m128i*)&out[ 0], d0);
-    _mm_storel_epi64((__m128i*)&out[ 4], g1);
-    _mm_storel_epi64((__m128i*)&out[ 8], d2);
-    _mm_storel_epi64((__m128i*)&out[12], f3);
+    const __m128i a0 = _mm_add_epi32(row0, row2);
+    const __m128i a1 = _mm_add_epi32(row1, row3);
+    const __m128i a2 = _mm_sub_epi32(row1, row3);
+    const __m128i a3 = _mm_sub_epi32(row0, row2);
+    const __m128i b0 = _mm_srai_epi32(_mm_add_epi32(a0, a1), 1);
+    const __m128i b1 = _mm_srai_epi32(_mm_add_epi32(a3, a2), 1);
+    const __m128i b2 = _mm_srai_epi32(_mm_sub_epi32(a3, a2), 1);
+    const __m128i b3 = _mm_srai_epi32(_mm_sub_epi32(a0, a1), 1);
+    const __m128i out0 = _mm_packs_epi32(b0, b1);
+    const __m128i out1 = _mm_packs_epi32(b2, b3);
+    _mm_storeu_si128((__m128i*)&out[0], out0);
+    _mm_storeu_si128((__m128i*)&out[8], out1);
+  }
+}
+
+//------------------------------------------------------------------------------
+// Compute susceptibility based on DCT-coeff histograms:
+// the higher, the "easier" the macroblock is to compress.
+
+static void CollectHistogram(const uint8_t* ref, const uint8_t* pred,
+                             int start_block, int end_block,
+                             VP8Histogram* const histo) {
+  const __m128i zero = _mm_setzero_si128();
+  const __m128i max_coeff_thresh = _mm_set1_epi16(MAX_COEFF_THRESH);
+  int j;
+  int distribution[MAX_COEFF_THRESH + 1] = { 0 };
+  for (j = start_block; j < end_block; ++j) {
+    int16_t out[16];
+    int k;
+
+    FTransform(ref + VP8DspScan[j], pred + VP8DspScan[j], out);
+
+    // Convert coefficients to bin (within out[]).
+    {
+      // Load.
+      const __m128i out0 = _mm_loadu_si128((__m128i*)&out[0]);
+      const __m128i out1 = _mm_loadu_si128((__m128i*)&out[8]);
+      const __m128i d0 = _mm_sub_epi16(zero, out0);
+      const __m128i d1 = _mm_sub_epi16(zero, out1);
+      const __m128i abs0 = _mm_max_epi16(out0, d0);   // abs(v), 16b
+      const __m128i abs1 = _mm_max_epi16(out1, d1);
+      // v = abs(out) >> 3
+      const __m128i v0 = _mm_srai_epi16(abs0, 3);
+      const __m128i v1 = _mm_srai_epi16(abs1, 3);
+      // bin = min(v, MAX_COEFF_THRESH)
+      const __m128i bin0 = _mm_min_epi16(v0, max_coeff_thresh);
+      const __m128i bin1 = _mm_min_epi16(v1, max_coeff_thresh);
+      // Store.
+      _mm_storeu_si128((__m128i*)&out[0], bin0);
+      _mm_storeu_si128((__m128i*)&out[8], bin1);
+    }
+
+    // Convert coefficients to bin.
+    for (k = 0; k < 16; ++k) {
+      ++distribution[out[k]];
+    }
   }
+  VP8SetHistogramData(distribution, histo);
+}
+
+//------------------------------------------------------------------------------
+// Intra predictions
+
+// helper for chroma-DC predictions
+static WEBP_INLINE void Put8x8uv(uint8_t v, uint8_t* dst) {
+  int j;
+  const __m128i values = _mm_set1_epi8(v);
+  for (j = 0; j < 8; ++j) {
+    _mm_storel_epi64((__m128i*)(dst + j * BPS), values);
+  }
+}
+
+static WEBP_INLINE void Put16(uint8_t v, uint8_t* dst) {
+  int j;
+  const __m128i values = _mm_set1_epi8(v);
+  for (j = 0; j < 16; ++j) {
+    _mm_store_si128((__m128i*)(dst + j * BPS), values);
+  }
+}
+
+static WEBP_INLINE void Fill(uint8_t* dst, int value, int size) {
+  if (size == 4) {
+    int j;
+    for (j = 0; j < 4; ++j) {
+      memset(dst + j * BPS, value, 4);
+    }
+  } else if (size == 8) {
+    Put8x8uv(value, dst);
+  } else {
+    Put16(value, dst);
+  }
+}
+
+static WEBP_INLINE void VE8uv(uint8_t* dst, const uint8_t* top) {
+  int j;
+  const __m128i top_values = _mm_loadl_epi64((const __m128i*)top);
+  for (j = 0; j < 8; ++j) {
+    _mm_storel_epi64((__m128i*)(dst + j * BPS), top_values);
+  }
+}
+
+static WEBP_INLINE void VE16(uint8_t* dst, const uint8_t* top) {
+  const __m128i top_values = _mm_load_si128((const __m128i*)top);
+  int j;
+  for (j = 0; j < 16; ++j) {
+    _mm_store_si128((__m128i*)(dst + j * BPS), top_values);
+  }
+}
+
+static WEBP_INLINE void VerticalPred(uint8_t* dst,
+                                     const uint8_t* top, int size) {
+  if (top != NULL) {
+    if (size == 8) {
+      VE8uv(dst, top);
+    } else {
+      VE16(dst, top);
+    }
+  } else {
+    Fill(dst, 127, size);
+  }
+}
+
+static WEBP_INLINE void HE8uv(uint8_t* dst, const uint8_t* left) {
+  int j;
+  for (j = 0; j < 8; ++j) {
+    const __m128i values = _mm_set1_epi8(left[j]);
+    _mm_storel_epi64((__m128i*)dst, values);
+    dst += BPS;
+  }
+}
+
+static WEBP_INLINE void HE16(uint8_t* dst, const uint8_t* left) {
+  int j;
+  for (j = 0; j < 16; ++j) {
+    const __m128i values = _mm_set1_epi8(left[j]);
+    _mm_store_si128((__m128i*)dst, values);
+    dst += BPS;
+  }
+}
+
+static WEBP_INLINE void HorizontalPred(uint8_t* dst,
+                                       const uint8_t* left, int size) {
+  if (left != NULL) {
+    if (size == 8) {
+      HE8uv(dst, left);
+    } else {
+      HE16(dst, left);
+    }
+  } else {
+    Fill(dst, 129, size);
+  }
+}
+
+static WEBP_INLINE void TM(uint8_t* dst, const uint8_t* left,
+                           const uint8_t* top, int size) {
+  const __m128i zero = _mm_setzero_si128();
+  int y;
+  if (size == 8) {
+    const __m128i top_values = _mm_loadl_epi64((const __m128i*)top);
+    const __m128i top_base = _mm_unpacklo_epi8(top_values, zero);
+    for (y = 0; y < 8; ++y, dst += BPS) {
+      const int val = left[y] - left[-1];
+      const __m128i base = _mm_set1_epi16(val);
+      const __m128i out = _mm_packus_epi16(_mm_add_epi16(base, top_base), zero);
+      _mm_storel_epi64((__m128i*)dst, out);
+    }
+  } else {
+    const __m128i top_values = _mm_load_si128((const __m128i*)top);
+    const __m128i top_base_0 = _mm_unpacklo_epi8(top_values, zero);
+    const __m128i top_base_1 = _mm_unpackhi_epi8(top_values, zero);
+    for (y = 0; y < 16; ++y, dst += BPS) {
+      const int val = left[y] - left[-1];
+      const __m128i base = _mm_set1_epi16(val);
+      const __m128i out_0 = _mm_add_epi16(base, top_base_0);
+      const __m128i out_1 = _mm_add_epi16(base, top_base_1);
+      const __m128i out = _mm_packus_epi16(out_0, out_1);
+      _mm_store_si128((__m128i*)dst, out);
+    }
+  }
+}
+
+static WEBP_INLINE void TrueMotion(uint8_t* dst, const uint8_t* left,
+                                   const uint8_t* top, int size) {
+  if (left != NULL) {
+    if (top != NULL) {
+      TM(dst, left, top, size);
+    } else {
+      HorizontalPred(dst, left, size);
+    }
+  } else {
+    // true motion without left samples (hence: with default 129 value)
+    // is equivalent to VE prediction where you just copy the top samples.
+    // Note that if top samples are not available, the default value is
+    // then 129, and not 127 as in the VerticalPred case.
+    if (top != NULL) {
+      VerticalPred(dst, top, size);
+    } else {
+      Fill(dst, 129, size);
+    }
+  }
+}
+
+static WEBP_INLINE void DC8uv(uint8_t* dst, const uint8_t* left,
+                              const uint8_t* top) {
+  const __m128i zero = _mm_setzero_si128();
+  const __m128i top_values = _mm_loadl_epi64((const __m128i*)top);
+  const __m128i left_values = _mm_loadl_epi64((const __m128i*)left);
+  const __m128i sum_top = _mm_sad_epu8(top_values, zero);
+  const __m128i sum_left = _mm_sad_epu8(left_values, zero);
+  const int DC = _mm_cvtsi128_si32(sum_top) + _mm_cvtsi128_si32(sum_left) + 8;
+  Put8x8uv(DC >> 4, dst);
+}
+
+static WEBP_INLINE void DC8uvNoLeft(uint8_t* dst, const uint8_t* top) {
+  const __m128i zero = _mm_setzero_si128();
+  const __m128i top_values = _mm_loadl_epi64((const __m128i*)top);
+  const __m128i sum = _mm_sad_epu8(top_values, zero);
+  const int DC = _mm_cvtsi128_si32(sum) + 4;
+  Put8x8uv(DC >> 3, dst);
+}
+
+static WEBP_INLINE void DC8uvNoTop(uint8_t* dst, const uint8_t* left) {
+  // 'left' is contiguous so we can reuse the top summation.
+  DC8uvNoLeft(dst, left);
+}
+
+static WEBP_INLINE void DC8uvNoTopLeft(uint8_t* dst) {
+  Put8x8uv(0x80, dst);
+}
+
+static WEBP_INLINE void DC8uvMode(uint8_t* dst, const uint8_t* left,
+                                  const uint8_t* top) {
+  if (top != NULL) {
+    if (left != NULL) {  // top and left present
+      DC8uv(dst, left, top);
+    } else {  // top, but no left
+      DC8uvNoLeft(dst, top);
+    }
+  } else if (left != NULL) {  // left but no top
+    DC8uvNoTop(dst, left);
+  } else {  // no top, no left, nothing.
+    DC8uvNoTopLeft(dst);
+  }
+}
+
+static WEBP_INLINE void DC16(uint8_t* dst, const uint8_t* left,
+                             const uint8_t* top) {
+  const __m128i zero = _mm_setzero_si128();
+  const __m128i top_row = _mm_load_si128((const __m128i*)top);
+  const __m128i left_row = _mm_load_si128((const __m128i*)left);
+  const __m128i sad8x2 = _mm_sad_epu8(top_row, zero);
+  // sum the two sads: sad8x2[0:1] + sad8x2[8:9]
+  const __m128i sum_top = _mm_add_epi16(sad8x2, _mm_shuffle_epi32(sad8x2, 2));
+  const __m128i sad8x2_left = _mm_sad_epu8(left_row, zero);
+  // sum the two sads: sad8x2[0:1] + sad8x2[8:9]
+  const __m128i sum_left =
+      _mm_add_epi16(sad8x2_left, _mm_shuffle_epi32(sad8x2_left, 2));
+  const int DC = _mm_cvtsi128_si32(sum_top) + _mm_cvtsi128_si32(sum_left) + 16;
+  Put16(DC >> 5, dst);
+}
+
+static WEBP_INLINE void DC16NoLeft(uint8_t* dst, const uint8_t* top) {
+  const __m128i zero = _mm_setzero_si128();
+  const __m128i top_row = _mm_load_si128((const __m128i*)top);
+  const __m128i sad8x2 = _mm_sad_epu8(top_row, zero);
+  // sum the two sads: sad8x2[0:1] + sad8x2[8:9]
+  const __m128i sum = _mm_add_epi16(sad8x2, _mm_shuffle_epi32(sad8x2, 2));
+  const int DC = _mm_cvtsi128_si32(sum) + 8;
+  Put16(DC >> 4, dst);
+}
+
+static WEBP_INLINE void DC16NoTop(uint8_t* dst, const uint8_t* left) {
+  // 'left' is contiguous so we can reuse the top summation.
+  DC16NoLeft(dst, left);
+}
+
+static WEBP_INLINE void DC16NoTopLeft(uint8_t* dst) {
+  Put16(0x80, dst);
+}
+
+static WEBP_INLINE void DC16Mode(uint8_t* dst, const uint8_t* left,
+                                 const uint8_t* top) {
+  if (top != NULL) {
+    if (left != NULL) {  // top and left present
+      DC16(dst, left, top);
+    } else {  // top, but no left
+      DC16NoLeft(dst, top);
+    }
+  } else if (left != NULL) {  // left but no top
+    DC16NoTop(dst, left);
+  } else {  // no top, no left, nothing.
+    DC16NoTopLeft(dst);
+  }
+}
+
+//------------------------------------------------------------------------------
+// 4x4 predictions
+
+#define DST(x, y) dst[(x) + (y) * BPS]
+#define AVG3(a, b, c) (((a) + 2 * (b) + (c) + 2) >> 2)
+#define AVG2(a, b) (((a) + (b) + 1) >> 1)
+
+// We use the following 8b-arithmetic tricks:
+//     (a + 2 * b + c + 2) >> 2 = (AC + b + 1) >> 1
+//   where: AC = (a + c) >> 1 = [(a + c + 1) >> 1] - [(a^c) & 1]
+// and:
+//     (a + 2 * b + c + 2) >> 2 = (AB + BC + 1) >> 1 - (ab|bc)&lsb
+//   where: AC = (a + b + 1) >> 1,   BC = (b + c + 1) >> 1
+//   and ab = a ^ b, bc = b ^ c, lsb = (AC^BC)&1
+
+static WEBP_INLINE void VE4(uint8_t* dst, const uint8_t* top) {  // vertical
+  const __m128i one = _mm_set1_epi8(1);
+  const __m128i ABCDEFGH = _mm_loadl_epi64((__m128i*)(top - 1));
+  const __m128i BCDEFGH0 = _mm_srli_si128(ABCDEFGH, 1);
+  const __m128i CDEFGH00 = _mm_srli_si128(ABCDEFGH, 2);
+  const __m128i a = _mm_avg_epu8(ABCDEFGH, CDEFGH00);
+  const __m128i lsb = _mm_and_si128(_mm_xor_si128(ABCDEFGH, CDEFGH00), one);
+  const __m128i b = _mm_subs_epu8(a, lsb);
+  const __m128i avg = _mm_avg_epu8(b, BCDEFGH0);
+  const uint32_t vals = _mm_cvtsi128_si32(avg);
+  int i;
+  for (i = 0; i < 4; ++i) {
+    *(uint32_t*)(dst + i * BPS) = vals;
+  }
+}
+
+static WEBP_INLINE void HE4(uint8_t* dst, const uint8_t* top) {  // horizontal
+  const int X = top[-1];
+  const int I = top[-2];
+  const int J = top[-3];
+  const int K = top[-4];
+  const int L = top[-5];
+  *(uint32_t*)(dst + 0 * BPS) = 0x01010101U * AVG3(X, I, J);
+  *(uint32_t*)(dst + 1 * BPS) = 0x01010101U * AVG3(I, J, K);
+  *(uint32_t*)(dst + 2 * BPS) = 0x01010101U * AVG3(J, K, L);
+  *(uint32_t*)(dst + 3 * BPS) = 0x01010101U * AVG3(K, L, L);
+}
+
+static WEBP_INLINE void DC4(uint8_t* dst, const uint8_t* top) {
+  uint32_t dc = 4;
+  int i;
+  for (i = 0; i < 4; ++i) dc += top[i] + top[-5 + i];
+  Fill(dst, dc >> 3, 4);
+}
+
+static WEBP_INLINE void LD4(uint8_t* dst, const uint8_t* top) {  // Down-Left
+  const __m128i one = _mm_set1_epi8(1);
+  const __m128i ABCDEFGH = _mm_loadl_epi64((const __m128i*)top);
+  const __m128i BCDEFGH0 = _mm_srli_si128(ABCDEFGH, 1);
+  const __m128i CDEFGH00 = _mm_srli_si128(ABCDEFGH, 2);
+  const __m128i CDEFGHH0 = _mm_insert_epi16(CDEFGH00, top[7], 3);
+  const __m128i avg1 = _mm_avg_epu8(ABCDEFGH, CDEFGHH0);
+  const __m128i lsb = _mm_and_si128(_mm_xor_si128(ABCDEFGH, CDEFGHH0), one);
+  const __m128i avg2 = _mm_subs_epu8(avg1, lsb);
+  const __m128i abcdefg = _mm_avg_epu8(avg2, BCDEFGH0);
+  *(uint32_t*)(dst + 0 * BPS) = _mm_cvtsi128_si32(               abcdefg    );
+  *(uint32_t*)(dst + 1 * BPS) = _mm_cvtsi128_si32(_mm_srli_si128(abcdefg, 1));
+  *(uint32_t*)(dst + 2 * BPS) = _mm_cvtsi128_si32(_mm_srli_si128(abcdefg, 2));
+  *(uint32_t*)(dst + 3 * BPS) = _mm_cvtsi128_si32(_mm_srli_si128(abcdefg, 3));
+}
+
+static WEBP_INLINE void VR4(uint8_t* dst,
+                            const uint8_t* top) {  // Vertical-Right
+  const __m128i one = _mm_set1_epi8(1);
+  const int I = top[-2];
+  const int J = top[-3];
+  const int K = top[-4];
+  const int X = top[-1];
+  const __m128i XABCD = _mm_loadl_epi64((const __m128i*)(top - 1));
+  const __m128i ABCD0 = _mm_srli_si128(XABCD, 1);
+  const __m128i abcd = _mm_avg_epu8(XABCD, ABCD0);
+  const __m128i _XABCD = _mm_slli_si128(XABCD, 1);
+  const __m128i IXABCD = _mm_insert_epi16(_XABCD, I | (X << 8), 0);
+  const __m128i avg1 = _mm_avg_epu8(IXABCD, ABCD0);
+  const __m128i lsb = _mm_and_si128(_mm_xor_si128(IXABCD, ABCD0), one);
+  const __m128i avg2 = _mm_subs_epu8(avg1, lsb);
+  const __m128i efgh = _mm_avg_epu8(avg2, XABCD);
+  *(uint32_t*)(dst + 0 * BPS) = _mm_cvtsi128_si32(               abcd    );
+  *(uint32_t*)(dst + 1 * BPS) = _mm_cvtsi128_si32(               efgh    );
+  *(uint32_t*)(dst + 2 * BPS) = _mm_cvtsi128_si32(_mm_slli_si128(abcd, 1));
+  *(uint32_t*)(dst + 3 * BPS) = _mm_cvtsi128_si32(_mm_slli_si128(efgh, 1));
+
+  // these two are hard to implement in SSE2, so we keep the C-version:
+  DST(0, 2) = AVG3(J, I, X);
+  DST(0, 3) = AVG3(K, J, I);
+}
+
+static WEBP_INLINE void VL4(uint8_t* dst,
+                            const uint8_t* top) {  // Vertical-Left
+  const __m128i one = _mm_set1_epi8(1);
+  const __m128i ABCDEFGH = _mm_loadl_epi64((const __m128i*)top);
+  const __m128i BCDEFGH_ = _mm_srli_si128(ABCDEFGH, 1);
+  const __m128i CDEFGH__ = _mm_srli_si128(ABCDEFGH, 2);
+  const __m128i avg1 = _mm_avg_epu8(ABCDEFGH, BCDEFGH_);
+  const __m128i avg2 = _mm_avg_epu8(CDEFGH__, BCDEFGH_);
+  const __m128i avg3 = _mm_avg_epu8(avg1, avg2);
+  const __m128i lsb1 = _mm_and_si128(_mm_xor_si128(avg1, avg2), one);
+  const __m128i ab = _mm_xor_si128(ABCDEFGH, BCDEFGH_);
+  const __m128i bc = _mm_xor_si128(CDEFGH__, BCDEFGH_);
+  const __m128i abbc = _mm_or_si128(ab, bc);
+  const __m128i lsb2 = _mm_and_si128(abbc, lsb1);
+  const __m128i avg4 = _mm_subs_epu8(avg3, lsb2);
+  const uint32_t extra_out = _mm_cvtsi128_si32(_mm_srli_si128(avg4, 4));
+  *(uint32_t*)(dst + 0 * BPS) = _mm_cvtsi128_si32(               avg1    );
+  *(uint32_t*)(dst + 1 * BPS) = _mm_cvtsi128_si32(               avg4    );
+  *(uint32_t*)(dst + 2 * BPS) = _mm_cvtsi128_si32(_mm_srli_si128(avg1, 1));
+  *(uint32_t*)(dst + 3 * BPS) = _mm_cvtsi128_si32(_mm_srli_si128(avg4, 1));
+
+  // these two are hard to get and irregular
+  DST(3, 2) = (extra_out >> 0) & 0xff;
+  DST(3, 3) = (extra_out >> 8) & 0xff;
+}
+
+static WEBP_INLINE void RD4(uint8_t* dst, const uint8_t* top) {  // Down-right
+  const __m128i one = _mm_set1_epi8(1);
+  const __m128i LKJIXABC = _mm_loadl_epi64((const __m128i*)(top - 5));
+  const __m128i LKJIXABCD = _mm_insert_epi16(LKJIXABC, top[3], 4);
+  const __m128i KJIXABCD_ = _mm_srli_si128(LKJIXABCD, 1);
+  const __m128i JIXABCD__ = _mm_srli_si128(LKJIXABCD, 2);
+  const __m128i avg1 = _mm_avg_epu8(JIXABCD__, LKJIXABCD);
+  const __m128i lsb = _mm_and_si128(_mm_xor_si128(JIXABCD__, LKJIXABCD), one);
+  const __m128i avg2 = _mm_subs_epu8(avg1, lsb);
+  const __m128i abcdefg = _mm_avg_epu8(avg2, KJIXABCD_);
+  *(uint32_t*)(dst + 3 * BPS) = _mm_cvtsi128_si32(               abcdefg    );
+  *(uint32_t*)(dst + 2 * BPS) = _mm_cvtsi128_si32(_mm_srli_si128(abcdefg, 1));
+  *(uint32_t*)(dst + 1 * BPS) = _mm_cvtsi128_si32(_mm_srli_si128(abcdefg, 2));
+  *(uint32_t*)(dst + 0 * BPS) = _mm_cvtsi128_si32(_mm_srli_si128(abcdefg, 3));
+}
+
+static WEBP_INLINE void HU4(uint8_t* dst, const uint8_t* top) {
+  const int I = top[-2];
+  const int J = top[-3];
+  const int K = top[-4];
+  const int L = top[-5];
+  DST(0, 0) =             AVG2(I, J);
+  DST(2, 0) = DST(0, 1) = AVG2(J, K);
+  DST(2, 1) = DST(0, 2) = AVG2(K, L);
+  DST(1, 0) =             AVG3(I, J, K);
+  DST(3, 0) = DST(1, 1) = AVG3(J, K, L);
+  DST(3, 1) = DST(1, 2) = AVG3(K, L, L);
+  DST(3, 2) = DST(2, 2) =
+  DST(0, 3) = DST(1, 3) = DST(2, 3) = DST(3, 3) = L;
+}
+
+static WEBP_INLINE void HD4(uint8_t* dst, const uint8_t* top) {
+  const int X = top[-1];
+  const int I = top[-2];
+  const int J = top[-3];
+  const int K = top[-4];
+  const int L = top[-5];
+  const int A = top[0];
+  const int B = top[1];
+  const int C = top[2];
+
+  DST(0, 0) = DST(2, 1) = AVG2(I, X);
+  DST(0, 1) = DST(2, 2) = AVG2(J, I);
+  DST(0, 2) = DST(2, 3) = AVG2(K, J);
+  DST(0, 3)             = AVG2(L, K);
+
+  DST(3, 0)             = AVG3(A, B, C);
+  DST(2, 0)             = AVG3(X, A, B);
+  DST(1, 0) = DST(3, 1) = AVG3(I, X, A);
+  DST(1, 1) = DST(3, 2) = AVG3(J, I, X);
+  DST(1, 2) = DST(3, 3) = AVG3(K, J, I);
+  DST(1, 3)             = AVG3(L, K, J);
+}
+
+static WEBP_INLINE void TM4(uint8_t* dst, const uint8_t* top) {
+  const __m128i zero = _mm_setzero_si128();
+  const __m128i top_values = _mm_cvtsi32_si128(MemToUint32(top));
+  const __m128i top_base = _mm_unpacklo_epi8(top_values, zero);
+  int y;
+  for (y = 0; y < 4; ++y, dst += BPS) {
+    const int val = top[-2 - y] - top[-1];
+    const __m128i base = _mm_set1_epi16(val);
+    const __m128i out = _mm_packus_epi16(_mm_add_epi16(base, top_base), zero);
+    *(int*)dst = _mm_cvtsi128_si32(out);
+  }
+}
+
+#undef DST
+#undef AVG3
+#undef AVG2
+
+//------------------------------------------------------------------------------
+// luma 4x4 prediction
+
+// Left samples are top[-5 .. -2], top_left is top[-1], top are
+// located at top[0..3], and top right is top[4..7]
+static void Intra4Preds(uint8_t* dst, const uint8_t* top) {
+  DC4(I4DC4 + dst, top);
+  TM4(I4TM4 + dst, top);
+  VE4(I4VE4 + dst, top);
+  HE4(I4HE4 + dst, top);
+  RD4(I4RD4 + dst, top);
+  VR4(I4VR4 + dst, top);
+  LD4(I4LD4 + dst, top);
+  VL4(I4VL4 + dst, top);
+  HD4(I4HD4 + dst, top);
+  HU4(I4HU4 + dst, top);
+}
+
+//------------------------------------------------------------------------------
+// Chroma 8x8 prediction (paragraph 12.2)
+
+static void IntraChromaPreds(uint8_t* dst, const uint8_t* left,
+                             const uint8_t* top) {
+  // U block
+  DC8uvMode(C8DC8 + dst, left, top);
+  VerticalPred(C8VE8 + dst, top, 8);
+  HorizontalPred(C8HE8 + dst, left, 8);
+  TrueMotion(C8TM8 + dst, left, top, 8);
+  // V block
+  dst += 8;
+  if (top != NULL) top += 8;
+  if (left != NULL) left += 16;
+  DC8uvMode(C8DC8 + dst, left, top);
+  VerticalPred(C8VE8 + dst, top, 8);
+  HorizontalPred(C8HE8 + dst, left, 8);
+  TrueMotion(C8TM8 + dst, left, top, 8);
+}
+
+//------------------------------------------------------------------------------
+// luma 16x16 prediction (paragraph 12.3)
+
+static void Intra16Preds(uint8_t* dst,
+                         const uint8_t* left, const uint8_t* top) {
+  DC16Mode(I16DC16 + dst, left, top);
+  VerticalPred(I16VE16 + dst, top, 16);
+  HorizontalPred(I16HE16 + dst, left, 16);
+  TrueMotion(I16TM16 + dst, left, top, 16);
 }
 
 //------------------------------------------------------------------------------
 // Metric
 
-static int SSE4x4SSE2(const uint8_t* a, const uint8_t* b) {
-  const __m128i zero = _mm_set1_epi16(0);
+static WEBP_INLINE void SubtractAndAccumulate(const __m128i a, const __m128i b,
+                                              __m128i* const sum) {
+  // take abs(a-b) in 8b
+  const __m128i a_b = _mm_subs_epu8(a, b);
+  const __m128i b_a = _mm_subs_epu8(b, a);
+  const __m128i abs_a_b = _mm_or_si128(a_b, b_a);
+  // zero-extend to 16b
+  const __m128i zero = _mm_setzero_si128();
+  const __m128i C0 = _mm_unpacklo_epi8(abs_a_b, zero);
+  const __m128i C1 = _mm_unpackhi_epi8(abs_a_b, zero);
+  // multiply with self
+  const __m128i sum1 = _mm_madd_epi16(C0, C0);
+  const __m128i sum2 = _mm_madd_epi16(C1, C1);
+  *sum = _mm_add_epi32(sum1, sum2);
+}
 
-  // Load values.
-  const __m128i a0 = _mm_loadl_epi64((__m128i*)&a[BPS * 0]);
-  const __m128i a1 = _mm_loadl_epi64((__m128i*)&a[BPS * 1]);
-  const __m128i a2 = _mm_loadl_epi64((__m128i*)&a[BPS * 2]);
-  const __m128i a3 = _mm_loadl_epi64((__m128i*)&a[BPS * 3]);
-  const __m128i b0 = _mm_loadl_epi64((__m128i*)&b[BPS * 0]);
-  const __m128i b1 = _mm_loadl_epi64((__m128i*)&b[BPS * 1]);
-  const __m128i b2 = _mm_loadl_epi64((__m128i*)&b[BPS * 2]);
-  const __m128i b3 = _mm_loadl_epi64((__m128i*)&b[BPS * 3]);
+static WEBP_INLINE int SSE_16xN(const uint8_t* a, const uint8_t* b,
+                                int num_pairs) {
+  __m128i sum = _mm_setzero_si128();
+  int32_t tmp[4];
+  int i;
+
+  for (i = 0; i < num_pairs; ++i) {
+    const __m128i a0 = _mm_loadu_si128((const __m128i*)&a[BPS * 0]);
+    const __m128i b0 = _mm_loadu_si128((const __m128i*)&b[BPS * 0]);
+    const __m128i a1 = _mm_loadu_si128((const __m128i*)&a[BPS * 1]);
+    const __m128i b1 = _mm_loadu_si128((const __m128i*)&b[BPS * 1]);
+    __m128i sum1, sum2;
+    SubtractAndAccumulate(a0, b0, &sum1);
+    SubtractAndAccumulate(a1, b1, &sum2);
+    sum = _mm_add_epi32(sum, _mm_add_epi32(sum1, sum2));
+    a += 2 * BPS;
+    b += 2 * BPS;
+  }
+  _mm_storeu_si128((__m128i*)tmp, sum);
+  return (tmp[3] + tmp[2] + tmp[1] + tmp[0]);
+}
+
+static int SSE16x16(const uint8_t* a, const uint8_t* b) {
+  return SSE_16xN(a, b, 8);
+}
 
-  // Combine pair of lines and convert to 16b.
+static int SSE16x8(const uint8_t* a, const uint8_t* b) {
+  return SSE_16xN(a, b, 4);
+}
+
+#define LOAD_8x16b(ptr) \
+  _mm_unpacklo_epi8(_mm_loadl_epi64((const __m128i*)(ptr)), zero)
+
+static int SSE8x8(const uint8_t* a, const uint8_t* b) {
+  const __m128i zero = _mm_setzero_si128();
+  int num_pairs = 4;
+  __m128i sum = zero;
+  int32_t tmp[4];
+  while (num_pairs-- > 0) {
+    const __m128i a0 = LOAD_8x16b(&a[BPS * 0]);
+    const __m128i a1 = LOAD_8x16b(&a[BPS * 1]);
+    const __m128i b0 = LOAD_8x16b(&b[BPS * 0]);
+    const __m128i b1 = LOAD_8x16b(&b[BPS * 1]);
+    // subtract
+    const __m128i c0 = _mm_subs_epi16(a0, b0);
+    const __m128i c1 = _mm_subs_epi16(a1, b1);
+    // multiply/accumulate with self
+    const __m128i d0 = _mm_madd_epi16(c0, c0);
+    const __m128i d1 = _mm_madd_epi16(c1, c1);
+    // collect
+    const __m128i sum01 = _mm_add_epi32(d0, d1);
+    sum = _mm_add_epi32(sum, sum01);
+    a += 2 * BPS;
+    b += 2 * BPS;
+  }
+  _mm_storeu_si128((__m128i*)tmp, sum);
+  return (tmp[3] + tmp[2] + tmp[1] + tmp[0]);
+}
+#undef LOAD_8x16b
+
+static int SSE4x4(const uint8_t* a, const uint8_t* b) {
+  const __m128i zero = _mm_setzero_si128();
+
+  // Load values. Note that we read 8 pixels instead of 4,
+  // but the a/b buffers are over-allocated to that effect.
+  const __m128i a0 = _mm_loadl_epi64((const __m128i*)&a[BPS * 0]);
+  const __m128i a1 = _mm_loadl_epi64((const __m128i*)&a[BPS * 1]);
+  const __m128i a2 = _mm_loadl_epi64((const __m128i*)&a[BPS * 2]);
+  const __m128i a3 = _mm_loadl_epi64((const __m128i*)&a[BPS * 3]);
+  const __m128i b0 = _mm_loadl_epi64((const __m128i*)&b[BPS * 0]);
+  const __m128i b1 = _mm_loadl_epi64((const __m128i*)&b[BPS * 1]);
+  const __m128i b2 = _mm_loadl_epi64((const __m128i*)&b[BPS * 2]);
+  const __m128i b3 = _mm_loadl_epi64((const __m128i*)&b[BPS * 3]);
+  // Combine pair of lines.
   const __m128i a01 = _mm_unpacklo_epi32(a0, a1);
   const __m128i a23 = _mm_unpacklo_epi32(a2, a3);
   const __m128i b01 = _mm_unpacklo_epi32(b0, b1);
   const __m128i b23 = _mm_unpacklo_epi32(b2, b3);
+  // Convert to 16b.
   const __m128i a01s = _mm_unpacklo_epi8(a01, zero);
   const __m128i a23s = _mm_unpacklo_epi8(a23, zero);
   const __m128i b01s = _mm_unpacklo_epi8(b01, zero);
   const __m128i b23s = _mm_unpacklo_epi8(b23, zero);
+  // subtract, square and accumulate
+  const __m128i d0 = _mm_subs_epi16(a01s, b01s);
+  const __m128i d1 = _mm_subs_epi16(a23s, b23s);
+  const __m128i e0 = _mm_madd_epi16(d0, d0);
+  const __m128i e1 = _mm_madd_epi16(d1, d1);
+  const __m128i sum = _mm_add_epi32(e0, e1);
 
-  // Compute differences; (a-b)^2 = (abs(a-b))^2 = (sat8(a-b) + sat8(b-a))^2
-  // TODO(cduvivier): Dissassemble and figure out why this is fastest. We don't
-  //                  need absolute values, there is no need to do calculation
-  //                  in 8bit as we are already in 16bit, ... Yet this is what
-  //                  benchmarks the fastest!
-  const __m128i d0 = _mm_subs_epu8(a01s, b01s);
-  const __m128i d1 = _mm_subs_epu8(b01s, a01s);
-  const __m128i d2 = _mm_subs_epu8(a23s, b23s);
-  const __m128i d3 = _mm_subs_epu8(b23s, a23s);
-
-  // Square and add them all together.
-  const __m128i madd0 = _mm_madd_epi16(d0, d0);
-  const __m128i madd1 = _mm_madd_epi16(d1, d1);
-  const __m128i madd2 = _mm_madd_epi16(d2, d2);
-  const __m128i madd3 = _mm_madd_epi16(d3, d3);
-  const __m128i sum0 = _mm_add_epi32(madd0, madd1);
-  const __m128i sum1 = _mm_add_epi32(madd2, madd3);
-  const __m128i sum2 = _mm_add_epi32(sum0, sum1);
   int32_t tmp[4];
-  _mm_storeu_si128((__m128i*)tmp, sum2);
+  _mm_storeu_si128((__m128i*)tmp, sum);
   return (tmp[3] + tmp[2] + tmp[1] + tmp[0]);
 }
 
@@ -497,24 +1155,22 @@ static int SSE4x4SSE2(const uint8_t* a, const uint8_t* b) {
 // Hadamard transform
 // Returns the difference between the weighted sum of the absolute value of
 // transformed coefficients.
-static int TTransformSSE2(const uint8_t* inA, const uint8_t* inB,
-                          const uint16_t* const w) {
+static int TTransform(const uint8_t* inA, const uint8_t* inB,
+                      const uint16_t* const w) {
   int32_t sum[4];
   __m128i tmp_0, tmp_1, tmp_2, tmp_3;
   const __m128i zero = _mm_setzero_si128();
-  const __m128i one = _mm_set1_epi16(1);
-  const __m128i three = _mm_set1_epi16(3);
 
-  // Load, combine and tranpose inputs.
+  // Load, combine and transpose inputs.
   {
-    const __m128i inA_0 = _mm_loadl_epi64((__m128i*)&inA[BPS * 0]);
-    const __m128i inA_1 = _mm_loadl_epi64((__m128i*)&inA[BPS * 1]);
-    const __m128i inA_2 = _mm_loadl_epi64((__m128i*)&inA[BPS * 2]);
-    const __m128i inA_3 = _mm_loadl_epi64((__m128i*)&inA[BPS * 3]);
-    const __m128i inB_0 = _mm_loadl_epi64((__m128i*)&inB[BPS * 0]);
-    const __m128i inB_1 = _mm_loadl_epi64((__m128i*)&inB[BPS * 1]);
-    const __m128i inB_2 = _mm_loadl_epi64((__m128i*)&inB[BPS * 2]);
-    const __m128i inB_3 = _mm_loadl_epi64((__m128i*)&inB[BPS * 3]);
+    const __m128i inA_0 = _mm_loadl_epi64((const __m128i*)&inA[BPS * 0]);
+    const __m128i inA_1 = _mm_loadl_epi64((const __m128i*)&inA[BPS * 1]);
+    const __m128i inA_2 = _mm_loadl_epi64((const __m128i*)&inA[BPS * 2]);
+    const __m128i inA_3 = _mm_loadl_epi64((const __m128i*)&inA[BPS * 3]);
+    const __m128i inB_0 = _mm_loadl_epi64((const __m128i*)&inB[BPS * 0]);
+    const __m128i inB_1 = _mm_loadl_epi64((const __m128i*)&inB[BPS * 1]);
+    const __m128i inB_2 = _mm_loadl_epi64((const __m128i*)&inB[BPS * 2]);
+    const __m128i inB_3 = _mm_loadl_epi64((const __m128i*)&inB[BPS * 3]);
 
     // Combine inA and inB (we'll do two transforms in parallel).
     const __m128i inAB_0 = _mm_unpacklo_epi8(inA_0, inB_0);
@@ -550,17 +1206,14 @@ static int TTransformSSE2(const uint8_t* inA, const uint8_t* inB,
   // Horizontal pass and subsequent transpose.
   {
     // Calculate a and b (two 4x4 at once).
-    const __m128i a0 = _mm_slli_epi16(_mm_add_epi16(tmp_0, tmp_2), 2);
-    const __m128i a1 = _mm_slli_epi16(_mm_add_epi16(tmp_1, tmp_3), 2);
-    const __m128i a2 = _mm_slli_epi16(_mm_sub_epi16(tmp_1, tmp_3), 2);
-    const __m128i a3 = _mm_slli_epi16(_mm_sub_epi16(tmp_0, tmp_2), 2);
-    // b0_extra = (a0 != 0);
-    const __m128i b0_extra = _mm_andnot_si128(_mm_cmpeq_epi16 (a0, zero), one);
-    const __m128i b0_base = _mm_add_epi16(a0, a1);
+    const __m128i a0 = _mm_add_epi16(tmp_0, tmp_2);
+    const __m128i a1 = _mm_add_epi16(tmp_1, tmp_3);
+    const __m128i a2 = _mm_sub_epi16(tmp_1, tmp_3);
+    const __m128i a3 = _mm_sub_epi16(tmp_0, tmp_2);
+    const __m128i b0 = _mm_add_epi16(a0, a1);
     const __m128i b1 = _mm_add_epi16(a3, a2);
     const __m128i b2 = _mm_sub_epi16(a3, a2);
     const __m128i b3 = _mm_sub_epi16(a0, a1);
-    const __m128i b0 = _mm_add_epi16(b0_base, b0_extra);
     // a00 a01 a02 a03   b00 b01 b02 b03
     // a10 a11 a12 a13   b10 b11 b12 b13
     // a20 a21 a22 a23   b20 b21 b22 b23
@@ -598,8 +1251,8 @@ static int TTransformSSE2(const uint8_t* inA, const uint8_t* inB,
     // Load all inputs.
     // TODO(cduvivier): Make variable declarations and allocations aligned so
     //                  we can use _mm_load_si128 instead of _mm_loadu_si128.
-    const __m128i w_0 = _mm_loadu_si128((__m128i*)&w[0]);
-    const __m128i w_8 = _mm_loadu_si128((__m128i*)&w[8]);
+    const __m128i w_0 = _mm_loadu_si128((const __m128i*)&w[0]);
+    const __m128i w_8 = _mm_loadu_si128((const __m128i*)&w[8]);
 
     // Calculate a and b (two 4x4 at once).
     const __m128i a0 = _mm_add_epi16(tmp_0, tmp_2);
@@ -618,36 +1271,16 @@ static int TTransformSSE2(const uint8_t* inA, const uint8_t* inB,
     __m128i B_b2 = _mm_unpackhi_epi64(b2, b3);
 
     {
-      // sign(b) = b >> 15  (0x0000 if positive, 0xffff if negative)
-      const __m128i sign_A_b0 = _mm_srai_epi16(A_b0, 15);
-      const __m128i sign_A_b2 = _mm_srai_epi16(A_b2, 15);
-      const __m128i sign_B_b0 = _mm_srai_epi16(B_b0, 15);
-      const __m128i sign_B_b2 = _mm_srai_epi16(B_b2, 15);
-
-      // b = abs(b) = (b ^ sign) - sign
-      A_b0 = _mm_xor_si128(A_b0, sign_A_b0);
-      A_b2 = _mm_xor_si128(A_b2, sign_A_b2);
-      B_b0 = _mm_xor_si128(B_b0, sign_B_b0);
-      B_b2 = _mm_xor_si128(B_b2, sign_B_b2);
-      A_b0 = _mm_sub_epi16(A_b0, sign_A_b0);
-      A_b2 = _mm_sub_epi16(A_b2, sign_A_b2);
-      B_b0 = _mm_sub_epi16(B_b0, sign_B_b0);
-      B_b2 = _mm_sub_epi16(B_b2, sign_B_b2);
+      const __m128i d0 = _mm_sub_epi16(zero, A_b0);
+      const __m128i d1 = _mm_sub_epi16(zero, A_b2);
+      const __m128i d2 = _mm_sub_epi16(zero, B_b0);
+      const __m128i d3 = _mm_sub_epi16(zero, B_b2);
+      A_b0 = _mm_max_epi16(A_b0, d0);   // abs(v), 16b
+      A_b2 = _mm_max_epi16(A_b2, d1);
+      B_b0 = _mm_max_epi16(B_b0, d2);
+      B_b2 = _mm_max_epi16(B_b2, d3);
     }
 
-    // b = abs(b) + 3
-    A_b0 = _mm_add_epi16(A_b0, three);
-    A_b2 = _mm_add_epi16(A_b2, three);
-    B_b0 = _mm_add_epi16(B_b0, three);
-    B_b2 = _mm_add_epi16(B_b2, three);
-
-    // abs((b + (b<0) + 3) >> 3) = (abs(b) + 3) >> 3
-    // b = (abs(b) + 3) >> 3
-    A_b0 = _mm_srai_epi16(A_b0, 3);
-    A_b2 = _mm_srai_epi16(A_b2, 3);
-    B_b0 = _mm_srai_epi16(B_b0, 3);
-    B_b2 = _mm_srai_epi16(B_b2, 3);
-
     // weighted sums
     A_b0 = _mm_madd_epi16(A_b0, w_0);
     A_b2 = _mm_madd_epi16(A_b2, w_8);
@@ -663,35 +1296,33 @@ static int TTransformSSE2(const uint8_t* inA, const uint8_t* inB,
   return sum[0] + sum[1] + sum[2] + sum[3];
 }
 
-static int Disto4x4SSE2(const uint8_t* const a, const uint8_t* const b,
-                        const uint16_t* const w) {
-  const int diff_sum = TTransformSSE2(a, b, w);
-  return (abs(diff_sum) + 8) >> 4;
+static int Disto4x4(const uint8_t* const a, const uint8_t* const b,
+                    const uint16_t* const w) {
+  const int diff_sum = TTransform(a, b, w);
+  return abs(diff_sum) >> 5;
 }
 
-static int Disto16x16SSE2(const uint8_t* const a, const uint8_t* const b,
-                          const uint16_t* const w) {
+static int Disto16x16(const uint8_t* const a, const uint8_t* const b,
+                      const uint16_t* const w) {
   int D = 0;
   int x, y;
   for (y = 0; y < 16 * BPS; y += 4 * BPS) {
     for (x = 0; x < 16; x += 4) {
-      D += Disto4x4SSE2(a + x + y, b + x + y, w);
+      D += Disto4x4(a + x + y, b + x + y, w);
     }
   }
   return D;
 }
 
-
 //------------------------------------------------------------------------------
 // Quantization
 //
 
-// Simple quantization
-static int QuantizeBlockSSE2(int16_t in[16], int16_t out[16],
-                             int n, const VP8Matrix* const mtx) {
-  const __m128i max_coeff_2047 = _mm_set1_epi16(2047);
-  const __m128i zero = _mm_set1_epi16(0);
-  __m128i sign0, sign8;
+static WEBP_INLINE int DoQuantizeBlock(int16_t in[16], int16_t out[16],
+                                       const uint16_t* const sharpen,
+                                       const VP8Matrix* const mtx) {
+  const __m128i max_coeff_2047 = _mm_set1_epi16(MAX_LEVEL);
+  const __m128i zero = _mm_setzero_si128();
   __m128i coeff0, coeff8;
   __m128i out0, out8;
   __m128i packed_out;
@@ -701,20 +1332,14 @@ static int QuantizeBlockSSE2(int16_t in[16], int16_t out[16],
   //                  we can use _mm_load_si128 instead of _mm_loadu_si128.
   __m128i in0 = _mm_loadu_si128((__m128i*)&in[0]);
   __m128i in8 = _mm_loadu_si128((__m128i*)&in[8]);
-  const __m128i sharpen0 = _mm_loadu_si128((__m128i*)&mtx->sharpen_[0]);
-  const __m128i sharpen8 = _mm_loadu_si128((__m128i*)&mtx->sharpen_[8]);
-  const __m128i iq0 = _mm_loadu_si128((__m128i*)&mtx->iq_[0]);
-  const __m128i iq8 = _mm_loadu_si128((__m128i*)&mtx->iq_[8]);
-  const __m128i bias0 = _mm_loadu_si128((__m128i*)&mtx->bias_[0]);
-  const __m128i bias8 = _mm_loadu_si128((__m128i*)&mtx->bias_[8]);
-  const __m128i q0 = _mm_loadu_si128((__m128i*)&mtx->q_[0]);
-  const __m128i q8 = _mm_loadu_si128((__m128i*)&mtx->q_[8]);
-  const __m128i zthresh0 = _mm_loadu_si128((__m128i*)&mtx->zthresh_[0]);
-  const __m128i zthresh8 = _mm_loadu_si128((__m128i*)&mtx->zthresh_[8]);
-
-  // sign(in) = in >> 15  (0x0000 if positive, 0xffff if negative)
-  sign0 = _mm_srai_epi16(in0, 15);
-  sign8 = _mm_srai_epi16(in8, 15);
+  const __m128i iq0 = _mm_loadu_si128((const __m128i*)&mtx->iq_[0]);
+  const __m128i iq8 = _mm_loadu_si128((const __m128i*)&mtx->iq_[8]);
+  const __m128i q0 = _mm_loadu_si128((const __m128i*)&mtx->q_[0]);
+  const __m128i q8 = _mm_loadu_si128((const __m128i*)&mtx->q_[8]);
+
+  // extract sign(in)  (0x0000 if positive, 0xffff if negative)
+  const __m128i sign0 = _mm_cmpgt_epi16(zero, in0);
+  const __m128i sign8 = _mm_cmpgt_epi16(zero, in8);
 
   // coeff = abs(in) = (in ^ sign) - sign
   coeff0 = _mm_xor_si128(in0, sign0);
@@ -723,43 +1348,47 @@ static int QuantizeBlockSSE2(int16_t in[16], int16_t out[16],
   coeff8 = _mm_sub_epi16(coeff8, sign8);
 
   // coeff = abs(in) + sharpen
-  coeff0 = _mm_add_epi16(coeff0, sharpen0);
-  coeff8 = _mm_add_epi16(coeff8, sharpen8);
-
-  // if (coeff > 2047) coeff = 2047
-  coeff0 = _mm_min_epi16(coeff0, max_coeff_2047);
-  coeff8 = _mm_min_epi16(coeff8, max_coeff_2047);
+  if (sharpen != NULL) {
+    const __m128i sharpen0 = _mm_loadu_si128((const __m128i*)&sharpen[0]);
+    const __m128i sharpen8 = _mm_loadu_si128((const __m128i*)&sharpen[8]);
+    coeff0 = _mm_add_epi16(coeff0, sharpen0);
+    coeff8 = _mm_add_epi16(coeff8, sharpen8);
+  }
 
-  // out = (coeff * iQ + B) >> QFIX;
+  // out = (coeff * iQ + B) >> QFIX
   {
     // doing calculations with 32b precision (QFIX=17)
     // out = (coeff * iQ)
-    __m128i coeff_iQ0H = _mm_mulhi_epu16(coeff0, iq0);
-    __m128i coeff_iQ0L = _mm_mullo_epi16(coeff0, iq0);
-    __m128i coeff_iQ8H = _mm_mulhi_epu16(coeff8, iq8);
-    __m128i coeff_iQ8L = _mm_mullo_epi16(coeff8, iq8);
+    const __m128i coeff_iQ0H = _mm_mulhi_epu16(coeff0, iq0);
+    const __m128i coeff_iQ0L = _mm_mullo_epi16(coeff0, iq0);
+    const __m128i coeff_iQ8H = _mm_mulhi_epu16(coeff8, iq8);
+    const __m128i coeff_iQ8L = _mm_mullo_epi16(coeff8, iq8);
     __m128i out_00 = _mm_unpacklo_epi16(coeff_iQ0L, coeff_iQ0H);
     __m128i out_04 = _mm_unpackhi_epi16(coeff_iQ0L, coeff_iQ0H);
     __m128i out_08 = _mm_unpacklo_epi16(coeff_iQ8L, coeff_iQ8H);
     __m128i out_12 = _mm_unpackhi_epi16(coeff_iQ8L, coeff_iQ8H);
-    // expand bias from 16b to 32b
-    __m128i bias_00 = _mm_unpacklo_epi16(bias0, zero);
-    __m128i bias_04 = _mm_unpackhi_epi16(bias0, zero);
-    __m128i bias_08 = _mm_unpacklo_epi16(bias8, zero);
-    __m128i bias_12 = _mm_unpackhi_epi16(bias8, zero);
     // out = (coeff * iQ + B)
+    const __m128i bias_00 = _mm_loadu_si128((const __m128i*)&mtx->bias_[0]);
+    const __m128i bias_04 = _mm_loadu_si128((const __m128i*)&mtx->bias_[4]);
+    const __m128i bias_08 = _mm_loadu_si128((const __m128i*)&mtx->bias_[8]);
+    const __m128i bias_12 = _mm_loadu_si128((const __m128i*)&mtx->bias_[12]);
     out_00 = _mm_add_epi32(out_00, bias_00);
     out_04 = _mm_add_epi32(out_04, bias_04);
     out_08 = _mm_add_epi32(out_08, bias_08);
     out_12 = _mm_add_epi32(out_12, bias_12);
-    // out = (coeff * iQ + B) >> QFIX;
+    // out = QUANTDIV(coeff, iQ, B, QFIX)
     out_00 = _mm_srai_epi32(out_00, QFIX);
     out_04 = _mm_srai_epi32(out_04, QFIX);
     out_08 = _mm_srai_epi32(out_08, QFIX);
     out_12 = _mm_srai_epi32(out_12, QFIX);
+
     // pack result as 16b
     out0 = _mm_packs_epi32(out_00, out_04);
     out8 = _mm_packs_epi32(out_08, out_12);
+
+    // if (coeff > 2047) coeff = 2047
+    out0 = _mm_min_epi16(out0, max_coeff_2047);
+    out8 = _mm_min_epi16(out8, max_coeff_2047);
   }
 
   // get sign back (if (sign[j]) out_n = -out_n)
@@ -772,17 +1401,8 @@ static int QuantizeBlockSSE2(int16_t in[16], int16_t out[16],
   in0 = _mm_mullo_epi16(out0, q0);
   in8 = _mm_mullo_epi16(out8, q8);
 
-  // if (coeff <= mtx->zthresh_) {in=0; out=0;}
-  {
-    __m128i cmp0 = _mm_cmpgt_epi16(coeff0, zthresh0);
-    __m128i cmp8 = _mm_cmpgt_epi16(coeff8, zthresh8);
-    in0 = _mm_and_si128(in0, cmp0);
-    in8 = _mm_and_si128(in8, cmp8);
-    _mm_storeu_si128((__m128i*)&in[0], in0);
-    _mm_storeu_si128((__m128i*)&in[8], in8);
-    out0 = _mm_and_si128(out0, cmp0);
-    out8 = _mm_and_si128(out8, cmp8);
-  }
+  _mm_storeu_si128((__m128i*)&in[0], in0);
+  _mm_storeu_si128((__m128i*)&in[8], in8);
 
   // zigzag the output before storing it.
   //
@@ -809,29 +1429,55 @@ static int QuantizeBlockSSE2(int16_t in[16], int16_t out[16],
   }
 
   // detect if all 'out' values are zeroes or not
-  {
-    int32_t tmp[4];
-    _mm_storeu_si128((__m128i*)tmp, packed_out);
-    if (n) {
-      tmp[0] &= ~0xff;
-    }
-    return (tmp[3] || tmp[2] || tmp[1] || tmp[0]);
-  }
+  return (_mm_movemask_epi8(_mm_cmpeq_epi8(packed_out, zero)) != 0xffff);
+}
+
+static int QuantizeBlock(int16_t in[16], int16_t out[16],
+                         const VP8Matrix* const mtx) {
+  return DoQuantizeBlock(in, out, &mtx->sharpen_[0], mtx);
+}
+
+static int QuantizeBlockWHT(int16_t in[16], int16_t out[16],
+                            const VP8Matrix* const mtx) {
+  return DoQuantizeBlock(in, out, NULL, mtx);
+}
+
+static int Quantize2Blocks(int16_t in[32], int16_t out[32],
+                           const VP8Matrix* const mtx) {
+  int nz;
+  const uint16_t* const sharpen = &mtx->sharpen_[0];
+  nz  = DoQuantizeBlock(in + 0 * 16, out + 0 * 16, sharpen, mtx) << 0;
+  nz |= DoQuantizeBlock(in + 1 * 16, out + 1 * 16, sharpen, mtx) << 1;
+  return nz;
 }
 
+//------------------------------------------------------------------------------
+// Entry point
+
 extern void VP8EncDspInitSSE2(void);
-void VP8EncDspInitSSE2(void) {
-  VP8CollectHistogram = CollectHistogramSSE2;
-  VP8EncQuantizeBlock = QuantizeBlockSSE2;
-  VP8ITransform = ITransformSSE2;
-  VP8FTransform = FTransformSSE2;
-  VP8SSE4x4 = SSE4x4SSE2;
-  VP8TDisto4x4 = Disto4x4SSE2;
-  VP8TDisto16x16 = Disto16x16SSE2;
-}
-
-#if defined(__cplusplus) || defined(c_plusplus)
-}    // extern "C"
-#endif
 
-#endif   // WEBP_USE_SSE2
+WEBP_TSAN_IGNORE_FUNCTION void VP8EncDspInitSSE2(void) {
+  VP8CollectHistogram = CollectHistogram;
+  VP8EncPredLuma16 = Intra16Preds;
+  VP8EncPredChroma8 = IntraChromaPreds;
+  VP8EncPredLuma4 = Intra4Preds;
+  VP8EncQuantizeBlock = QuantizeBlock;
+  VP8EncQuantize2Blocks = Quantize2Blocks;
+  VP8EncQuantizeBlockWHT = QuantizeBlockWHT;
+  VP8ITransform = ITransform;
+  VP8FTransform = FTransform;
+  VP8FTransform2 = FTransform2;
+  VP8FTransformWHT = FTransformWHT;
+  VP8SSE16x16 = SSE16x16;
+  VP8SSE16x8 = SSE16x8;
+  VP8SSE8x8 = SSE8x8;
+  VP8SSE4x4 = SSE4x4;
+  VP8TDisto4x4 = Disto4x4;
+  VP8TDisto16x16 = Disto16x16;
+}
+
+#else  // !WEBP_USE_SSE2
+
+WEBP_DSP_INIT_STUB(VP8EncDspInitSSE2)
+
+#endif  // WEBP_USE_SSE2
diff --git a/drivers/webp/dsp/enc_sse41.c b/drivers/webp/dsp/enc_sse41.c
new file mode 100644
index 0000000000..27f4189833
--- /dev/null
+++ b/drivers/webp/dsp/enc_sse41.c
@@ -0,0 +1,375 @@
+// Copyright 2015 Google Inc. All Rights Reserved.
+//
+// Use of this source code is governed by a BSD-style license
+// that can be found in the COPYING file in the root of the source
+// tree. An additional intellectual property rights grant can be found
+// in the file PATENTS. All contributing project authors may
+// be found in the AUTHORS file in the root of the source tree.
+// -----------------------------------------------------------------------------
+//
+// SSE4 version of some encoding functions.
+//
+// Author: Skal (pascal.massimino@gmail.com)
+
+#include "./dsp.h"
+
+#if defined(WEBP_USE_SSE41)
+#include <smmintrin.h>
+#include <stdlib.h>  // for abs()
+
+#include "../enc/vp8enci.h"
+
+//------------------------------------------------------------------------------
+// Compute susceptibility based on DCT-coeff histograms.
+
+static void CollectHistogram(const uint8_t* ref, const uint8_t* pred,
+                             int start_block, int end_block,
+                             VP8Histogram* const histo) {
+  const __m128i max_coeff_thresh = _mm_set1_epi16(MAX_COEFF_THRESH);
+  int j;
+  int distribution[MAX_COEFF_THRESH + 1] = { 0 };
+  for (j = start_block; j < end_block; ++j) {
+    int16_t out[16];
+    int k;
+
+    VP8FTransform(ref + VP8DspScan[j], pred + VP8DspScan[j], out);
+
+    // Convert coefficients to bin (within out[]).
+    {
+      // Load.
+      const __m128i out0 = _mm_loadu_si128((__m128i*)&out[0]);
+      const __m128i out1 = _mm_loadu_si128((__m128i*)&out[8]);
+      // v = abs(out) >> 3
+      const __m128i abs0 = _mm_abs_epi16(out0);
+      const __m128i abs1 = _mm_abs_epi16(out1);
+      const __m128i v0 = _mm_srai_epi16(abs0, 3);
+      const __m128i v1 = _mm_srai_epi16(abs1, 3);
+      // bin = min(v, MAX_COEFF_THRESH)
+      const __m128i bin0 = _mm_min_epi16(v0, max_coeff_thresh);
+      const __m128i bin1 = _mm_min_epi16(v1, max_coeff_thresh);
+      // Store.
+      _mm_storeu_si128((__m128i*)&out[0], bin0);
+      _mm_storeu_si128((__m128i*)&out[8], bin1);
+    }
+
+    // Convert coefficients to bin.
+    for (k = 0; k < 16; ++k) {
+      ++distribution[out[k]];
+    }
+  }
+  VP8SetHistogramData(distribution, histo);
+}
+
+//------------------------------------------------------------------------------
+// Texture distortion
+//
+// We try to match the spectral content (weighted) between source and
+// reconstructed samples.
+
+// Hadamard transform
+// Returns the difference between the weighted sum of the absolute value of
+// transformed coefficients.
+static int TTransform(const uint8_t* inA, const uint8_t* inB,
+                      const uint16_t* const w) {
+  __m128i tmp_0, tmp_1, tmp_2, tmp_3;
+
+  // Load, combine and transpose inputs.
+  {
+    const __m128i inA_0 = _mm_loadl_epi64((const __m128i*)&inA[BPS * 0]);
+    const __m128i inA_1 = _mm_loadl_epi64((const __m128i*)&inA[BPS * 1]);
+    const __m128i inA_2 = _mm_loadl_epi64((const __m128i*)&inA[BPS * 2]);
+    const __m128i inA_3 = _mm_loadl_epi64((const __m128i*)&inA[BPS * 3]);
+    const __m128i inB_0 = _mm_loadl_epi64((const __m128i*)&inB[BPS * 0]);
+    const __m128i inB_1 = _mm_loadl_epi64((const __m128i*)&inB[BPS * 1]);
+    const __m128i inB_2 = _mm_loadl_epi64((const __m128i*)&inB[BPS * 2]);
+    const __m128i inB_3 = _mm_loadl_epi64((const __m128i*)&inB[BPS * 3]);
+
+    // Combine inA and inB (we'll do two transforms in parallel).
+    const __m128i inAB_0 = _mm_unpacklo_epi8(inA_0, inB_0);
+    const __m128i inAB_1 = _mm_unpacklo_epi8(inA_1, inB_1);
+    const __m128i inAB_2 = _mm_unpacklo_epi8(inA_2, inB_2);
+    const __m128i inAB_3 = _mm_unpacklo_epi8(inA_3, inB_3);
+    // a00 b00 a01 b01 a02 b03 a03 b03   0 0 0 0 0 0 0 0
+    // a10 b10 a11 b11 a12 b12 a13 b13   0 0 0 0 0 0 0 0
+    // a20 b20 a21 b21 a22 b22 a23 b23   0 0 0 0 0 0 0 0
+    // a30 b30 a31 b31 a32 b32 a33 b33   0 0 0 0 0 0 0 0
+
+    // Transpose the two 4x4, discarding the filling zeroes.
+    const __m128i transpose0_0 = _mm_unpacklo_epi8(inAB_0, inAB_2);
+    const __m128i transpose0_1 = _mm_unpacklo_epi8(inAB_1, inAB_3);
+    // a00 a20  b00 b20  a01 a21  b01 b21  a02 a22  b02 b22  a03 a23  b03 b23
+    // a10 a30  b10 b30  a11 a31  b11 b31  a12 a32  b12 b32  a13 a33  b13 b33
+    const __m128i transpose1_0 = _mm_unpacklo_epi8(transpose0_0, transpose0_1);
+    const __m128i transpose1_1 = _mm_unpackhi_epi8(transpose0_0, transpose0_1);
+    // a00 a10 a20 a30  b00 b10 b20 b30  a01 a11 a21 a31  b01 b11 b21 b31
+    // a02 a12 a22 a32  b02 b12 b22 b32  a03 a13 a23 a33  b03 b13 b23 b33
+
+    // Convert to 16b.
+    tmp_0 = _mm_cvtepu8_epi16(transpose1_0);
+    tmp_1 = _mm_cvtepu8_epi16(_mm_srli_si128(transpose1_0, 8));
+    tmp_2 = _mm_cvtepu8_epi16(transpose1_1);
+    tmp_3 = _mm_cvtepu8_epi16(_mm_srli_si128(transpose1_1, 8));
+    // a00 a10 a20 a30   b00 b10 b20 b30
+    // a01 a11 a21 a31   b01 b11 b21 b31
+    // a02 a12 a22 a32   b02 b12 b22 b32
+    // a03 a13 a23 a33   b03 b13 b23 b33
+  }
+
+  // Horizontal pass and subsequent transpose.
+  {
+    // Calculate a and b (two 4x4 at once).
+    const __m128i a0 = _mm_add_epi16(tmp_0, tmp_2);
+    const __m128i a1 = _mm_add_epi16(tmp_1, tmp_3);
+    const __m128i a2 = _mm_sub_epi16(tmp_1, tmp_3);
+    const __m128i a3 = _mm_sub_epi16(tmp_0, tmp_2);
+    const __m128i b0 = _mm_add_epi16(a0, a1);
+    const __m128i b1 = _mm_add_epi16(a3, a2);
+    const __m128i b2 = _mm_sub_epi16(a3, a2);
+    const __m128i b3 = _mm_sub_epi16(a0, a1);
+    // a00 a01 a02 a03   b00 b01 b02 b03
+    // a10 a11 a12 a13   b10 b11 b12 b13
+    // a20 a21 a22 a23   b20 b21 b22 b23
+    // a30 a31 a32 a33   b30 b31 b32 b33
+
+    // Transpose the two 4x4.
+    const __m128i transpose0_0 = _mm_unpacklo_epi16(b0, b1);
+    const __m128i transpose0_1 = _mm_unpacklo_epi16(b2, b3);
+    const __m128i transpose0_2 = _mm_unpackhi_epi16(b0, b1);
+    const __m128i transpose0_3 = _mm_unpackhi_epi16(b2, b3);
+    // a00 a10 a01 a11   a02 a12 a03 a13
+    // a20 a30 a21 a31   a22 a32 a23 a33
+    // b00 b10 b01 b11   b02 b12 b03 b13
+    // b20 b30 b21 b31   b22 b32 b23 b33
+    const __m128i transpose1_0 = _mm_unpacklo_epi32(transpose0_0, transpose0_1);
+    const __m128i transpose1_1 = _mm_unpacklo_epi32(transpose0_2, transpose0_3);
+    const __m128i transpose1_2 = _mm_unpackhi_epi32(transpose0_0, transpose0_1);
+    const __m128i transpose1_3 = _mm_unpackhi_epi32(transpose0_2, transpose0_3);
+    // a00 a10 a20 a30 a01 a11 a21 a31
+    // b00 b10 b20 b30 b01 b11 b21 b31
+    // a02 a12 a22 a32 a03 a13 a23 a33
+    // b02 b12 a22 b32 b03 b13 b23 b33
+    tmp_0 = _mm_unpacklo_epi64(transpose1_0, transpose1_1);
+    tmp_1 = _mm_unpackhi_epi64(transpose1_0, transpose1_1);
+    tmp_2 = _mm_unpacklo_epi64(transpose1_2, transpose1_3);
+    tmp_3 = _mm_unpackhi_epi64(transpose1_2, transpose1_3);
+    // a00 a10 a20 a30   b00 b10 b20 b30
+    // a01 a11 a21 a31   b01 b11 b21 b31
+    // a02 a12 a22 a32   b02 b12 b22 b32
+    // a03 a13 a23 a33   b03 b13 b23 b33
+  }
+
+  // Vertical pass and difference of weighted sums.
+  {
+    // Load all inputs.
+    const __m128i w_0 = _mm_loadu_si128((const __m128i*)&w[0]);
+    const __m128i w_8 = _mm_loadu_si128((const __m128i*)&w[8]);
+
+    // Calculate a and b (two 4x4 at once).
+    const __m128i a0 = _mm_add_epi16(tmp_0, tmp_2);
+    const __m128i a1 = _mm_add_epi16(tmp_1, tmp_3);
+    const __m128i a2 = _mm_sub_epi16(tmp_1, tmp_3);
+    const __m128i a3 = _mm_sub_epi16(tmp_0, tmp_2);
+    const __m128i b0 = _mm_add_epi16(a0, a1);
+    const __m128i b1 = _mm_add_epi16(a3, a2);
+    const __m128i b2 = _mm_sub_epi16(a3, a2);
+    const __m128i b3 = _mm_sub_epi16(a0, a1);
+
+    // Separate the transforms of inA and inB.
+    __m128i A_b0 = _mm_unpacklo_epi64(b0, b1);
+    __m128i A_b2 = _mm_unpacklo_epi64(b2, b3);
+    __m128i B_b0 = _mm_unpackhi_epi64(b0, b1);
+    __m128i B_b2 = _mm_unpackhi_epi64(b2, b3);
+
+    A_b0 = _mm_abs_epi16(A_b0);
+    A_b2 = _mm_abs_epi16(A_b2);
+    B_b0 = _mm_abs_epi16(B_b0);
+    B_b2 = _mm_abs_epi16(B_b2);
+
+    // weighted sums
+    A_b0 = _mm_madd_epi16(A_b0, w_0);
+    A_b2 = _mm_madd_epi16(A_b2, w_8);
+    B_b0 = _mm_madd_epi16(B_b0, w_0);
+    B_b2 = _mm_madd_epi16(B_b2, w_8);
+    A_b0 = _mm_add_epi32(A_b0, A_b2);
+    B_b0 = _mm_add_epi32(B_b0, B_b2);
+
+    // difference of weighted sums
+    A_b2 = _mm_sub_epi32(A_b0, B_b0);
+    // cascading summation of the differences
+    B_b0 = _mm_hadd_epi32(A_b2, A_b2);
+    B_b2 = _mm_hadd_epi32(B_b0, B_b0);
+    return _mm_cvtsi128_si32(B_b2);
+  }
+}
+
+static int Disto4x4(const uint8_t* const a, const uint8_t* const b,
+                    const uint16_t* const w) {
+  const int diff_sum = TTransform(a, b, w);
+  return abs(diff_sum) >> 5;
+}
+
+static int Disto16x16(const uint8_t* const a, const uint8_t* const b,
+                      const uint16_t* const w) {
+  int D = 0;
+  int x, y;
+  for (y = 0; y < 16 * BPS; y += 4 * BPS) {
+    for (x = 0; x < 16; x += 4) {
+      D += Disto4x4(a + x + y, b + x + y, w);
+    }
+  }
+  return D;
+}
+
+//------------------------------------------------------------------------------
+// Quantization
+//
+
+// Generates a pshufb constant for shuffling 16b words.
+#define PSHUFB_CST(A,B,C,D,E,F,G,H) \
+  _mm_set_epi8(2 * (H) + 1, 2 * (H) + 0, 2 * (G) + 1, 2 * (G) + 0, \
+               2 * (F) + 1, 2 * (F) + 0, 2 * (E) + 1, 2 * (E) + 0, \
+               2 * (D) + 1, 2 * (D) + 0, 2 * (C) + 1, 2 * (C) + 0, \
+               2 * (B) + 1, 2 * (B) + 0, 2 * (A) + 1, 2 * (A) + 0)
+
+static WEBP_INLINE int DoQuantizeBlock(int16_t in[16], int16_t out[16],
+                                       const uint16_t* const sharpen,
+                                       const VP8Matrix* const mtx) {
+  const __m128i max_coeff_2047 = _mm_set1_epi16(MAX_LEVEL);
+  const __m128i zero = _mm_setzero_si128();
+  __m128i out0, out8;
+  __m128i packed_out;
+
+  // Load all inputs.
+  // TODO(cduvivier): Make variable declarations and allocations aligned so that
+  //                  we can use _mm_load_si128 instead of _mm_loadu_si128.
+  __m128i in0 = _mm_loadu_si128((__m128i*)&in[0]);
+  __m128i in8 = _mm_loadu_si128((__m128i*)&in[8]);
+  const __m128i iq0 = _mm_loadu_si128((const __m128i*)&mtx->iq_[0]);
+  const __m128i iq8 = _mm_loadu_si128((const __m128i*)&mtx->iq_[8]);
+  const __m128i q0 = _mm_loadu_si128((const __m128i*)&mtx->q_[0]);
+  const __m128i q8 = _mm_loadu_si128((const __m128i*)&mtx->q_[8]);
+
+  // coeff = abs(in)
+  __m128i coeff0 = _mm_abs_epi16(in0);
+  __m128i coeff8 = _mm_abs_epi16(in8);
+
+  // coeff = abs(in) + sharpen
+  if (sharpen != NULL) {
+    const __m128i sharpen0 = _mm_loadu_si128((const __m128i*)&sharpen[0]);
+    const __m128i sharpen8 = _mm_loadu_si128((const __m128i*)&sharpen[8]);
+    coeff0 = _mm_add_epi16(coeff0, sharpen0);
+    coeff8 = _mm_add_epi16(coeff8, sharpen8);
+  }
+
+  // out = (coeff * iQ + B) >> QFIX
+  {
+    // doing calculations with 32b precision (QFIX=17)
+    // out = (coeff * iQ)
+    const __m128i coeff_iQ0H = _mm_mulhi_epu16(coeff0, iq0);
+    const __m128i coeff_iQ0L = _mm_mullo_epi16(coeff0, iq0);
+    const __m128i coeff_iQ8H = _mm_mulhi_epu16(coeff8, iq8);
+    const __m128i coeff_iQ8L = _mm_mullo_epi16(coeff8, iq8);
+    __m128i out_00 = _mm_unpacklo_epi16(coeff_iQ0L, coeff_iQ0H);
+    __m128i out_04 = _mm_unpackhi_epi16(coeff_iQ0L, coeff_iQ0H);
+    __m128i out_08 = _mm_unpacklo_epi16(coeff_iQ8L, coeff_iQ8H);
+    __m128i out_12 = _mm_unpackhi_epi16(coeff_iQ8L, coeff_iQ8H);
+    // out = (coeff * iQ + B)
+    const __m128i bias_00 = _mm_loadu_si128((const __m128i*)&mtx->bias_[0]);
+    const __m128i bias_04 = _mm_loadu_si128((const __m128i*)&mtx->bias_[4]);
+    const __m128i bias_08 = _mm_loadu_si128((const __m128i*)&mtx->bias_[8]);
+    const __m128i bias_12 = _mm_loadu_si128((const __m128i*)&mtx->bias_[12]);
+    out_00 = _mm_add_epi32(out_00, bias_00);
+    out_04 = _mm_add_epi32(out_04, bias_04);
+    out_08 = _mm_add_epi32(out_08, bias_08);
+    out_12 = _mm_add_epi32(out_12, bias_12);
+    // out = QUANTDIV(coeff, iQ, B, QFIX)
+    out_00 = _mm_srai_epi32(out_00, QFIX);
+    out_04 = _mm_srai_epi32(out_04, QFIX);
+    out_08 = _mm_srai_epi32(out_08, QFIX);
+    out_12 = _mm_srai_epi32(out_12, QFIX);
+
+    // pack result as 16b
+    out0 = _mm_packs_epi32(out_00, out_04);
+    out8 = _mm_packs_epi32(out_08, out_12);
+
+    // if (coeff > 2047) coeff = 2047
+    out0 = _mm_min_epi16(out0, max_coeff_2047);
+    out8 = _mm_min_epi16(out8, max_coeff_2047);
+  }
+
+  // put sign back
+  out0 = _mm_sign_epi16(out0, in0);
+  out8 = _mm_sign_epi16(out8, in8);
+
+  // in = out * Q
+  in0 = _mm_mullo_epi16(out0, q0);
+  in8 = _mm_mullo_epi16(out8, q8);
+
+  _mm_storeu_si128((__m128i*)&in[0], in0);
+  _mm_storeu_si128((__m128i*)&in[8], in8);
+
+  // zigzag the output before storing it. The re-ordering is:
+  //    0 1 2 3 4 5 6 7 | 8  9 10 11 12 13 14 15
+  // -> 0 1 4[8]5 2 3 6 | 9 12 13 10 [7]11 14 15
+  // There's only two misplaced entries ([8] and [7]) that are crossing the
+  // reg's boundaries.
+  // We use pshufb instead of pshuflo/pshufhi.
+  {
+    const __m128i kCst_lo = PSHUFB_CST(0, 1, 4, -1, 5, 2, 3, 6);
+    const __m128i kCst_7 = PSHUFB_CST(-1, -1, -1, -1, 7, -1, -1, -1);
+    const __m128i tmp_lo = _mm_shuffle_epi8(out0, kCst_lo);
+    const __m128i tmp_7 = _mm_shuffle_epi8(out0, kCst_7);  // extract #7
+    const __m128i kCst_hi = PSHUFB_CST(1, 4, 5, 2, -1, 3, 6, 7);
+    const __m128i kCst_8 = PSHUFB_CST(-1, -1, -1, 0, -1, -1, -1, -1);
+    const __m128i tmp_hi = _mm_shuffle_epi8(out8, kCst_hi);
+    const __m128i tmp_8 = _mm_shuffle_epi8(out8, kCst_8);  // extract #8
+    const __m128i out_z0 = _mm_or_si128(tmp_lo, tmp_8);
+    const __m128i out_z8 = _mm_or_si128(tmp_hi, tmp_7);
+    _mm_storeu_si128((__m128i*)&out[0], out_z0);
+    _mm_storeu_si128((__m128i*)&out[8], out_z8);
+    packed_out = _mm_packs_epi16(out_z0, out_z8);
+  }
+
+  // detect if all 'out' values are zeroes or not
+  return (_mm_movemask_epi8(_mm_cmpeq_epi8(packed_out, zero)) != 0xffff);
+}
+
+#undef PSHUFB_CST
+
+static int QuantizeBlock(int16_t in[16], int16_t out[16],
+                         const VP8Matrix* const mtx) {
+  return DoQuantizeBlock(in, out, &mtx->sharpen_[0], mtx);
+}
+
+static int QuantizeBlockWHT(int16_t in[16], int16_t out[16],
+                            const VP8Matrix* const mtx) {
+  return DoQuantizeBlock(in, out, NULL, mtx);
+}
+
+static int Quantize2Blocks(int16_t in[32], int16_t out[32],
+                           const VP8Matrix* const mtx) {
+  int nz;
+  const uint16_t* const sharpen = &mtx->sharpen_[0];
+  nz  = DoQuantizeBlock(in + 0 * 16, out + 0 * 16, sharpen, mtx) << 0;
+  nz |= DoQuantizeBlock(in + 1 * 16, out + 1 * 16, sharpen, mtx) << 1;
+  return nz;
+}
+
+//------------------------------------------------------------------------------
+// Entry point
+
+extern void VP8EncDspInitSSE41(void);
+WEBP_TSAN_IGNORE_FUNCTION void VP8EncDspInitSSE41(void) {
+  VP8CollectHistogram = CollectHistogram;
+  VP8EncQuantizeBlock = QuantizeBlock;
+  VP8EncQuantize2Blocks = Quantize2Blocks;
+  VP8EncQuantizeBlockWHT = QuantizeBlockWHT;
+  VP8TDisto4x4 = Disto4x4;
+  VP8TDisto16x16 = Disto16x16;
+}
+
+#else  // !WEBP_USE_SSE41
+
+WEBP_DSP_INIT_STUB(VP8EncDspInitSSE41)
+
+#endif  // WEBP_USE_SSE41
diff --git a/drivers/webp/dsp/filters.c b/drivers/webp/dsp/filters.c
new file mode 100644
index 0000000000..5c30f2e457
--- /dev/null
+++ b/drivers/webp/dsp/filters.c
@@ -0,0 +1,240 @@
+// Copyright 2011 Google Inc. All Rights Reserved.
+//
+// Use of this source code is governed by a BSD-style license
+// that can be found in the COPYING file in the root of the source
+// tree. An additional intellectual property rights grant can be found
+// in the file PATENTS. All contributing project authors may
+// be found in the AUTHORS file in the root of the source tree.
+// -----------------------------------------------------------------------------
+//
+// Spatial prediction using various filters
+//
+// Author: Urvang (urvang@google.com)
+
+#include "./dsp.h"
+#include <assert.h>
+#include <stdlib.h>
+#include <string.h>
+
+//------------------------------------------------------------------------------
+// Helpful macro.
+
+# define SANITY_CHECK(in, out)                                                 \
+  assert(in != NULL);                                                          \
+  assert(out != NULL);                                                         \
+  assert(width > 0);                                                           \
+  assert(height > 0);                                                          \
+  assert(stride >= width);                                                     \
+  assert(row >= 0 && num_rows > 0 && row + num_rows <= height);                \
+  (void)height;  // Silence unused warning.
+
+static WEBP_INLINE void PredictLine(const uint8_t* src, const uint8_t* pred,
+                                    uint8_t* dst, int length, int inverse) {
+  int i;
+  if (inverse) {
+    for (i = 0; i < length; ++i) dst[i] = src[i] + pred[i];
+  } else {
+    for (i = 0; i < length; ++i) dst[i] = src[i] - pred[i];
+  }
+}
+
+//------------------------------------------------------------------------------
+// Horizontal filter.
+
+static WEBP_INLINE void DoHorizontalFilter(const uint8_t* in,
+                                           int width, int height, int stride,
+                                           int row, int num_rows,
+                                           int inverse, uint8_t* out) {
+  const uint8_t* preds;
+  const size_t start_offset = row * stride;
+  const int last_row = row + num_rows;
+  SANITY_CHECK(in, out);
+  in += start_offset;
+  out += start_offset;
+  preds = inverse ? out : in;
+
+  if (row == 0) {
+    // Leftmost pixel is the same as input for topmost scanline.
+    out[0] = in[0];
+    PredictLine(in + 1, preds, out + 1, width - 1, inverse);
+    row = 1;
+    preds += stride;
+    in += stride;
+    out += stride;
+  }
+
+  // Filter line-by-line.
+  while (row < last_row) {
+    // Leftmost pixel is predicted from above.
+    PredictLine(in, preds - stride, out, 1, inverse);
+    PredictLine(in + 1, preds, out + 1, width - 1, inverse);
+    ++row;
+    preds += stride;
+    in += stride;
+    out += stride;
+  }
+}
+
+//------------------------------------------------------------------------------
+// Vertical filter.
+
+static WEBP_INLINE void DoVerticalFilter(const uint8_t* in,
+                                         int width, int height, int stride,
+                                         int row, int num_rows,
+                                         int inverse, uint8_t* out) {
+  const uint8_t* preds;
+  const size_t start_offset = row * stride;
+  const int last_row = row + num_rows;
+  SANITY_CHECK(in, out);
+  in += start_offset;
+  out += start_offset;
+  preds = inverse ? out : in;
+
+  if (row == 0) {
+    // Very first top-left pixel is copied.
+    out[0] = in[0];
+    // Rest of top scan-line is left-predicted.
+    PredictLine(in + 1, preds, out + 1, width - 1, inverse);
+    row = 1;
+    in += stride;
+    out += stride;
+  } else {
+    // We are starting from in-between. Make sure 'preds' points to prev row.
+    preds -= stride;
+  }
+
+  // Filter line-by-line.
+  while (row < last_row) {
+    PredictLine(in, preds, out, width, inverse);
+    ++row;
+    preds += stride;
+    in += stride;
+    out += stride;
+  }
+}
+
+//------------------------------------------------------------------------------
+// Gradient filter.
+
+static WEBP_INLINE int GradientPredictor(uint8_t a, uint8_t b, uint8_t c) {
+  const int g = a + b - c;
+  return ((g & ~0xff) == 0) ? g : (g < 0) ? 0 : 255;  // clip to 8bit
+}
+
+static WEBP_INLINE void DoGradientFilter(const uint8_t* in,
+                                         int width, int height, int stride,
+                                         int row, int num_rows,
+                                         int inverse, uint8_t* out) {
+  const uint8_t* preds;
+  const size_t start_offset = row * stride;
+  const int last_row = row + num_rows;
+  SANITY_CHECK(in, out);
+  in += start_offset;
+  out += start_offset;
+  preds = inverse ? out : in;
+
+  // left prediction for top scan-line
+  if (row == 0) {
+    out[0] = in[0];
+    PredictLine(in + 1, preds, out + 1, width - 1, inverse);
+    row = 1;
+    preds += stride;
+    in += stride;
+    out += stride;
+  }
+
+  // Filter line-by-line.
+  while (row < last_row) {
+    int w;
+    // leftmost pixel: predict from above.
+    PredictLine(in, preds - stride, out, 1, inverse);
+    for (w = 1; w < width; ++w) {
+      const int pred = GradientPredictor(preds[w - 1],
+                                         preds[w - stride],
+                                         preds[w - stride - 1]);
+      out[w] = in[w] + (inverse ? pred : -pred);
+    }
+    ++row;
+    preds += stride;
+    in += stride;
+    out += stride;
+  }
+}
+
+#undef SANITY_CHECK
+
+//------------------------------------------------------------------------------
+
+static void HorizontalFilter(const uint8_t* data, int width, int height,
+                             int stride, uint8_t* filtered_data) {
+  DoHorizontalFilter(data, width, height, stride, 0, height, 0, filtered_data);
+}
+
+static void VerticalFilter(const uint8_t* data, int width, int height,
+                           int stride, uint8_t* filtered_data) {
+  DoVerticalFilter(data, width, height, stride, 0, height, 0, filtered_data);
+}
+
+
+static void GradientFilter(const uint8_t* data, int width, int height,
+                           int stride, uint8_t* filtered_data) {
+  DoGradientFilter(data, width, height, stride, 0, height, 0, filtered_data);
+}
+
+
+//------------------------------------------------------------------------------
+
+static void VerticalUnfilter(int width, int height, int stride, int row,
+                             int num_rows, uint8_t* data) {
+  DoVerticalFilter(data, width, height, stride, row, num_rows, 1, data);
+}
+
+static void HorizontalUnfilter(int width, int height, int stride, int row,
+                               int num_rows, uint8_t* data) {
+  DoHorizontalFilter(data, width, height, stride, row, num_rows, 1, data);
+}
+
+static void GradientUnfilter(int width, int height, int stride, int row,
+                             int num_rows, uint8_t* data) {
+  DoGradientFilter(data, width, height, stride, row, num_rows, 1, data);
+}
+
+//------------------------------------------------------------------------------
+// Init function
+
+WebPFilterFunc WebPFilters[WEBP_FILTER_LAST];
+WebPUnfilterFunc WebPUnfilters[WEBP_FILTER_LAST];
+
+extern void VP8FiltersInitMIPSdspR2(void);
+extern void VP8FiltersInitSSE2(void);
+
+static volatile VP8CPUInfo filters_last_cpuinfo_used =
+    (VP8CPUInfo)&filters_last_cpuinfo_used;
+
+WEBP_TSAN_IGNORE_FUNCTION void VP8FiltersInit(void) {
+  if (filters_last_cpuinfo_used == VP8GetCPUInfo) return;
+
+  WebPUnfilters[WEBP_FILTER_NONE] = NULL;
+  WebPUnfilters[WEBP_FILTER_HORIZONTAL] = HorizontalUnfilter;
+  WebPUnfilters[WEBP_FILTER_VERTICAL] = VerticalUnfilter;
+  WebPUnfilters[WEBP_FILTER_GRADIENT] = GradientUnfilter;
+
+  WebPFilters[WEBP_FILTER_NONE] = NULL;
+  WebPFilters[WEBP_FILTER_HORIZONTAL] = HorizontalFilter;
+  WebPFilters[WEBP_FILTER_VERTICAL] = VerticalFilter;
+  WebPFilters[WEBP_FILTER_GRADIENT] = GradientFilter;
+
+  if (VP8GetCPUInfo != NULL) {
+#if defined(WEBP_USE_SSE2)
+    if (VP8GetCPUInfo(kSSE2)) {
+      VP8FiltersInitSSE2();
+    }
+#endif
+#if defined(WEBP_USE_MIPS_DSP_R2)
+    if (VP8GetCPUInfo(kMIPSdspR2)) {
+      VP8FiltersInitMIPSdspR2();
+    }
+#endif
+  }
+  filters_last_cpuinfo_used = VP8GetCPUInfo;
+}
diff --git a/drivers/webp/dsp/filters_mips_dsp_r2.c b/drivers/webp/dsp/filters_mips_dsp_r2.c
new file mode 100644
index 0000000000..8134af511b
--- /dev/null
+++ b/drivers/webp/dsp/filters_mips_dsp_r2.c
@@ -0,0 +1,405 @@
+// Copyright 2014 Google Inc. All Rights Reserved.
+//
+// Use of this source code is governed by a BSD-style license
+// that can be found in the COPYING file in the root of the source
+// tree. An additional intellectual property rights grant can be found
+// in the file PATENTS. All contributing project authors may
+// be found in the AUTHORS file in the root of the source tree.
+// -----------------------------------------------------------------------------
+//
+// Spatial prediction using various filters
+//
+// Author(s): Branimir Vasic (branimir.vasic@imgtec.com)
+//            Djordje Pesut (djordje.pesut@imgtec.com)
+
+#include "./dsp.h"
+
+#if defined(WEBP_USE_MIPS_DSP_R2)
+
+#include "../dsp/dsp.h"
+#include <assert.h>
+#include <stdlib.h>
+#include <string.h>
+
+//------------------------------------------------------------------------------
+// Helpful macro.
+
+# define SANITY_CHECK(in, out)                                                 \
+  assert(in != NULL);                                                          \
+  assert(out != NULL);                                                         \
+  assert(width > 0);                                                           \
+  assert(height > 0);                                                          \
+  assert(stride >= width);                                                     \
+  assert(row >= 0 && num_rows > 0 && row + num_rows <= height);                \
+  (void)height;  // Silence unused warning.
+
+// if INVERSE
+//   preds == &dst[-1] == &src[-1]
+// else
+//   preds == &src[-1] != &dst[-1]
+#define DO_PREDICT_LINE(SRC, DST, LENGTH, INVERSE) do {                        \
+    const uint8_t* psrc = (uint8_t*)(SRC);                                     \
+    uint8_t* pdst = (uint8_t*)(DST);                                           \
+    const int ilength = (int)(LENGTH);                                         \
+    int temp0, temp1, temp2, temp3, temp4, temp5, temp6;                       \
+    __asm__ volatile (                                                         \
+      ".set      push                                   \n\t"                  \
+      ".set      noreorder                              \n\t"                  \
+      "srl       %[temp0],    %[length],    0x2         \n\t"                  \
+      "beqz      %[temp0],    4f                        \n\t"                  \
+      " andi     %[temp6],    %[length],    0x3         \n\t"                  \
+    ".if " #INVERSE "                                   \n\t"                  \
+      "lbu       %[temp1],    -1(%[src])                \n\t"                  \
+    "1:                                                 \n\t"                  \
+      "lbu       %[temp2],    0(%[src])                 \n\t"                  \
+      "lbu       %[temp3],    1(%[src])                 \n\t"                  \
+      "lbu       %[temp4],    2(%[src])                 \n\t"                  \
+      "lbu       %[temp5],    3(%[src])                 \n\t"                  \
+      "addiu     %[src],      %[src],       4           \n\t"                  \
+      "addiu     %[temp0],    %[temp0],     -1          \n\t"                  \
+      "addu      %[temp2],    %[temp2],     %[temp1]    \n\t"                  \
+      "addu      %[temp3],    %[temp3],     %[temp2]    \n\t"                  \
+      "addu      %[temp4],    %[temp4],     %[temp3]    \n\t"                  \
+      "addu      %[temp1],    %[temp5],     %[temp4]    \n\t"                  \
+      "sb        %[temp2],    -4(%[src])                \n\t"                  \
+      "sb        %[temp3],    -3(%[src])                \n\t"                  \
+      "sb        %[temp4],    -2(%[src])                \n\t"                  \
+      "bnez      %[temp0],    1b                        \n\t"                  \
+      " sb       %[temp1],    -1(%[src])                \n\t"                  \
+    ".else                                              \n\t"                  \
+    "1:                                                 \n\t"                  \
+      "ulw       %[temp1],    -1(%[src])                \n\t"                  \
+      "ulw       %[temp2],    0(%[src])                 \n\t"                  \
+      "addiu     %[src],      %[src],       4           \n\t"                  \
+      "addiu     %[temp0],    %[temp0],     -1          \n\t"                  \
+      "subu.qb   %[temp3],    %[temp2],     %[temp1]    \n\t"                  \
+      "usw       %[temp3],    0(%[dst])                 \n\t"                  \
+      "bnez      %[temp0],    1b                        \n\t"                  \
+      " addiu    %[dst],      %[dst],       4           \n\t"                  \
+    ".endif                                             \n\t"                  \
+    "4:                                                 \n\t"                  \
+      "beqz      %[temp6],    3f                        \n\t"                  \
+      " nop                                             \n\t"                  \
+    "2:                                                 \n\t"                  \
+      "lbu       %[temp1],    -1(%[src])                \n\t"                  \
+      "lbu       %[temp2],    0(%[src])                 \n\t"                  \
+      "addiu     %[src],      %[src],       1           \n\t"                  \
+    ".if " #INVERSE "                                   \n\t"                  \
+      "addu      %[temp3],    %[temp1],     %[temp2]    \n\t"                  \
+      "sb        %[temp3],    -1(%[src])                \n\t"                  \
+    ".else                                              \n\t"                  \
+      "subu      %[temp3],    %[temp1],     %[temp2]    \n\t"                  \
+      "sb        %[temp3],    0(%[dst])                 \n\t"                  \
+    ".endif                                             \n\t"                  \
+      "addiu     %[temp6],    %[temp6],     -1          \n\t"                  \
+      "bnez      %[temp6],    2b                        \n\t"                  \
+      " addiu    %[dst],      %[dst],       1           \n\t"                  \
+    "3:                                                 \n\t"                  \
+      ".set      pop                                    \n\t"                  \
+      : [temp0]"=&r"(temp0), [temp1]"=&r"(temp1), [temp2]"=&r"(temp2),         \
+        [temp3]"=&r"(temp3), [temp4]"=&r"(temp4), [temp5]"=&r"(temp5),         \
+        [temp6]"=&r"(temp6), [dst]"+&r"(pdst), [src]"+&r"(psrc)                \
+      : [length]"r"(ilength)                                                   \
+      : "memory"                                                               \
+    );                                                                         \
+  } while (0)
+
+static WEBP_INLINE void PredictLine(const uint8_t* src, uint8_t* dst,
+                                    int length, int inverse) {
+  if (inverse) {
+    DO_PREDICT_LINE(src, dst, length, 1);
+  } else {
+    DO_PREDICT_LINE(src, dst, length, 0);
+  }
+}
+
+#define DO_PREDICT_LINE_VERTICAL(SRC, PRED, DST, LENGTH, INVERSE) do {         \
+    const uint8_t* psrc = (uint8_t*)(SRC);                                     \
+    const uint8_t* ppred = (uint8_t*)(PRED);                                   \
+    uint8_t* pdst = (uint8_t*)(DST);                                           \
+    const int ilength = (int)(LENGTH);                                         \
+    int temp0, temp1, temp2, temp3, temp4, temp5, temp6, temp7;                \
+    __asm__ volatile (                                                         \
+      ".set      push                                   \n\t"                  \
+      ".set      noreorder                              \n\t"                  \
+      "srl       %[temp0],    %[length],    0x3         \n\t"                  \
+      "beqz      %[temp0],    4f                        \n\t"                  \
+      " andi     %[temp7],    %[length],    0x7         \n\t"                  \
+    "1:                                                 \n\t"                  \
+      "ulw       %[temp1],    0(%[src])                 \n\t"                  \
+      "ulw       %[temp2],    0(%[pred])                \n\t"                  \
+      "ulw       %[temp3],    4(%[src])                 \n\t"                  \
+      "ulw       %[temp4],    4(%[pred])                \n\t"                  \
+      "addiu     %[src],      %[src],       8           \n\t"                  \
+    ".if " #INVERSE "                                   \n\t"                  \
+      "addu.qb   %[temp5],    %[temp1],     %[temp2]    \n\t"                  \
+      "addu.qb   %[temp6],    %[temp3],     %[temp4]    \n\t"                  \
+    ".else                                              \n\t"                  \
+      "subu.qb   %[temp5],    %[temp1],     %[temp2]    \n\t"                  \
+      "subu.qb   %[temp6],    %[temp3],     %[temp4]    \n\t"                  \
+    ".endif                                             \n\t"                  \
+      "addiu     %[pred],     %[pred],      8           \n\t"                  \
+      "usw       %[temp5],    0(%[dst])                 \n\t"                  \
+      "usw       %[temp6],    4(%[dst])                 \n\t"                  \
+      "addiu     %[temp0],    %[temp0],     -1          \n\t"                  \
+      "bnez      %[temp0],    1b                        \n\t"                  \
+      " addiu    %[dst],      %[dst],       8           \n\t"                  \
+    "4:                                                 \n\t"                  \
+      "beqz      %[temp7],    3f                        \n\t"                  \
+      " nop                                             \n\t"                  \
+    "2:                                                 \n\t"                  \
+      "lbu       %[temp1],    0(%[src])                 \n\t"                  \
+      "lbu       %[temp2],    0(%[pred])                \n\t"                  \
+      "addiu     %[src],      %[src],       1           \n\t"                  \
+      "addiu     %[pred],     %[pred],      1           \n\t"                  \
+    ".if " #INVERSE "                                   \n\t"                  \
+      "addu      %[temp3],    %[temp1],     %[temp2]    \n\t"                  \
+    ".else                                              \n\t"                  \
+      "subu      %[temp3],    %[temp1],     %[temp2]    \n\t"                  \
+    ".endif                                             \n\t"                  \
+      "sb        %[temp3],    0(%[dst])                 \n\t"                  \
+      "addiu     %[temp7],    %[temp7],     -1          \n\t"                  \
+      "bnez      %[temp7],    2b                        \n\t"                  \
+      " addiu    %[dst],      %[dst],       1           \n\t"                  \
+    "3:                                                 \n\t"                  \
+      ".set      pop                                    \n\t"                  \
+      : [temp0]"=&r"(temp0), [temp1]"=&r"(temp1), [temp2]"=&r"(temp2),         \
+        [temp3]"=&r"(temp3), [temp4]"=&r"(temp4), [temp5]"=&r"(temp5),         \
+        [temp6]"=&r"(temp6), [temp7]"=&r"(temp7), [pred]"+&r"(ppred),          \
+        [dst]"+&r"(pdst), [src]"+&r"(psrc)                                     \
+      : [length]"r"(ilength)                                                   \
+      : "memory"                                                               \
+    );                                                                         \
+  } while (0)
+
+#define PREDICT_LINE_ONE_PASS(SRC, PRED, DST, INVERSE) do {                    \
+    int temp1, temp2, temp3;                                                   \
+    __asm__ volatile (                                                         \
+      "lbu       %[temp1],   0(%[src])               \n\t"                     \
+      "lbu       %[temp2],   0(%[pred])              \n\t"                     \
+    ".if " #INVERSE "                                \n\t"                     \
+      "addu      %[temp3],   %[temp1],   %[temp2]    \n\t"                     \
+    ".else                                           \n\t"                     \
+      "subu      %[temp3],   %[temp1],   %[temp2]    \n\t"                     \
+    ".endif                                          \n\t"                     \
+      "sb        %[temp3],   0(%[dst])               \n\t"                     \
+      : [temp1]"=&r"(temp1), [temp2]"=&r"(temp2), [temp3]"=&r"(temp3)          \
+      : [pred]"r"((PRED)), [dst]"r"((DST)), [src]"r"((SRC))                    \
+      : "memory"                                                               \
+    );                                                                         \
+  } while (0)
+
+//------------------------------------------------------------------------------
+// Horizontal filter.
+
+#define FILTER_LINE_BY_LINE(INVERSE) do {                                      \
+    while (row < last_row) {                                                   \
+      PREDICT_LINE_ONE_PASS(in, preds - stride, out, INVERSE);                 \
+      DO_PREDICT_LINE(in + 1, out + 1, width - 1, INVERSE);                    \
+      ++row;                                                                   \
+      preds += stride;                                                         \
+      in += stride;                                                            \
+      out += stride;                                                           \
+    }                                                                          \
+  } while (0)
+
+static WEBP_INLINE void DoHorizontalFilter(const uint8_t* in,
+                                           int width, int height, int stride,
+                                           int row, int num_rows,
+                                           int inverse, uint8_t* out) {
+  const uint8_t* preds;
+  const size_t start_offset = row * stride;
+  const int last_row = row + num_rows;
+  SANITY_CHECK(in, out);
+  in += start_offset;
+  out += start_offset;
+  preds = inverse ? out : in;
+
+  if (row == 0) {
+    // Leftmost pixel is the same as input for topmost scanline.
+    out[0] = in[0];
+    PredictLine(in + 1, out + 1, width - 1, inverse);
+    row = 1;
+    preds += stride;
+    in += stride;
+    out += stride;
+  }
+
+  // Filter line-by-line.
+  if (inverse) {
+    FILTER_LINE_BY_LINE(1);
+  } else {
+    FILTER_LINE_BY_LINE(0);
+  }
+}
+
+#undef FILTER_LINE_BY_LINE
+
+static void HorizontalFilter(const uint8_t* data, int width, int height,
+                             int stride, uint8_t* filtered_data) {
+  DoHorizontalFilter(data, width, height, stride, 0, height, 0, filtered_data);
+}
+
+static void HorizontalUnfilter(int width, int height, int stride, int row,
+                               int num_rows, uint8_t* data) {
+  DoHorizontalFilter(data, width, height, stride, row, num_rows, 1, data);
+}
+
+//------------------------------------------------------------------------------
+// Vertical filter.
+
+#define FILTER_LINE_BY_LINE(INVERSE) do {                                      \
+    while (row < last_row) {                                                   \
+      DO_PREDICT_LINE_VERTICAL(in, preds, out, width, INVERSE);                \
+      ++row;                                                                   \
+      preds += stride;                                                         \
+      in += stride;                                                            \
+      out += stride;                                                           \
+    }                                                                          \
+  } while (0)
+
+static WEBP_INLINE void DoVerticalFilter(const uint8_t* in,
+                                         int width, int height, int stride,
+                                         int row, int num_rows,
+                                         int inverse, uint8_t* out) {
+  const uint8_t* preds;
+  const size_t start_offset = row * stride;
+  const int last_row = row + num_rows;
+  SANITY_CHECK(in, out);
+  in += start_offset;
+  out += start_offset;
+  preds = inverse ? out : in;
+
+  if (row == 0) {
+    // Very first top-left pixel is copied.
+    out[0] = in[0];
+    // Rest of top scan-line is left-predicted.
+    PredictLine(in + 1, out + 1, width - 1, inverse);
+    row = 1;
+    in += stride;
+    out += stride;
+  } else {
+    // We are starting from in-between. Make sure 'preds' points to prev row.
+    preds -= stride;
+  }
+
+  // Filter line-by-line.
+  if (inverse) {
+    FILTER_LINE_BY_LINE(1);
+  } else {
+    FILTER_LINE_BY_LINE(0);
+  }
+}
+
+#undef FILTER_LINE_BY_LINE
+#undef DO_PREDICT_LINE_VERTICAL
+
+static void VerticalFilter(const uint8_t* data, int width, int height,
+                           int stride, uint8_t* filtered_data) {
+  DoVerticalFilter(data, width, height, stride, 0, height, 0, filtered_data);
+}
+
+static void VerticalUnfilter(int width, int height, int stride, int row,
+                             int num_rows, uint8_t* data) {
+  DoVerticalFilter(data, width, height, stride, row, num_rows, 1, data);
+}
+
+//------------------------------------------------------------------------------
+// Gradient filter.
+
+static WEBP_INLINE int GradientPredictor(uint8_t a, uint8_t b, uint8_t c) {
+  int temp0;
+  __asm__ volatile (
+    "addu             %[temp0],   %[a],       %[b]        \n\t"
+    "subu             %[temp0],   %[temp0],   %[c]        \n\t"
+    "shll_s.w         %[temp0],   %[temp0],   23          \n\t"
+    "precrqu_s.qb.ph  %[temp0],   %[temp0],   $zero       \n\t"
+    "srl              %[temp0],   %[temp0],   24          \n\t"
+    : [temp0]"=&r"(temp0)
+    : [a]"r"(a),[b]"r"(b),[c]"r"(c)
+  );
+  return temp0;
+}
+
+#define FILTER_LINE_BY_LINE(INVERSE, PREDS, OPERATION) do {                    \
+    while (row < last_row) {                                                   \
+      int w;                                                                   \
+      PREDICT_LINE_ONE_PASS(in, PREDS - stride, out, INVERSE);                 \
+      for (w = 1; w < width; ++w) {                                            \
+        const int pred = GradientPredictor(PREDS[w - 1],                       \
+                                           PREDS[w - stride],                  \
+                                           PREDS[w - stride - 1]);             \
+        out[w] = in[w] OPERATION pred;                                         \
+      }                                                                        \
+      ++row;                                                                   \
+      in += stride;                                                            \
+      out += stride;                                                           \
+    }                                                                          \
+  } while (0)
+
+static WEBP_INLINE void DoGradientFilter(const uint8_t* in,
+                                         int width, int height, int stride,
+                                         int row, int num_rows,
+                                         int inverse, uint8_t* out) {
+  const uint8_t* preds;
+  const size_t start_offset = row * stride;
+  const int last_row = row + num_rows;
+  SANITY_CHECK(in, out);
+  in += start_offset;
+  out += start_offset;
+  preds = inverse ? out : in;
+
+  // left prediction for top scan-line
+  if (row == 0) {
+    out[0] = in[0];
+    PredictLine(in + 1, out + 1, width - 1, inverse);
+    row = 1;
+    preds += stride;
+    in += stride;
+    out += stride;
+  }
+
+  // Filter line-by-line.
+  if (inverse) {
+    FILTER_LINE_BY_LINE(1, out, +);
+  } else {
+    FILTER_LINE_BY_LINE(0, in, -);
+  }
+}
+
+#undef FILTER_LINE_BY_LINE
+
+static void GradientFilter(const uint8_t* data, int width, int height,
+                           int stride, uint8_t* filtered_data) {
+  DoGradientFilter(data, width, height, stride, 0, height, 0, filtered_data);
+}
+
+static void GradientUnfilter(int width, int height, int stride, int row,
+                             int num_rows, uint8_t* data) {
+  DoGradientFilter(data, width, height, stride, row, num_rows, 1, data);
+}
+
+#undef PREDICT_LINE_ONE_PASS
+#undef DO_PREDICT_LINE
+#undef SANITY_CHECK
+
+//------------------------------------------------------------------------------
+// Entry point
+
+extern void VP8FiltersInitMIPSdspR2(void);
+
+WEBP_TSAN_IGNORE_FUNCTION void VP8FiltersInitMIPSdspR2(void) {
+  WebPFilters[WEBP_FILTER_HORIZONTAL] = HorizontalFilter;
+  WebPFilters[WEBP_FILTER_VERTICAL] = VerticalFilter;
+  WebPFilters[WEBP_FILTER_GRADIENT] = GradientFilter;
+
+  WebPUnfilters[WEBP_FILTER_HORIZONTAL] = HorizontalUnfilter;
+  WebPUnfilters[WEBP_FILTER_VERTICAL] = VerticalUnfilter;
+  WebPUnfilters[WEBP_FILTER_GRADIENT] = GradientUnfilter;
+}
+
+#else  // !WEBP_USE_MIPS_DSP_R2
+
+WEBP_DSP_INIT_STUB(VP8FiltersInitMIPSdspR2)
+
+#endif  // WEBP_USE_MIPS_DSP_R2
diff --git a/drivers/webp/dsp/filters_sse2.c b/drivers/webp/dsp/filters_sse2.c
new file mode 100644
index 0000000000..bf93342eb7
--- /dev/null
+++ b/drivers/webp/dsp/filters_sse2.c
@@ -0,0 +1,352 @@
+// Copyright 2015 Google Inc. All Rights Reserved.
+//
+// Use of this source code is governed by a BSD-style license
+// that can be found in the COPYING file in the root of the source
+// tree. An additional intellectual property rights grant can be found
+// in the file PATENTS. All contributing project authors may
+// be found in the AUTHORS file in the root of the source tree.
+// -----------------------------------------------------------------------------
+//
+// SSE2 variant of alpha filters
+//
+// Author: Skal (pascal.massimino@gmail.com)
+
+#include "./dsp.h"
+
+#if defined(WEBP_USE_SSE2)
+
+#include <assert.h>
+#include <emmintrin.h>
+#include <stdlib.h>
+#include <string.h>
+
+//------------------------------------------------------------------------------
+// Helpful macro.
+
+# define SANITY_CHECK(in, out)                                                 \
+  assert(in != NULL);                                                          \
+  assert(out != NULL);                                                         \
+  assert(width > 0);                                                           \
+  assert(height > 0);                                                          \
+  assert(stride >= width);                                                     \
+  assert(row >= 0 && num_rows > 0 && row + num_rows <= height);                \
+  (void)height;  // Silence unused warning.
+
+static void PredictLineTop(const uint8_t* src, const uint8_t* pred,
+                           uint8_t* dst, int length, int inverse) {
+  int i;
+  const int max_pos = length & ~31;
+  assert(length >= 0);
+  if (inverse) {
+    for (i = 0; i < max_pos; i += 32) {
+      const __m128i A0 = _mm_loadu_si128((const __m128i*)&src[i +  0]);
+      const __m128i A1 = _mm_loadu_si128((const __m128i*)&src[i + 16]);
+      const __m128i B0 = _mm_loadu_si128((const __m128i*)&pred[i +  0]);
+      const __m128i B1 = _mm_loadu_si128((const __m128i*)&pred[i + 16]);
+      const __m128i C0 = _mm_add_epi8(A0, B0);
+      const __m128i C1 = _mm_add_epi8(A1, B1);
+      _mm_storeu_si128((__m128i*)&dst[i +  0], C0);
+      _mm_storeu_si128((__m128i*)&dst[i + 16], C1);
+    }
+    for (; i < length; ++i) dst[i] = src[i] + pred[i];
+  } else {
+    for (i = 0; i < max_pos; i += 32) {
+      const __m128i A0 = _mm_loadu_si128((const __m128i*)&src[i +  0]);
+      const __m128i A1 = _mm_loadu_si128((const __m128i*)&src[i + 16]);
+      const __m128i B0 = _mm_loadu_si128((const __m128i*)&pred[i +  0]);
+      const __m128i B1 = _mm_loadu_si128((const __m128i*)&pred[i + 16]);
+      const __m128i C0 = _mm_sub_epi8(A0, B0);
+      const __m128i C1 = _mm_sub_epi8(A1, B1);
+      _mm_storeu_si128((__m128i*)&dst[i +  0], C0);
+      _mm_storeu_si128((__m128i*)&dst[i + 16], C1);
+    }
+    for (; i < length; ++i) dst[i] = src[i] - pred[i];
+  }
+}
+
+// Special case for left-based prediction (when preds==dst-1 or preds==src-1).
+static void PredictLineLeft(const uint8_t* src, uint8_t* dst, int length,
+                            int inverse) {
+  int i;
+  if (length <= 0) return;
+  if (inverse) {
+    const int max_pos = length & ~7;
+    __m128i last = _mm_set_epi32(0, 0, 0, dst[-1]);
+    for (i = 0; i < max_pos; i += 8) {
+      const __m128i A0 = _mm_loadl_epi64((const __m128i*)(src + i));
+      const __m128i A1 = _mm_add_epi8(A0, last);
+      const __m128i A2 = _mm_slli_si128(A1, 1);
+      const __m128i A3 = _mm_add_epi8(A1, A2);
+      const __m128i A4 = _mm_slli_si128(A3, 2);
+      const __m128i A5 = _mm_add_epi8(A3, A4);
+      const __m128i A6 = _mm_slli_si128(A5, 4);
+      const __m128i A7 = _mm_add_epi8(A5, A6);
+      _mm_storel_epi64((__m128i*)(dst + i), A7);
+      last = _mm_srli_epi64(A7, 56);
+    }
+    for (; i < length; ++i) dst[i] = src[i] + dst[i - 1];
+  } else {
+    const int max_pos = length & ~31;
+    for (i = 0; i < max_pos; i += 32) {
+      const __m128i A0 = _mm_loadu_si128((const __m128i*)(src + i +  0    ));
+      const __m128i B0 = _mm_loadu_si128((const __m128i*)(src + i +  0 - 1));
+      const __m128i A1 = _mm_loadu_si128((const __m128i*)(src + i + 16    ));
+      const __m128i B1 = _mm_loadu_si128((const __m128i*)(src + i + 16 - 1));
+      const __m128i C0 = _mm_sub_epi8(A0, B0);
+      const __m128i C1 = _mm_sub_epi8(A1, B1);
+      _mm_storeu_si128((__m128i*)(dst + i +  0), C0);
+      _mm_storeu_si128((__m128i*)(dst + i + 16), C1);
+    }
+    for (; i < length; ++i) dst[i] = src[i] - src[i - 1];
+  }
+}
+
+static void PredictLineC(const uint8_t* src, const uint8_t* pred,
+                         uint8_t* dst, int length, int inverse) {
+  int i;
+  if (inverse) {
+    for (i = 0; i < length; ++i) dst[i] = src[i] + pred[i];
+  } else {
+    for (i = 0; i < length; ++i) dst[i] = src[i] - pred[i];
+  }
+}
+
+//------------------------------------------------------------------------------
+// Horizontal filter.
+
+static WEBP_INLINE void DoHorizontalFilter(const uint8_t* in,
+                                           int width, int height, int stride,
+                                           int row, int num_rows,
+                                           int inverse, uint8_t* out) {
+  const uint8_t* preds;
+  const size_t start_offset = row * stride;
+  const int last_row = row + num_rows;
+  SANITY_CHECK(in, out);
+  in += start_offset;
+  out += start_offset;
+  preds = inverse ? out : in;
+
+  if (row == 0) {
+    // Leftmost pixel is the same as input for topmost scanline.
+    out[0] = in[0];
+    PredictLineLeft(in + 1, out + 1, width - 1, inverse);
+    row = 1;
+    preds += stride;
+    in += stride;
+    out += stride;
+  }
+
+  // Filter line-by-line.
+  while (row < last_row) {
+    // Leftmost pixel is predicted from above.
+    PredictLineC(in, preds - stride, out, 1, inverse);
+    PredictLineLeft(in + 1, out + 1, width - 1, inverse);
+    ++row;
+    preds += stride;
+    in += stride;
+    out += stride;
+  }
+}
+
+//------------------------------------------------------------------------------
+// Vertical filter.
+
+static WEBP_INLINE void DoVerticalFilter(const uint8_t* in,
+                                         int width, int height, int stride,
+                                         int row, int num_rows,
+                                         int inverse, uint8_t* out) {
+  const uint8_t* preds;
+  const size_t start_offset = row * stride;
+  const int last_row = row + num_rows;
+  SANITY_CHECK(in, out);
+  in += start_offset;
+  out += start_offset;
+  preds = inverse ? out : in;
+
+  if (row == 0) {
+    // Very first top-left pixel is copied.
+    out[0] = in[0];
+    // Rest of top scan-line is left-predicted.
+    PredictLineLeft(in + 1, out + 1, width - 1, inverse);
+    row = 1;
+    in += stride;
+    out += stride;
+  } else {
+    // We are starting from in-between. Make sure 'preds' points to prev row.
+    preds -= stride;
+  }
+
+  // Filter line-by-line.
+  while (row < last_row) {
+    PredictLineTop(in, preds, out, width, inverse);
+    ++row;
+    preds += stride;
+    in += stride;
+    out += stride;
+  }
+}
+
+//------------------------------------------------------------------------------
+// Gradient filter.
+
+static WEBP_INLINE int GradientPredictorC(uint8_t a, uint8_t b, uint8_t c) {
+  const int g = a + b - c;
+  return ((g & ~0xff) == 0) ? g : (g < 0) ? 0 : 255;  // clip to 8bit
+}
+
+static void GradientPredictDirect(const uint8_t* const row,
+                                  const uint8_t* const top,
+                                  uint8_t* const out, int length) {
+  const int max_pos = length & ~7;
+  int i;
+  const __m128i zero = _mm_setzero_si128();
+  for (i = 0; i < max_pos; i += 8) {
+    const __m128i A0 = _mm_loadl_epi64((const __m128i*)&row[i - 1]);
+    const __m128i B0 = _mm_loadl_epi64((const __m128i*)&top[i]);
+    const __m128i C0 = _mm_loadl_epi64((const __m128i*)&top[i - 1]);
+    const __m128i D = _mm_loadl_epi64((const __m128i*)&row[i]);
+    const __m128i A1 = _mm_unpacklo_epi8(A0, zero);
+    const __m128i B1 = _mm_unpacklo_epi8(B0, zero);
+    const __m128i C1 = _mm_unpacklo_epi8(C0, zero);
+    const __m128i E = _mm_add_epi16(A1, B1);
+    const __m128i F = _mm_sub_epi16(E, C1);
+    const __m128i G = _mm_packus_epi16(F, zero);
+    const __m128i H = _mm_sub_epi8(D, G);
+    _mm_storel_epi64((__m128i*)(out + i), H);
+  }
+  for (; i < length; ++i) {
+    out[i] = row[i] - GradientPredictorC(row[i - 1], top[i], top[i - 1]);
+  }
+}
+
+static void GradientPredictInverse(const uint8_t* const in,
+                                   const uint8_t* const top,
+                                   uint8_t* const row, int length) {
+  if (length > 0) {
+    int i;
+    const int max_pos = length & ~7;
+    const __m128i zero = _mm_setzero_si128();
+    __m128i A = _mm_set_epi32(0, 0, 0, row[-1]);   // left sample
+    for (i = 0; i < max_pos; i += 8) {
+      const __m128i tmp0 = _mm_loadl_epi64((const __m128i*)&top[i]);
+      const __m128i tmp1 = _mm_loadl_epi64((const __m128i*)&top[i - 1]);
+      const __m128i B = _mm_unpacklo_epi8(tmp0, zero);
+      const __m128i C = _mm_unpacklo_epi8(tmp1, zero);
+      const __m128i tmp2 = _mm_loadl_epi64((const __m128i*)&in[i]);
+      const __m128i D = _mm_unpacklo_epi8(tmp2, zero);   // base input
+      const __m128i E = _mm_sub_epi16(B, C);  // unclipped gradient basis B - C
+      __m128i out = zero;                     // accumulator for output
+      __m128i mask_hi = _mm_set_epi32(0, 0, 0, 0xff);
+      int k = 8;
+      while (1) {
+        const __m128i tmp3 = _mm_add_epi16(A, E);        // delta = A + B - C
+        const __m128i tmp4 = _mm_min_epi16(tmp3, mask_hi);
+        const __m128i tmp5 = _mm_max_epi16(tmp4, zero);  // clipped delta
+        const __m128i tmp6 = _mm_add_epi16(tmp5, D);     // add to in[] values
+        A = _mm_and_si128(tmp6, mask_hi);                // 1-complement clip
+        out = _mm_or_si128(out, A);                      // accumulate output
+        if (--k == 0) break;
+        A = _mm_slli_si128(A, 2);                        // rotate left sample
+        mask_hi = _mm_slli_si128(mask_hi, 2);            // rotate mask
+      }
+      A = _mm_srli_si128(A, 14);       // prepare left sample for next iteration
+      _mm_storel_epi64((__m128i*)&row[i], _mm_packus_epi16(out, zero));
+    }
+    for (; i < length; ++i) {
+      row[i] = in[i] + GradientPredictorC(row[i - 1], top[i], top[i - 1]);
+    }
+  }
+}
+
+static WEBP_INLINE void DoGradientFilter(const uint8_t* in,
+                                         int width, int height, int stride,
+                                         int row, int num_rows,
+                                         int inverse, uint8_t* out) {
+  const size_t start_offset = row * stride;
+  const int last_row = row + num_rows;
+  SANITY_CHECK(in, out);
+  in += start_offset;
+  out += start_offset;
+
+  // left prediction for top scan-line
+  if (row == 0) {
+    out[0] = in[0];
+    PredictLineLeft(in + 1, out + 1, width - 1, inverse);
+    row = 1;
+    in += stride;
+    out += stride;
+  }
+
+  // Filter line-by-line.
+  while (row < last_row) {
+    if (inverse) {
+      PredictLineC(in, out - stride, out, 1, inverse);  // predict from above
+      GradientPredictInverse(in + 1, out + 1 - stride, out + 1, width - 1);
+    } else {
+      PredictLineC(in, in - stride, out, 1, inverse);
+      GradientPredictDirect(in + 1, in + 1 - stride, out + 1, width - 1);
+    }
+    ++row;
+    in += stride;
+    out += stride;
+  }
+}
+
+#undef SANITY_CHECK
+
+//------------------------------------------------------------------------------
+
+static void HorizontalFilter(const uint8_t* data, int width, int height,
+                             int stride, uint8_t* filtered_data) {
+  DoHorizontalFilter(data, width, height, stride, 0, height, 0, filtered_data);
+}
+
+static void VerticalFilter(const uint8_t* data, int width, int height,
+                           int stride, uint8_t* filtered_data) {
+  DoVerticalFilter(data, width, height, stride, 0, height, 0, filtered_data);
+}
+
+
+static void GradientFilter(const uint8_t* data, int width, int height,
+                           int stride, uint8_t* filtered_data) {
+  DoGradientFilter(data, width, height, stride, 0, height, 0, filtered_data);
+}
+
+
+//------------------------------------------------------------------------------
+
+static void VerticalUnfilter(int width, int height, int stride, int row,
+                             int num_rows, uint8_t* data) {
+  DoVerticalFilter(data, width, height, stride, row, num_rows, 1, data);
+}
+
+static void HorizontalUnfilter(int width, int height, int stride, int row,
+                               int num_rows, uint8_t* data) {
+  DoHorizontalFilter(data, width, height, stride, row, num_rows, 1, data);
+}
+
+static void GradientUnfilter(int width, int height, int stride, int row,
+                             int num_rows, uint8_t* data) {
+  DoGradientFilter(data, width, height, stride, row, num_rows, 1, data);
+}
+
+//------------------------------------------------------------------------------
+// Entry point
+
+extern void VP8FiltersInitSSE2(void);
+
+WEBP_TSAN_IGNORE_FUNCTION void VP8FiltersInitSSE2(void) {
+  WebPUnfilters[WEBP_FILTER_HORIZONTAL] = HorizontalUnfilter;
+  WebPUnfilters[WEBP_FILTER_VERTICAL] = VerticalUnfilter;
+  WebPUnfilters[WEBP_FILTER_GRADIENT] = GradientUnfilter;
+
+  WebPFilters[WEBP_FILTER_HORIZONTAL] = HorizontalFilter;
+  WebPFilters[WEBP_FILTER_VERTICAL] = VerticalFilter;
+  WebPFilters[WEBP_FILTER_GRADIENT] = GradientFilter;
+}
+
+#else  // !WEBP_USE_SSE2
+
+WEBP_DSP_INIT_STUB(VP8FiltersInitSSE2)
+
+#endif  // WEBP_USE_SSE2
diff --git a/drivers/webp/dsp/lossless.c b/drivers/webp/dsp/lossless.c
index 62a6b7b15a..5702eb3b17 100644
--- a/drivers/webp/dsp/lossless.c
+++ b/drivers/webp/dsp/lossless.c
@@ -1,8 +1,10 @@
 // Copyright 2012 Google Inc. All Rights Reserved.
 //
-// This code is licensed under the same terms as WebM:
-//  Software License Agreement:  http://www.webmproject.org/license/software/
-//  Additional IP Rights Grant:  http://www.webmproject.org/license/additional/
+// Use of this source code is governed by a BSD-style license
+// that can be found in the COPYING file in the root of the source
+// tree. An additional intellectual property rights grant can be found
+// in the file PATENTS. All contributing project authors may
+// be found in the AUTHORS file in the root of the source tree.
 // -----------------------------------------------------------------------------
 //
 // Image transforms and color space conversion methods for lossless decoder.
@@ -11,170 +13,16 @@
 //          Jyrki Alakuijala (jyrki@google.com)
 //          Urvang Joshi (urvang@google.com)
 
-#if defined(__cplusplus) || defined(c_plusplus)
-extern "C" {
-#endif
+#include "./dsp.h"
 
 #include <math.h>
 #include <stdlib.h>
-#include "./lossless.h"
 #include "../dec/vp8li.h"
-#include "../dsp/yuv.h"
-#include "../dsp/dsp.h"
-#include "../enc/histogram.h"
+#include "../utils/endian_inl.h"
+#include "./lossless.h"
 
 #define MAX_DIFF_COST (1e30f)
 
-// lookup table for small values of log2(int)
-#define APPROX_LOG_MAX  4096
-#define LOG_2_RECIPROCAL 1.44269504088896338700465094007086
-#define LOG_LOOKUP_IDX_MAX 256
-static const float kLog2Table[LOG_LOOKUP_IDX_MAX] = {
-  0.0000000000000000f, 0.0000000000000000f,
-  1.0000000000000000f, 1.5849625007211560f,
-  2.0000000000000000f, 2.3219280948873621f,
-  2.5849625007211560f, 2.8073549220576041f,
-  3.0000000000000000f, 3.1699250014423121f,
-  3.3219280948873621f, 3.4594316186372973f,
-  3.5849625007211560f, 3.7004397181410921f,
-  3.8073549220576041f, 3.9068905956085187f,
-  4.0000000000000000f, 4.0874628412503390f,
-  4.1699250014423121f, 4.2479275134435852f,
-  4.3219280948873626f, 4.3923174227787606f,
-  4.4594316186372973f, 4.5235619560570130f,
-  4.5849625007211560f, 4.6438561897747243f,
-  4.7004397181410917f, 4.7548875021634682f,
-  4.8073549220576037f, 4.8579809951275718f,
-  4.9068905956085187f, 4.9541963103868749f,
-  5.0000000000000000f, 5.0443941193584533f,
-  5.0874628412503390f, 5.1292830169449663f,
-  5.1699250014423121f, 5.2094533656289501f,
-  5.2479275134435852f, 5.2854022188622487f,
-  5.3219280948873626f, 5.3575520046180837f,
-  5.3923174227787606f, 5.4262647547020979f,
-  5.4594316186372973f, 5.4918530963296747f,
-  5.5235619560570130f, 5.5545888516776376f,
-  5.5849625007211560f, 5.6147098441152083f,
-  5.6438561897747243f, 5.6724253419714951f,
-  5.7004397181410917f, 5.7279204545631987f,
-  5.7548875021634682f, 5.7813597135246599f,
-  5.8073549220576037f, 5.8328900141647412f,
-  5.8579809951275718f, 5.8826430493618415f,
-  5.9068905956085187f, 5.9307373375628866f,
-  5.9541963103868749f, 5.9772799234999167f,
-  6.0000000000000000f, 6.0223678130284543f,
-  6.0443941193584533f, 6.0660891904577720f,
-  6.0874628412503390f, 6.1085244567781691f,
-  6.1292830169449663f, 6.1497471195046822f,
-  6.1699250014423121f, 6.1898245588800175f,
-  6.2094533656289501f, 6.2288186904958804f,
-  6.2479275134435852f, 6.2667865406949010f,
-  6.2854022188622487f, 6.3037807481771030f,
-  6.3219280948873626f, 6.3398500028846243f,
-  6.3575520046180837f, 6.3750394313469245f,
-  6.3923174227787606f, 6.4093909361377017f,
-  6.4262647547020979f, 6.4429434958487279f,
-  6.4594316186372973f, 6.4757334309663976f,
-  6.4918530963296747f, 6.5077946401986963f,
-  6.5235619560570130f, 6.5391588111080309f,
-  6.5545888516776376f, 6.5698556083309478f,
-  6.5849625007211560f, 6.5999128421871278f,
-  6.6147098441152083f, 6.6293566200796094f,
-  6.6438561897747243f, 6.6582114827517946f,
-  6.6724253419714951f, 6.6865005271832185f,
-  6.7004397181410917f, 6.7142455176661224f,
-  6.7279204545631987f, 6.7414669864011464f,
-  6.7548875021634682f, 6.7681843247769259f,
-  6.7813597135246599f, 6.7944158663501061f,
-  6.8073549220576037f, 6.8201789624151878f,
-  6.8328900141647412f, 6.8454900509443747f,
-  6.8579809951275718f, 6.8703647195834047f,
-  6.8826430493618415f, 6.8948177633079437f,
-  6.9068905956085187f, 6.9188632372745946f,
-  6.9307373375628866f, 6.9425145053392398f,
-  6.9541963103868749f, 6.9657842846620869f,
-  6.9772799234999167f, 6.9886846867721654f,
-  7.0000000000000000f, 7.0112272554232539f,
-  7.0223678130284543f, 7.0334230015374501f,
-  7.0443941193584533f, 7.0552824355011898f,
-  7.0660891904577720f, 7.0768155970508308f,
-  7.0874628412503390f, 7.0980320829605263f,
-  7.1085244567781691f, 7.1189410727235076f,
-  7.1292830169449663f, 7.1395513523987936f,
-  7.1497471195046822f, 7.1598713367783890f,
-  7.1699250014423121f, 7.1799090900149344f,
-  7.1898245588800175f, 7.1996723448363644f,
-  7.2094533656289501f, 7.2191685204621611f,
-  7.2288186904958804f, 7.2384047393250785f,
-  7.2479275134435852f, 7.2573878426926521f,
-  7.2667865406949010f, 7.2761244052742375f,
-  7.2854022188622487f, 7.2946207488916270f,
-  7.3037807481771030f, 7.3128829552843557f,
-  7.3219280948873626f, 7.3309168781146167f,
-  7.3398500028846243f, 7.3487281542310771f,
-  7.3575520046180837f, 7.3663222142458160f,
-  7.3750394313469245f, 7.3837042924740519f,
-  7.3923174227787606f, 7.4008794362821843f,
-  7.4093909361377017f, 7.4178525148858982f,
-  7.4262647547020979f, 7.4346282276367245f,
-  7.4429434958487279f, 7.4512111118323289f,
-  7.4594316186372973f, 7.4676055500829976f,
-  7.4757334309663976f, 7.4838157772642563f,
-  7.4918530963296747f, 7.4998458870832056f,
-  7.5077946401986963f, 7.5156998382840427f,
-  7.5235619560570130f, 7.5313814605163118f,
-  7.5391588111080309f, 7.5468944598876364f,
-  7.5545888516776376f, 7.5622424242210728f,
-  7.5698556083309478f, 7.5774288280357486f,
-  7.5849625007211560f, 7.5924570372680806f,
-  7.5999128421871278f, 7.6073303137496104f,
-  7.6147098441152083f, 7.6220518194563764f,
-  7.6293566200796094f, 7.6366246205436487f,
-  7.6438561897747243f, 7.6510516911789281f,
-  7.6582114827517946f, 7.6653359171851764f,
-  7.6724253419714951f, 7.6794800995054464f,
-  7.6865005271832185f, 7.6934869574993252f,
-  7.7004397181410917f, 7.7073591320808825f,
-  7.7142455176661224f, 7.7210991887071855f,
-  7.7279204545631987f, 7.7347096202258383f,
-  7.7414669864011464f, 7.7481928495894605f,
-  7.7548875021634682f, 7.7615512324444795f,
-  7.7681843247769259f, 7.7747870596011736f,
-  7.7813597135246599f, 7.7879025593914317f,
-  7.7944158663501061f, 7.8008998999203047f,
-  7.8073549220576037f, 7.8137811912170374f,
-  7.8201789624151878f, 7.8265484872909150f,
-  7.8328900141647412f, 7.8392037880969436f,
-  7.8454900509443747f, 7.8517490414160571f,
-  7.8579809951275718f, 7.8641861446542797f,
-  7.8703647195834047f, 7.8765169465649993f,
-  7.8826430493618415f, 7.8887432488982591f,
-  7.8948177633079437f, 7.9008668079807486f,
-  7.9068905956085187f, 7.9128893362299619f,
-  7.9188632372745946f, 7.9248125036057812f,
-  7.9307373375628866f, 7.9366379390025709f,
-  7.9425145053392398f, 7.9483672315846778f,
-  7.9541963103868749f, 7.9600019320680805f,
-  7.9657842846620869f, 7.9715435539507719f,
-  7.9772799234999167f, 7.9829935746943103f,
-  7.9886846867721654f, 7.9943534368588577f
-};
-
-float VP8LFastLog2(int v) {
-  if (v < LOG_LOOKUP_IDX_MAX) {
-    return kLog2Table[v];
-  } else if (v < APPROX_LOG_MAX) {
-    int log_cnt = 0;
-    while (v >= LOG_LOOKUP_IDX_MAX) {
-      ++log_cnt;
-      v = v >> 1;
-    }
-    return kLog2Table[v] + (float)log_cnt;
-  } else {
-    return (float)(LOG_2_RECIPROCAL * log((double)v));
-  }
-}
-
 //------------------------------------------------------------------------------
 // Image transforms.
 
@@ -186,7 +34,7 @@ static WEBP_INLINE void AddPixelsEq(uint32_t* a, uint32_t b) {
 }
 
 static WEBP_INLINE uint32_t Average2(uint32_t a0, uint32_t a1) {
-  return (((a0 ^ a1) & 0xfefefefeL) >> 1) + (a0 & a1);
+  return (((a0 ^ a1) & 0xfefefefeu) >> 1) + (a0 & a1);
 }
 
 static WEBP_INLINE uint32_t Average3(uint32_t a0, uint32_t a1, uint32_t a2) {
@@ -221,7 +69,7 @@ static WEBP_INLINE uint32_t ClampedAddSubtractFull(uint32_t c0, uint32_t c1,
                                          (c1 >> 8) & 0xff,
                                          (c2 >> 8) & 0xff);
   const int b = AddSubtractComponentFull(c0 & 0xff, c1 & 0xff, c2 & 0xff);
-  return (a << 24) | (r << 16) | (g << 8) | b;
+  return ((uint32_t)a << 24) | (r << 16) | (g << 8) | b;
 }
 
 static WEBP_INLINE int AddSubtractComponentHalf(int a, int b) {
@@ -235,22 +83,30 @@ static WEBP_INLINE uint32_t ClampedAddSubtractHalf(uint32_t c0, uint32_t c1,
   const int r = AddSubtractComponentHalf((ave >> 16) & 0xff, (c2 >> 16) & 0xff);
   const int g = AddSubtractComponentHalf((ave >> 8) & 0xff, (c2 >> 8) & 0xff);
   const int b = AddSubtractComponentHalf((ave >> 0) & 0xff, (c2 >> 0) & 0xff);
-  return (a << 24) | (r << 16) | (g << 8) | b;
+  return ((uint32_t)a << 24) | (r << 16) | (g << 8) | b;
 }
 
-static WEBP_INLINE int Sub3(int a, int b, int c) {
-  const int pa = b - c;
-  const int pb = a - c;
-  return abs(pa) - abs(pb);
+// gcc-4.9 on ARM generates incorrect code in Select() when Sub3() is inlined.
+#if defined(__arm__) && LOCAL_GCC_VERSION == 0x409
+# define LOCAL_INLINE __attribute__ ((noinline))
+#else
+# define LOCAL_INLINE WEBP_INLINE
+#endif
+
+static LOCAL_INLINE int Sub3(int a, int b, int c) {
+  const int pb = b - c;
+  const int pa = a - c;
+  return abs(pb) - abs(pa);
 }
 
+#undef LOCAL_INLINE
+
 static WEBP_INLINE uint32_t Select(uint32_t a, uint32_t b, uint32_t c) {
   const int pa_minus_pb =
       Sub3((a >> 24)       , (b >> 24)       , (c >> 24)       ) +
       Sub3((a >> 16) & 0xff, (b >> 16) & 0xff, (c >> 16) & 0xff) +
       Sub3((a >>  8) & 0xff, (b >>  8) & 0xff, (c >>  8) & 0xff) +
       Sub3((a      ) & 0xff, (b      ) & 0xff, (c      ) & 0xff);
-
   return (pa_minus_pb <= 0) ? a : b;
 }
 
@@ -317,208 +173,7 @@ static uint32_t Predictor13(uint32_t left, const uint32_t* const top) {
   return pred;
 }
 
-typedef uint32_t (*PredictorFunc)(uint32_t left, const uint32_t* const top);
-static const PredictorFunc kPredictors[16] = {
-  Predictor0, Predictor1, Predictor2, Predictor3,
-  Predictor4, Predictor5, Predictor6, Predictor7,
-  Predictor8, Predictor9, Predictor10, Predictor11,
-  Predictor12, Predictor13,
-  Predictor0, Predictor0    // <- padding security sentinels
-};
-
-// TODO(vikasa): Replace 256 etc with defines.
-static float PredictionCostSpatial(const int* counts,
-                                   int weight_0, double exp_val) {
-  const int significant_symbols = 16;
-  const double exp_decay_factor = 0.6;
-  double bits = weight_0 * counts[0];
-  int i;
-  for (i = 1; i < significant_symbols; ++i) {
-    bits += exp_val * (counts[i] + counts[256 - i]);
-    exp_val *= exp_decay_factor;
-  }
-  return (float)(-0.1 * bits);
-}
-
-// Compute the Shanon's entropy: Sum(p*log2(p))
-static float ShannonEntropy(const int* const array, int n) {
-  int i;
-  float retval = 0.f;
-  int sum = 0;
-  for (i = 0; i < n; ++i) {
-    if (array[i] != 0) {
-      sum += array[i];
-      retval -= VP8LFastSLog2(array[i]);
-    }
-  }
-  retval += VP8LFastSLog2(sum);
-  return retval;
-}
-
-static float PredictionCostSpatialHistogram(int accumulated[4][256],
-                                            int tile[4][256]) {
-  int i;
-  int k;
-  int combo[256];
-  double retval = 0;
-  for (i = 0; i < 4; ++i) {
-    const double exp_val = 0.94;
-    retval += PredictionCostSpatial(&tile[i][0], 1, exp_val);
-    retval += ShannonEntropy(&tile[i][0], 256);
-    for (k = 0; k < 256; ++k) {
-      combo[k] = accumulated[i][k] + tile[i][k];
-    }
-    retval += ShannonEntropy(&combo[0], 256);
-  }
-  return (float)retval;
-}
-
-static int GetBestPredictorForTile(int width, int height,
-                                   int tile_x, int tile_y, int bits,
-                                   int accumulated[4][256],
-                                   const uint32_t* const argb_scratch) {
-  const int kNumPredModes = 14;
-  const int col_start = tile_x << bits;
-  const int row_start = tile_y << bits;
-  const int tile_size = 1 << bits;
-  const int ymax = (tile_size <= height - row_start) ?
-      tile_size : height - row_start;
-  const int xmax = (tile_size <= width - col_start) ?
-      tile_size : width - col_start;
-  int histo[4][256];
-  float best_diff = MAX_DIFF_COST;
-  int best_mode = 0;
-
-  int mode;
-  for (mode = 0; mode < kNumPredModes; ++mode) {
-    const uint32_t* current_row = argb_scratch;
-    const PredictorFunc pred_func = kPredictors[mode];
-    float cur_diff;
-    int y;
-    memset(&histo[0][0], 0, sizeof(histo));
-    for (y = 0; y < ymax; ++y) {
-      int x;
-      const int row = row_start + y;
-      const uint32_t* const upper_row = current_row;
-      current_row = upper_row + width;
-      for (x = 0; x < xmax; ++x) {
-        const int col = col_start + x;
-        uint32_t predict;
-        uint32_t predict_diff;
-        if (row == 0) {
-          predict = (col == 0) ? ARGB_BLACK : current_row[col - 1];  // Left.
-        } else if (col == 0) {
-          predict = upper_row[col];  // Top.
-        } else {
-          predict = pred_func(current_row[col - 1], upper_row + col);
-        }
-        predict_diff = VP8LSubPixels(current_row[col], predict);
-        ++histo[0][predict_diff >> 24];
-        ++histo[1][((predict_diff >> 16) & 0xff)];
-        ++histo[2][((predict_diff >> 8) & 0xff)];
-        ++histo[3][(predict_diff & 0xff)];
-      }
-    }
-    cur_diff = PredictionCostSpatialHistogram(accumulated, histo);
-    if (cur_diff < best_diff) {
-      best_diff = cur_diff;
-      best_mode = mode;
-    }
-  }
-
-  return best_mode;
-}
-
-static void CopyTileWithPrediction(int width, int height,
-                                   int tile_x, int tile_y, int bits, int mode,
-                                   const uint32_t* const argb_scratch,
-                                   uint32_t* const argb) {
-  const int col_start = tile_x << bits;
-  const int row_start = tile_y << bits;
-  const int tile_size = 1 << bits;
-  const int ymax = (tile_size <= height - row_start) ?
-      tile_size : height - row_start;
-  const int xmax = (tile_size <= width - col_start) ?
-      tile_size : width - col_start;
-  const PredictorFunc pred_func = kPredictors[mode];
-  const uint32_t* current_row = argb_scratch;
-
-  int y;
-  for (y = 0; y < ymax; ++y) {
-    int x;
-    const int row = row_start + y;
-    const uint32_t* const upper_row = current_row;
-    current_row = upper_row + width;
-    for (x = 0; x < xmax; ++x) {
-      const int col = col_start + x;
-      const int pix = row * width + col;
-      uint32_t predict;
-      if (row == 0) {
-        predict = (col == 0) ? ARGB_BLACK : current_row[col - 1];  // Left.
-      } else if (col == 0) {
-        predict = upper_row[col];  // Top.
-      } else {
-        predict = pred_func(current_row[col - 1], upper_row + col);
-      }
-      argb[pix] = VP8LSubPixels(current_row[col], predict);
-    }
-  }
-}
-
-void VP8LResidualImage(int width, int height, int bits,
-                       uint32_t* const argb, uint32_t* const argb_scratch,
-                       uint32_t* const image) {
-  const int max_tile_size = 1 << bits;
-  const int tiles_per_row = VP8LSubSampleSize(width, bits);
-  const int tiles_per_col = VP8LSubSampleSize(height, bits);
-  uint32_t* const upper_row = argb_scratch;
-  uint32_t* const current_tile_rows = argb_scratch + width;
-  int tile_y;
-  int histo[4][256];
-  memset(histo, 0, sizeof(histo));
-  for (tile_y = 0; tile_y < tiles_per_col; ++tile_y) {
-    const int tile_y_offset = tile_y * max_tile_size;
-    const int this_tile_height =
-        (tile_y < tiles_per_col - 1) ? max_tile_size : height - tile_y_offset;
-    int tile_x;
-    if (tile_y > 0) {
-      memcpy(upper_row, current_tile_rows + (max_tile_size - 1) * width,
-             width * sizeof(*upper_row));
-    }
-    memcpy(current_tile_rows, &argb[tile_y_offset * width],
-           this_tile_height * width * sizeof(*current_tile_rows));
-    for (tile_x = 0; tile_x < tiles_per_row; ++tile_x) {
-      int pred;
-      int y;
-      const int tile_x_offset = tile_x * max_tile_size;
-      int all_x_max = tile_x_offset + max_tile_size;
-      if (all_x_max > width) {
-        all_x_max = width;
-      }
-      pred = GetBestPredictorForTile(width, height, tile_x, tile_y, bits, histo,
-                                     argb_scratch);
-      image[tile_y * tiles_per_row + tile_x] = 0xff000000u | (pred << 8);
-      CopyTileWithPrediction(width, height, tile_x, tile_y, bits, pred,
-                             argb_scratch, argb);
-      for (y = 0; y < max_tile_size; ++y) {
-        int ix;
-        int all_x;
-        int all_y = tile_y_offset + y;
-        if (all_y >= height) {
-          break;
-        }
-        ix = all_y * width + tile_x_offset;
-        for (all_x = tile_x_offset; all_x < all_x_max; ++all_x, ++ix) {
-          const uint32_t a = argb[ix];
-          ++histo[0][a >> 24];
-          ++histo[1][((a >> 16) & 0xff)];
-          ++histo[2][((a >> 8) & 0xff)];
-          ++histo[3][(a & 0xff)];
-        }
-      }
-    }
-  }
-}
+//------------------------------------------------------------------------------
 
 // Inverse prediction.
 static void PredictorInverseTransform(const VP8LTransform* const transform,
@@ -538,29 +193,36 @@ static void PredictorInverseTransform(const VP8LTransform* const transform,
 
   {
     int y = y_start;
-    const int mask = (1 << transform->bits_) - 1;
+    const int tile_width = 1 << transform->bits_;
+    const int mask = tile_width - 1;
+    const int safe_width = width & ~mask;
     const int tiles_per_row = VP8LSubSampleSize(width, transform->bits_);
     const uint32_t* pred_mode_base =
         transform->data_ + (y >> transform->bits_) * tiles_per_row;
 
     while (y < y_end) {
-      int x;
       const uint32_t pred2 = Predictor2(data[-1], data - width);
       const uint32_t* pred_mode_src = pred_mode_base;
-      PredictorFunc pred_func;
-
+      VP8LPredictorFunc pred_func;
+      int x = 1;
+      int t = 1;
       // First pixel follows the T (mode=2) mode.
       AddPixelsEq(data, pred2);
-
       // .. the rest:
-      pred_func = kPredictors[((*pred_mode_src++) >> 8) & 0xf];
-      for (x = 1; x < width; ++x) {
-        uint32_t pred;
-        if ((x & mask) == 0) {    // start of tile. Read predictor function.
-          pred_func = kPredictors[((*pred_mode_src++) >> 8) & 0xf];
+      while (x < safe_width) {
+        pred_func = VP8LPredictors[((*pred_mode_src++) >> 8) & 0xf];
+        for (; t < tile_width; ++t, ++x) {
+          const uint32_t pred = pred_func(data[x - 1], data + x - width);
+          AddPixelsEq(data + x, pred);
+        }
+        t = 0;
+      }
+      if (x < width) {
+        pred_func = VP8LPredictors[((*pred_mode_src++) >> 8) & 0xf];
+        for (; x < width; ++x) {
+          const uint32_t pred = pred_func(data[x - 1], data + x - width);
+          AddPixelsEq(data + x, pred);
         }
-        pred = pred_func(data[x - 1], data + x - width);
-        AddPixelsEq(data + x, pred);
       }
       data += width;
       ++y;
@@ -571,326 +233,47 @@ static void PredictorInverseTransform(const VP8LTransform* const transform,
   }
 }
 
-void VP8LSubtractGreenFromBlueAndRed(uint32_t* argb_data, int num_pixs) {
-  int i;
-  for (i = 0; i < num_pixs; ++i) {
-    const uint32_t argb = argb_data[i];
-    const uint32_t green = (argb >> 8) & 0xff;
-    const uint32_t new_r = (((argb >> 16) & 0xff) - green) & 0xff;
-    const uint32_t new_b = ((argb & 0xff) - green) & 0xff;
-    argb_data[i] = (argb & 0xff00ff00) | (new_r << 16) | new_b;
-  }
-}
-
 // Add green to blue and red channels (i.e. perform the inverse transform of
 // 'subtract green').
-static void AddGreenToBlueAndRed(const VP8LTransform* const transform,
-                                 int y_start, int y_end, uint32_t* data) {
-  const int width = transform->xsize_;
-  const uint32_t* const data_end = data + (y_end - y_start) * width;
-  while (data < data_end) {
-    const uint32_t argb = *data;
-    // "* 0001001u" is equivalent to "(green << 16) + green)"
+void VP8LAddGreenToBlueAndRed_C(uint32_t* data, int num_pixels) {
+  int i;
+  for (i = 0; i < num_pixels; ++i) {
+    const uint32_t argb = data[i];
     const uint32_t green = ((argb >> 8) & 0xff);
     uint32_t red_blue = (argb & 0x00ff00ffu);
     red_blue += (green << 16) | green;
     red_blue &= 0x00ff00ffu;
-    *data++ = (argb & 0xff00ff00u) | red_blue;
+    data[i] = (argb & 0xff00ff00u) | red_blue;
   }
 }
 
-typedef struct {
-  // Note: the members are uint8_t, so that any negative values are
-  // automatically converted to "mod 256" values.
-  uint8_t green_to_red_;
-  uint8_t green_to_blue_;
-  uint8_t red_to_blue_;
-} Multipliers;
-
-static WEBP_INLINE void MultipliersClear(Multipliers* m) {
-  m->green_to_red_ = 0;
-  m->green_to_blue_ = 0;
-  m->red_to_blue_ = 0;
-}
-
 static WEBP_INLINE uint32_t ColorTransformDelta(int8_t color_pred,
                                                 int8_t color) {
   return (uint32_t)((int)(color_pred) * color) >> 5;
 }
 
 static WEBP_INLINE void ColorCodeToMultipliers(uint32_t color_code,
-                                               Multipliers* const m) {
+                                               VP8LMultipliers* const m) {
   m->green_to_red_  = (color_code >>  0) & 0xff;
   m->green_to_blue_ = (color_code >>  8) & 0xff;
   m->red_to_blue_   = (color_code >> 16) & 0xff;
 }
 
-static WEBP_INLINE uint32_t MultipliersToColorCode(Multipliers* const m) {
-  return 0xff000000u |
-         ((uint32_t)(m->red_to_blue_) << 16) |
-         ((uint32_t)(m->green_to_blue_) << 8) |
-         m->green_to_red_;
-}
-
-static WEBP_INLINE uint32_t TransformColor(const Multipliers* const m,
-                                           uint32_t argb, int inverse) {
-  const uint32_t green = argb >> 8;
-  const uint32_t red = argb >> 16;
-  uint32_t new_red = red;
-  uint32_t new_blue = argb;
-
-  if (inverse) {
+void VP8LTransformColorInverse_C(const VP8LMultipliers* const m, uint32_t* data,
+                                 int num_pixels) {
+  int i;
+  for (i = 0; i < num_pixels; ++i) {
+    const uint32_t argb = data[i];
+    const uint32_t green = argb >> 8;
+    const uint32_t red = argb >> 16;
+    uint32_t new_red = red;
+    uint32_t new_blue = argb;
     new_red += ColorTransformDelta(m->green_to_red_, green);
     new_red &= 0xff;
     new_blue += ColorTransformDelta(m->green_to_blue_, green);
     new_blue += ColorTransformDelta(m->red_to_blue_, new_red);
     new_blue &= 0xff;
-  } else {
-    new_red -= ColorTransformDelta(m->green_to_red_, green);
-    new_red &= 0xff;
-    new_blue -= ColorTransformDelta(m->green_to_blue_, green);
-    new_blue -= ColorTransformDelta(m->red_to_blue_, red);
-    new_blue &= 0xff;
-  }
-  return (argb & 0xff00ff00u) | (new_red << 16) | (new_blue);
-}
-
-static WEBP_INLINE int SkipRepeatedPixels(const uint32_t* const argb,
-                                          int ix, int xsize) {
-  const uint32_t v = argb[ix];
-  if (ix >= xsize + 3) {
-    if (v == argb[ix - xsize] &&
-        argb[ix - 1] == argb[ix - xsize - 1] &&
-        argb[ix - 2] == argb[ix - xsize - 2] &&
-        argb[ix - 3] == argb[ix - xsize - 3]) {
-      return 1;
-    }
-    return v == argb[ix - 3] && v == argb[ix - 2] && v == argb[ix - 1];
-  } else if (ix >= 3) {
-    return v == argb[ix - 3] && v == argb[ix - 2] && v == argb[ix - 1];
-  }
-  return 0;
-}
-
-static float PredictionCostCrossColor(const int accumulated[256],
-                                      const int counts[256]) {
-  // Favor low entropy, locally and globally.
-  int i;
-  int combo[256];
-  for (i = 0; i < 256; ++i) {
-    combo[i] = accumulated[i] + counts[i];
-  }
-  return ShannonEntropy(combo, 256) +
-         ShannonEntropy(counts, 256) +
-         PredictionCostSpatial(counts, 3, 2.4);  // Favor small absolute values.
-}
-
-static Multipliers GetBestColorTransformForTile(
-    int tile_x, int tile_y, int bits,
-    Multipliers prevX,
-    Multipliers prevY,
-    int step, int xsize, int ysize,
-    int* accumulated_red_histo,
-    int* accumulated_blue_histo,
-    const uint32_t* const argb) {
-  float best_diff = MAX_DIFF_COST;
-  float cur_diff;
-  const int halfstep = step / 2;
-  const int max_tile_size = 1 << bits;
-  const int tile_y_offset = tile_y * max_tile_size;
-  const int tile_x_offset = tile_x * max_tile_size;
-  int green_to_red;
-  int green_to_blue;
-  int red_to_blue;
-  int all_x_max = tile_x_offset + max_tile_size;
-  int all_y_max = tile_y_offset + max_tile_size;
-  Multipliers best_tx;
-  MultipliersClear(&best_tx);
-  if (all_x_max > xsize) {
-    all_x_max = xsize;
-  }
-  if (all_y_max > ysize) {
-    all_y_max = ysize;
-  }
-  for (green_to_red = -64; green_to_red <= 64; green_to_red += halfstep) {
-    int histo[256] = { 0 };
-    int all_y;
-    Multipliers tx;
-    MultipliersClear(&tx);
-    tx.green_to_red_ = green_to_red & 0xff;
-
-    for (all_y = tile_y_offset; all_y < all_y_max; ++all_y) {
-      uint32_t predict;
-      int ix = all_y * xsize + tile_x_offset;
-      int all_x;
-      for (all_x = tile_x_offset; all_x < all_x_max; ++all_x, ++ix) {
-        if (SkipRepeatedPixels(argb, ix, xsize)) {
-          continue;
-        }
-        predict = TransformColor(&tx, argb[ix], 0);
-        ++histo[(predict >> 16) & 0xff];  // red.
-      }
-    }
-    cur_diff = PredictionCostCrossColor(&accumulated_red_histo[0], &histo[0]);
-    if (tx.green_to_red_ == prevX.green_to_red_) {
-      cur_diff -= 3;  // favor keeping the areas locally similar
-    }
-    if (tx.green_to_red_ == prevY.green_to_red_) {
-      cur_diff -= 3;  // favor keeping the areas locally similar
-    }
-    if (tx.green_to_red_ == 0) {
-      cur_diff -= 3;
-    }
-    if (cur_diff < best_diff) {
-      best_diff = cur_diff;
-      best_tx = tx;
-    }
-  }
-  best_diff = MAX_DIFF_COST;
-  green_to_red = best_tx.green_to_red_;
-  for (green_to_blue = -32; green_to_blue <= 32; green_to_blue += step) {
-    for (red_to_blue = -32; red_to_blue <= 32; red_to_blue += step) {
-      int all_y;
-      int histo[256] = { 0 };
-      Multipliers tx;
-      tx.green_to_red_ = green_to_red;
-      tx.green_to_blue_ = green_to_blue;
-      tx.red_to_blue_ = red_to_blue;
-      for (all_y = tile_y_offset; all_y < all_y_max; ++all_y) {
-        uint32_t predict;
-        int all_x;
-        int ix = all_y * xsize + tile_x_offset;
-        for (all_x = tile_x_offset; all_x < all_x_max; ++all_x, ++ix) {
-          if (SkipRepeatedPixels(argb, ix, xsize)) {
-            continue;
-          }
-          predict = TransformColor(&tx, argb[ix], 0);
-          ++histo[predict & 0xff];  // blue.
-        }
-      }
-      cur_diff =
-        PredictionCostCrossColor(&accumulated_blue_histo[0], &histo[0]);
-      if (tx.green_to_blue_ == prevX.green_to_blue_) {
-        cur_diff -= 3;  // favor keeping the areas locally similar
-      }
-      if (tx.green_to_blue_ == prevY.green_to_blue_) {
-        cur_diff -= 3;  // favor keeping the areas locally similar
-      }
-      if (tx.red_to_blue_ == prevX.red_to_blue_) {
-        cur_diff -= 3;  // favor keeping the areas locally similar
-      }
-      if (tx.red_to_blue_ == prevY.red_to_blue_) {
-        cur_diff -= 3;  // favor keeping the areas locally similar
-      }
-      if (tx.green_to_blue_ == 0) {
-        cur_diff -= 3;
-      }
-      if (tx.red_to_blue_ == 0) {
-        cur_diff -= 3;
-      }
-      if (cur_diff < best_diff) {
-        best_diff = cur_diff;
-        best_tx = tx;
-      }
-    }
-  }
-  return best_tx;
-}
-
-static void CopyTileWithColorTransform(int xsize, int ysize,
-                                       int tile_x, int tile_y, int bits,
-                                       Multipliers color_transform,
-                                       uint32_t* const argb) {
-  int y;
-  int xscan = 1 << bits;
-  int yscan = 1 << bits;
-  tile_x <<= bits;
-  tile_y <<= bits;
-  if (xscan > xsize - tile_x) {
-    xscan = xsize - tile_x;
-  }
-  if (yscan > ysize - tile_y) {
-    yscan = ysize - tile_y;
-  }
-  yscan += tile_y;
-  for (y = tile_y; y < yscan; ++y) {
-    int ix = y * xsize + tile_x;
-    const int end_ix = ix + xscan;
-    for (; ix < end_ix; ++ix) {
-      argb[ix] = TransformColor(&color_transform, argb[ix], 0);
-    }
-  }
-}
-
-void VP8LColorSpaceTransform(int width, int height, int bits, int step,
-                             uint32_t* const argb, uint32_t* image) {
-  const int max_tile_size = 1 << bits;
-  int tile_xsize = VP8LSubSampleSize(width, bits);
-  int tile_ysize = VP8LSubSampleSize(height, bits);
-  int accumulated_red_histo[256] = { 0 };
-  int accumulated_blue_histo[256] = { 0 };
-  int tile_y;
-  int tile_x;
-  Multipliers prevX;
-  Multipliers prevY;
-  MultipliersClear(&prevY);
-  MultipliersClear(&prevX);
-  for (tile_y = 0; tile_y < tile_ysize; ++tile_y) {
-    for (tile_x = 0; tile_x < tile_xsize; ++tile_x) {
-      Multipliers color_transform;
-      int all_x_max;
-      int y;
-      const int tile_y_offset = tile_y * max_tile_size;
-      const int tile_x_offset = tile_x * max_tile_size;
-      if (tile_y != 0) {
-        ColorCodeToMultipliers(image[tile_y * tile_xsize + tile_x - 1], &prevX);
-        ColorCodeToMultipliers(image[(tile_y - 1) * tile_xsize + tile_x],
-                               &prevY);
-      } else if (tile_x != 0) {
-        ColorCodeToMultipliers(image[tile_y * tile_xsize + tile_x - 1], &prevX);
-      }
-      color_transform =
-          GetBestColorTransformForTile(tile_x, tile_y, bits,
-                                       prevX, prevY,
-                                       step, width, height,
-                                       &accumulated_red_histo[0],
-                                       &accumulated_blue_histo[0],
-                                       argb);
-      image[tile_y * tile_xsize + tile_x] =
-          MultipliersToColorCode(&color_transform);
-      CopyTileWithColorTransform(width, height, tile_x, tile_y, bits,
-                                 color_transform, argb);
-
-      // Gather accumulated histogram data.
-      all_x_max = tile_x_offset + max_tile_size;
-      if (all_x_max > width) {
-        all_x_max = width;
-      }
-      for (y = 0; y < max_tile_size; ++y) {
-        int ix;
-        int all_x;
-        int all_y = tile_y_offset + y;
-        if (all_y >= height) {
-          break;
-        }
-        ix = all_y * width + tile_x_offset;
-        for (all_x = tile_x_offset; all_x < all_x_max; ++all_x, ++ix) {
-          if (ix >= 2 &&
-              argb[ix] == argb[ix - 2] &&
-              argb[ix] == argb[ix - 1]) {
-            continue;  // repeated pixels are handled by backward references
-          }
-          if (ix >= width + 2 &&
-              argb[ix - 2] == argb[ix - width - 2] &&
-              argb[ix - 1] == argb[ix - width - 1] &&
-              argb[ix] == argb[ix - width]) {
-            continue;  // repeated pixels are handled by backward references
-          }
-          ++accumulated_red_histo[(argb[ix] >> 16) & 0xff];
-          ++accumulated_blue_histo[argb[ix] & 0xff];
-        }
-      }
-    }
+    data[i] = (argb & 0xff00ff00u) | (new_red << 16) | (new_blue);
   }
 }
 
@@ -898,7 +281,10 @@ void VP8LColorSpaceTransform(int width, int height, int bits, int step,
 static void ColorSpaceInverseTransform(const VP8LTransform* const transform,
                                        int y_start, int y_end, uint32_t* data) {
   const int width = transform->xsize_;
-  const int mask = (1 << transform->bits_) - 1;
+  const int tile_width = 1 << transform->bits_;
+  const int mask = tile_width - 1;
+  const int safe_width = width & ~mask;
+  const int remaining_width = width - safe_width;
   const int tiles_per_row = VP8LSubSampleSize(width, transform->bits_);
   int y = y_start;
   const uint32_t* pred_row =
@@ -906,68 +292,89 @@ static void ColorSpaceInverseTransform(const VP8LTransform* const transform,
 
   while (y < y_end) {
     const uint32_t* pred = pred_row;
-    Multipliers m = { 0, 0, 0 };
-    int x;
-
-    for (x = 0; x < width; ++x) {
-      if ((x & mask) == 0) ColorCodeToMultipliers(*pred++, &m);
-      data[x] = TransformColor(&m, data[x], 1);
+    VP8LMultipliers m = { 0, 0, 0 };
+    const uint32_t* const data_safe_end = data + safe_width;
+    const uint32_t* const data_end = data + width;
+    while (data < data_safe_end) {
+      ColorCodeToMultipliers(*pred++, &m);
+      VP8LTransformColorInverse(&m, data, tile_width);
+      data += tile_width;
+    }
+    if (data < data_end) {  // Left-overs using C-version.
+      ColorCodeToMultipliers(*pred++, &m);
+      VP8LTransformColorInverse(&m, data, remaining_width);
+      data += remaining_width;
     }
-    data += width;
     ++y;
-    if ((y & mask) == 0) pred_row += tiles_per_row;;
+    if ((y & mask) == 0) pred_row += tiles_per_row;
   }
 }
 
 // Separate out pixels packed together using pixel-bundling.
-static void ColorIndexInverseTransform(
-    const VP8LTransform* const transform,
-    int y_start, int y_end, const uint32_t* src, uint32_t* dst) {
-  int y;
-  const int bits_per_pixel = 8 >> transform->bits_;
-  const int width = transform->xsize_;
-  const uint32_t* const color_map = transform->data_;
-  if (bits_per_pixel < 8) {
-    const int pixels_per_byte = 1 << transform->bits_;
-    const int count_mask = pixels_per_byte - 1;
-    const uint32_t bit_mask = (1 << bits_per_pixel) - 1;
-    for (y = y_start; y < y_end; ++y) {
-      uint32_t packed_pixels = 0;
-      int x;
-      for (x = 0; x < width; ++x) {
-        // We need to load fresh 'packed_pixels' once every 'pixels_per_byte'
-        // increments of x. Fortunately, pixels_per_byte is a power of 2, so
-        // can just use a mask for that, instead of decrementing a counter.
-        if ((x & count_mask) == 0) packed_pixels = ((*src++) >> 8) & 0xff;
-        *dst++ = color_map[packed_pixels & bit_mask];
-        packed_pixels >>= bits_per_pixel;
-      }
-    }
-  } else {
-    for (y = y_start; y < y_end; ++y) {
-      int x;
-      for (x = 0; x < width; ++x) {
-        *dst++ = color_map[((*src++) >> 8) & 0xff];
-      }
-    }
-  }
-}
+// We define two methods for ARGB data (uint32_t) and alpha-only data (uint8_t).
+#define COLOR_INDEX_INVERSE(FUNC_NAME, F_NAME, STATIC_DECL, TYPE, BIT_SUFFIX,  \
+                            GET_INDEX, GET_VALUE)                              \
+static void F_NAME(const TYPE* src, const uint32_t* const color_map,           \
+                   TYPE* dst, int y_start, int y_end, int width) {             \
+  int y;                                                                       \
+  for (y = y_start; y < y_end; ++y) {                                          \
+    int x;                                                                     \
+    for (x = 0; x < width; ++x) {                                              \
+      *dst++ = GET_VALUE(color_map[GET_INDEX(*src++)]);                        \
+    }                                                                          \
+  }                                                                            \
+}                                                                              \
+STATIC_DECL void FUNC_NAME(const VP8LTransform* const transform,               \
+                           int y_start, int y_end, const TYPE* src,            \
+                           TYPE* dst) {                                        \
+  int y;                                                                       \
+  const int bits_per_pixel = 8 >> transform->bits_;                            \
+  const int width = transform->xsize_;                                         \
+  const uint32_t* const color_map = transform->data_;                          \
+  if (bits_per_pixel < 8) {                                                    \
+    const int pixels_per_byte = 1 << transform->bits_;                         \
+    const int count_mask = pixels_per_byte - 1;                                \
+    const uint32_t bit_mask = (1 << bits_per_pixel) - 1;                       \
+    for (y = y_start; y < y_end; ++y) {                                        \
+      uint32_t packed_pixels = 0;                                              \
+      int x;                                                                   \
+      for (x = 0; x < width; ++x) {                                            \
+        /* We need to load fresh 'packed_pixels' once every                */  \
+        /* 'pixels_per_byte' increments of x. Fortunately, pixels_per_byte */  \
+        /* is a power of 2, so can just use a mask for that, instead of    */  \
+        /* decrementing a counter.                                         */  \
+        if ((x & count_mask) == 0) packed_pixels = GET_INDEX(*src++);          \
+        *dst++ = GET_VALUE(color_map[packed_pixels & bit_mask]);               \
+        packed_pixels >>= bits_per_pixel;                                      \
+      }                                                                        \
+    }                                                                          \
+  } else {                                                                     \
+    VP8LMapColor##BIT_SUFFIX(src, color_map, dst, y_start, y_end, width);      \
+  }                                                                            \
+}
+
+COLOR_INDEX_INVERSE(ColorIndexInverseTransform, MapARGB, static, uint32_t, 32b,
+                    VP8GetARGBIndex, VP8GetARGBValue)
+COLOR_INDEX_INVERSE(VP8LColorIndexInverseTransformAlpha, MapAlpha, , uint8_t,
+                    8b, VP8GetAlphaIndex, VP8GetAlphaValue)
+
+#undef COLOR_INDEX_INVERSE
 
 void VP8LInverseTransform(const VP8LTransform* const transform,
                           int row_start, int row_end,
                           const uint32_t* const in, uint32_t* const out) {
+  const int width = transform->xsize_;
   assert(row_start < row_end);
   assert(row_end <= transform->ysize_);
   switch (transform->type_) {
     case SUBTRACT_GREEN:
-      AddGreenToBlueAndRed(transform, row_start, row_end, out);
+      VP8LAddGreenToBlueAndRed(out, (row_end - row_start) * width);
       break;
     case PREDICTOR_TRANSFORM:
       PredictorInverseTransform(transform, row_start, row_end, out);
       if (row_end != transform->ysize_) {
         // The last predicted row in this iteration will be the top-pred row
         // for the first row in next iteration.
-        const int width = transform->xsize_;
         memcpy(out - width, out + (row_end - row_start - 1) * width,
                width * sizeof(*out));
       }
@@ -982,7 +389,7 @@ void VP8LInverseTransform(const VP8LTransform* const transform,
         // Also, note that this is the only transform that applies on
         // the effective width of VP8LSubSampleSize(xsize_, bits_). All other
         // transforms work on effective width of xsize_.
-        const int out_stride = (row_end - row_start) * transform->xsize_;
+        const int out_stride = (row_end - row_start) * width;
         const int in_stride = (row_end - row_start) *
             VP8LSubSampleSize(transform->xsize_, transform->bits_);
         uint32_t* const src = out + out_stride - in_stride;
@@ -1006,8 +413,8 @@ static int is_big_endian(void) {
   return (tmp.b[0] != 1);
 }
 
-static void ConvertBGRAToRGB(const uint32_t* src,
-                             int num_pixels, uint8_t* dst) {
+void VP8LConvertBGRAToRGB_C(const uint32_t* src,
+                            int num_pixels, uint8_t* dst) {
   const uint32_t* const src_end = src + num_pixels;
   while (src < src_end) {
     const uint32_t argb = *src++;
@@ -1017,8 +424,8 @@ static void ConvertBGRAToRGB(const uint32_t* src,
   }
 }
 
-static void ConvertBGRAToRGBA(const uint32_t* src,
-                              int num_pixels, uint8_t* dst) {
+void VP8LConvertBGRAToRGBA_C(const uint32_t* src,
+                             int num_pixels, uint8_t* dst) {
   const uint32_t* const src_end = src + num_pixels;
   while (src < src_end) {
     const uint32_t argb = *src++;
@@ -1029,28 +436,42 @@ static void ConvertBGRAToRGBA(const uint32_t* src,
   }
 }
 
-static void ConvertBGRAToRGBA4444(const uint32_t* src,
-                                  int num_pixels, uint8_t* dst) {
+void VP8LConvertBGRAToRGBA4444_C(const uint32_t* src,
+                                 int num_pixels, uint8_t* dst) {
   const uint32_t* const src_end = src + num_pixels;
   while (src < src_end) {
     const uint32_t argb = *src++;
-    *dst++ = ((argb >> 16) & 0xf0) | ((argb >> 12) & 0xf);
-    *dst++ = ((argb >>  0) & 0xf0) | ((argb >> 28) & 0xf);
+    const uint8_t rg = ((argb >> 16) & 0xf0) | ((argb >> 12) & 0xf);
+    const uint8_t ba = ((argb >>  0) & 0xf0) | ((argb >> 28) & 0xf);
+#ifdef WEBP_SWAP_16BIT_CSP
+    *dst++ = ba;
+    *dst++ = rg;
+#else
+    *dst++ = rg;
+    *dst++ = ba;
+#endif
   }
 }
 
-static void ConvertBGRAToRGB565(const uint32_t* src,
-                                int num_pixels, uint8_t* dst) {
+void VP8LConvertBGRAToRGB565_C(const uint32_t* src,
+                               int num_pixels, uint8_t* dst) {
   const uint32_t* const src_end = src + num_pixels;
   while (src < src_end) {
     const uint32_t argb = *src++;
-    *dst++ = ((argb >> 16) & 0xf8) | ((argb >> 13) & 0x7);
-    *dst++ = ((argb >>  5) & 0xe0) | ((argb >>  3) & 0x1f);
+    const uint8_t rg = ((argb >> 16) & 0xf8) | ((argb >> 13) & 0x7);
+    const uint8_t gb = ((argb >>  5) & 0xe0) | ((argb >>  3) & 0x1f);
+#ifdef WEBP_SWAP_16BIT_CSP
+    *dst++ = gb;
+    *dst++ = rg;
+#else
+    *dst++ = rg;
+    *dst++ = gb;
+#endif
   }
 }
 
-static void ConvertBGRAToBGR(const uint32_t* src,
-                             int num_pixels, uint8_t* dst) {
+void VP8LConvertBGRAToBGR_C(const uint32_t* src,
+                            int num_pixels, uint8_t* dst) {
   const uint32_t* const src_end = src + num_pixels;
   while (src < src_end) {
     const uint32_t argb = *src++;
@@ -1065,21 +486,24 @@ static void CopyOrSwap(const uint32_t* src, int num_pixels, uint8_t* dst,
   if (is_big_endian() == swap_on_big_endian) {
     const uint32_t* const src_end = src + num_pixels;
     while (src < src_end) {
-      uint32_t argb = *src++;
-#if !defined(__BIG_ENDIAN__) && (defined(__i386__) || defined(__x86_64__))
-      __asm__ volatile("bswap %0" : "=r"(argb) : "0"(argb));
-      *(uint32_t*)dst = argb;
-      dst += sizeof(argb);
-#elif !defined(__BIG_ENDIAN__) && defined(_MSC_VER)
-      argb = _byteswap_ulong(argb);
-      *(uint32_t*)dst = argb;
-      dst += sizeof(argb);
-#else
-      *dst++ = (argb >> 24) & 0xff;
-      *dst++ = (argb >> 16) & 0xff;
-      *dst++ = (argb >>  8) & 0xff;
-      *dst++ = (argb >>  0) & 0xff;
+      const uint32_t argb = *src++;
+
+#if !defined(WORDS_BIGENDIAN)
+#if !defined(WEBP_REFERENCE_IMPLEMENTATION)
+      *(uint32_t*)dst = BSwap32(argb);
+#else  // WEBP_REFERENCE_IMPLEMENTATION
+      dst[0] = (argb >> 24) & 0xff;
+      dst[1] = (argb >> 16) & 0xff;
+      dst[2] = (argb >>  8) & 0xff;
+      dst[3] = (argb >>  0) & 0xff;
 #endif
+#else  // WORDS_BIGENDIAN
+      dst[0] = (argb >>  0) & 0xff;
+      dst[1] = (argb >>  8) & 0xff;
+      dst[2] = (argb >> 16) & 0xff;
+      dst[3] = (argb >> 24) & 0xff;
+#endif
+      dst += sizeof(argb);
     }
   } else {
     memcpy(dst, src, num_pixels * sizeof(*src));
@@ -1090,17 +514,17 @@ void VP8LConvertFromBGRA(const uint32_t* const in_data, int num_pixels,
                          WEBP_CSP_MODE out_colorspace, uint8_t* const rgba) {
   switch (out_colorspace) {
     case MODE_RGB:
-      ConvertBGRAToRGB(in_data, num_pixels, rgba);
+      VP8LConvertBGRAToRGB(in_data, num_pixels, rgba);
       break;
     case MODE_RGBA:
-      ConvertBGRAToRGBA(in_data, num_pixels, rgba);
+      VP8LConvertBGRAToRGBA(in_data, num_pixels, rgba);
       break;
     case MODE_rgbA:
-      ConvertBGRAToRGBA(in_data, num_pixels, rgba);
+      VP8LConvertBGRAToRGBA(in_data, num_pixels, rgba);
       WebPApplyAlphaMultiply(rgba, 0, num_pixels, 1, 0);
       break;
     case MODE_BGR:
-      ConvertBGRAToBGR(in_data, num_pixels, rgba);
+      VP8LConvertBGRAToBGR(in_data, num_pixels, rgba);
       break;
     case MODE_BGRA:
       CopyOrSwap(in_data, num_pixels, rgba, 1);
@@ -1117,14 +541,14 @@ void VP8LConvertFromBGRA(const uint32_t* const in_data, int num_pixels,
       WebPApplyAlphaMultiply(rgba, 1, num_pixels, 1, 0);
       break;
     case MODE_RGBA_4444:
-      ConvertBGRAToRGBA4444(in_data, num_pixels, rgba);
+      VP8LConvertBGRAToRGBA4444(in_data, num_pixels, rgba);
       break;
     case MODE_rgbA_4444:
-      ConvertBGRAToRGBA4444(in_data, num_pixels, rgba);
+      VP8LConvertBGRAToRGBA4444(in_data, num_pixels, rgba);
       WebPApplyAlphaMultiply4444(rgba, num_pixels, 1, 0);
       break;
     case MODE_RGB_565:
-      ConvertBGRAToRGB565(in_data, num_pixels, rgba);
+      VP8LConvertBGRAToRGB565(in_data, num_pixels, rgba);
       break;
     default:
       assert(0);          // Code flow should not reach here.
@@ -1133,6 +557,79 @@ void VP8LConvertFromBGRA(const uint32_t* const in_data, int num_pixels,
 
 //------------------------------------------------------------------------------
 
-#if defined(__cplusplus) || defined(c_plusplus)
-}    // extern "C"
+VP8LProcessBlueAndRedFunc VP8LAddGreenToBlueAndRed;
+VP8LPredictorFunc VP8LPredictors[16];
+
+VP8LTransformColorFunc VP8LTransformColorInverse;
+
+VP8LConvertFunc VP8LConvertBGRAToRGB;
+VP8LConvertFunc VP8LConvertBGRAToRGBA;
+VP8LConvertFunc VP8LConvertBGRAToRGBA4444;
+VP8LConvertFunc VP8LConvertBGRAToRGB565;
+VP8LConvertFunc VP8LConvertBGRAToBGR;
+
+VP8LMapARGBFunc VP8LMapColor32b;
+VP8LMapAlphaFunc VP8LMapColor8b;
+
+extern void VP8LDspInitSSE2(void);
+extern void VP8LDspInitNEON(void);
+extern void VP8LDspInitMIPSdspR2(void);
+
+static volatile VP8CPUInfo lossless_last_cpuinfo_used =
+    (VP8CPUInfo)&lossless_last_cpuinfo_used;
+
+WEBP_TSAN_IGNORE_FUNCTION void VP8LDspInit(void) {
+  if (lossless_last_cpuinfo_used == VP8GetCPUInfo) return;
+
+  VP8LPredictors[0] = Predictor0;
+  VP8LPredictors[1] = Predictor1;
+  VP8LPredictors[2] = Predictor2;
+  VP8LPredictors[3] = Predictor3;
+  VP8LPredictors[4] = Predictor4;
+  VP8LPredictors[5] = Predictor5;
+  VP8LPredictors[6] = Predictor6;
+  VP8LPredictors[7] = Predictor7;
+  VP8LPredictors[8] = Predictor8;
+  VP8LPredictors[9] = Predictor9;
+  VP8LPredictors[10] = Predictor10;
+  VP8LPredictors[11] = Predictor11;
+  VP8LPredictors[12] = Predictor12;
+  VP8LPredictors[13] = Predictor13;
+  VP8LPredictors[14] = Predictor0;     // <- padding security sentinels
+  VP8LPredictors[15] = Predictor0;
+
+  VP8LAddGreenToBlueAndRed = VP8LAddGreenToBlueAndRed_C;
+
+  VP8LTransformColorInverse = VP8LTransformColorInverse_C;
+
+  VP8LConvertBGRAToRGB = VP8LConvertBGRAToRGB_C;
+  VP8LConvertBGRAToRGBA = VP8LConvertBGRAToRGBA_C;
+  VP8LConvertBGRAToRGBA4444 = VP8LConvertBGRAToRGBA4444_C;
+  VP8LConvertBGRAToRGB565 = VP8LConvertBGRAToRGB565_C;
+  VP8LConvertBGRAToBGR = VP8LConvertBGRAToBGR_C;
+
+  VP8LMapColor32b = MapARGB;
+  VP8LMapColor8b = MapAlpha;
+
+  // If defined, use CPUInfo() to overwrite some pointers with faster versions.
+  if (VP8GetCPUInfo != NULL) {
+#if defined(WEBP_USE_SSE2)
+    if (VP8GetCPUInfo(kSSE2)) {
+      VP8LDspInitSSE2();
+    }
+#endif
+#if defined(WEBP_USE_NEON)
+    if (VP8GetCPUInfo(kNEON)) {
+      VP8LDspInitNEON();
+    }
+#endif
+#if defined(WEBP_USE_MIPS_DSP_R2)
+    if (VP8GetCPUInfo(kMIPSdspR2)) {
+      VP8LDspInitMIPSdspR2();
+    }
 #endif
+  }
+  lossless_last_cpuinfo_used = VP8GetCPUInfo;
+}
+
+//------------------------------------------------------------------------------
diff --git a/drivers/webp/dsp/lossless.h b/drivers/webp/dsp/lossless.h
index 7c7d5555ed..149c6a01d3 100644
--- a/drivers/webp/dsp/lossless.h
+++ b/drivers/webp/dsp/lossless.h
@@ -1,8 +1,10 @@
 // Copyright 2012 Google Inc. All Rights Reserved.
 //
-// This code is licensed under the same terms as WebM:
-//  Software License Agreement:  http://www.webmproject.org/license/software/
-//  Additional IP Rights Grant:  http://www.webmproject.org/license/additional/
+// Use of this source code is governed by a BSD-style license
+// that can be found in the COPYING file in the root of the source
+// tree. An additional intellectual property rights grant can be found
+// in the file PATENTS. All contributing project authors may
+// be found in the AUTHORS file in the root of the source tree.
 // -----------------------------------------------------------------------------
 //
 // Image transforms and color space conversion methods for lossless decoder.
@@ -13,15 +15,42 @@
 #ifndef WEBP_DSP_LOSSLESS_H_
 #define WEBP_DSP_LOSSLESS_H_
 
-#include "../types.h"
-#include "../decode.h"
+#include "webp/types.h"
+#include "webp/decode.h"
 
-#if defined(__cplusplus) || defined(c_plusplus)
+#include "../enc/histogram.h"
+#include "../utils/utils.h"
+
+#ifdef __cplusplus
 extern "C" {
 #endif
 
+#ifdef WEBP_EXPERIMENTAL_FEATURES
+#include "../enc/delta_palettization.h"
+#endif  // WEBP_EXPERIMENTAL_FEATURES
+
+// Not a trivial literal symbol.
+#define VP8L_NON_TRIVIAL_SYM (0xffffffff)
+
 //------------------------------------------------------------------------------
-// Image transforms.
+// Decoding
+
+typedef uint32_t (*VP8LPredictorFunc)(uint32_t left, const uint32_t* const top);
+extern VP8LPredictorFunc VP8LPredictors[16];
+
+typedef void (*VP8LProcessBlueAndRedFunc)(uint32_t* argb_data, int num_pixels);
+extern VP8LProcessBlueAndRedFunc VP8LAddGreenToBlueAndRed;
+
+typedef struct {
+  // Note: the members are uint8_t, so that any negative values are
+  // automatically converted to "mod 256" values.
+  uint8_t green_to_red_;
+  uint8_t green_to_blue_;
+  uint8_t red_to_blue_;
+} VP8LMultipliers;
+typedef void (*VP8LTransformColorFunc)(const VP8LMultipliers* const m,
+                                       uint32_t* argb_data, int num_pixels);
+extern VP8LTransformColorFunc VP8LTransformColorInverse;
 
 struct VP8LTransform;  // Defined in dec/vp8li.h.
 
@@ -33,23 +62,110 @@ void VP8LInverseTransform(const struct VP8LTransform* const transform,
                           int row_start, int row_end,
                           const uint32_t* const in, uint32_t* const out);
 
-// Subtracts green from blue and red channels.
-void VP8LSubtractGreenFromBlueAndRed(uint32_t* argb_data, int num_pixs);
-
-void VP8LResidualImage(int width, int height, int bits,
-                       uint32_t* const argb, uint32_t* const argb_scratch,
-                       uint32_t* const image);
-
-void VP8LColorSpaceTransform(int width, int height, int bits, int step,
-                             uint32_t* const argb, uint32_t* image);
-
-//------------------------------------------------------------------------------
 // Color space conversion.
+typedef void (*VP8LConvertFunc)(const uint32_t* src, int num_pixels,
+                                uint8_t* dst);
+extern VP8LConvertFunc VP8LConvertBGRAToRGB;
+extern VP8LConvertFunc VP8LConvertBGRAToRGBA;
+extern VP8LConvertFunc VP8LConvertBGRAToRGBA4444;
+extern VP8LConvertFunc VP8LConvertBGRAToRGB565;
+extern VP8LConvertFunc VP8LConvertBGRAToBGR;
 
 // Converts from BGRA to other color spaces.
 void VP8LConvertFromBGRA(const uint32_t* const in_data, int num_pixels,
                          WEBP_CSP_MODE out_colorspace, uint8_t* const rgba);
 
+// color mapping related functions.
+static WEBP_INLINE uint32_t VP8GetARGBIndex(uint32_t idx) {
+  return (idx >> 8) & 0xff;
+}
+
+static WEBP_INLINE uint8_t VP8GetAlphaIndex(uint8_t idx) {
+  return idx;
+}
+
+static WEBP_INLINE uint32_t VP8GetARGBValue(uint32_t val) {
+  return val;
+}
+
+static WEBP_INLINE uint8_t VP8GetAlphaValue(uint32_t val) {
+  return (val >> 8) & 0xff;
+}
+
+typedef void (*VP8LMapARGBFunc)(const uint32_t* src,
+                                const uint32_t* const color_map,
+                                uint32_t* dst, int y_start,
+                                int y_end, int width);
+typedef void (*VP8LMapAlphaFunc)(const uint8_t* src,
+                                 const uint32_t* const color_map,
+                                 uint8_t* dst, int y_start,
+                                 int y_end, int width);
+
+extern VP8LMapARGBFunc VP8LMapColor32b;
+extern VP8LMapAlphaFunc VP8LMapColor8b;
+
+// Similar to the static method ColorIndexInverseTransform() that is part of
+// lossless.c, but used only for alpha decoding. It takes uint8_t (rather than
+// uint32_t) arguments for 'src' and 'dst'.
+void VP8LColorIndexInverseTransformAlpha(
+    const struct VP8LTransform* const transform, int y_start, int y_end,
+    const uint8_t* src, uint8_t* dst);
+
+// Expose some C-only fallback functions
+void VP8LTransformColorInverse_C(const VP8LMultipliers* const m,
+                                 uint32_t* data, int num_pixels);
+
+void VP8LConvertBGRAToRGB_C(const uint32_t* src, int num_pixels, uint8_t* dst);
+void VP8LConvertBGRAToRGBA_C(const uint32_t* src, int num_pixels, uint8_t* dst);
+void VP8LConvertBGRAToRGBA4444_C(const uint32_t* src,
+                                 int num_pixels, uint8_t* dst);
+void VP8LConvertBGRAToRGB565_C(const uint32_t* src,
+                               int num_pixels, uint8_t* dst);
+void VP8LConvertBGRAToBGR_C(const uint32_t* src, int num_pixels, uint8_t* dst);
+void VP8LAddGreenToBlueAndRed_C(uint32_t* data, int num_pixels);
+
+// Must be called before calling any of the above methods.
+void VP8LDspInit(void);
+
+//------------------------------------------------------------------------------
+// Encoding
+
+extern VP8LProcessBlueAndRedFunc VP8LSubtractGreenFromBlueAndRed;
+extern VP8LTransformColorFunc VP8LTransformColor;
+typedef void (*VP8LCollectColorBlueTransformsFunc)(
+    const uint32_t* argb, int stride,
+    int tile_width, int tile_height,
+    int green_to_blue, int red_to_blue, int histo[]);
+extern VP8LCollectColorBlueTransformsFunc VP8LCollectColorBlueTransforms;
+
+typedef void (*VP8LCollectColorRedTransformsFunc)(
+    const uint32_t* argb, int stride,
+    int tile_width, int tile_height,
+    int green_to_red, int histo[]);
+extern VP8LCollectColorRedTransformsFunc VP8LCollectColorRedTransforms;
+
+// Expose some C-only fallback functions
+void VP8LTransformColor_C(const VP8LMultipliers* const m,
+                          uint32_t* data, int num_pixels);
+void VP8LSubtractGreenFromBlueAndRed_C(uint32_t* argb_data, int num_pixels);
+void VP8LCollectColorRedTransforms_C(const uint32_t* argb, int stride,
+                                     int tile_width, int tile_height,
+                                     int green_to_red, int histo[]);
+void VP8LCollectColorBlueTransforms_C(const uint32_t* argb, int stride,
+                                      int tile_width, int tile_height,
+                                      int green_to_blue, int red_to_blue,
+                                      int histo[]);
+
+//------------------------------------------------------------------------------
+// Image transforms.
+
+void VP8LResidualImage(int width, int height, int bits, int low_effort,
+                       uint32_t* const argb, uint32_t* const argb_scratch,
+                       uint32_t* const image);
+
+void VP8LColorSpaceTransform(int width, int height, int bits, int quality,
+                             uint32_t* const argb, uint32_t* image);
+
 //------------------------------------------------------------------------------
 // Misc methods.
 
@@ -59,10 +175,136 @@ static WEBP_INLINE uint32_t VP8LSubSampleSize(uint32_t size,
   return (size + (1 << sampling_bits) - 1) >> sampling_bits;
 }
 
-// Faster logarithm for integers, with the property of log2(0) == 0.
-float VP8LFastLog2(int v);
+// -----------------------------------------------------------------------------
+// Faster logarithm for integers. Small values use a look-up table.
+#define LOG_LOOKUP_IDX_MAX 256
+extern const float kLog2Table[LOG_LOOKUP_IDX_MAX];
+extern const float kSLog2Table[LOG_LOOKUP_IDX_MAX];
+typedef float (*VP8LFastLog2SlowFunc)(uint32_t v);
+
+extern VP8LFastLog2SlowFunc VP8LFastLog2Slow;
+extern VP8LFastLog2SlowFunc VP8LFastSLog2Slow;
+
+static WEBP_INLINE float VP8LFastLog2(uint32_t v) {
+  return (v < LOG_LOOKUP_IDX_MAX) ? kLog2Table[v] : VP8LFastLog2Slow(v);
+}
 // Fast calculation of v * log2(v) for integer input.
-static WEBP_INLINE float VP8LFastSLog2(int v) { return VP8LFastLog2(v) * v; }
+static WEBP_INLINE float VP8LFastSLog2(uint32_t v) {
+  return (v < LOG_LOOKUP_IDX_MAX) ? kSLog2Table[v] : VP8LFastSLog2Slow(v);
+}
+
+// -----------------------------------------------------------------------------
+// Huffman-cost related functions.
+
+typedef double (*VP8LCostFunc)(const uint32_t* population, int length);
+typedef double (*VP8LCostCombinedFunc)(const uint32_t* X, const uint32_t* Y,
+                                       int length);
+
+extern VP8LCostFunc VP8LExtraCost;
+extern VP8LCostCombinedFunc VP8LExtraCostCombined;
+
+typedef struct {        // small struct to hold counters
+  int counts[2];        // index: 0=zero steak, 1=non-zero streak
+  int streaks[2][2];    // [zero/non-zero][streak<3 / streak>=3]
+} VP8LStreaks;
+
+typedef VP8LStreaks (*VP8LCostCountFunc)(const uint32_t* population,
+                                         int length);
+typedef VP8LStreaks (*VP8LCostCombinedCountFunc)(const uint32_t* X,
+                                                 const uint32_t* Y, int length);
+
+extern VP8LCostCountFunc VP8LHuffmanCostCount;
+extern VP8LCostCombinedCountFunc VP8LHuffmanCostCombinedCount;
+
+// Get the symbol entropy for the distribution 'population'.
+// Set 'trivial_sym', if there's only one symbol present in the distribution.
+double VP8LPopulationCost(const uint32_t* const population, int length,
+                          uint32_t* const trivial_sym);
+
+// Get the combined symbol entropy for the distributions 'X' and 'Y'.
+double VP8LGetCombinedEntropy(const uint32_t* const X,
+                              const uint32_t* const Y, int length);
+
+double VP8LBitsEntropy(const uint32_t* const array, int n,
+                       uint32_t* const trivial_symbol);
+
+// Estimate how many bits the combined entropy of literals and distance
+// approximately maps to.
+double VP8LHistogramEstimateBits(const VP8LHistogram* const p);
+
+// This function estimates the cost in bits excluding the bits needed to
+// represent the entropy code itself.
+double VP8LHistogramEstimateBitsBulk(const VP8LHistogram* const p);
+
+typedef void (*VP8LHistogramAddFunc)(const VP8LHistogram* const a,
+                                     const VP8LHistogram* const b,
+                                     VP8LHistogram* const out);
+extern VP8LHistogramAddFunc VP8LHistogramAdd;
+
+// -----------------------------------------------------------------------------
+// PrefixEncode()
+
+static WEBP_INLINE int VP8LBitsLog2Ceiling(uint32_t n) {
+  const int log_floor = BitsLog2Floor(n);
+  if (n == (n & ~(n - 1)))  // zero or a power of two.
+    return log_floor;
+  else
+    return log_floor + 1;
+}
+
+// Splitting of distance and length codes into prefixes and
+// extra bits. The prefixes are encoded with an entropy code
+// while the extra bits are stored just as normal bits.
+static WEBP_INLINE void VP8LPrefixEncodeBitsNoLUT(int distance, int* const code,
+                                                  int* const extra_bits) {
+  const int highest_bit = BitsLog2Floor(--distance);
+  const int second_highest_bit = (distance >> (highest_bit - 1)) & 1;
+  *extra_bits = highest_bit - 1;
+  *code = 2 * highest_bit + second_highest_bit;
+}
+
+static WEBP_INLINE void VP8LPrefixEncodeNoLUT(int distance, int* const code,
+                                              int* const extra_bits,
+                                              int* const extra_bits_value) {
+  const int highest_bit = BitsLog2Floor(--distance);
+  const int second_highest_bit = (distance >> (highest_bit - 1)) & 1;
+  *extra_bits = highest_bit - 1;
+  *extra_bits_value = distance & ((1 << *extra_bits) - 1);
+  *code = 2 * highest_bit + second_highest_bit;
+}
+
+#define PREFIX_LOOKUP_IDX_MAX   512
+typedef struct {
+  int8_t code_;
+  int8_t extra_bits_;
+} VP8LPrefixCode;
+
+// These tables are derived using VP8LPrefixEncodeNoLUT.
+extern const VP8LPrefixCode kPrefixEncodeCode[PREFIX_LOOKUP_IDX_MAX];
+extern const uint8_t kPrefixEncodeExtraBitsValue[PREFIX_LOOKUP_IDX_MAX];
+static WEBP_INLINE void VP8LPrefixEncodeBits(int distance, int* const code,
+                                             int* const extra_bits) {
+  if (distance < PREFIX_LOOKUP_IDX_MAX) {
+    const VP8LPrefixCode prefix_code = kPrefixEncodeCode[distance];
+    *code = prefix_code.code_;
+    *extra_bits = prefix_code.extra_bits_;
+  } else {
+    VP8LPrefixEncodeBitsNoLUT(distance, code, extra_bits);
+  }
+}
+
+static WEBP_INLINE void VP8LPrefixEncode(int distance, int* const code,
+                                         int* const extra_bits,
+                                         int* const extra_bits_value) {
+  if (distance < PREFIX_LOOKUP_IDX_MAX) {
+    const VP8LPrefixCode prefix_code = kPrefixEncodeCode[distance];
+    *code = prefix_code.code_;
+    *extra_bits = prefix_code.extra_bits_;
+    *extra_bits_value = kPrefixEncodeExtraBitsValue[distance];
+  } else {
+    VP8LPrefixEncodeNoLUT(distance, code, extra_bits, extra_bits_value);
+  }
+}
 
 // In-place difference of each component with mod 256.
 static WEBP_INLINE uint32_t VP8LSubPixels(uint32_t a, uint32_t b) {
@@ -73,9 +315,15 @@ static WEBP_INLINE uint32_t VP8LSubPixels(uint32_t a, uint32_t b) {
   return (alpha_and_green & 0xff00ff00u) | (red_and_blue & 0x00ff00ffu);
 }
 
+void VP8LBundleColorMap(const uint8_t* const row, int width,
+                        int xbits, uint32_t* const dst);
+
+// Must be called before calling any of the above methods.
+void VP8LEncDspInit(void);
+
 //------------------------------------------------------------------------------
 
-#if defined(__cplusplus) || defined(c_plusplus)
+#ifdef __cplusplus
 }    // extern "C"
 #endif
 
diff --git a/drivers/webp/dsp/lossless_enc.c b/drivers/webp/dsp/lossless_enc.c
new file mode 100644
index 0000000000..b3036f5384
--- /dev/null
+++ b/drivers/webp/dsp/lossless_enc.c
@@ -0,0 +1,1305 @@
+// Copyright 2015 Google Inc. All Rights Reserved.
+//
+// Use of this source code is governed by a BSD-style license
+// that can be found in the COPYING file in the root of the source
+// tree. An additional intellectual property rights grant can be found
+// in the file PATENTS. All contributing project authors may
+// be found in the AUTHORS file in the root of the source tree.
+// -----------------------------------------------------------------------------
+//
+// Image transform methods for lossless encoder.
+//
+// Authors: Vikas Arora (vikaas.arora@gmail.com)
+//          Jyrki Alakuijala (jyrki@google.com)
+//          Urvang Joshi (urvang@google.com)
+
+#include "./dsp.h"
+
+#include <math.h>
+#include <stdlib.h>
+#include "../dec/vp8li.h"
+#include "../utils/endian_inl.h"
+#include "./lossless.h"
+#include "./yuv.h"
+
+#define MAX_DIFF_COST (1e30f)
+
+// lookup table for small values of log2(int)
+const float kLog2Table[LOG_LOOKUP_IDX_MAX] = {
+  0.0000000000000000f, 0.0000000000000000f,
+  1.0000000000000000f, 1.5849625007211560f,
+  2.0000000000000000f, 2.3219280948873621f,
+  2.5849625007211560f, 2.8073549220576041f,
+  3.0000000000000000f, 3.1699250014423121f,
+  3.3219280948873621f, 3.4594316186372973f,
+  3.5849625007211560f, 3.7004397181410921f,
+  3.8073549220576041f, 3.9068905956085187f,
+  4.0000000000000000f, 4.0874628412503390f,
+  4.1699250014423121f, 4.2479275134435852f,
+  4.3219280948873626f, 4.3923174227787606f,
+  4.4594316186372973f, 4.5235619560570130f,
+  4.5849625007211560f, 4.6438561897747243f,
+  4.7004397181410917f, 4.7548875021634682f,
+  4.8073549220576037f, 4.8579809951275718f,
+  4.9068905956085187f, 4.9541963103868749f,
+  5.0000000000000000f, 5.0443941193584533f,
+  5.0874628412503390f, 5.1292830169449663f,
+  5.1699250014423121f, 5.2094533656289501f,
+  5.2479275134435852f, 5.2854022188622487f,
+  5.3219280948873626f, 5.3575520046180837f,
+  5.3923174227787606f, 5.4262647547020979f,
+  5.4594316186372973f, 5.4918530963296747f,
+  5.5235619560570130f, 5.5545888516776376f,
+  5.5849625007211560f, 5.6147098441152083f,
+  5.6438561897747243f, 5.6724253419714951f,
+  5.7004397181410917f, 5.7279204545631987f,
+  5.7548875021634682f, 5.7813597135246599f,
+  5.8073549220576037f, 5.8328900141647412f,
+  5.8579809951275718f, 5.8826430493618415f,
+  5.9068905956085187f, 5.9307373375628866f,
+  5.9541963103868749f, 5.9772799234999167f,
+  6.0000000000000000f, 6.0223678130284543f,
+  6.0443941193584533f, 6.0660891904577720f,
+  6.0874628412503390f, 6.1085244567781691f,
+  6.1292830169449663f, 6.1497471195046822f,
+  6.1699250014423121f, 6.1898245588800175f,
+  6.2094533656289501f, 6.2288186904958804f,
+  6.2479275134435852f, 6.2667865406949010f,
+  6.2854022188622487f, 6.3037807481771030f,
+  6.3219280948873626f, 6.3398500028846243f,
+  6.3575520046180837f, 6.3750394313469245f,
+  6.3923174227787606f, 6.4093909361377017f,
+  6.4262647547020979f, 6.4429434958487279f,
+  6.4594316186372973f, 6.4757334309663976f,
+  6.4918530963296747f, 6.5077946401986963f,
+  6.5235619560570130f, 6.5391588111080309f,
+  6.5545888516776376f, 6.5698556083309478f,
+  6.5849625007211560f, 6.5999128421871278f,
+  6.6147098441152083f, 6.6293566200796094f,
+  6.6438561897747243f, 6.6582114827517946f,
+  6.6724253419714951f, 6.6865005271832185f,
+  6.7004397181410917f, 6.7142455176661224f,
+  6.7279204545631987f, 6.7414669864011464f,
+  6.7548875021634682f, 6.7681843247769259f,
+  6.7813597135246599f, 6.7944158663501061f,
+  6.8073549220576037f, 6.8201789624151878f,
+  6.8328900141647412f, 6.8454900509443747f,
+  6.8579809951275718f, 6.8703647195834047f,
+  6.8826430493618415f, 6.8948177633079437f,
+  6.9068905956085187f, 6.9188632372745946f,
+  6.9307373375628866f, 6.9425145053392398f,
+  6.9541963103868749f, 6.9657842846620869f,
+  6.9772799234999167f, 6.9886846867721654f,
+  7.0000000000000000f, 7.0112272554232539f,
+  7.0223678130284543f, 7.0334230015374501f,
+  7.0443941193584533f, 7.0552824355011898f,
+  7.0660891904577720f, 7.0768155970508308f,
+  7.0874628412503390f, 7.0980320829605263f,
+  7.1085244567781691f, 7.1189410727235076f,
+  7.1292830169449663f, 7.1395513523987936f,
+  7.1497471195046822f, 7.1598713367783890f,
+  7.1699250014423121f, 7.1799090900149344f,
+  7.1898245588800175f, 7.1996723448363644f,
+  7.2094533656289501f, 7.2191685204621611f,
+  7.2288186904958804f, 7.2384047393250785f,
+  7.2479275134435852f, 7.2573878426926521f,
+  7.2667865406949010f, 7.2761244052742375f,
+  7.2854022188622487f, 7.2946207488916270f,
+  7.3037807481771030f, 7.3128829552843557f,
+  7.3219280948873626f, 7.3309168781146167f,
+  7.3398500028846243f, 7.3487281542310771f,
+  7.3575520046180837f, 7.3663222142458160f,
+  7.3750394313469245f, 7.3837042924740519f,
+  7.3923174227787606f, 7.4008794362821843f,
+  7.4093909361377017f, 7.4178525148858982f,
+  7.4262647547020979f, 7.4346282276367245f,
+  7.4429434958487279f, 7.4512111118323289f,
+  7.4594316186372973f, 7.4676055500829976f,
+  7.4757334309663976f, 7.4838157772642563f,
+  7.4918530963296747f, 7.4998458870832056f,
+  7.5077946401986963f, 7.5156998382840427f,
+  7.5235619560570130f, 7.5313814605163118f,
+  7.5391588111080309f, 7.5468944598876364f,
+  7.5545888516776376f, 7.5622424242210728f,
+  7.5698556083309478f, 7.5774288280357486f,
+  7.5849625007211560f, 7.5924570372680806f,
+  7.5999128421871278f, 7.6073303137496104f,
+  7.6147098441152083f, 7.6220518194563764f,
+  7.6293566200796094f, 7.6366246205436487f,
+  7.6438561897747243f, 7.6510516911789281f,
+  7.6582114827517946f, 7.6653359171851764f,
+  7.6724253419714951f, 7.6794800995054464f,
+  7.6865005271832185f, 7.6934869574993252f,
+  7.7004397181410917f, 7.7073591320808825f,
+  7.7142455176661224f, 7.7210991887071855f,
+  7.7279204545631987f, 7.7347096202258383f,
+  7.7414669864011464f, 7.7481928495894605f,
+  7.7548875021634682f, 7.7615512324444795f,
+  7.7681843247769259f, 7.7747870596011736f,
+  7.7813597135246599f, 7.7879025593914317f,
+  7.7944158663501061f, 7.8008998999203047f,
+  7.8073549220576037f, 7.8137811912170374f,
+  7.8201789624151878f, 7.8265484872909150f,
+  7.8328900141647412f, 7.8392037880969436f,
+  7.8454900509443747f, 7.8517490414160571f,
+  7.8579809951275718f, 7.8641861446542797f,
+  7.8703647195834047f, 7.8765169465649993f,
+  7.8826430493618415f, 7.8887432488982591f,
+  7.8948177633079437f, 7.9008668079807486f,
+  7.9068905956085187f, 7.9128893362299619f,
+  7.9188632372745946f, 7.9248125036057812f,
+  7.9307373375628866f, 7.9366379390025709f,
+  7.9425145053392398f, 7.9483672315846778f,
+  7.9541963103868749f, 7.9600019320680805f,
+  7.9657842846620869f, 7.9715435539507719f,
+  7.9772799234999167f, 7.9829935746943103f,
+  7.9886846867721654f, 7.9943534368588577f
+};
+
+const float kSLog2Table[LOG_LOOKUP_IDX_MAX] = {
+  0.00000000f,    0.00000000f,  2.00000000f,   4.75488750f,
+  8.00000000f,   11.60964047f,  15.50977500f,  19.65148445f,
+  24.00000000f,  28.52932501f,  33.21928095f,  38.05374781f,
+  43.01955001f,  48.10571634f,  53.30296891f,  58.60335893f,
+  64.00000000f,  69.48686830f,  75.05865003f,  80.71062276f,
+  86.43856190f,  92.23866588f,  98.10749561f,  104.04192499f,
+  110.03910002f, 116.09640474f, 122.21143267f, 128.38196256f,
+  134.60593782f, 140.88144886f, 147.20671787f, 153.58008562f,
+  160.00000000f, 166.46500594f, 172.97373660f, 179.52490559f,
+  186.11730005f, 192.74977453f, 199.42124551f, 206.13068654f,
+  212.87712380f, 219.65963219f, 226.47733176f, 233.32938445f,
+  240.21499122f, 247.13338933f, 254.08384998f, 261.06567603f,
+  268.07820003f, 275.12078236f, 282.19280949f, 289.29369244f,
+  296.42286534f, 303.57978409f, 310.76392512f, 317.97478424f,
+  325.21187564f, 332.47473081f, 339.76289772f, 347.07593991f,
+  354.41343574f, 361.77497759f, 369.16017124f, 376.56863518f,
+  384.00000000f, 391.45390785f, 398.93001188f, 406.42797576f,
+  413.94747321f, 421.48818752f, 429.04981119f, 436.63204548f,
+  444.23460010f, 451.85719280f, 459.49954906f, 467.16140179f,
+  474.84249102f, 482.54256363f, 490.26137307f, 497.99867911f,
+  505.75424759f, 513.52785023f, 521.31926438f, 529.12827280f,
+  536.95466351f, 544.79822957f, 552.65876890f, 560.53608414f,
+  568.42998244f, 576.34027536f, 584.26677867f, 592.20931226f,
+  600.16769996f, 608.14176943f, 616.13135206f, 624.13628279f,
+  632.15640007f, 640.19154569f, 648.24156472f, 656.30630539f,
+  664.38561898f, 672.47935976f, 680.58738488f, 688.70955430f,
+  696.84573069f, 704.99577935f, 713.15956818f, 721.33696754f,
+  729.52785023f, 737.73209140f, 745.94956849f, 754.18016116f,
+  762.42375127f, 770.68022275f, 778.94946161f, 787.23135586f,
+  795.52579543f, 803.83267219f, 812.15187982f, 820.48331383f,
+  828.82687147f, 837.18245171f, 845.54995518f, 853.92928416f,
+  862.32034249f, 870.72303558f, 879.13727036f, 887.56295522f,
+  896.00000000f, 904.44831595f, 912.90781569f, 921.37841320f,
+  929.86002376f, 938.35256392f, 946.85595152f, 955.37010560f,
+  963.89494641f, 972.43039537f, 980.97637504f, 989.53280911f,
+  998.09962237f, 1006.67674069f, 1015.26409097f, 1023.86160116f,
+  1032.46920021f, 1041.08681805f, 1049.71438560f, 1058.35183469f,
+  1066.99909811f, 1075.65610955f, 1084.32280357f, 1092.99911564f,
+  1101.68498204f, 1110.38033993f, 1119.08512727f, 1127.79928282f,
+  1136.52274614f, 1145.25545758f, 1153.99735821f, 1162.74838989f,
+  1171.50849518f, 1180.27761738f, 1189.05570047f, 1197.84268914f,
+  1206.63852876f, 1215.44316535f, 1224.25654560f, 1233.07861684f,
+  1241.90932703f, 1250.74862473f, 1259.59645914f, 1268.45278005f,
+  1277.31753781f, 1286.19068338f, 1295.07216828f, 1303.96194457f,
+  1312.85996488f, 1321.76618236f, 1330.68055071f, 1339.60302413f,
+  1348.53355734f, 1357.47210556f, 1366.41862452f, 1375.37307041f,
+  1384.33539991f, 1393.30557020f, 1402.28353887f, 1411.26926400f,
+  1420.26270412f, 1429.26381818f, 1438.27256558f, 1447.28890615f,
+  1456.31280014f, 1465.34420819f, 1474.38309138f, 1483.42941118f,
+  1492.48312945f, 1501.54420843f, 1510.61261078f, 1519.68829949f,
+  1528.77123795f, 1537.86138993f, 1546.95871952f, 1556.06319119f,
+  1565.17476976f, 1574.29342040f, 1583.41910860f, 1592.55180020f,
+  1601.69146137f, 1610.83805860f, 1619.99155871f, 1629.15192882f,
+  1638.31913637f, 1647.49314911f, 1656.67393509f, 1665.86146266f,
+  1675.05570047f, 1684.25661744f, 1693.46418280f, 1702.67836605f,
+  1711.89913698f, 1721.12646563f, 1730.36032233f, 1739.60067768f,
+  1748.84750254f, 1758.10076802f, 1767.36044551f, 1776.62650662f,
+  1785.89892323f, 1795.17766747f, 1804.46271172f, 1813.75402857f,
+  1823.05159087f, 1832.35537170f, 1841.66534438f, 1850.98148244f,
+  1860.30375965f, 1869.63214999f, 1878.96662767f, 1888.30716711f,
+  1897.65374295f, 1907.00633003f, 1916.36490342f, 1925.72943838f,
+  1935.09991037f, 1944.47629506f, 1953.85856831f, 1963.24670620f,
+  1972.64068498f, 1982.04048108f, 1991.44607117f, 2000.85743204f,
+  2010.27454072f, 2019.69737440f, 2029.12591044f, 2038.56012640f
+};
+
+const VP8LPrefixCode kPrefixEncodeCode[PREFIX_LOOKUP_IDX_MAX] = {
+  { 0, 0}, { 0, 0}, { 1, 0}, { 2, 0}, { 3, 0}, { 4, 1}, { 4, 1}, { 5, 1},
+  { 5, 1}, { 6, 2}, { 6, 2}, { 6, 2}, { 6, 2}, { 7, 2}, { 7, 2}, { 7, 2},
+  { 7, 2}, { 8, 3}, { 8, 3}, { 8, 3}, { 8, 3}, { 8, 3}, { 8, 3}, { 8, 3},
+  { 8, 3}, { 9, 3}, { 9, 3}, { 9, 3}, { 9, 3}, { 9, 3}, { 9, 3}, { 9, 3},
+  { 9, 3}, {10, 4}, {10, 4}, {10, 4}, {10, 4}, {10, 4}, {10, 4}, {10, 4},
+  {10, 4}, {10, 4}, {10, 4}, {10, 4}, {10, 4}, {10, 4}, {10, 4}, {10, 4},
+  {10, 4}, {11, 4}, {11, 4}, {11, 4}, {11, 4}, {11, 4}, {11, 4}, {11, 4},
+  {11, 4}, {11, 4}, {11, 4}, {11, 4}, {11, 4}, {11, 4}, {11, 4}, {11, 4},
+  {11, 4}, {12, 5}, {12, 5}, {12, 5}, {12, 5}, {12, 5}, {12, 5}, {12, 5},
+  {12, 5}, {12, 5}, {12, 5}, {12, 5}, {12, 5}, {12, 5}, {12, 5}, {12, 5},
+  {12, 5}, {12, 5}, {12, 5}, {12, 5}, {12, 5}, {12, 5}, {12, 5}, {12, 5},
+  {12, 5}, {12, 5}, {12, 5}, {12, 5}, {12, 5}, {12, 5}, {12, 5}, {12, 5},
+  {12, 5}, {13, 5}, {13, 5}, {13, 5}, {13, 5}, {13, 5}, {13, 5}, {13, 5},
+  {13, 5}, {13, 5}, {13, 5}, {13, 5}, {13, 5}, {13, 5}, {13, 5}, {13, 5},
+  {13, 5}, {13, 5}, {13, 5}, {13, 5}, {13, 5}, {13, 5}, {13, 5}, {13, 5},
+  {13, 5}, {13, 5}, {13, 5}, {13, 5}, {13, 5}, {13, 5}, {13, 5}, {13, 5},
+  {13, 5}, {14, 6}, {14, 6}, {14, 6}, {14, 6}, {14, 6}, {14, 6}, {14, 6},
+  {14, 6}, {14, 6}, {14, 6}, {14, 6}, {14, 6}, {14, 6}, {14, 6}, {14, 6},
+  {14, 6}, {14, 6}, {14, 6}, {14, 6}, {14, 6}, {14, 6}, {14, 6}, {14, 6},
+  {14, 6}, {14, 6}, {14, 6}, {14, 6}, {14, 6}, {14, 6}, {14, 6}, {14, 6},
+  {14, 6}, {14, 6}, {14, 6}, {14, 6}, {14, 6}, {14, 6}, {14, 6}, {14, 6},
+  {14, 6}, {14, 6}, {14, 6}, {14, 6}, {14, 6}, {14, 6}, {14, 6}, {14, 6},
+  {14, 6}, {14, 6}, {14, 6}, {14, 6}, {14, 6}, {14, 6}, {14, 6}, {14, 6},
+  {14, 6}, {14, 6}, {14, 6}, {14, 6}, {14, 6}, {14, 6}, {14, 6}, {14, 6},
+  {14, 6}, {15, 6}, {15, 6}, {15, 6}, {15, 6}, {15, 6}, {15, 6}, {15, 6},
+  {15, 6}, {15, 6}, {15, 6}, {15, 6}, {15, 6}, {15, 6}, {15, 6}, {15, 6},
+  {15, 6}, {15, 6}, {15, 6}, {15, 6}, {15, 6}, {15, 6}, {15, 6}, {15, 6},
+  {15, 6}, {15, 6}, {15, 6}, {15, 6}, {15, 6}, {15, 6}, {15, 6}, {15, 6},
+  {15, 6}, {15, 6}, {15, 6}, {15, 6}, {15, 6}, {15, 6}, {15, 6}, {15, 6},
+  {15, 6}, {15, 6}, {15, 6}, {15, 6}, {15, 6}, {15, 6}, {15, 6}, {15, 6},
+  {15, 6}, {15, 6}, {15, 6}, {15, 6}, {15, 6}, {15, 6}, {15, 6}, {15, 6},
+  {15, 6}, {15, 6}, {15, 6}, {15, 6}, {15, 6}, {15, 6}, {15, 6}, {15, 6},
+  {15, 6}, {16, 7}, {16, 7}, {16, 7}, {16, 7}, {16, 7}, {16, 7}, {16, 7},
+  {16, 7}, {16, 7}, {16, 7}, {16, 7}, {16, 7}, {16, 7}, {16, 7}, {16, 7},
+  {16, 7}, {16, 7}, {16, 7}, {16, 7}, {16, 7}, {16, 7}, {16, 7}, {16, 7},
+  {16, 7}, {16, 7}, {16, 7}, {16, 7}, {16, 7}, {16, 7}, {16, 7}, {16, 7},
+  {16, 7}, {16, 7}, {16, 7}, {16, 7}, {16, 7}, {16, 7}, {16, 7}, {16, 7},
+  {16, 7}, {16, 7}, {16, 7}, {16, 7}, {16, 7}, {16, 7}, {16, 7}, {16, 7},
+  {16, 7}, {16, 7}, {16, 7}, {16, 7}, {16, 7}, {16, 7}, {16, 7}, {16, 7},
+  {16, 7}, {16, 7}, {16, 7}, {16, 7}, {16, 7}, {16, 7}, {16, 7}, {16, 7},
+  {16, 7}, {16, 7}, {16, 7}, {16, 7}, {16, 7}, {16, 7}, {16, 7}, {16, 7},
+  {16, 7}, {16, 7}, {16, 7}, {16, 7}, {16, 7}, {16, 7}, {16, 7}, {16, 7},
+  {16, 7}, {16, 7}, {16, 7}, {16, 7}, {16, 7}, {16, 7}, {16, 7}, {16, 7},
+  {16, 7}, {16, 7}, {16, 7}, {16, 7}, {16, 7}, {16, 7}, {16, 7}, {16, 7},
+  {16, 7}, {16, 7}, {16, 7}, {16, 7}, {16, 7}, {16, 7}, {16, 7}, {16, 7},
+  {16, 7}, {16, 7}, {16, 7}, {16, 7}, {16, 7}, {16, 7}, {16, 7}, {16, 7},
+  {16, 7}, {16, 7}, {16, 7}, {16, 7}, {16, 7}, {16, 7}, {16, 7}, {16, 7},
+  {16, 7}, {16, 7}, {16, 7}, {16, 7}, {16, 7}, {16, 7}, {16, 7}, {16, 7},
+  {16, 7}, {17, 7}, {17, 7}, {17, 7}, {17, 7}, {17, 7}, {17, 7}, {17, 7},
+  {17, 7}, {17, 7}, {17, 7}, {17, 7}, {17, 7}, {17, 7}, {17, 7}, {17, 7},
+  {17, 7}, {17, 7}, {17, 7}, {17, 7}, {17, 7}, {17, 7}, {17, 7}, {17, 7},
+  {17, 7}, {17, 7}, {17, 7}, {17, 7}, {17, 7}, {17, 7}, {17, 7}, {17, 7},
+  {17, 7}, {17, 7}, {17, 7}, {17, 7}, {17, 7}, {17, 7}, {17, 7}, {17, 7},
+  {17, 7}, {17, 7}, {17, 7}, {17, 7}, {17, 7}, {17, 7}, {17, 7}, {17, 7},
+  {17, 7}, {17, 7}, {17, 7}, {17, 7}, {17, 7}, {17, 7}, {17, 7}, {17, 7},
+  {17, 7}, {17, 7}, {17, 7}, {17, 7}, {17, 7}, {17, 7}, {17, 7}, {17, 7},
+  {17, 7}, {17, 7}, {17, 7}, {17, 7}, {17, 7}, {17, 7}, {17, 7}, {17, 7},
+  {17, 7}, {17, 7}, {17, 7}, {17, 7}, {17, 7}, {17, 7}, {17, 7}, {17, 7},
+  {17, 7}, {17, 7}, {17, 7}, {17, 7}, {17, 7}, {17, 7}, {17, 7}, {17, 7},
+  {17, 7}, {17, 7}, {17, 7}, {17, 7}, {17, 7}, {17, 7}, {17, 7}, {17, 7},
+  {17, 7}, {17, 7}, {17, 7}, {17, 7}, {17, 7}, {17, 7}, {17, 7}, {17, 7},
+  {17, 7}, {17, 7}, {17, 7}, {17, 7}, {17, 7}, {17, 7}, {17, 7}, {17, 7},
+  {17, 7}, {17, 7}, {17, 7}, {17, 7}, {17, 7}, {17, 7}, {17, 7}, {17, 7},
+  {17, 7}, {17, 7}, {17, 7}, {17, 7}, {17, 7}, {17, 7}, {17, 7}, {17, 7},
+};
+
+const uint8_t kPrefixEncodeExtraBitsValue[PREFIX_LOOKUP_IDX_MAX] = {
+   0,  0,  0,  0,  0,  0,  1,  0,  1,  0,  1,  2,  3,  0,  1,  2,  3,
+   0,  1,  2,  3,  4,  5,  6,  7,  0,  1,  2,  3,  4,  5,  6,  7,
+   0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15,
+   0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15,
+   0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15,
+  16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,
+   0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15,
+  16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,
+   0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15,
+  16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,
+  32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47,
+  48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63,
+   0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15,
+  16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,
+  32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47,
+  48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63,
+   0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15,
+  16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,
+  32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47,
+  48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63,
+  64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79,
+  80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95,
+  96, 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111,
+  112, 113, 114, 115, 116, 117, 118, 119, 120, 121, 122, 123, 124, 125, 126,
+  127,
+   0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15,
+  16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,
+  32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47,
+  48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63,
+  64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79,
+  80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95,
+  96, 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111,
+  112, 113, 114, 115, 116, 117, 118, 119, 120, 121, 122, 123, 124, 125, 126
+};
+
+// The threshold till approximate version of log_2 can be used.
+// Practically, we can get rid of the call to log() as the two values match to
+// very high degree (the ratio of these two is 0.99999x).
+// Keeping a high threshold for now.
+#define APPROX_LOG_WITH_CORRECTION_MAX  65536
+#define APPROX_LOG_MAX                   4096
+#define LOG_2_RECIPROCAL 1.44269504088896338700465094007086
+static float FastSLog2Slow(uint32_t v) {
+  assert(v >= LOG_LOOKUP_IDX_MAX);
+  if (v < APPROX_LOG_WITH_CORRECTION_MAX) {
+    int log_cnt = 0;
+    uint32_t y = 1;
+    int correction = 0;
+    const float v_f = (float)v;
+    const uint32_t orig_v = v;
+    do {
+      ++log_cnt;
+      v = v >> 1;
+      y = y << 1;
+    } while (v >= LOG_LOOKUP_IDX_MAX);
+    // vf = (2^log_cnt) * Xf; where y = 2^log_cnt and Xf < 256
+    // Xf = floor(Xf) * (1 + (v % y) / v)
+    // log2(Xf) = log2(floor(Xf)) + log2(1 + (v % y) / v)
+    // The correction factor: log(1 + d) ~ d; for very small d values, so
+    // log2(1 + (v % y) / v) ~ LOG_2_RECIPROCAL * (v % y)/v
+    // LOG_2_RECIPROCAL ~ 23/16
+    correction = (23 * (orig_v & (y - 1))) >> 4;
+    return v_f * (kLog2Table[v] + log_cnt) + correction;
+  } else {
+    return (float)(LOG_2_RECIPROCAL * v * log((double)v));
+  }
+}
+
+static float FastLog2Slow(uint32_t v) {
+  assert(v >= LOG_LOOKUP_IDX_MAX);
+  if (v < APPROX_LOG_WITH_CORRECTION_MAX) {
+    int log_cnt = 0;
+    uint32_t y = 1;
+    const uint32_t orig_v = v;
+    double log_2;
+    do {
+      ++log_cnt;
+      v = v >> 1;
+      y = y << 1;
+    } while (v >= LOG_LOOKUP_IDX_MAX);
+    log_2 = kLog2Table[v] + log_cnt;
+    if (orig_v >= APPROX_LOG_MAX) {
+      // Since the division is still expensive, add this correction factor only
+      // for large values of 'v'.
+      const int correction = (23 * (orig_v & (y - 1))) >> 4;
+      log_2 += (double)correction / orig_v;
+    }
+    return (float)log_2;
+  } else {
+    return (float)(LOG_2_RECIPROCAL * log((double)v));
+  }
+}
+
+// Mostly used to reduce code size + readability
+static WEBP_INLINE int GetMin(int a, int b) { return (a > b) ? b : a; }
+
+//------------------------------------------------------------------------------
+// Methods to calculate Entropy (Shannon).
+
+static float PredictionCostSpatial(const int counts[256], int weight_0,
+                                   double exp_val) {
+  const int significant_symbols = 256 >> 4;
+  const double exp_decay_factor = 0.6;
+  double bits = weight_0 * counts[0];
+  int i;
+  for (i = 1; i < significant_symbols; ++i) {
+    bits += exp_val * (counts[i] + counts[256 - i]);
+    exp_val *= exp_decay_factor;
+  }
+  return (float)(-0.1 * bits);
+}
+
+// Compute the combined Shanon's entropy for distribution {X} and {X+Y}
+static float CombinedShannonEntropy(const int X[256], const int Y[256]) {
+  int i;
+  double retval = 0.;
+  int sumX = 0, sumXY = 0;
+  for (i = 0; i < 256; ++i) {
+    const int x = X[i];
+    const int xy = x + Y[i];
+    if (x != 0) {
+      sumX += x;
+      retval -= VP8LFastSLog2(x);
+      sumXY += xy;
+      retval -= VP8LFastSLog2(xy);
+    } else if (xy != 0) {
+      sumXY += xy;
+      retval -= VP8LFastSLog2(xy);
+    }
+  }
+  retval += VP8LFastSLog2(sumX) + VP8LFastSLog2(sumXY);
+  return (float)retval;
+}
+
+static float PredictionCostSpatialHistogram(const int accumulated[4][256],
+                                            const int tile[4][256]) {
+  int i;
+  double retval = 0;
+  for (i = 0; i < 4; ++i) {
+    const double kExpValue = 0.94;
+    retval += PredictionCostSpatial(tile[i], 1, kExpValue);
+    retval += CombinedShannonEntropy(tile[i], accumulated[i]);
+  }
+  return (float)retval;
+}
+
+static WEBP_INLINE double BitsEntropyRefine(int nonzeros, int sum, int max_val,
+                                            double retval) {
+  double mix;
+  if (nonzeros < 5) {
+    if (nonzeros <= 1) {
+      return 0;
+    }
+    // Two symbols, they will be 0 and 1 in a Huffman code.
+    // Let's mix in a bit of entropy to favor good clustering when
+    // distributions of these are combined.
+    if (nonzeros == 2) {
+      return 0.99 * sum + 0.01 * retval;
+    }
+    // No matter what the entropy says, we cannot be better than min_limit
+    // with Huffman coding. I am mixing a bit of entropy into the
+    // min_limit since it produces much better (~0.5 %) compression results
+    // perhaps because of better entropy clustering.
+    if (nonzeros == 3) {
+      mix = 0.95;
+    } else {
+      mix = 0.7;  // nonzeros == 4.
+    }
+  } else {
+    mix = 0.627;
+  }
+
+  {
+    double min_limit = 2 * sum - max_val;
+    min_limit = mix * min_limit + (1.0 - mix) * retval;
+    return (retval < min_limit) ? min_limit : retval;
+  }
+}
+
+// Returns the entropy for the symbols in the input array.
+// Also sets trivial_symbol to the code value, if the array has only one code
+// value. Otherwise, set it to VP8L_NON_TRIVIAL_SYM.
+double VP8LBitsEntropy(const uint32_t* const array, int n,
+                       uint32_t* const trivial_symbol) {
+  double retval = 0.;
+  uint32_t sum = 0;
+  uint32_t nonzero_code = VP8L_NON_TRIVIAL_SYM;
+  int nonzeros = 0;
+  uint32_t max_val = 0;
+  int i;
+  for (i = 0; i < n; ++i) {
+    if (array[i] != 0) {
+      sum += array[i];
+      nonzero_code = i;
+      ++nonzeros;
+      retval -= VP8LFastSLog2(array[i]);
+      if (max_val < array[i]) {
+        max_val = array[i];
+      }
+    }
+  }
+  retval += VP8LFastSLog2(sum);
+  if (trivial_symbol != NULL) {
+    *trivial_symbol = (nonzeros == 1) ? nonzero_code : VP8L_NON_TRIVIAL_SYM;
+  }
+  return BitsEntropyRefine(nonzeros, sum, max_val, retval);
+}
+
+static double InitialHuffmanCost(void) {
+  // Small bias because Huffman code length is typically not stored in
+  // full length.
+  static const int kHuffmanCodeOfHuffmanCodeSize = CODE_LENGTH_CODES * 3;
+  static const double kSmallBias = 9.1;
+  return kHuffmanCodeOfHuffmanCodeSize - kSmallBias;
+}
+
+// Finalize the Huffman cost based on streak numbers and length type (<3 or >=3)
+static double FinalHuffmanCost(const VP8LStreaks* const stats) {
+  double retval = InitialHuffmanCost();
+  retval += stats->counts[0] * 1.5625 + 0.234375 * stats->streaks[0][1];
+  retval += stats->counts[1] * 2.578125 + 0.703125 * stats->streaks[1][1];
+  retval += 1.796875 * stats->streaks[0][0];
+  retval += 3.28125 * stats->streaks[1][0];
+  return retval;
+}
+
+// Trampolines
+static double HuffmanCost(const uint32_t* const population, int length) {
+  const VP8LStreaks stats = VP8LHuffmanCostCount(population, length);
+  return FinalHuffmanCost(&stats);
+}
+
+// Aggregated costs
+double VP8LPopulationCost(const uint32_t* const population, int length,
+                          uint32_t* const trivial_sym) {
+  return
+      VP8LBitsEntropy(population, length, trivial_sym) +
+      HuffmanCost(population, length);
+}
+
+double VP8LGetCombinedEntropy(const uint32_t* const X,
+                              const uint32_t* const Y, int length) {
+  double bits_entropy_combined;
+  double huffman_cost_combined;
+  int i;
+
+  // Bit entropy variables.
+  double retval = 0.;
+  int sum = 0;
+  int nonzeros = 0;
+  uint32_t max_val = 0;
+  int i_prev;
+  uint32_t xy;
+
+  // Huffman cost variables.
+  int streak = 0;
+  uint32_t xy_prev;
+  VP8LStreaks stats;
+  memset(&stats, 0, sizeof(stats));
+
+  // Treat the first value for the huffman cost: this is keeping the original
+  // behavior, even though there is no first streak.
+  // TODO(vrabaud): study proper behavior
+  xy = X[0] + Y[0];
+  ++stats.streaks[xy != 0][0];
+  xy_prev = xy;
+  i_prev = 0;
+
+  for (i = 1; i < length; ++i) {
+    xy = X[i] + Y[i];
+
+    // Process data by streaks for both bit entropy and huffman cost.
+    if (xy != xy_prev) {
+      streak = i - i_prev;
+
+      // Gather info for the bit entropy.
+      if (xy_prev != 0) {
+        sum += xy_prev * streak;
+        nonzeros += streak;
+        retval -= VP8LFastSLog2(xy_prev) * streak;
+        if (max_val < xy_prev) {
+          max_val = xy_prev;
+        }
+      }
+
+      // Gather info for the huffman cost.
+      stats.counts[xy != 0] += (streak > 3);
+      stats.streaks[xy != 0][(streak > 3)] += streak;
+
+      xy_prev = xy;
+      i_prev = i;
+    }
+  }
+
+  // Finish off the last streak for bit entropy.
+  if (xy != 0) {
+    streak = i - i_prev;
+    sum += xy * streak;
+    nonzeros += streak;
+    retval -= VP8LFastSLog2(xy) * streak;
+    if (max_val < xy) {
+      max_val = xy;
+    }
+  }
+  // Huffman cost is not updated with the last streak to keep original behavior.
+  // TODO(vrabaud): study proper behavior
+
+  retval += VP8LFastSLog2(sum);
+  bits_entropy_combined = BitsEntropyRefine(nonzeros, sum, max_val, retval);
+
+  huffman_cost_combined = FinalHuffmanCost(&stats);
+
+  return bits_entropy_combined + huffman_cost_combined;
+}
+
+// Estimates the Entropy + Huffman + other block overhead size cost.
+double VP8LHistogramEstimateBits(const VP8LHistogram* const p) {
+  return
+      VP8LPopulationCost(
+          p->literal_, VP8LHistogramNumCodes(p->palette_code_bits_), NULL)
+      + VP8LPopulationCost(p->red_, NUM_LITERAL_CODES, NULL)
+      + VP8LPopulationCost(p->blue_, NUM_LITERAL_CODES, NULL)
+      + VP8LPopulationCost(p->alpha_, NUM_LITERAL_CODES, NULL)
+      + VP8LPopulationCost(p->distance_, NUM_DISTANCE_CODES, NULL)
+      + VP8LExtraCost(p->literal_ + NUM_LITERAL_CODES, NUM_LENGTH_CODES)
+      + VP8LExtraCost(p->distance_, NUM_DISTANCE_CODES);
+}
+
+double VP8LHistogramEstimateBitsBulk(const VP8LHistogram* const p) {
+  return
+      VP8LBitsEntropy(p->literal_, VP8LHistogramNumCodes(p->palette_code_bits_),
+                  NULL)
+      + VP8LBitsEntropy(p->red_, NUM_LITERAL_CODES, NULL)
+      + VP8LBitsEntropy(p->blue_, NUM_LITERAL_CODES, NULL)
+      + VP8LBitsEntropy(p->alpha_, NUM_LITERAL_CODES, NULL)
+      + VP8LBitsEntropy(p->distance_, NUM_DISTANCE_CODES, NULL)
+      + VP8LExtraCost(p->literal_ + NUM_LITERAL_CODES, NUM_LENGTH_CODES)
+      + VP8LExtraCost(p->distance_, NUM_DISTANCE_CODES);
+}
+
+static WEBP_INLINE void UpdateHisto(int histo_argb[4][256], uint32_t argb) {
+  ++histo_argb[0][argb >> 24];
+  ++histo_argb[1][(argb >> 16) & 0xff];
+  ++histo_argb[2][(argb >> 8) & 0xff];
+  ++histo_argb[3][argb & 0xff];
+}
+
+//------------------------------------------------------------------------------
+
+// Returns best predictor and updates the accumulated histogram.
+static int GetBestPredictorForTile(int width, int height,
+                                   int tile_x, int tile_y, int bits,
+                                   int accumulated[4][256],
+                                   const uint32_t* const argb_scratch) {
+  const int kNumPredModes = 14;
+  const int col_start = tile_x << bits;
+  const int row_start = tile_y << bits;
+  const int tile_size = 1 << bits;
+  const int max_y = GetMin(tile_size, height - row_start);
+  const int max_x = GetMin(tile_size, width - col_start);
+  float best_diff = MAX_DIFF_COST;
+  int best_mode = 0;
+  int mode;
+  int histo_stack_1[4][256];
+  int histo_stack_2[4][256];
+  // Need pointers to be able to swap arrays.
+  int (*histo_argb)[256] = histo_stack_1;
+  int (*best_histo)[256] = histo_stack_2;
+
+  int i, j;
+  for (mode = 0; mode < kNumPredModes; ++mode) {
+    const uint32_t* current_row = argb_scratch;
+    const VP8LPredictorFunc pred_func = VP8LPredictors[mode];
+    float cur_diff;
+    int y;
+    memset(histo_argb, 0, sizeof(histo_stack_1));
+    for (y = 0; y < max_y; ++y) {
+      int x;
+      const int row = row_start + y;
+      const uint32_t* const upper_row = current_row;
+      current_row = upper_row + width;
+      for (x = 0; x < max_x; ++x) {
+        const int col = col_start + x;
+        uint32_t predict;
+        if (row == 0) {
+          predict = (col == 0) ? ARGB_BLACK : current_row[col - 1];  // Left.
+        } else if (col == 0) {
+          predict = upper_row[col];  // Top.
+        } else {
+          predict = pred_func(current_row[col - 1], upper_row + col);
+        }
+        UpdateHisto(histo_argb, VP8LSubPixels(current_row[col], predict));
+      }
+    }
+    cur_diff = PredictionCostSpatialHistogram(
+        (const int (*)[256])accumulated, (const int (*)[256])histo_argb);
+    if (cur_diff < best_diff) {
+      int (*tmp)[256] = histo_argb;
+      histo_argb = best_histo;
+      best_histo = tmp;
+      best_diff = cur_diff;
+      best_mode = mode;
+    }
+  }
+
+  for (i = 0; i < 4; i++) {
+    for (j = 0; j < 256; j++) {
+      accumulated[i][j] += best_histo[i][j];
+    }
+  }
+
+  return best_mode;
+}
+
+static void CopyImageWithPrediction(int width, int height,
+                                    int bits, uint32_t* const modes,
+                                    uint32_t* const argb_scratch,
+                                    uint32_t* const argb) {
+  const int tiles_per_row = VP8LSubSampleSize(width, bits);
+  const int mask = (1 << bits) - 1;
+  // The row size is one pixel longer to allow the top right pixel to point to
+  // the leftmost pixel of the next row when at the right edge.
+  uint32_t* current_row = argb_scratch;
+  uint32_t* upper_row = argb_scratch + width + 1;
+  int y;
+  VP8LPredictorFunc pred_func = 0;
+
+  for (y = 0; y < height; ++y) {
+    int x;
+    uint32_t* tmp = upper_row;
+    upper_row = current_row;
+    current_row = tmp;
+    memcpy(current_row, argb + y * width, sizeof(*current_row) * width);
+    current_row[width] = (y + 1 < height) ? argb[(y + 1) * width] : ARGB_BLACK;
+    for (x = 0; x < width; ++x) {
+      uint32_t predict;
+      if ((x & mask) == 0) {
+        const int mode =
+            (modes[(y >> bits) * tiles_per_row + (x >> bits)] >> 8) & 0xff;
+        pred_func = VP8LPredictors[mode];
+      }
+      if (y == 0) {
+        predict = (x == 0) ? ARGB_BLACK : current_row[x - 1];  // Left.
+      } else if (x == 0) {
+        predict = upper_row[x];  // Top.
+      } else {
+        predict = pred_func(current_row[x - 1], upper_row + x);
+      }
+      argb[y * width + x] = VP8LSubPixels(current_row[x], predict);
+    }
+  }
+}
+
+void VP8LResidualImage(int width, int height, int bits, int low_effort,
+                       uint32_t* const argb, uint32_t* const argb_scratch,
+                       uint32_t* const image) {
+  const int max_tile_size = 1 << bits;
+  const int tiles_per_row = VP8LSubSampleSize(width, bits);
+  const int tiles_per_col = VP8LSubSampleSize(height, bits);
+  const int kPredLowEffort = 11;
+  uint32_t* const upper_row = argb_scratch;
+  uint32_t* const current_tile_rows = argb_scratch + width;
+  int tile_y;
+  int histo[4][256];
+  if (!low_effort) memset(histo, 0, sizeof(histo));
+  for (tile_y = 0; tile_y < tiles_per_col; ++tile_y) {
+    const int tile_y_offset = tile_y * max_tile_size;
+    const int this_tile_height =
+        (tile_y < tiles_per_col - 1) ? max_tile_size : height - tile_y_offset;
+    int tile_x;
+    if (tile_y > 0) {
+      memcpy(upper_row, current_tile_rows + (max_tile_size - 1) * width,
+             width * sizeof(*upper_row));
+    }
+    memcpy(current_tile_rows, &argb[tile_y_offset * width],
+           this_tile_height * width * sizeof(*current_tile_rows));
+    for (tile_x = 0; tile_x < tiles_per_row; ++tile_x) {
+      const int pred =
+          low_effort ? kPredLowEffort :
+                       GetBestPredictorForTile(width, height,
+                                               tile_x, tile_y, bits,
+                                               (int (*)[256])histo,
+                                               argb_scratch);
+      image[tile_y * tiles_per_row + tile_x] = 0xff000000u | (pred << 8);
+    }
+  }
+
+  CopyImageWithPrediction(width, height, bits, image, argb_scratch, argb);
+}
+
+void VP8LSubtractGreenFromBlueAndRed_C(uint32_t* argb_data, int num_pixels) {
+  int i;
+  for (i = 0; i < num_pixels; ++i) {
+    const uint32_t argb = argb_data[i];
+    const uint32_t green = (argb >> 8) & 0xff;
+    const uint32_t new_r = (((argb >> 16) & 0xff) - green) & 0xff;
+    const uint32_t new_b = ((argb & 0xff) - green) & 0xff;
+    argb_data[i] = (argb & 0xff00ff00) | (new_r << 16) | new_b;
+  }
+}
+
+static WEBP_INLINE void MultipliersClear(VP8LMultipliers* const m) {
+  m->green_to_red_ = 0;
+  m->green_to_blue_ = 0;
+  m->red_to_blue_ = 0;
+}
+
+static WEBP_INLINE uint32_t ColorTransformDelta(int8_t color_pred,
+                                                int8_t color) {
+  return (uint32_t)((int)(color_pred) * color) >> 5;
+}
+
+static WEBP_INLINE void ColorCodeToMultipliers(uint32_t color_code,
+                                               VP8LMultipliers* const m) {
+  m->green_to_red_  = (color_code >>  0) & 0xff;
+  m->green_to_blue_ = (color_code >>  8) & 0xff;
+  m->red_to_blue_   = (color_code >> 16) & 0xff;
+}
+
+static WEBP_INLINE uint32_t MultipliersToColorCode(
+    const VP8LMultipliers* const m) {
+  return 0xff000000u |
+         ((uint32_t)(m->red_to_blue_) << 16) |
+         ((uint32_t)(m->green_to_blue_) << 8) |
+         m->green_to_red_;
+}
+
+void VP8LTransformColor_C(const VP8LMultipliers* const m, uint32_t* data,
+                          int num_pixels) {
+  int i;
+  for (i = 0; i < num_pixels; ++i) {
+    const uint32_t argb = data[i];
+    const uint32_t green = argb >> 8;
+    const uint32_t red = argb >> 16;
+    uint32_t new_red = red;
+    uint32_t new_blue = argb;
+    new_red -= ColorTransformDelta(m->green_to_red_, green);
+    new_red &= 0xff;
+    new_blue -= ColorTransformDelta(m->green_to_blue_, green);
+    new_blue -= ColorTransformDelta(m->red_to_blue_, red);
+    new_blue &= 0xff;
+    data[i] = (argb & 0xff00ff00u) | (new_red << 16) | (new_blue);
+  }
+}
+
+static WEBP_INLINE uint8_t TransformColorRed(uint8_t green_to_red,
+                                             uint32_t argb) {
+  const uint32_t green = argb >> 8;
+  uint32_t new_red = argb >> 16;
+  new_red -= ColorTransformDelta(green_to_red, green);
+  return (new_red & 0xff);
+}
+
+static WEBP_INLINE uint8_t TransformColorBlue(uint8_t green_to_blue,
+                                              uint8_t red_to_blue,
+                                              uint32_t argb) {
+  const uint32_t green = argb >> 8;
+  const uint32_t red = argb >> 16;
+  uint8_t new_blue = argb;
+  new_blue -= ColorTransformDelta(green_to_blue, green);
+  new_blue -= ColorTransformDelta(red_to_blue, red);
+  return (new_blue & 0xff);
+}
+
+static float PredictionCostCrossColor(const int accumulated[256],
+                                      const int counts[256]) {
+  // Favor low entropy, locally and globally.
+  // Favor small absolute values for PredictionCostSpatial
+  static const double kExpValue = 2.4;
+  return CombinedShannonEntropy(counts, accumulated) +
+         PredictionCostSpatial(counts, 3, kExpValue);
+}
+
+void VP8LCollectColorRedTransforms_C(const uint32_t* argb, int stride,
+                                     int tile_width, int tile_height,
+                                     int green_to_red, int histo[]) {
+  while (tile_height-- > 0) {
+    int x;
+    for (x = 0; x < tile_width; ++x) {
+      ++histo[TransformColorRed(green_to_red, argb[x])];
+    }
+    argb += stride;
+  }
+}
+
+static float GetPredictionCostCrossColorRed(
+    const uint32_t* argb, int stride, int tile_width, int tile_height,
+    VP8LMultipliers prev_x, VP8LMultipliers prev_y, int green_to_red,
+    const int accumulated_red_histo[256]) {
+  int histo[256] = { 0 };
+  float cur_diff;
+
+  VP8LCollectColorRedTransforms(argb, stride, tile_width, tile_height,
+                                green_to_red, histo);
+
+  cur_diff = PredictionCostCrossColor(accumulated_red_histo, histo);
+  if ((uint8_t)green_to_red == prev_x.green_to_red_) {
+    cur_diff -= 3;  // favor keeping the areas locally similar
+  }
+  if ((uint8_t)green_to_red == prev_y.green_to_red_) {
+    cur_diff -= 3;  // favor keeping the areas locally similar
+  }
+  if (green_to_red == 0) {
+    cur_diff -= 3;
+  }
+  return cur_diff;
+}
+
+static void GetBestGreenToRed(
+    const uint32_t* argb, int stride, int tile_width, int tile_height,
+    VP8LMultipliers prev_x, VP8LMultipliers prev_y, int quality,
+    const int accumulated_red_histo[256], VP8LMultipliers* const best_tx) {
+  const int kMaxIters = 4 + ((7 * quality) >> 8);  // in range [4..6]
+  int green_to_red_best = 0;
+  int iter, offset;
+  float best_diff = GetPredictionCostCrossColorRed(
+      argb, stride, tile_width, tile_height, prev_x, prev_y,
+      green_to_red_best, accumulated_red_histo);
+  for (iter = 0; iter < kMaxIters; ++iter) {
+    // ColorTransformDelta is a 3.5 bit fixed point, so 32 is equal to
+    // one in color computation. Having initial delta here as 1 is sufficient
+    // to explore the range of (-2, 2).
+    const int delta = 32 >> iter;
+    // Try a negative and a positive delta from the best known value.
+    for (offset = -delta; offset <= delta; offset += 2 * delta) {
+      const int green_to_red_cur = offset + green_to_red_best;
+      const float cur_diff = GetPredictionCostCrossColorRed(
+          argb, stride, tile_width, tile_height, prev_x, prev_y,
+          green_to_red_cur, accumulated_red_histo);
+      if (cur_diff < best_diff) {
+        best_diff = cur_diff;
+        green_to_red_best = green_to_red_cur;
+      }
+    }
+  }
+  best_tx->green_to_red_ = green_to_red_best;
+}
+
+void VP8LCollectColorBlueTransforms_C(const uint32_t* argb, int stride,
+                                      int tile_width, int tile_height,
+                                      int green_to_blue, int red_to_blue,
+                                      int histo[]) {
+  while (tile_height-- > 0) {
+    int x;
+    for (x = 0; x < tile_width; ++x) {
+      ++histo[TransformColorBlue(green_to_blue, red_to_blue, argb[x])];
+    }
+    argb += stride;
+  }
+}
+
+static float GetPredictionCostCrossColorBlue(
+    const uint32_t* argb, int stride, int tile_width, int tile_height,
+    VP8LMultipliers prev_x, VP8LMultipliers prev_y,
+    int green_to_blue, int red_to_blue, const int accumulated_blue_histo[256]) {
+  int histo[256] = { 0 };
+  float cur_diff;
+
+  VP8LCollectColorBlueTransforms(argb, stride, tile_width, tile_height,
+                                 green_to_blue, red_to_blue, histo);
+
+  cur_diff = PredictionCostCrossColor(accumulated_blue_histo, histo);
+  if ((uint8_t)green_to_blue == prev_x.green_to_blue_) {
+    cur_diff -= 3;  // favor keeping the areas locally similar
+  }
+  if ((uint8_t)green_to_blue == prev_y.green_to_blue_) {
+    cur_diff -= 3;  // favor keeping the areas locally similar
+  }
+  if ((uint8_t)red_to_blue == prev_x.red_to_blue_) {
+    cur_diff -= 3;  // favor keeping the areas locally similar
+  }
+  if ((uint8_t)red_to_blue == prev_y.red_to_blue_) {
+    cur_diff -= 3;  // favor keeping the areas locally similar
+  }
+  if (green_to_blue == 0) {
+    cur_diff -= 3;
+  }
+  if (red_to_blue == 0) {
+    cur_diff -= 3;
+  }
+  return cur_diff;
+}
+
+#define kGreenRedToBlueNumAxis 8
+#define kGreenRedToBlueMaxIters 7
+static void GetBestGreenRedToBlue(
+    const uint32_t* argb, int stride, int tile_width, int tile_height,
+    VP8LMultipliers prev_x, VP8LMultipliers prev_y, int quality,
+    const int accumulated_blue_histo[256],
+    VP8LMultipliers* const best_tx) {
+  const int8_t offset[kGreenRedToBlueNumAxis][2] =
+      {{0, -1}, {0, 1}, {-1, 0}, {1, 0}, {-1, -1}, {-1, 1}, {1, -1}, {1, 1}};
+  const int8_t delta_lut[kGreenRedToBlueMaxIters] = { 16, 16, 8, 4, 2, 2, 2 };
+  const int iters =
+      (quality < 25) ? 1 : (quality > 50) ? kGreenRedToBlueMaxIters : 4;
+  int green_to_blue_best = 0;
+  int red_to_blue_best = 0;
+  int iter;
+  // Initial value at origin:
+  float best_diff = GetPredictionCostCrossColorBlue(
+      argb, stride, tile_width, tile_height, prev_x, prev_y,
+      green_to_blue_best, red_to_blue_best, accumulated_blue_histo);
+  for (iter = 0; iter < iters; ++iter) {
+    const int delta = delta_lut[iter];
+    int axis;
+    for (axis = 0; axis < kGreenRedToBlueNumAxis; ++axis) {
+      const int green_to_blue_cur =
+          offset[axis][0] * delta + green_to_blue_best;
+      const int red_to_blue_cur = offset[axis][1] * delta + red_to_blue_best;
+      const float cur_diff = GetPredictionCostCrossColorBlue(
+          argb, stride, tile_width, tile_height, prev_x, prev_y,
+          green_to_blue_cur, red_to_blue_cur, accumulated_blue_histo);
+      if (cur_diff < best_diff) {
+        best_diff = cur_diff;
+        green_to_blue_best = green_to_blue_cur;
+        red_to_blue_best = red_to_blue_cur;
+      }
+      if (quality < 25 && iter == 4) {
+        // Only axis aligned diffs for lower quality.
+        break;  // next iter.
+      }
+    }
+    if (delta == 2 && green_to_blue_best == 0 && red_to_blue_best == 0) {
+      // Further iterations would not help.
+      break;  // out of iter-loop.
+    }
+  }
+  best_tx->green_to_blue_ = green_to_blue_best;
+  best_tx->red_to_blue_ = red_to_blue_best;
+}
+#undef kGreenRedToBlueMaxIters
+#undef kGreenRedToBlueNumAxis
+
+static VP8LMultipliers GetBestColorTransformForTile(
+    int tile_x, int tile_y, int bits,
+    VP8LMultipliers prev_x,
+    VP8LMultipliers prev_y,
+    int quality, int xsize, int ysize,
+    const int accumulated_red_histo[256],
+    const int accumulated_blue_histo[256],
+    const uint32_t* const argb) {
+  const int max_tile_size = 1 << bits;
+  const int tile_y_offset = tile_y * max_tile_size;
+  const int tile_x_offset = tile_x * max_tile_size;
+  const int all_x_max = GetMin(tile_x_offset + max_tile_size, xsize);
+  const int all_y_max = GetMin(tile_y_offset + max_tile_size, ysize);
+  const int tile_width = all_x_max - tile_x_offset;
+  const int tile_height = all_y_max - tile_y_offset;
+  const uint32_t* const tile_argb = argb + tile_y_offset * xsize
+                                  + tile_x_offset;
+  VP8LMultipliers best_tx;
+  MultipliersClear(&best_tx);
+
+  GetBestGreenToRed(tile_argb, xsize, tile_width, tile_height,
+                    prev_x, prev_y, quality, accumulated_red_histo, &best_tx);
+  GetBestGreenRedToBlue(tile_argb, xsize, tile_width, tile_height,
+                        prev_x, prev_y, quality, accumulated_blue_histo,
+                        &best_tx);
+  return best_tx;
+}
+
+static void CopyTileWithColorTransform(int xsize, int ysize,
+                                       int tile_x, int tile_y,
+                                       int max_tile_size,
+                                       VP8LMultipliers color_transform,
+                                       uint32_t* argb) {
+  const int xscan = GetMin(max_tile_size, xsize - tile_x);
+  int yscan = GetMin(max_tile_size, ysize - tile_y);
+  argb += tile_y * xsize + tile_x;
+  while (yscan-- > 0) {
+    VP8LTransformColor(&color_transform, argb, xscan);
+    argb += xsize;
+  }
+}
+
+void VP8LColorSpaceTransform(int width, int height, int bits, int quality,
+                             uint32_t* const argb, uint32_t* image) {
+  const int max_tile_size = 1 << bits;
+  const int tile_xsize = VP8LSubSampleSize(width, bits);
+  const int tile_ysize = VP8LSubSampleSize(height, bits);
+  int accumulated_red_histo[256] = { 0 };
+  int accumulated_blue_histo[256] = { 0 };
+  int tile_x, tile_y;
+  VP8LMultipliers prev_x, prev_y;
+  MultipliersClear(&prev_y);
+  MultipliersClear(&prev_x);
+  for (tile_y = 0; tile_y < tile_ysize; ++tile_y) {
+    for (tile_x = 0; tile_x < tile_xsize; ++tile_x) {
+      int y;
+      const int tile_x_offset = tile_x * max_tile_size;
+      const int tile_y_offset = tile_y * max_tile_size;
+      const int all_x_max = GetMin(tile_x_offset + max_tile_size, width);
+      const int all_y_max = GetMin(tile_y_offset + max_tile_size, height);
+      const int offset = tile_y * tile_xsize + tile_x;
+      if (tile_y != 0) {
+        ColorCodeToMultipliers(image[offset - tile_xsize], &prev_y);
+      }
+      prev_x = GetBestColorTransformForTile(tile_x, tile_y, bits,
+                                            prev_x, prev_y,
+                                            quality, width, height,
+                                            accumulated_red_histo,
+                                            accumulated_blue_histo,
+                                            argb);
+      image[offset] = MultipliersToColorCode(&prev_x);
+      CopyTileWithColorTransform(width, height, tile_x_offset, tile_y_offset,
+                                 max_tile_size, prev_x, argb);
+
+      // Gather accumulated histogram data.
+      for (y = tile_y_offset; y < all_y_max; ++y) {
+        int ix = y * width + tile_x_offset;
+        const int ix_end = ix + all_x_max - tile_x_offset;
+        for (; ix < ix_end; ++ix) {
+          const uint32_t pix = argb[ix];
+          if (ix >= 2 &&
+              pix == argb[ix - 2] &&
+              pix == argb[ix - 1]) {
+            continue;  // repeated pixels are handled by backward references
+          }
+          if (ix >= width + 2 &&
+              argb[ix - 2] == argb[ix - width - 2] &&
+              argb[ix - 1] == argb[ix - width - 1] &&
+              pix == argb[ix - width]) {
+            continue;  // repeated pixels are handled by backward references
+          }
+          ++accumulated_red_histo[(pix >> 16) & 0xff];
+          ++accumulated_blue_histo[(pix >> 0) & 0xff];
+        }
+      }
+    }
+  }
+}
+
+//------------------------------------------------------------------------------
+// Bundles multiple (1, 2, 4 or 8) pixels into a single pixel.
+void VP8LBundleColorMap(const uint8_t* const row, int width,
+                        int xbits, uint32_t* const dst) {
+  int x;
+  if (xbits > 0) {
+    const int bit_depth = 1 << (3 - xbits);
+    const int mask = (1 << xbits) - 1;
+    uint32_t code = 0xff000000;
+    for (x = 0; x < width; ++x) {
+      const int xsub = x & mask;
+      if (xsub == 0) {
+        code = 0xff000000;
+      }
+      code |= row[x] << (8 + bit_depth * xsub);
+      dst[x >> xbits] = code;
+    }
+  } else {
+    for (x = 0; x < width; ++x) dst[x] = 0xff000000 | (row[x] << 8);
+  }
+}
+
+//------------------------------------------------------------------------------
+
+static double ExtraCost(const uint32_t* population, int length) {
+  int i;
+  double cost = 0.;
+  for (i = 2; i < length - 2; ++i) cost += (i >> 1) * population[i + 2];
+  return cost;
+}
+
+static double ExtraCostCombined(const uint32_t* X, const uint32_t* Y,
+                                int length) {
+  int i;
+  double cost = 0.;
+  for (i = 2; i < length - 2; ++i) {
+    const int xy = X[i + 2] + Y[i + 2];
+    cost += (i >> 1) * xy;
+  }
+  return cost;
+}
+
+// Returns the various RLE counts
+static VP8LStreaks HuffmanCostCount(const uint32_t* population, int length) {
+  int i;
+  int streak = 0;
+  VP8LStreaks stats;
+  memset(&stats, 0, sizeof(stats));
+  for (i = 0; i < length - 1; ++i) {
+    ++streak;
+    if (population[i] == population[i + 1]) {
+      continue;
+    }
+    stats.counts[population[i] != 0] += (streak > 3);
+    stats.streaks[population[i] != 0][(streak > 3)] += streak;
+    streak = 0;
+  }
+  ++streak;
+  stats.counts[population[i] != 0] += (streak > 3);
+  stats.streaks[population[i] != 0][(streak > 3)] += streak;
+  return stats;
+}
+
+//------------------------------------------------------------------------------
+
+static void HistogramAdd(const VP8LHistogram* const a,
+                         const VP8LHistogram* const b,
+                         VP8LHistogram* const out) {
+  int i;
+  const int literal_size = VP8LHistogramNumCodes(a->palette_code_bits_);
+  assert(a->palette_code_bits_ == b->palette_code_bits_);
+  if (b != out) {
+    for (i = 0; i < literal_size; ++i) {
+      out->literal_[i] = a->literal_[i] + b->literal_[i];
+    }
+    for (i = 0; i < NUM_DISTANCE_CODES; ++i) {
+      out->distance_[i] = a->distance_[i] + b->distance_[i];
+    }
+    for (i = 0; i < NUM_LITERAL_CODES; ++i) {
+      out->red_[i] = a->red_[i] + b->red_[i];
+      out->blue_[i] = a->blue_[i] + b->blue_[i];
+      out->alpha_[i] = a->alpha_[i] + b->alpha_[i];
+    }
+  } else {
+    for (i = 0; i < literal_size; ++i) {
+      out->literal_[i] += a->literal_[i];
+    }
+    for (i = 0; i < NUM_DISTANCE_CODES; ++i) {
+      out->distance_[i] += a->distance_[i];
+    }
+    for (i = 0; i < NUM_LITERAL_CODES; ++i) {
+      out->red_[i] += a->red_[i];
+      out->blue_[i] += a->blue_[i];
+      out->alpha_[i] += a->alpha_[i];
+    }
+  }
+}
+
+//------------------------------------------------------------------------------
+
+VP8LProcessBlueAndRedFunc VP8LSubtractGreenFromBlueAndRed;
+
+VP8LTransformColorFunc VP8LTransformColor;
+
+VP8LCollectColorBlueTransformsFunc VP8LCollectColorBlueTransforms;
+VP8LCollectColorRedTransformsFunc VP8LCollectColorRedTransforms;
+
+VP8LFastLog2SlowFunc VP8LFastLog2Slow;
+VP8LFastLog2SlowFunc VP8LFastSLog2Slow;
+
+VP8LCostFunc VP8LExtraCost;
+VP8LCostCombinedFunc VP8LExtraCostCombined;
+
+VP8LCostCountFunc VP8LHuffmanCostCount;
+
+VP8LHistogramAddFunc VP8LHistogramAdd;
+
+extern void VP8LEncDspInitSSE2(void);
+extern void VP8LEncDspInitSSE41(void);
+extern void VP8LEncDspInitNEON(void);
+extern void VP8LEncDspInitMIPS32(void);
+extern void VP8LEncDspInitMIPSdspR2(void);
+
+static volatile VP8CPUInfo lossless_enc_last_cpuinfo_used =
+    (VP8CPUInfo)&lossless_enc_last_cpuinfo_used;
+
+WEBP_TSAN_IGNORE_FUNCTION void VP8LEncDspInit(void) {
+  if (lossless_enc_last_cpuinfo_used == VP8GetCPUInfo) return;
+
+  VP8LDspInit();
+
+  VP8LSubtractGreenFromBlueAndRed = VP8LSubtractGreenFromBlueAndRed_C;
+
+  VP8LTransformColor = VP8LTransformColor_C;
+
+  VP8LCollectColorBlueTransforms = VP8LCollectColorBlueTransforms_C;
+  VP8LCollectColorRedTransforms = VP8LCollectColorRedTransforms_C;
+
+  VP8LFastLog2Slow = FastLog2Slow;
+  VP8LFastSLog2Slow = FastSLog2Slow;
+
+  VP8LExtraCost = ExtraCost;
+  VP8LExtraCostCombined = ExtraCostCombined;
+
+  VP8LHuffmanCostCount = HuffmanCostCount;
+
+  VP8LHistogramAdd = HistogramAdd;
+
+  // If defined, use CPUInfo() to overwrite some pointers with faster versions.
+  if (VP8GetCPUInfo != NULL) {
+#if defined(WEBP_USE_SSE2)
+    if (VP8GetCPUInfo(kSSE2)) {
+      VP8LEncDspInitSSE2();
+#if defined(WEBP_USE_SSE41)
+      if (VP8GetCPUInfo(kSSE4_1)) {
+        VP8LEncDspInitSSE41();
+      }
+#endif
+    }
+#endif
+#if defined(WEBP_USE_NEON)
+    if (VP8GetCPUInfo(kNEON)) {
+      VP8LEncDspInitNEON();
+    }
+#endif
+#if defined(WEBP_USE_MIPS32)
+    if (VP8GetCPUInfo(kMIPS32)) {
+      VP8LEncDspInitMIPS32();
+    }
+#endif
+#if defined(WEBP_USE_MIPS_DSP_R2)
+    if (VP8GetCPUInfo(kMIPSdspR2)) {
+      VP8LEncDspInitMIPSdspR2();
+    }
+#endif
+  }
+  lossless_enc_last_cpuinfo_used = VP8GetCPUInfo;
+}
+
+//------------------------------------------------------------------------------
diff --git a/drivers/webp/dsp/lossless_enc_mips32.c b/drivers/webp/dsp/lossless_enc_mips32.c
new file mode 100644
index 0000000000..0468a5aac2
--- /dev/null
+++ b/drivers/webp/dsp/lossless_enc_mips32.c
@@ -0,0 +1,417 @@
+// Copyright 2015 Google Inc. All Rights Reserved.
+//
+// Use of this source code is governed by a BSD-style license
+// that can be found in the COPYING file in the root of the source
+// tree. An additional intellectual property rights grant can be found
+// in the file PATENTS. All contributing project authors may
+// be found in the AUTHORS file in the root of the source tree.
+// -----------------------------------------------------------------------------
+//
+// MIPS version of lossless functions
+//
+// Author(s):  Djordje Pesut    (djordje.pesut@imgtec.com)
+//             Jovan Zelincevic (jovan.zelincevic@imgtec.com)
+
+#include "./dsp.h"
+#include "./lossless.h"
+
+#if defined(WEBP_USE_MIPS32)
+
+#include <assert.h>
+#include <math.h>
+#include <stdlib.h>
+#include <string.h>
+
+#define APPROX_LOG_WITH_CORRECTION_MAX  65536
+#define APPROX_LOG_MAX                   4096
+#define LOG_2_RECIPROCAL 1.44269504088896338700465094007086
+
+static float FastSLog2Slow(uint32_t v) {
+  assert(v >= LOG_LOOKUP_IDX_MAX);
+  if (v < APPROX_LOG_WITH_CORRECTION_MAX) {
+    uint32_t log_cnt, y, correction;
+    const int c24 = 24;
+    const float v_f = (float)v;
+    uint32_t temp;
+
+    // Xf = 256 = 2^8
+    // log_cnt is index of leading one in upper 24 bits
+    __asm__ volatile(
+      "clz      %[log_cnt], %[v]                      \n\t"
+      "addiu    %[y],       $zero,        1           \n\t"
+      "subu     %[log_cnt], %[c24],       %[log_cnt]  \n\t"
+      "sllv     %[y],       %[y],         %[log_cnt]  \n\t"
+      "srlv     %[temp],    %[v],         %[log_cnt]  \n\t"
+      : [log_cnt]"=&r"(log_cnt), [y]"=&r"(y),
+        [temp]"=r"(temp)
+      : [c24]"r"(c24), [v]"r"(v)
+    );
+
+    // vf = (2^log_cnt) * Xf; where y = 2^log_cnt and Xf < 256
+    // Xf = floor(Xf) * (1 + (v % y) / v)
+    // log2(Xf) = log2(floor(Xf)) + log2(1 + (v % y) / v)
+    // The correction factor: log(1 + d) ~ d; for very small d values, so
+    // log2(1 + (v % y) / v) ~ LOG_2_RECIPROCAL * (v % y)/v
+    // LOG_2_RECIPROCAL ~ 23/16
+
+    // (v % y) = (v % 2^log_cnt) = v & (2^log_cnt - 1)
+    correction = (23 * (v & (y - 1))) >> 4;
+    return v_f * (kLog2Table[temp] + log_cnt) + correction;
+  } else {
+    return (float)(LOG_2_RECIPROCAL * v * log((double)v));
+  }
+}
+
+static float FastLog2Slow(uint32_t v) {
+  assert(v >= LOG_LOOKUP_IDX_MAX);
+  if (v < APPROX_LOG_WITH_CORRECTION_MAX) {
+    uint32_t log_cnt, y;
+    const int c24 = 24;
+    double log_2;
+    uint32_t temp;
+
+    __asm__ volatile(
+      "clz      %[log_cnt], %[v]                      \n\t"
+      "addiu    %[y],       $zero,        1           \n\t"
+      "subu     %[log_cnt], %[c24],       %[log_cnt]  \n\t"
+      "sllv     %[y],       %[y],         %[log_cnt]  \n\t"
+      "srlv     %[temp],    %[v],         %[log_cnt]  \n\t"
+      : [log_cnt]"=&r"(log_cnt), [y]"=&r"(y),
+        [temp]"=r"(temp)
+      : [c24]"r"(c24), [v]"r"(v)
+    );
+
+    log_2 = kLog2Table[temp] + log_cnt;
+    if (v >= APPROX_LOG_MAX) {
+      // Since the division is still expensive, add this correction factor only
+      // for large values of 'v'.
+
+      const uint32_t correction = (23 * (v & (y - 1))) >> 4;
+      log_2 += (double)correction / v;
+    }
+    return (float)log_2;
+  } else {
+    return (float)(LOG_2_RECIPROCAL * log((double)v));
+  }
+}
+
+// C version of this function:
+//   int i = 0;
+//   int64_t cost = 0;
+//   const uint32_t* pop = &population[4];
+//   const uint32_t* LoopEnd = &population[length];
+//   while (pop != LoopEnd) {
+//     ++i;
+//     cost += i * *pop;
+//     cost += i * *(pop + 1);
+//     pop += 2;
+//   }
+//   return (double)cost;
+static double ExtraCost(const uint32_t* const population, int length) {
+  int i, temp0, temp1;
+  const uint32_t* pop = &population[4];
+  const uint32_t* const LoopEnd = &population[length];
+
+  __asm__ volatile(
+    "mult   $zero,    $zero                  \n\t"
+    "xor    %[i],     %[i],       %[i]       \n\t"
+    "beq    %[pop],   %[LoopEnd], 2f         \n\t"
+  "1:                                        \n\t"
+    "lw     %[temp0], 0(%[pop])              \n\t"
+    "lw     %[temp1], 4(%[pop])              \n\t"
+    "addiu  %[i],     %[i],       1          \n\t"
+    "addiu  %[pop],   %[pop],     8          \n\t"
+    "madd   %[i],     %[temp0]               \n\t"
+    "madd   %[i],     %[temp1]               \n\t"
+    "bne    %[pop],   %[LoopEnd], 1b         \n\t"
+  "2:                                        \n\t"
+    "mfhi   %[temp0]                         \n\t"
+    "mflo   %[temp1]                         \n\t"
+    : [temp0]"=&r"(temp0), [temp1]"=&r"(temp1),
+      [i]"=&r"(i), [pop]"+r"(pop)
+    : [LoopEnd]"r"(LoopEnd)
+    : "memory", "hi", "lo"
+  );
+
+  return (double)((int64_t)temp0 << 32 | temp1);
+}
+
+// C version of this function:
+//   int i = 0;
+//   int64_t cost = 0;
+//   const uint32_t* pX = &X[4];
+//   const uint32_t* pY = &Y[4];
+//   const uint32_t* LoopEnd = &X[length];
+//   while (pX != LoopEnd) {
+//     const uint32_t xy0 = *pX + *pY;
+//     const uint32_t xy1 = *(pX + 1) + *(pY + 1);
+//     ++i;
+//     cost += i * xy0;
+//     cost += i * xy1;
+//     pX += 2;
+//     pY += 2;
+//   }
+//   return (double)cost;
+static double ExtraCostCombined(const uint32_t* const X,
+                                const uint32_t* const Y, int length) {
+  int i, temp0, temp1, temp2, temp3;
+  const uint32_t* pX = &X[4];
+  const uint32_t* pY = &Y[4];
+  const uint32_t* const LoopEnd = &X[length];
+
+  __asm__ volatile(
+    "mult   $zero,    $zero                  \n\t"
+    "xor    %[i],     %[i],       %[i]       \n\t"
+    "beq    %[pX],    %[LoopEnd], 2f         \n\t"
+  "1:                                        \n\t"
+    "lw     %[temp0], 0(%[pX])               \n\t"
+    "lw     %[temp1], 0(%[pY])               \n\t"
+    "lw     %[temp2], 4(%[pX])               \n\t"
+    "lw     %[temp3], 4(%[pY])               \n\t"
+    "addiu  %[i],     %[i],       1          \n\t"
+    "addu   %[temp0], %[temp0],   %[temp1]   \n\t"
+    "addu   %[temp2], %[temp2],   %[temp3]   \n\t"
+    "addiu  %[pX],    %[pX],      8          \n\t"
+    "addiu  %[pY],    %[pY],      8          \n\t"
+    "madd   %[i],     %[temp0]               \n\t"
+    "madd   %[i],     %[temp2]               \n\t"
+    "bne    %[pX],    %[LoopEnd], 1b         \n\t"
+  "2:                                        \n\t"
+    "mfhi   %[temp0]                         \n\t"
+    "mflo   %[temp1]                         \n\t"
+    : [temp0]"=&r"(temp0), [temp1]"=&r"(temp1),
+      [temp2]"=&r"(temp2), [temp3]"=&r"(temp3),
+      [i]"=&r"(i), [pX]"+r"(pX), [pY]"+r"(pY)
+    : [LoopEnd]"r"(LoopEnd)
+    : "memory", "hi", "lo"
+  );
+
+  return (double)((int64_t)temp0 << 32 | temp1);
+}
+
+#define HUFFMAN_COST_PASS                                 \
+  __asm__ volatile(                                       \
+    "sll   %[temp1],  %[temp0],    3           \n\t"      \
+    "addiu %[temp3],  %[streak],   -3          \n\t"      \
+    "addu  %[temp2],  %[pstreaks], %[temp1]    \n\t"      \
+    "blez  %[temp3],  1f                       \n\t"      \
+    "srl   %[temp1],  %[temp1],    1           \n\t"      \
+    "addu  %[temp3],  %[pcnts],    %[temp1]    \n\t"      \
+    "lw    %[temp0],  4(%[temp2])              \n\t"      \
+    "lw    %[temp1],  0(%[temp3])              \n\t"      \
+    "addu  %[temp0],  %[temp0],    %[streak]   \n\t"      \
+    "addiu %[temp1],  %[temp1],    1           \n\t"      \
+    "sw    %[temp0],  4(%[temp2])              \n\t"      \
+    "sw    %[temp1],  0(%[temp3])              \n\t"      \
+    "b     2f                                  \n\t"      \
+  "1:                                          \n\t"      \
+    "lw    %[temp0],  0(%[temp2])              \n\t"      \
+    "addu  %[temp0],  %[temp0],    %[streak]   \n\t"      \
+    "sw    %[temp0],  0(%[temp2])              \n\t"      \
+  "2:                                          \n\t"      \
+    : [temp1]"=&r"(temp1), [temp2]"=&r"(temp2),           \
+      [temp3]"=&r"(temp3), [temp0]"+r"(temp0)             \
+    : [pstreaks]"r"(pstreaks), [pcnts]"r"(pcnts),         \
+      [streak]"r"(streak)                                 \
+    : "memory"                                            \
+  );
+
+// Returns the various RLE counts
+static VP8LStreaks HuffmanCostCount(const uint32_t* population, int length) {
+  int i;
+  int streak = 0;
+  VP8LStreaks stats;
+  int* const pstreaks = &stats.streaks[0][0];
+  int* const pcnts = &stats.counts[0];
+  int temp0, temp1, temp2, temp3;
+  memset(&stats, 0, sizeof(stats));
+  for (i = 0; i < length - 1; ++i) {
+    ++streak;
+    if (population[i] == population[i + 1]) {
+      continue;
+    }
+    temp0 = (population[i] != 0);
+    HUFFMAN_COST_PASS
+    streak = 0;
+  }
+  ++streak;
+  temp0 = (population[i] != 0);
+  HUFFMAN_COST_PASS
+
+  return stats;
+}
+
+static VP8LStreaks HuffmanCostCombinedCount(const uint32_t* X,
+                                            const uint32_t* Y, int length) {
+  int i;
+  int streak = 0;
+  uint32_t xy_prev = 0xffffffff;
+  VP8LStreaks stats;
+  int* const pstreaks = &stats.streaks[0][0];
+  int* const pcnts = &stats.counts[0];
+  int temp0, temp1, temp2, temp3;
+  memset(&stats, 0, sizeof(stats));
+  for (i = 0; i < length; ++i) {
+    const uint32_t xy = X[i] + Y[i];
+    ++streak;
+    if (xy != xy_prev) {
+      temp0 = (xy != 0);
+      HUFFMAN_COST_PASS
+      streak = 0;
+      xy_prev = xy;
+    }
+  }
+  return stats;
+}
+
+#define ASM_START                                       \
+  __asm__ volatile(                                     \
+    ".set   push                            \n\t"       \
+    ".set   at                              \n\t"       \
+    ".set   macro                           \n\t"       \
+  "1:                                       \n\t"
+
+// P2 = P0 + P1
+// A..D - offsets
+// E - temp variable to tell macro
+//     if pointer should be incremented
+// literal_ and successive histograms could be unaligned
+// so we must use ulw and usw
+#define ADD_TO_OUT(A, B, C, D, E, P0, P1, P2)           \
+    "ulw    %[temp0], " #A "(%[" #P0 "])    \n\t"       \
+    "ulw    %[temp1], " #B "(%[" #P0 "])    \n\t"       \
+    "ulw    %[temp2], " #C "(%[" #P0 "])    \n\t"       \
+    "ulw    %[temp3], " #D "(%[" #P0 "])    \n\t"       \
+    "ulw    %[temp4], " #A "(%[" #P1 "])    \n\t"       \
+    "ulw    %[temp5], " #B "(%[" #P1 "])    \n\t"       \
+    "ulw    %[temp6], " #C "(%[" #P1 "])    \n\t"       \
+    "ulw    %[temp7], " #D "(%[" #P1 "])    \n\t"       \
+    "addu   %[temp4], %[temp4],   %[temp0]  \n\t"       \
+    "addu   %[temp5], %[temp5],   %[temp1]  \n\t"       \
+    "addu   %[temp6], %[temp6],   %[temp2]  \n\t"       \
+    "addu   %[temp7], %[temp7],   %[temp3]  \n\t"       \
+    "addiu  %[" #P0 "],  %[" #P0 "],  16    \n\t"       \
+  ".if " #E " == 1                          \n\t"       \
+    "addiu  %[" #P1 "],  %[" #P1 "],  16    \n\t"       \
+  ".endif                                   \n\t"       \
+    "usw    %[temp4], " #A "(%[" #P2 "])    \n\t"       \
+    "usw    %[temp5], " #B "(%[" #P2 "])    \n\t"       \
+    "usw    %[temp6], " #C "(%[" #P2 "])    \n\t"       \
+    "usw    %[temp7], " #D "(%[" #P2 "])    \n\t"       \
+    "addiu  %[" #P2 "], %[" #P2 "],   16    \n\t"       \
+    "bne    %[" #P0 "], %[LoopEnd], 1b      \n\t"       \
+    ".set   pop                             \n\t"       \
+
+#define ASM_END_COMMON_0                                \
+    : [temp0]"=&r"(temp0), [temp1]"=&r"(temp1),         \
+      [temp2]"=&r"(temp2), [temp3]"=&r"(temp3),         \
+      [temp4]"=&r"(temp4), [temp5]"=&r"(temp5),         \
+      [temp6]"=&r"(temp6), [temp7]"=&r"(temp7),         \
+      [pa]"+r"(pa), [pout]"+r"(pout)
+
+#define ASM_END_COMMON_1                                \
+    : [LoopEnd]"r"(LoopEnd)                             \
+    : "memory", "at"                                    \
+  );
+
+#define ASM_END_0                                       \
+    ASM_END_COMMON_0                                    \
+      , [pb]"+r"(pb)                                    \
+    ASM_END_COMMON_1
+
+#define ASM_END_1                                       \
+    ASM_END_COMMON_0                                    \
+    ASM_END_COMMON_1
+
+#define ADD_VECTOR(A, B, OUT, SIZE, EXTRA_SIZE)  do {   \
+  const uint32_t* pa = (const uint32_t*)(A);            \
+  const uint32_t* pb = (const uint32_t*)(B);            \
+  uint32_t* pout = (uint32_t*)(OUT);                    \
+  const uint32_t* const LoopEnd = pa + (SIZE);          \
+  assert((SIZE) % 4 == 0);                              \
+  ASM_START                                             \
+  ADD_TO_OUT(0, 4, 8, 12, 1, pa, pb, pout)              \
+  ASM_END_0                                             \
+  if ((EXTRA_SIZE) > 0) {                               \
+    const int last = (EXTRA_SIZE);                      \
+    int i;                                              \
+    for (i = 0; i < last; ++i) pout[i] = pa[i] + pb[i]; \
+  }                                                     \
+} while (0)
+
+#define ADD_VECTOR_EQ(A, OUT, SIZE, EXTRA_SIZE)  do {   \
+  const uint32_t* pa = (const uint32_t*)(A);            \
+  uint32_t* pout = (uint32_t*)(OUT);                    \
+  const uint32_t* const LoopEnd = pa + (SIZE);          \
+  assert((SIZE) % 4 == 0);                              \
+  ASM_START                                             \
+  ADD_TO_OUT(0, 4, 8, 12, 0, pa, pout, pout)            \
+  ASM_END_1                                             \
+  if ((EXTRA_SIZE) > 0) {                               \
+    const int last = (EXTRA_SIZE);                      \
+    int i;                                              \
+    for (i = 0; i < last; ++i) pout[i] += pa[i];        \
+  }                                                     \
+} while (0)
+
+static void HistogramAdd(const VP8LHistogram* const a,
+                         const VP8LHistogram* const b,
+                         VP8LHistogram* const out) {
+  uint32_t temp0, temp1, temp2, temp3, temp4, temp5, temp6, temp7;
+  const int extra_cache_size = VP8LHistogramNumCodes(a->palette_code_bits_)
+                             - (NUM_LITERAL_CODES + NUM_LENGTH_CODES);
+  assert(a->palette_code_bits_ == b->palette_code_bits_);
+
+  if (b != out) {
+    ADD_VECTOR(a->literal_, b->literal_, out->literal_,
+               NUM_LITERAL_CODES + NUM_LENGTH_CODES, extra_cache_size);
+    ADD_VECTOR(a->distance_, b->distance_, out->distance_,
+               NUM_DISTANCE_CODES, 0);
+    ADD_VECTOR(a->red_, b->red_, out->red_, NUM_LITERAL_CODES, 0);
+    ADD_VECTOR(a->blue_, b->blue_, out->blue_, NUM_LITERAL_CODES, 0);
+    ADD_VECTOR(a->alpha_, b->alpha_, out->alpha_, NUM_LITERAL_CODES, 0);
+  } else {
+    ADD_VECTOR_EQ(a->literal_, out->literal_,
+                  NUM_LITERAL_CODES + NUM_LENGTH_CODES, extra_cache_size);
+    ADD_VECTOR_EQ(a->distance_, out->distance_, NUM_DISTANCE_CODES, 0);
+    ADD_VECTOR_EQ(a->red_, out->red_, NUM_LITERAL_CODES, 0);
+    ADD_VECTOR_EQ(a->blue_, out->blue_, NUM_LITERAL_CODES, 0);
+    ADD_VECTOR_EQ(a->alpha_, out->alpha_, NUM_LITERAL_CODES, 0);
+  }
+}
+
+#undef ADD_VECTOR_EQ
+#undef ADD_VECTOR
+#undef ASM_END_1
+#undef ASM_END_0
+#undef ASM_END_COMMON_1
+#undef ASM_END_COMMON_0
+#undef ADD_TO_OUT
+#undef ASM_START
+
+//------------------------------------------------------------------------------
+// Entry point
+
+extern void VP8LEncDspInitMIPS32(void);
+
+WEBP_TSAN_IGNORE_FUNCTION void VP8LEncDspInitMIPS32(void) {
+  VP8LFastSLog2Slow = FastSLog2Slow;
+  VP8LFastLog2Slow = FastLog2Slow;
+  VP8LExtraCost = ExtraCost;
+  VP8LExtraCostCombined = ExtraCostCombined;
+  VP8LHuffmanCostCount = HuffmanCostCount;
+// TODO(mips team): rewrite VP8LGetCombinedEntropy (which used to use
+// HuffmanCostCombinedCount) with MIPS optimizations
+#if 0
+  VP8LHuffmanCostCombinedCount = HuffmanCostCombinedCount;
+#else
+ (void)HuffmanCostCombinedCount;
+#endif
+  VP8LHistogramAdd = HistogramAdd;
+}
+
+#else  // !WEBP_USE_MIPS32
+
+WEBP_DSP_INIT_STUB(VP8LEncDspInitMIPS32)
+
+#endif  // WEBP_USE_MIPS32
diff --git a/drivers/webp/dsp/lossless_enc_mips_dsp_r2.c b/drivers/webp/dsp/lossless_enc_mips_dsp_r2.c
new file mode 100644
index 0000000000..0abf3c4f36
--- /dev/null
+++ b/drivers/webp/dsp/lossless_enc_mips_dsp_r2.c
@@ -0,0 +1,275 @@
+// Copyright 2015 Google Inc. All Rights Reserved.
+//
+// Use of this source code is governed by a BSD-style license
+// that can be found in the COPYING file in the root of the source
+// tree. An additional intellectual property rights grant can be found
+// in the file PATENTS. All contributing project authors may
+// be found in the AUTHORS file in the root of the source tree.
+// -----------------------------------------------------------------------------
+//
+// Image transform methods for lossless encoder.
+//
+// Author(s):  Djordje Pesut    (djordje.pesut@imgtec.com)
+//             Jovan Zelincevic (jovan.zelincevic@imgtec.com)
+
+#include "./dsp.h"
+
+#if defined(WEBP_USE_MIPS_DSP_R2)
+
+#include "./lossless.h"
+
+static void SubtractGreenFromBlueAndRed(uint32_t* argb_data,
+                                        int num_pixels) {
+  uint32_t temp0, temp1, temp2, temp3, temp4, temp5, temp6, temp7;
+  uint32_t* const p_loop1_end = argb_data + (num_pixels & ~3);
+  uint32_t* const p_loop2_end = p_loop1_end + (num_pixels & 3);
+  __asm__ volatile (
+    ".set       push                                          \n\t"
+    ".set       noreorder                                     \n\t"
+    "beq        %[argb_data],    %[p_loop1_end],     3f       \n\t"
+    " nop                                                     \n\t"
+  "0:                                                         \n\t"
+    "lw         %[temp0],        0(%[argb_data])              \n\t"
+    "lw         %[temp1],        4(%[argb_data])              \n\t"
+    "lw         %[temp2],        8(%[argb_data])              \n\t"
+    "lw         %[temp3],        12(%[argb_data])             \n\t"
+    "ext        %[temp4],        %[temp0],           8,    8  \n\t"
+    "ext        %[temp5],        %[temp1],           8,    8  \n\t"
+    "ext        %[temp6],        %[temp2],           8,    8  \n\t"
+    "ext        %[temp7],        %[temp3],           8,    8  \n\t"
+    "addiu      %[argb_data],    %[argb_data],       16       \n\t"
+    "replv.ph   %[temp4],        %[temp4]                     \n\t"
+    "replv.ph   %[temp5],        %[temp5]                     \n\t"
+    "replv.ph   %[temp6],        %[temp6]                     \n\t"
+    "replv.ph   %[temp7],        %[temp7]                     \n\t"
+    "subu.qb    %[temp0],        %[temp0],           %[temp4] \n\t"
+    "subu.qb    %[temp1],        %[temp1],           %[temp5] \n\t"
+    "subu.qb    %[temp2],        %[temp2],           %[temp6] \n\t"
+    "subu.qb    %[temp3],        %[temp3],           %[temp7] \n\t"
+    "sw         %[temp0],        -16(%[argb_data])            \n\t"
+    "sw         %[temp1],        -12(%[argb_data])            \n\t"
+    "sw         %[temp2],        -8(%[argb_data])             \n\t"
+    "bne        %[argb_data],    %[p_loop1_end],     0b       \n\t"
+    " sw        %[temp3],        -4(%[argb_data])             \n\t"
+  "3:                                                         \n\t"
+    "beq        %[argb_data],    %[p_loop2_end],     2f       \n\t"
+    " nop                                                     \n\t"
+  "1:                                                         \n\t"
+    "lw         %[temp0],        0(%[argb_data])              \n\t"
+    "addiu      %[argb_data],    %[argb_data],       4        \n\t"
+    "ext        %[temp4],        %[temp0],           8,    8  \n\t"
+    "replv.ph   %[temp4],        %[temp4]                     \n\t"
+    "subu.qb    %[temp0],        %[temp0],           %[temp4] \n\t"
+    "bne        %[argb_data],    %[p_loop2_end],     1b       \n\t"
+    " sw        %[temp0],        -4(%[argb_data])             \n\t"
+  "2:                                                         \n\t"
+    ".set       pop                                           \n\t"
+    : [argb_data]"+&r"(argb_data), [temp0]"=&r"(temp0),
+      [temp1]"=&r"(temp1), [temp2]"=&r"(temp2), [temp3]"=&r"(temp3),
+      [temp4]"=&r"(temp4), [temp5]"=&r"(temp5), [temp6]"=&r"(temp6),
+      [temp7]"=&r"(temp7)
+    : [p_loop1_end]"r"(p_loop1_end), [p_loop2_end]"r"(p_loop2_end)
+    : "memory"
+  );
+}
+
+static WEBP_INLINE uint32_t ColorTransformDelta(int8_t color_pred,
+                                                int8_t color) {
+  return (uint32_t)((int)(color_pred) * color) >> 5;
+}
+
+static void TransformColor(const VP8LMultipliers* const m, uint32_t* data,
+                           int num_pixels) {
+  int temp0, temp1, temp2, temp3, temp4, temp5;
+  uint32_t argb, argb1, new_red, new_red1;
+  const uint32_t G_to_R = m->green_to_red_;
+  const uint32_t G_to_B = m->green_to_blue_;
+  const uint32_t R_to_B = m->red_to_blue_;
+  uint32_t* const p_loop_end = data + (num_pixels & ~1);
+  __asm__ volatile (
+    ".set            push                                    \n\t"
+    ".set            noreorder                               \n\t"
+    "beq             %[data],      %[p_loop_end],  1f        \n\t"
+    " nop                                                    \n\t"
+    "replv.ph        %[temp0],     %[G_to_R]                 \n\t"
+    "replv.ph        %[temp1],     %[G_to_B]                 \n\t"
+    "replv.ph        %[temp2],     %[R_to_B]                 \n\t"
+    "shll.ph         %[temp0],     %[temp0],       8         \n\t"
+    "shll.ph         %[temp1],     %[temp1],       8         \n\t"
+    "shll.ph         %[temp2],     %[temp2],       8         \n\t"
+    "shra.ph         %[temp0],     %[temp0],       8         \n\t"
+    "shra.ph         %[temp1],     %[temp1],       8         \n\t"
+    "shra.ph         %[temp2],     %[temp2],       8         \n\t"
+  "0:                                                        \n\t"
+    "lw              %[argb],      0(%[data])                \n\t"
+    "lw              %[argb1],     4(%[data])                \n\t"
+    "lhu             %[new_red],   2(%[data])                \n\t"
+    "lhu             %[new_red1],  6(%[data])                \n\t"
+    "precrq.qb.ph    %[temp3],     %[argb],        %[argb1]  \n\t"
+    "precr.qb.ph     %[temp4],     %[argb],        %[argb1]  \n\t"
+    "preceu.ph.qbra  %[temp3],     %[temp3]                  \n\t"
+    "preceu.ph.qbla  %[temp4],     %[temp4]                  \n\t"
+    "shll.ph         %[temp3],     %[temp3],       8         \n\t"
+    "shll.ph         %[temp4],     %[temp4],       8         \n\t"
+    "shra.ph         %[temp3],     %[temp3],       8         \n\t"
+    "shra.ph         %[temp4],     %[temp4],       8         \n\t"
+    "mul.ph          %[temp5],     %[temp3],       %[temp0]  \n\t"
+    "mul.ph          %[temp3],     %[temp3],       %[temp1]  \n\t"
+    "mul.ph          %[temp4],     %[temp4],       %[temp2]  \n\t"
+    "addiu           %[data],      %[data],        8         \n\t"
+    "ins             %[new_red1],  %[new_red],     16,   16  \n\t"
+    "ins             %[argb1],     %[argb],        16,   16  \n\t"
+    "shra.ph         %[temp5],     %[temp5],       5         \n\t"
+    "shra.ph         %[temp3],     %[temp3],       5         \n\t"
+    "shra.ph         %[temp4],     %[temp4],       5         \n\t"
+    "subu.ph         %[new_red1],  %[new_red1],    %[temp5]  \n\t"
+    "subu.ph         %[argb1],     %[argb1],       %[temp3]  \n\t"
+    "preceu.ph.qbra  %[temp5],     %[new_red1]               \n\t"
+    "subu.ph         %[argb1],     %[argb1],       %[temp4]  \n\t"
+    "preceu.ph.qbra  %[temp3],     %[argb1]                  \n\t"
+    "sb              %[temp5],     -2(%[data])               \n\t"
+    "sb              %[temp3],     -4(%[data])               \n\t"
+    "sra             %[temp5],     %[temp5],       16        \n\t"
+    "sra             %[temp3],     %[temp3],       16        \n\t"
+    "sb              %[temp5],     -6(%[data])               \n\t"
+    "bne             %[data],      %[p_loop_end],  0b        \n\t"
+    " sb             %[temp3],     -8(%[data])               \n\t"
+  "1:                                                        \n\t"
+    ".set            pop                                     \n\t"
+    : [temp0]"=&r"(temp0), [temp1]"=&r"(temp1), [temp2]"=&r"(temp2),
+      [temp3]"=&r"(temp3), [temp4]"=&r"(temp4), [temp5]"=&r"(temp5),
+      [new_red1]"=&r"(new_red1), [new_red]"=&r"(new_red),
+      [argb]"=&r"(argb), [argb1]"=&r"(argb1), [data]"+&r"(data)
+    : [G_to_R]"r"(G_to_R), [R_to_B]"r"(R_to_B),
+      [G_to_B]"r"(G_to_B), [p_loop_end]"r"(p_loop_end)
+    : "memory", "hi", "lo"
+  );
+
+  if (num_pixels & 1) {
+    const uint32_t argb_ = data[0];
+    const uint32_t green = argb_ >> 8;
+    const uint32_t red = argb_ >> 16;
+    uint32_t new_blue = argb_;
+    new_red = red;
+    new_red -= ColorTransformDelta(m->green_to_red_, green);
+    new_red &= 0xff;
+    new_blue -= ColorTransformDelta(m->green_to_blue_, green);
+    new_blue -= ColorTransformDelta(m->red_to_blue_, red);
+    new_blue &= 0xff;
+    data[0] = (argb_ & 0xff00ff00u) | (new_red << 16) | (new_blue);
+  }
+}
+
+static WEBP_INLINE uint8_t TransformColorBlue(uint8_t green_to_blue,
+                                              uint8_t red_to_blue,
+                                              uint32_t argb) {
+  const uint32_t green = argb >> 8;
+  const uint32_t red = argb >> 16;
+  uint8_t new_blue = argb;
+  new_blue -= ColorTransformDelta(green_to_blue, green);
+  new_blue -= ColorTransformDelta(red_to_blue, red);
+  return (new_blue & 0xff);
+}
+
+static void CollectColorBlueTransforms(const uint32_t* argb, int stride,
+                                       int tile_width, int tile_height,
+                                       int green_to_blue, int red_to_blue,
+                                       int histo[]) {
+  const int rtb = (red_to_blue << 16) | (red_to_blue & 0xffff);
+  const int gtb = (green_to_blue << 16) | (green_to_blue & 0xffff);
+  const uint32_t mask = 0xff00ffu;
+  while (tile_height-- > 0) {
+    int x;
+    const uint32_t* p_argb = argb;
+    argb += stride;
+    for (x = 0; x < (tile_width >> 1); ++x) {
+      int temp0, temp1, temp2, temp3, temp4, temp5, temp6;
+      __asm__ volatile (
+        "lw           %[temp0],  0(%[p_argb])             \n\t"
+        "lw           %[temp1],  4(%[p_argb])             \n\t"
+        "precr.qb.ph  %[temp2],  %[temp0],  %[temp1]      \n\t"
+        "ins          %[temp1],  %[temp0],  16,    16     \n\t"
+        "shra.ph      %[temp2],  %[temp2],  8             \n\t"
+        "shra.ph      %[temp3],  %[temp1],  8             \n\t"
+        "mul.ph       %[temp5],  %[temp2],  %[rtb]        \n\t"
+        "mul.ph       %[temp6],  %[temp3],  %[gtb]        \n\t"
+        "and          %[temp4],  %[temp1],  %[mask]       \n\t"
+        "addiu        %[p_argb], %[p_argb], 8             \n\t"
+        "shra.ph      %[temp5],  %[temp5],  5             \n\t"
+        "shra.ph      %[temp6],  %[temp6],  5             \n\t"
+        "subu.qb      %[temp2],  %[temp4],  %[temp5]      \n\t"
+        "subu.qb      %[temp2],  %[temp2],  %[temp6]      \n\t"
+        : [p_argb]"+&r"(p_argb), [temp0]"=&r"(temp0), [temp1]"=&r"(temp1),
+          [temp2]"=&r"(temp2), [temp3]"=&r"(temp3), [temp4]"=&r"(temp4),
+          [temp5]"=&r"(temp5), [temp6]"=&r"(temp6)
+        : [rtb]"r"(rtb), [gtb]"r"(gtb), [mask]"r"(mask)
+        : "memory", "hi", "lo"
+      );
+      ++histo[(uint8_t)(temp2 >> 16)];
+      ++histo[(uint8_t)temp2];
+    }
+    if (tile_width & 1) {
+      ++histo[TransformColorBlue(green_to_blue, red_to_blue, *p_argb)];
+    }
+  }
+}
+
+static WEBP_INLINE uint8_t TransformColorRed(uint8_t green_to_red,
+                                             uint32_t argb) {
+  const uint32_t green = argb >> 8;
+  uint32_t new_red = argb >> 16;
+  new_red -= ColorTransformDelta(green_to_red, green);
+  return (new_red & 0xff);
+}
+
+static void CollectColorRedTransforms(const uint32_t* argb, int stride,
+                                      int tile_width, int tile_height,
+                                      int green_to_red, int histo[]) {
+  const int gtr = (green_to_red << 16) | (green_to_red & 0xffff);
+  while (tile_height-- > 0) {
+    int x;
+    const uint32_t* p_argb = argb;
+    argb += stride;
+    for (x = 0; x < (tile_width >> 1); ++x) {
+      int temp0, temp1, temp2, temp3, temp4;
+      __asm__ volatile (
+        "lw           %[temp0],  0(%[p_argb])             \n\t"
+        "lw           %[temp1],  4(%[p_argb])             \n\t"
+        "precrq.ph.w  %[temp4],  %[temp0],  %[temp1]      \n\t"
+        "ins          %[temp1],  %[temp0],  16,    16     \n\t"
+        "shra.ph      %[temp3],  %[temp1],  8             \n\t"
+        "mul.ph       %[temp2],  %[temp3],  %[gtr]        \n\t"
+        "addiu        %[p_argb], %[p_argb], 8             \n\t"
+        "shra.ph      %[temp2],  %[temp2],  5             \n\t"
+        "subu.qb      %[temp2],  %[temp4],  %[temp2]      \n\t"
+        : [p_argb]"+&r"(p_argb), [temp0]"=&r"(temp0), [temp1]"=&r"(temp1),
+          [temp2]"=&r"(temp2), [temp3]"=&r"(temp3), [temp4]"=&r"(temp4)
+        : [gtr]"r"(gtr)
+        : "memory", "hi", "lo"
+      );
+      ++histo[(uint8_t)(temp2 >> 16)];
+      ++histo[(uint8_t)temp2];
+    }
+    if (tile_width & 1) {
+      ++histo[TransformColorRed(green_to_red, *p_argb)];
+    }
+  }
+}
+
+//------------------------------------------------------------------------------
+// Entry point
+
+extern void VP8LEncDspInitMIPSdspR2(void);
+
+WEBP_TSAN_IGNORE_FUNCTION void VP8LEncDspInitMIPSdspR2(void) {
+  VP8LSubtractGreenFromBlueAndRed = SubtractGreenFromBlueAndRed;
+  VP8LTransformColor = TransformColor;
+  VP8LCollectColorBlueTransforms = CollectColorBlueTransforms;
+  VP8LCollectColorRedTransforms = CollectColorRedTransforms;
+}
+
+#else  // !WEBP_USE_MIPS_DSP_R2
+
+WEBP_DSP_INIT_STUB(VP8LEncDspInitMIPSdspR2)
+
+#endif  // WEBP_USE_MIPS_DSP_R2
diff --git a/drivers/webp/dsp/lossless_enc_neon.c b/drivers/webp/dsp/lossless_enc_neon.c
new file mode 100644
index 0000000000..4c56f2594b
--- /dev/null
+++ b/drivers/webp/dsp/lossless_enc_neon.c
@@ -0,0 +1,143 @@
+// Copyright 2015 Google Inc. All Rights Reserved.
+//
+// Use of this source code is governed by a BSD-style license
+// that can be found in the COPYING file in the root of the source
+// tree. An additional intellectual property rights grant can be found
+// in the file PATENTS. All contributing project authors may
+// be found in the AUTHORS file in the root of the source tree.
+// -----------------------------------------------------------------------------
+//
+// NEON variant of methods for lossless encoder
+//
+// Author: Skal (pascal.massimino@gmail.com)
+
+#include "./dsp.h"
+
+#if defined(WEBP_USE_NEON)
+
+#include <arm_neon.h>
+
+#include "./lossless.h"
+#include "./neon.h"
+
+//------------------------------------------------------------------------------
+// Subtract-Green Transform
+
+// vtbl?_u8 are marked unavailable for iOS arm64 with Xcode < 6.3, use
+// non-standard versions there.
+#if defined(__APPLE__) && defined(__aarch64__) && \
+    defined(__apple_build_version__) && (__apple_build_version__< 6020037)
+#define USE_VTBLQ
+#endif
+
+#ifdef USE_VTBLQ
+// 255 = byte will be zeroed
+static const uint8_t kGreenShuffle[16] = {
+  1, 255, 1, 255, 5, 255, 5, 255, 9, 255, 9, 255, 13, 255, 13, 255
+};
+
+static WEBP_INLINE uint8x16_t DoGreenShuffle(const uint8x16_t argb,
+                                             const uint8x16_t shuffle) {
+  return vcombine_u8(vtbl1q_u8(argb, vget_low_u8(shuffle)),
+                     vtbl1q_u8(argb, vget_high_u8(shuffle)));
+}
+#else  // !USE_VTBLQ
+// 255 = byte will be zeroed
+static const uint8_t kGreenShuffle[8] = { 1, 255, 1, 255, 5, 255, 5, 255  };
+
+static WEBP_INLINE uint8x16_t DoGreenShuffle(const uint8x16_t argb,
+                                             const uint8x8_t shuffle) {
+  return vcombine_u8(vtbl1_u8(vget_low_u8(argb), shuffle),
+                     vtbl1_u8(vget_high_u8(argb), shuffle));
+}
+#endif  // USE_VTBLQ
+
+static void SubtractGreenFromBlueAndRed(uint32_t* argb_data, int num_pixels) {
+  const uint32_t* const end = argb_data + (num_pixels & ~3);
+#ifdef USE_VTBLQ
+  const uint8x16_t shuffle = vld1q_u8(kGreenShuffle);
+#else
+  const uint8x8_t shuffle = vld1_u8(kGreenShuffle);
+#endif
+  for (; argb_data < end; argb_data += 4) {
+    const uint8x16_t argb = vld1q_u8((uint8_t*)argb_data);
+    const uint8x16_t greens = DoGreenShuffle(argb, shuffle);
+    vst1q_u8((uint8_t*)argb_data, vsubq_u8(argb, greens));
+  }
+  // fallthrough and finish off with plain-C
+  VP8LSubtractGreenFromBlueAndRed_C(argb_data, num_pixels & 3);
+}
+
+//------------------------------------------------------------------------------
+// Color Transform
+
+static void TransformColor(const VP8LMultipliers* const m,
+                           uint32_t* argb_data, int num_pixels) {
+  // sign-extended multiplying constants, pre-shifted by 6.
+#define CST(X)  (((int16_t)(m->X << 8)) >> 6)
+  const int16_t rb[8] = {
+    CST(green_to_blue_), CST(green_to_red_),
+    CST(green_to_blue_), CST(green_to_red_),
+    CST(green_to_blue_), CST(green_to_red_),
+    CST(green_to_blue_), CST(green_to_red_)
+  };
+  const int16x8_t mults_rb = vld1q_s16(rb);
+  const int16_t b2[8] = {
+    0, CST(red_to_blue_), 0, CST(red_to_blue_),
+    0, CST(red_to_blue_), 0, CST(red_to_blue_),
+  };
+  const int16x8_t mults_b2 = vld1q_s16(b2);
+#undef CST
+#ifdef USE_VTBLQ
+  static const uint8_t kg0g0[16] = {
+    255, 1, 255, 1, 255, 5, 255, 5, 255, 9, 255, 9, 255, 13, 255, 13
+  };
+  const uint8x16_t shuffle = vld1q_u8(kg0g0);
+#else
+  static const uint8_t k0g0g[8] = { 255, 1, 255, 1, 255, 5, 255, 5 };
+  const uint8x8_t shuffle = vld1_u8(k0g0g);
+#endif
+  const uint32x4_t mask_rb = vdupq_n_u32(0x00ff00ffu);  // red-blue masks
+  int i;
+  for (i = 0; i + 4 <= num_pixels; i += 4) {
+    const uint8x16_t in = vld1q_u8((uint8_t*)(argb_data + i));
+    // 0 g 0 g
+    const uint8x16_t greens = DoGreenShuffle(in, shuffle);
+    // x dr  x db1
+    const int16x8_t A = vqdmulhq_s16(vreinterpretq_s16_u8(greens), mults_rb);
+    // r 0   b   0
+    const int16x8_t B = vshlq_n_s16(vreinterpretq_s16_u8(in), 8);
+    // x db2 0   0
+    const int16x8_t C = vqdmulhq_s16(B, mults_b2);
+    // 0 0   x db2
+    const uint32x4_t D = vshrq_n_u32(vreinterpretq_u32_s16(C), 16);
+    // x dr  x  db
+    const int8x16_t E = vaddq_s8(vreinterpretq_s8_u32(D),
+                                 vreinterpretq_s8_s16(A));
+    // 0 dr  0  db
+    const uint32x4_t F = vandq_u32(vreinterpretq_u32_s8(E), mask_rb);
+    const int8x16_t out = vsubq_s8(vreinterpretq_s8_u8(in),
+                                   vreinterpretq_s8_u32(F));
+    vst1q_s8((int8_t*)(argb_data + i), out);
+  }
+  // fallthrough and finish off with plain-C
+  VP8LTransformColor_C(m, argb_data + i, num_pixels - i);
+}
+
+#undef USE_VTBLQ
+
+//------------------------------------------------------------------------------
+// Entry point
+
+extern void VP8LEncDspInitNEON(void);
+
+WEBP_TSAN_IGNORE_FUNCTION void VP8LEncDspInitNEON(void) {
+  VP8LSubtractGreenFromBlueAndRed = SubtractGreenFromBlueAndRed;
+  VP8LTransformColor = TransformColor;
+}
+
+#else  // !WEBP_USE_NEON
+
+WEBP_DSP_INIT_STUB(VP8LEncDspInitNEON)
+
+#endif  // WEBP_USE_NEON
diff --git a/drivers/webp/dsp/lossless_enc_sse2.c b/drivers/webp/dsp/lossless_enc_sse2.c
new file mode 100644
index 0000000000..1374b3ef64
--- /dev/null
+++ b/drivers/webp/dsp/lossless_enc_sse2.c
@@ -0,0 +1,270 @@
+// Copyright 2015 Google Inc. All Rights Reserved.
+//
+// Use of this source code is governed by a BSD-style license
+// that can be found in the COPYING file in the root of the source
+// tree. An additional intellectual property rights grant can be found
+// in the file PATENTS. All contributing project authors may
+// be found in the AUTHORS file in the root of the source tree.
+// -----------------------------------------------------------------------------
+//
+// SSE2 variant of methods for lossless encoder
+//
+// Author: Skal (pascal.massimino@gmail.com)
+
+#include "./dsp.h"
+
+#if defined(WEBP_USE_SSE2)
+#include <assert.h>
+#include <emmintrin.h>
+#include "./lossless.h"
+
+// For sign-extended multiplying constants, pre-shifted by 5:
+#define CST_5b(X)  (((int16_t)((uint16_t)X << 8)) >> 5)
+
+//------------------------------------------------------------------------------
+// Subtract-Green Transform
+
+static void SubtractGreenFromBlueAndRed(uint32_t* argb_data, int num_pixels) {
+  int i;
+  for (i = 0; i + 4 <= num_pixels; i += 4) {
+    const __m128i in = _mm_loadu_si128((__m128i*)&argb_data[i]); // argb
+    const __m128i A = _mm_srli_epi16(in, 8);     // 0 a 0 g
+    const __m128i B = _mm_shufflelo_epi16(A, _MM_SHUFFLE(2, 2, 0, 0));
+    const __m128i C = _mm_shufflehi_epi16(B, _MM_SHUFFLE(2, 2, 0, 0));  // 0g0g
+    const __m128i out = _mm_sub_epi8(in, C);
+    _mm_storeu_si128((__m128i*)&argb_data[i], out);
+  }
+  // fallthrough and finish off with plain-C
+  VP8LSubtractGreenFromBlueAndRed_C(argb_data + i, num_pixels - i);
+}
+
+//------------------------------------------------------------------------------
+// Color Transform
+
+static void TransformColor(const VP8LMultipliers* const m,
+                           uint32_t* argb_data, int num_pixels) {
+  const __m128i mults_rb = _mm_set_epi16(
+      CST_5b(m->green_to_red_), CST_5b(m->green_to_blue_),
+      CST_5b(m->green_to_red_), CST_5b(m->green_to_blue_),
+      CST_5b(m->green_to_red_), CST_5b(m->green_to_blue_),
+      CST_5b(m->green_to_red_), CST_5b(m->green_to_blue_));
+  const __m128i mults_b2 = _mm_set_epi16(
+      CST_5b(m->red_to_blue_), 0, CST_5b(m->red_to_blue_), 0,
+      CST_5b(m->red_to_blue_), 0, CST_5b(m->red_to_blue_), 0);
+  const __m128i mask_ag = _mm_set1_epi32(0xff00ff00);  // alpha-green masks
+  const __m128i mask_rb = _mm_set1_epi32(0x00ff00ff);  // red-blue masks
+  int i;
+  for (i = 0; i + 4 <= num_pixels; i += 4) {
+    const __m128i in = _mm_loadu_si128((__m128i*)&argb_data[i]); // argb
+    const __m128i A = _mm_and_si128(in, mask_ag);     // a   0   g   0
+    const __m128i B = _mm_shufflelo_epi16(A, _MM_SHUFFLE(2, 2, 0, 0));
+    const __m128i C = _mm_shufflehi_epi16(B, _MM_SHUFFLE(2, 2, 0, 0));  // g0g0
+    const __m128i D = _mm_mulhi_epi16(C, mults_rb);    // x dr  x db1
+    const __m128i E = _mm_slli_epi16(in, 8);           // r 0   b   0
+    const __m128i F = _mm_mulhi_epi16(E, mults_b2);    // x db2 0   0
+    const __m128i G = _mm_srli_epi32(F, 16);           // 0 0   x db2
+    const __m128i H = _mm_add_epi8(G, D);              // x dr  x  db
+    const __m128i I = _mm_and_si128(H, mask_rb);       // 0 dr  0  db
+    const __m128i out = _mm_sub_epi8(in, I);
+    _mm_storeu_si128((__m128i*)&argb_data[i], out);
+  }
+  // fallthrough and finish off with plain-C
+  VP8LTransformColor_C(m, argb_data + i, num_pixels - i);
+}
+
+//------------------------------------------------------------------------------
+#define SPAN 8
+static void CollectColorBlueTransforms(const uint32_t* argb, int stride,
+                                       int tile_width, int tile_height,
+                                       int green_to_blue, int red_to_blue,
+                                       int histo[]) {
+  const __m128i mults_r = _mm_set_epi16(
+      CST_5b(red_to_blue), 0, CST_5b(red_to_blue), 0,
+      CST_5b(red_to_blue), 0, CST_5b(red_to_blue), 0);
+  const __m128i mults_g = _mm_set_epi16(
+      0, CST_5b(green_to_blue), 0, CST_5b(green_to_blue),
+      0, CST_5b(green_to_blue), 0, CST_5b(green_to_blue));
+  const __m128i mask_g = _mm_set1_epi32(0x00ff00);  // green mask
+  const __m128i mask_b = _mm_set1_epi32(0x0000ff);  // blue mask
+  int y;
+  for (y = 0; y < tile_height; ++y) {
+    const uint32_t* const src = argb + y * stride;
+    int i, x;
+    for (x = 0; x + SPAN <= tile_width; x += SPAN) {
+      uint16_t values[SPAN];
+      const __m128i in0 = _mm_loadu_si128((__m128i*)&src[x +        0]);
+      const __m128i in1 = _mm_loadu_si128((__m128i*)&src[x + SPAN / 2]);
+      const __m128i A0 = _mm_slli_epi16(in0, 8);        // r 0  | b 0
+      const __m128i A1 = _mm_slli_epi16(in1, 8);
+      const __m128i B0 = _mm_and_si128(in0, mask_g);    // 0 0  | g 0
+      const __m128i B1 = _mm_and_si128(in1, mask_g);
+      const __m128i C0 = _mm_mulhi_epi16(A0, mults_r);  // x db | 0 0
+      const __m128i C1 = _mm_mulhi_epi16(A1, mults_r);
+      const __m128i D0 = _mm_mulhi_epi16(B0, mults_g);  // 0 0  | x db
+      const __m128i D1 = _mm_mulhi_epi16(B1, mults_g);
+      const __m128i E0 = _mm_sub_epi8(in0, D0);         // x x  | x b'
+      const __m128i E1 = _mm_sub_epi8(in1, D1);
+      const __m128i F0 = _mm_srli_epi32(C0, 16);        // 0 0  | x db
+      const __m128i F1 = _mm_srli_epi32(C1, 16);
+      const __m128i G0 = _mm_sub_epi8(E0, F0);          // 0 0  | x b'
+      const __m128i G1 = _mm_sub_epi8(E1, F1);
+      const __m128i H0 = _mm_and_si128(G0, mask_b);     // 0 0  | 0 b
+      const __m128i H1 = _mm_and_si128(G1, mask_b);
+      const __m128i I = _mm_packs_epi32(H0, H1);        // 0 b' | 0 b'
+      _mm_storeu_si128((__m128i*)values, I);
+      for (i = 0; i < SPAN; ++i) ++histo[values[i]];
+    }
+  }
+  {
+    const int left_over = tile_width & (SPAN - 1);
+    if (left_over > 0) {
+      VP8LCollectColorBlueTransforms_C(argb + tile_width - left_over, stride,
+                                       left_over, tile_height,
+                                       green_to_blue, red_to_blue, histo);
+    }
+  }
+}
+
+static void CollectColorRedTransforms(const uint32_t* argb, int stride,
+                                      int tile_width, int tile_height,
+                                      int green_to_red, int histo[]) {
+  const __m128i mults_g = _mm_set_epi16(
+      0, CST_5b(green_to_red), 0, CST_5b(green_to_red),
+      0, CST_5b(green_to_red), 0, CST_5b(green_to_red));
+  const __m128i mask_g = _mm_set1_epi32(0x00ff00);  // green mask
+  const __m128i mask = _mm_set1_epi32(0xff);
+
+  int y;
+  for (y = 0; y < tile_height; ++y) {
+    const uint32_t* const src = argb + y * stride;
+    int i, x;
+    for (x = 0; x + SPAN <= tile_width; x += SPAN) {
+      uint16_t values[SPAN];
+      const __m128i in0 = _mm_loadu_si128((__m128i*)&src[x +        0]);
+      const __m128i in1 = _mm_loadu_si128((__m128i*)&src[x + SPAN / 2]);
+      const __m128i A0 = _mm_and_si128(in0, mask_g);    // 0 0  | g 0
+      const __m128i A1 = _mm_and_si128(in1, mask_g);
+      const __m128i B0 = _mm_srli_epi32(in0, 16);       // 0 0  | x r
+      const __m128i B1 = _mm_srli_epi32(in1, 16);
+      const __m128i C0 = _mm_mulhi_epi16(A0, mults_g);  // 0 0  | x dr
+      const __m128i C1 = _mm_mulhi_epi16(A1, mults_g);
+      const __m128i E0 = _mm_sub_epi8(B0, C0);          // x x  | x r'
+      const __m128i E1 = _mm_sub_epi8(B1, C1);
+      const __m128i F0 = _mm_and_si128(E0, mask);       // 0 0  | 0 r'
+      const __m128i F1 = _mm_and_si128(E1, mask);
+      const __m128i I = _mm_packs_epi32(F0, F1);
+      _mm_storeu_si128((__m128i*)values, I);
+      for (i = 0; i < SPAN; ++i) ++histo[values[i]];
+    }
+  }
+  {
+    const int left_over = tile_width & (SPAN - 1);
+    if (left_over > 0) {
+      VP8LCollectColorRedTransforms_C(argb + tile_width - left_over, stride,
+                                      left_over, tile_height,
+                                      green_to_red, histo);
+    }
+  }
+}
+#undef SPAN
+
+//------------------------------------------------------------------------------
+
+#define LINE_SIZE 16    // 8 or 16
+static void AddVector(const uint32_t* a, const uint32_t* b, uint32_t* out,
+                      int size) {
+  int i;
+  assert(size % LINE_SIZE == 0);
+  for (i = 0; i < size; i += LINE_SIZE) {
+    const __m128i a0 = _mm_loadu_si128((const __m128i*)&a[i +  0]);
+    const __m128i a1 = _mm_loadu_si128((const __m128i*)&a[i +  4]);
+#if (LINE_SIZE == 16)
+    const __m128i a2 = _mm_loadu_si128((const __m128i*)&a[i +  8]);
+    const __m128i a3 = _mm_loadu_si128((const __m128i*)&a[i + 12]);
+#endif
+    const __m128i b0 = _mm_loadu_si128((const __m128i*)&b[i +  0]);
+    const __m128i b1 = _mm_loadu_si128((const __m128i*)&b[i +  4]);
+#if (LINE_SIZE == 16)
+    const __m128i b2 = _mm_loadu_si128((const __m128i*)&b[i +  8]);
+    const __m128i b3 = _mm_loadu_si128((const __m128i*)&b[i + 12]);
+#endif
+    _mm_storeu_si128((__m128i*)&out[i +  0], _mm_add_epi32(a0, b0));
+    _mm_storeu_si128((__m128i*)&out[i +  4], _mm_add_epi32(a1, b1));
+#if (LINE_SIZE == 16)
+    _mm_storeu_si128((__m128i*)&out[i +  8], _mm_add_epi32(a2, b2));
+    _mm_storeu_si128((__m128i*)&out[i + 12], _mm_add_epi32(a3, b3));
+#endif
+  }
+}
+
+static void AddVectorEq(const uint32_t* a, uint32_t* out, int size) {
+  int i;
+  assert(size % LINE_SIZE == 0);
+  for (i = 0; i < size; i += LINE_SIZE) {
+    const __m128i a0 = _mm_loadu_si128((const __m128i*)&a[i +  0]);
+    const __m128i a1 = _mm_loadu_si128((const __m128i*)&a[i +  4]);
+#if (LINE_SIZE == 16)
+    const __m128i a2 = _mm_loadu_si128((const __m128i*)&a[i +  8]);
+    const __m128i a3 = _mm_loadu_si128((const __m128i*)&a[i + 12]);
+#endif
+    const __m128i b0 = _mm_loadu_si128((const __m128i*)&out[i +  0]);
+    const __m128i b1 = _mm_loadu_si128((const __m128i*)&out[i +  4]);
+#if (LINE_SIZE == 16)
+    const __m128i b2 = _mm_loadu_si128((const __m128i*)&out[i +  8]);
+    const __m128i b3 = _mm_loadu_si128((const __m128i*)&out[i + 12]);
+#endif
+    _mm_storeu_si128((__m128i*)&out[i +  0], _mm_add_epi32(a0, b0));
+    _mm_storeu_si128((__m128i*)&out[i +  4], _mm_add_epi32(a1, b1));
+#if (LINE_SIZE == 16)
+    _mm_storeu_si128((__m128i*)&out[i +  8], _mm_add_epi32(a2, b2));
+    _mm_storeu_si128((__m128i*)&out[i + 12], _mm_add_epi32(a3, b3));
+#endif
+  }
+}
+#undef LINE_SIZE
+
+// Note we are adding uint32_t's as *signed* int32's (using _mm_add_epi32). But
+// that's ok since the histogram values are less than 1<<28 (max picture size).
+static void HistogramAdd(const VP8LHistogram* const a,
+                         const VP8LHistogram* const b,
+                         VP8LHistogram* const out) {
+  int i;
+  const int literal_size = VP8LHistogramNumCodes(a->palette_code_bits_);
+  assert(a->palette_code_bits_ == b->palette_code_bits_);
+  if (b != out) {
+    AddVector(a->literal_, b->literal_, out->literal_, NUM_LITERAL_CODES);
+    AddVector(a->red_, b->red_, out->red_, NUM_LITERAL_CODES);
+    AddVector(a->blue_, b->blue_, out->blue_, NUM_LITERAL_CODES);
+    AddVector(a->alpha_, b->alpha_, out->alpha_, NUM_LITERAL_CODES);
+  } else {
+    AddVectorEq(a->literal_, out->literal_, NUM_LITERAL_CODES);
+    AddVectorEq(a->red_, out->red_, NUM_LITERAL_CODES);
+    AddVectorEq(a->blue_, out->blue_, NUM_LITERAL_CODES);
+    AddVectorEq(a->alpha_, out->alpha_, NUM_LITERAL_CODES);
+  }
+  for (i = NUM_LITERAL_CODES; i < literal_size; ++i) {
+    out->literal_[i] = a->literal_[i] + b->literal_[i];
+  }
+  for (i = 0; i < NUM_DISTANCE_CODES; ++i) {
+    out->distance_[i] = a->distance_[i] + b->distance_[i];
+  }
+}
+
+//------------------------------------------------------------------------------
+// Entry point
+
+extern void VP8LEncDspInitSSE2(void);
+
+WEBP_TSAN_IGNORE_FUNCTION void VP8LEncDspInitSSE2(void) {
+  VP8LSubtractGreenFromBlueAndRed = SubtractGreenFromBlueAndRed;
+  VP8LTransformColor = TransformColor;
+  VP8LCollectColorBlueTransforms = CollectColorBlueTransforms;
+  VP8LCollectColorRedTransforms = CollectColorRedTransforms;
+  VP8LHistogramAdd = HistogramAdd;
+}
+
+#else  // !WEBP_USE_SSE2
+
+WEBP_DSP_INIT_STUB(VP8LEncDspInitSSE2)
+
+#endif  // WEBP_USE_SSE2
diff --git a/drivers/webp/dsp/lossless_enc_sse41.c b/drivers/webp/dsp/lossless_enc_sse41.c
new file mode 100644
index 0000000000..3e493198db
--- /dev/null
+++ b/drivers/webp/dsp/lossless_enc_sse41.c
@@ -0,0 +1,51 @@
+// Copyright 2015 Google Inc. All Rights Reserved.
+//
+// Use of this source code is governed by a BSD-style license
+// that can be found in the COPYING file in the root of the source
+// tree. An additional intellectual property rights grant can be found
+// in the file PATENTS. All contributing project authors may
+// be found in the AUTHORS file in the root of the source tree.
+// -----------------------------------------------------------------------------
+//
+// SSE4.1 variant of methods for lossless encoder
+//
+// Author: Skal (pascal.massimino@gmail.com)
+
+#include "./dsp.h"
+
+#if defined(WEBP_USE_SSE41)
+#include <assert.h>
+#include <smmintrin.h>
+#include "./lossless.h"
+
+//------------------------------------------------------------------------------
+// Subtract-Green Transform
+
+static void SubtractGreenFromBlueAndRed(uint32_t* argb_data, int num_pixels) {
+  int i;
+  const __m128i kCstShuffle = _mm_set_epi8(-1, 13, -1, 13, -1, 9, -1, 9,
+                                           -1,  5, -1,  5, -1, 1, -1, 1);
+  for (i = 0; i + 4 <= num_pixels; i += 4) {
+    const __m128i in = _mm_loadu_si128((__m128i*)&argb_data[i]);
+    const __m128i in_0g0g = _mm_shuffle_epi8(in, kCstShuffle);
+    const __m128i out = _mm_sub_epi8(in, in_0g0g);
+    _mm_storeu_si128((__m128i*)&argb_data[i], out);
+  }
+  // fallthrough and finish off with plain-C
+  VP8LSubtractGreenFromBlueAndRed_C(argb_data + i, num_pixels - i);
+}
+
+//------------------------------------------------------------------------------
+// Entry point
+
+extern void VP8LEncDspInitSSE41(void);
+
+WEBP_TSAN_IGNORE_FUNCTION void VP8LEncDspInitSSE41(void) {
+  VP8LSubtractGreenFromBlueAndRed = SubtractGreenFromBlueAndRed;
+}
+
+#else  // !WEBP_USE_SSE41
+
+WEBP_DSP_INIT_STUB(VP8LEncDspInitSSE41)
+
+#endif  // WEBP_USE_SSE41
diff --git a/drivers/webp/dsp/lossless_mips_dsp_r2.c b/drivers/webp/dsp/lossless_mips_dsp_r2.c
new file mode 100644
index 0000000000..90aed7f151
--- /dev/null
+++ b/drivers/webp/dsp/lossless_mips_dsp_r2.c
@@ -0,0 +1,680 @@
+// Copyright 2014 Google Inc. All Rights Reserved.
+//
+// Use of this source code is governed by a BSD-style license
+// that can be found in the COPYING file in the root of the source
+// tree. An additional intellectual property rights grant can be found
+// in the file PATENTS. All contributing project authors may
+// be found in the AUTHORS file in the root of the source tree.
+// -----------------------------------------------------------------------------
+//
+// Image transforms and color space conversion methods for lossless decoder.
+//
+// Author(s):  Djordje Pesut    (djordje.pesut@imgtec.com)
+//             Jovan Zelincevic (jovan.zelincevic@imgtec.com)
+
+#include "./dsp.h"
+
+#if defined(WEBP_USE_MIPS_DSP_R2)
+
+#include "./lossless.h"
+
+#define MAP_COLOR_FUNCS(FUNC_NAME, TYPE, GET_INDEX, GET_VALUE)                 \
+static void FUNC_NAME(const TYPE* src,                                         \
+                      const uint32_t* const color_map,                         \
+                      TYPE* dst, int y_start, int y_end,                       \
+                      int width) {                                             \
+  int y;                                                                       \
+  for (y = y_start; y < y_end; ++y) {                                          \
+    int x;                                                                     \
+    for (x = 0; x < (width >> 2); ++x) {                                       \
+      int tmp1, tmp2, tmp3, tmp4;                                              \
+      __asm__ volatile (                                                       \
+      ".ifc        " #TYPE ",  uint8_t                  \n\t"                  \
+        "lbu       %[tmp1],  0(%[src])                  \n\t"                  \
+        "lbu       %[tmp2],  1(%[src])                  \n\t"                  \
+        "lbu       %[tmp3],  2(%[src])                  \n\t"                  \
+        "lbu       %[tmp4],  3(%[src])                  \n\t"                  \
+        "addiu     %[src],   %[src],      4             \n\t"                  \
+      ".endif                                           \n\t"                  \
+      ".ifc        " #TYPE ",  uint32_t                 \n\t"                  \
+        "lw        %[tmp1],  0(%[src])                  \n\t"                  \
+        "lw        %[tmp2],  4(%[src])                  \n\t"                  \
+        "lw        %[tmp3],  8(%[src])                  \n\t"                  \
+        "lw        %[tmp4],  12(%[src])                 \n\t"                  \
+        "ext       %[tmp1],  %[tmp1],     8,        8   \n\t"                  \
+        "ext       %[tmp2],  %[tmp2],     8,        8   \n\t"                  \
+        "ext       %[tmp3],  %[tmp3],     8,        8   \n\t"                  \
+        "ext       %[tmp4],  %[tmp4],     8,        8   \n\t"                  \
+        "addiu     %[src],   %[src],      16            \n\t"                  \
+      ".endif                                           \n\t"                  \
+        "sll       %[tmp1],  %[tmp1],     2             \n\t"                  \
+        "sll       %[tmp2],  %[tmp2],     2             \n\t"                  \
+        "sll       %[tmp3],  %[tmp3],     2             \n\t"                  \
+        "sll       %[tmp4],  %[tmp4],     2             \n\t"                  \
+        "lwx       %[tmp1],  %[tmp1](%[color_map])      \n\t"                  \
+        "lwx       %[tmp2],  %[tmp2](%[color_map])      \n\t"                  \
+        "lwx       %[tmp3],  %[tmp3](%[color_map])      \n\t"                  \
+        "lwx       %[tmp4],  %[tmp4](%[color_map])      \n\t"                  \
+      ".ifc        " #TYPE ",  uint8_t                  \n\t"                  \
+        "ext       %[tmp1],  %[tmp1],     8,        8   \n\t"                  \
+        "ext       %[tmp2],  %[tmp2],     8,        8   \n\t"                  \
+        "ext       %[tmp3],  %[tmp3],     8,        8   \n\t"                  \
+        "ext       %[tmp4],  %[tmp4],     8,        8   \n\t"                  \
+        "sb        %[tmp1],  0(%[dst])                  \n\t"                  \
+        "sb        %[tmp2],  1(%[dst])                  \n\t"                  \
+        "sb        %[tmp3],  2(%[dst])                  \n\t"                  \
+        "sb        %[tmp4],  3(%[dst])                  \n\t"                  \
+        "addiu     %[dst],   %[dst],      4             \n\t"                  \
+      ".endif                                           \n\t"                  \
+      ".ifc        " #TYPE ",  uint32_t                 \n\t"                  \
+        "sw        %[tmp1],  0(%[dst])                  \n\t"                  \
+        "sw        %[tmp2],  4(%[dst])                  \n\t"                  \
+        "sw        %[tmp3],  8(%[dst])                  \n\t"                  \
+        "sw        %[tmp4],  12(%[dst])                 \n\t"                  \
+        "addiu     %[dst],   %[dst],      16            \n\t"                  \
+      ".endif                                           \n\t"                  \
+        : [tmp1]"=&r"(tmp1), [tmp2]"=&r"(tmp2), [tmp3]"=&r"(tmp3),             \
+          [tmp4]"=&r"(tmp4), [src]"+&r"(src), [dst]"+r"(dst)                   \
+        : [color_map]"r"(color_map)                                            \
+        : "memory"                                                             \
+      );                                                                       \
+    }                                                                          \
+    for (x = 0; x < (width & 3); ++x) {                                        \
+      *dst++ = GET_VALUE(color_map[GET_INDEX(*src++)]);                        \
+    }                                                                          \
+  }                                                                            \
+}
+
+MAP_COLOR_FUNCS(MapARGB, uint32_t, VP8GetARGBIndex, VP8GetARGBValue)
+MAP_COLOR_FUNCS(MapAlpha, uint8_t, VP8GetAlphaIndex, VP8GetAlphaValue)
+
+#undef MAP_COLOR_FUNCS
+
+static WEBP_INLINE uint32_t ClampedAddSubtractFull(uint32_t c0, uint32_t c1,
+                                                   uint32_t c2) {
+  int temp0, temp1, temp2, temp3, temp4, temp5;
+  __asm__ volatile (
+    "preceu.ph.qbr   %[temp1],   %[c0]                 \n\t"
+    "preceu.ph.qbl   %[temp2],   %[c0]                 \n\t"
+    "preceu.ph.qbr   %[temp3],   %[c1]                 \n\t"
+    "preceu.ph.qbl   %[temp4],   %[c1]                 \n\t"
+    "preceu.ph.qbr   %[temp5],   %[c2]                 \n\t"
+    "preceu.ph.qbl   %[temp0],   %[c2]                 \n\t"
+    "subq.ph         %[temp3],   %[temp3],   %[temp5]  \n\t"
+    "subq.ph         %[temp4],   %[temp4],   %[temp0]  \n\t"
+    "addq.ph         %[temp1],   %[temp1],   %[temp3]  \n\t"
+    "addq.ph         %[temp2],   %[temp2],   %[temp4]  \n\t"
+    "shll_s.ph       %[temp1],   %[temp1],   7         \n\t"
+    "shll_s.ph       %[temp2],   %[temp2],   7         \n\t"
+    "precrqu_s.qb.ph %[temp2],   %[temp2],   %[temp1]  \n\t"
+    : [temp0]"=r"(temp0), [temp1]"=&r"(temp1), [temp2]"=&r"(temp2),
+      [temp3]"=&r"(temp3), [temp4]"=&r"(temp4), [temp5]"=&r"(temp5)
+    : [c0]"r"(c0), [c1]"r"(c1), [c2]"r"(c2)
+    : "memory"
+  );
+  return temp2;
+}
+
+static WEBP_INLINE uint32_t ClampedAddSubtractHalf(uint32_t c0, uint32_t c1,
+                                                   uint32_t c2) {
+  int temp0, temp1, temp2, temp3, temp4, temp5;
+  __asm__ volatile (
+    "adduh.qb         %[temp5],   %[c0],      %[c1]       \n\t"
+    "preceu.ph.qbr    %[temp3],   %[c2]                   \n\t"
+    "preceu.ph.qbr    %[temp1],   %[temp5]                \n\t"
+    "preceu.ph.qbl    %[temp2],   %[temp5]                \n\t"
+    "preceu.ph.qbl    %[temp4],   %[c2]                   \n\t"
+    "subq.ph          %[temp3],   %[temp1],   %[temp3]    \n\t"
+    "subq.ph          %[temp4],   %[temp2],   %[temp4]    \n\t"
+    "shrl.ph          %[temp5],   %[temp3],   15          \n\t"
+    "shrl.ph          %[temp0],   %[temp4],   15          \n\t"
+    "addq.ph          %[temp3],   %[temp3],   %[temp5]    \n\t"
+    "addq.ph          %[temp4],   %[temp0],   %[temp4]    \n\t"
+    "shra.ph          %[temp3],   %[temp3],   1           \n\t"
+    "shra.ph          %[temp4],   %[temp4],   1           \n\t"
+    "addq.ph          %[temp1],   %[temp1],   %[temp3]    \n\t"
+    "addq.ph          %[temp2],   %[temp2],   %[temp4]    \n\t"
+    "shll_s.ph        %[temp1],   %[temp1],   7           \n\t"
+    "shll_s.ph        %[temp2],   %[temp2],   7           \n\t"
+    "precrqu_s.qb.ph  %[temp1],   %[temp2],   %[temp1]    \n\t"
+    : [temp0]"=r"(temp0), [temp1]"=&r"(temp1), [temp2]"=&r"(temp2),
+      [temp3]"=&r"(temp3), [temp4]"=r"(temp4), [temp5]"=&r"(temp5)
+    : [c0]"r"(c0), [c1]"r"(c1), [c2]"r"(c2)
+    : "memory"
+  );
+  return temp1;
+}
+
+static WEBP_INLINE uint32_t Select(uint32_t a, uint32_t b, uint32_t c) {
+  int temp0, temp1, temp2, temp3, temp4, temp5;
+  __asm__ volatile (
+    "cmpgdu.lt.qb %[temp1], %[c],     %[b]             \n\t"
+    "pick.qb      %[temp1], %[b],     %[c]             \n\t"
+    "pick.qb      %[temp2], %[c],     %[b]             \n\t"
+    "cmpgdu.lt.qb %[temp4], %[c],     %[a]             \n\t"
+    "pick.qb      %[temp4], %[a],     %[c]             \n\t"
+    "pick.qb      %[temp5], %[c],     %[a]             \n\t"
+    "subu.qb      %[temp3], %[temp1], %[temp2]         \n\t"
+    "subu.qb      %[temp0], %[temp4], %[temp5]         \n\t"
+    "raddu.w.qb   %[temp3], %[temp3]                   \n\t"
+    "raddu.w.qb   %[temp0], %[temp0]                   \n\t"
+    "subu         %[temp3], %[temp3], %[temp0]         \n\t"
+    "slti         %[temp0], %[temp3], 0x1              \n\t"
+    "movz         %[a],     %[b],     %[temp0]         \n\t"
+    : [temp1]"=&r"(temp1), [temp2]"=&r"(temp2), [temp3]"=&r"(temp3),
+      [temp4]"=&r"(temp4), [temp5]"=&r"(temp5), [temp0]"=&r"(temp0),
+      [a]"+&r"(a)
+    : [b]"r"(b), [c]"r"(c)
+  );
+  return a;
+}
+
+static WEBP_INLINE uint32_t Average2(uint32_t a0, uint32_t a1) {
+  __asm__ volatile (
+    "adduh.qb    %[a0], %[a0], %[a1]       \n\t"
+    : [a0]"+r"(a0)
+    : [a1]"r"(a1)
+  );
+  return a0;
+}
+
+static WEBP_INLINE uint32_t Average3(uint32_t a0, uint32_t a1, uint32_t a2) {
+  return Average2(Average2(a0, a2), a1);
+}
+
+static WEBP_INLINE uint32_t Average4(uint32_t a0, uint32_t a1,
+                                     uint32_t a2, uint32_t a3) {
+  return Average2(Average2(a0, a1), Average2(a2, a3));
+}
+
+static uint32_t Predictor5(uint32_t left, const uint32_t* const top) {
+  return Average3(left, top[0], top[1]);
+}
+
+static uint32_t Predictor6(uint32_t left, const uint32_t* const top) {
+  return Average2(left, top[-1]);
+}
+
+static uint32_t Predictor7(uint32_t left, const uint32_t* const top) {
+  return Average2(left, top[0]);
+}
+
+static uint32_t Predictor8(uint32_t left, const uint32_t* const top) {
+  (void)left;
+  return Average2(top[-1], top[0]);
+}
+
+static uint32_t Predictor9(uint32_t left, const uint32_t* const top) {
+  (void)left;
+  return Average2(top[0], top[1]);
+}
+
+static uint32_t Predictor10(uint32_t left, const uint32_t* const top) {
+  return Average4(left, top[-1], top[0], top[1]);
+}
+
+static uint32_t Predictor11(uint32_t left, const uint32_t* const top) {
+  return Select(top[0], left, top[-1]);
+}
+
+static uint32_t Predictor12(uint32_t left, const uint32_t* const top) {
+  return ClampedAddSubtractFull(left, top[0], top[-1]);
+}
+
+static uint32_t Predictor13(uint32_t left, const uint32_t* const top) {
+  return ClampedAddSubtractHalf(left, top[0], top[-1]);
+}
+
+// Add green to blue and red channels (i.e. perform the inverse transform of
+// 'subtract green').
+static void AddGreenToBlueAndRed(uint32_t* data, int num_pixels) {
+  uint32_t temp0, temp1, temp2, temp3, temp4, temp5, temp6, temp7;
+  uint32_t* const p_loop1_end = data + (num_pixels & ~3);
+  uint32_t* const p_loop2_end = data + num_pixels;
+  __asm__ volatile (
+    ".set       push                                          \n\t"
+    ".set       noreorder                                     \n\t"
+    "beq        %[data],         %[p_loop1_end],     3f       \n\t"
+    " nop                                                     \n\t"
+  "0:                                                         \n\t"
+    "lw         %[temp0],        0(%[data])                   \n\t"
+    "lw         %[temp1],        4(%[data])                   \n\t"
+    "lw         %[temp2],        8(%[data])                   \n\t"
+    "lw         %[temp3],        12(%[data])                  \n\t"
+    "ext        %[temp4],        %[temp0],           8,    8  \n\t"
+    "ext        %[temp5],        %[temp1],           8,    8  \n\t"
+    "ext        %[temp6],        %[temp2],           8,    8  \n\t"
+    "ext        %[temp7],        %[temp3],           8,    8  \n\t"
+    "addiu      %[data],         %[data],            16       \n\t"
+    "replv.ph   %[temp4],        %[temp4]                     \n\t"
+    "replv.ph   %[temp5],        %[temp5]                     \n\t"
+    "replv.ph   %[temp6],        %[temp6]                     \n\t"
+    "replv.ph   %[temp7],        %[temp7]                     \n\t"
+    "addu.qb    %[temp0],        %[temp0],           %[temp4] \n\t"
+    "addu.qb    %[temp1],        %[temp1],           %[temp5] \n\t"
+    "addu.qb    %[temp2],        %[temp2],           %[temp6] \n\t"
+    "addu.qb    %[temp3],        %[temp3],           %[temp7] \n\t"
+    "sw         %[temp0],        -16(%[data])                 \n\t"
+    "sw         %[temp1],        -12(%[data])                 \n\t"
+    "sw         %[temp2],        -8(%[data])                  \n\t"
+    "bne        %[data],         %[p_loop1_end],     0b       \n\t"
+    " sw        %[temp3],        -4(%[data])                  \n\t"
+  "3:                                                         \n\t"
+    "beq        %[data],         %[p_loop2_end],     2f       \n\t"
+    " nop                                                     \n\t"
+  "1:                                                         \n\t"
+    "lw         %[temp0],        0(%[data])                   \n\t"
+    "addiu      %[data],         %[data],            4        \n\t"
+    "ext        %[temp4],        %[temp0],           8,    8  \n\t"
+    "replv.ph   %[temp4],        %[temp4]                     \n\t"
+    "addu.qb    %[temp0],        %[temp0],           %[temp4] \n\t"
+    "bne        %[data],         %[p_loop2_end],     1b       \n\t"
+    " sw        %[temp0],        -4(%[data])                  \n\t"
+  "2:                                                         \n\t"
+    ".set       pop                                           \n\t"
+    : [data]"+&r"(data), [temp0]"=&r"(temp0), [temp1]"=&r"(temp1),
+      [temp2]"=&r"(temp2), [temp3]"=&r"(temp3), [temp4]"=&r"(temp4),
+      [temp5]"=&r"(temp5), [temp6]"=&r"(temp6), [temp7]"=&r"(temp7)
+    : [p_loop1_end]"r"(p_loop1_end), [p_loop2_end]"r"(p_loop2_end)
+    : "memory"
+  );
+}
+
+static void TransformColorInverse(const VP8LMultipliers* const m,
+                                  uint32_t* data, int num_pixels) {
+  int temp0, temp1, temp2, temp3, temp4, temp5;
+  uint32_t argb, argb1, new_red;
+  const uint32_t G_to_R = m->green_to_red_;
+  const uint32_t G_to_B = m->green_to_blue_;
+  const uint32_t R_to_B = m->red_to_blue_;
+  uint32_t* const p_loop_end = data + (num_pixels & ~1);
+  __asm__ volatile (
+    ".set            push                                    \n\t"
+    ".set            noreorder                               \n\t"
+    "beq             %[data],      %[p_loop_end],  1f        \n\t"
+    " nop                                                    \n\t"
+    "replv.ph        %[temp0],     %[G_to_R]                 \n\t"
+    "replv.ph        %[temp1],     %[G_to_B]                 \n\t"
+    "replv.ph        %[temp2],     %[R_to_B]                 \n\t"
+    "shll.ph         %[temp0],     %[temp0],       8         \n\t"
+    "shll.ph         %[temp1],     %[temp1],       8         \n\t"
+    "shll.ph         %[temp2],     %[temp2],       8         \n\t"
+    "shra.ph         %[temp0],     %[temp0],       8         \n\t"
+    "shra.ph         %[temp1],     %[temp1],       8         \n\t"
+    "shra.ph         %[temp2],     %[temp2],       8         \n\t"
+  "0:                                                        \n\t"
+    "lw              %[argb],      0(%[data])                \n\t"
+    "lw              %[argb1],     4(%[data])                \n\t"
+    "addiu           %[data],      %[data],        8         \n\t"
+    "precrq.qb.ph    %[temp3],     %[argb],        %[argb1]  \n\t"
+    "preceu.ph.qbra  %[temp3],     %[temp3]                  \n\t"
+    "shll.ph         %[temp3],     %[temp3],       8         \n\t"
+    "shra.ph         %[temp3],     %[temp3],       8         \n\t"
+    "mul.ph          %[temp5],     %[temp3],       %[temp0]  \n\t"
+    "mul.ph          %[temp3],     %[temp3],       %[temp1]  \n\t"
+    "precrq.ph.w     %[new_red],   %[argb],        %[argb1]  \n\t"
+    "ins             %[argb1],     %[argb],        16,   16  \n\t"
+    "shra.ph         %[temp5],     %[temp5],       5         \n\t"
+    "shra.ph         %[temp3],     %[temp3],       5         \n\t"
+    "addu.ph         %[new_red],   %[new_red],     %[temp5]  \n\t"
+    "addu.ph         %[argb1],     %[argb1],       %[temp3]  \n\t"
+    "preceu.ph.qbra  %[temp5],     %[new_red]                \n\t"
+    "shll.ph         %[temp4],     %[temp5],       8         \n\t"
+    "shra.ph         %[temp4],     %[temp4],       8         \n\t"
+    "mul.ph          %[temp4],     %[temp4],       %[temp2]  \n\t"
+    "sb              %[temp5],     -2(%[data])               \n\t"
+    "sra             %[temp5],     %[temp5],       16        \n\t"
+    "shra.ph         %[temp4],     %[temp4],       5         \n\t"
+    "addu.ph         %[argb1],     %[argb1],       %[temp4]  \n\t"
+    "preceu.ph.qbra  %[temp3],     %[argb1]                  \n\t"
+    "sb              %[temp5],     -6(%[data])               \n\t"
+    "sb              %[temp3],     -4(%[data])               \n\t"
+    "sra             %[temp3],     %[temp3],       16        \n\t"
+    "bne             %[data],      %[p_loop_end],  0b        \n\t"
+    " sb             %[temp3],     -8(%[data])               \n\t"
+  "1:                                                        \n\t"
+    ".set            pop                                     \n\t"
+    : [temp0]"=&r"(temp0), [temp1]"=&r"(temp1), [temp2]"=&r"(temp2),
+      [temp3]"=&r"(temp3), [temp4]"=&r"(temp4), [temp5]"=&r"(temp5),
+      [new_red]"=&r"(new_red), [argb]"=&r"(argb),
+      [argb1]"=&r"(argb1), [data]"+&r"(data)
+    : [G_to_R]"r"(G_to_R), [R_to_B]"r"(R_to_B),
+      [G_to_B]"r"(G_to_B), [p_loop_end]"r"(p_loop_end)
+    : "memory", "hi", "lo"
+  );
+
+  // Fall-back to C-version for left-overs.
+  if (num_pixels & 1) VP8LTransformColorInverse_C(m, data, 1);
+}
+
+static void ConvertBGRAToRGB(const uint32_t* src,
+                             int num_pixels, uint8_t* dst) {
+  int temp0, temp1, temp2, temp3;
+  const uint32_t* const p_loop1_end = src + (num_pixels & ~3);
+  const uint32_t* const p_loop2_end = src + num_pixels;
+  __asm__ volatile (
+    ".set       push                                       \n\t"
+    ".set       noreorder                                  \n\t"
+    "beq        %[src],      %[p_loop1_end],    3f         \n\t"
+    " nop                                                  \n\t"
+  "0:                                                      \n\t"
+    "lw         %[temp3],    12(%[src])                    \n\t"
+    "lw         %[temp2],    8(%[src])                     \n\t"
+    "lw         %[temp1],    4(%[src])                     \n\t"
+    "lw         %[temp0],    0(%[src])                     \n\t"
+    "ins        %[temp3],    %[temp2],          24,   8    \n\t"
+    "sll        %[temp2],    %[temp2],          8          \n\t"
+    "rotr       %[temp3],    %[temp3],          16         \n\t"
+    "ins        %[temp2],    %[temp1],          0,    16   \n\t"
+    "sll        %[temp1],    %[temp1],          8          \n\t"
+    "wsbh       %[temp3],    %[temp3]                      \n\t"
+    "balign     %[temp0],    %[temp1],          1          \n\t"
+    "wsbh       %[temp2],    %[temp2]                      \n\t"
+    "wsbh       %[temp0],    %[temp0]                      \n\t"
+    "usw        %[temp3],    8(%[dst])                     \n\t"
+    "rotr       %[temp0],    %[temp0],          16         \n\t"
+    "usw        %[temp2],    4(%[dst])                     \n\t"
+    "addiu      %[src],      %[src],            16         \n\t"
+    "usw        %[temp0],    0(%[dst])                     \n\t"
+    "bne        %[src],      %[p_loop1_end],    0b         \n\t"
+    " addiu     %[dst],      %[dst],            12         \n\t"
+  "3:                                                      \n\t"
+    "beq        %[src],      %[p_loop2_end],    2f         \n\t"
+    " nop                                                  \n\t"
+  "1:                                                      \n\t"
+    "lw         %[temp0],    0(%[src])                     \n\t"
+    "addiu      %[src],      %[src],            4          \n\t"
+    "wsbh       %[temp1],    %[temp0]                      \n\t"
+    "addiu      %[dst],      %[dst],            3          \n\t"
+    "ush        %[temp1],    -2(%[dst])                    \n\t"
+    "sra        %[temp0],    %[temp0],          16         \n\t"
+    "bne        %[src],      %[p_loop2_end],    1b         \n\t"
+    " sb        %[temp0],    -3(%[dst])                    \n\t"
+  "2:                                                      \n\t"
+    ".set       pop                                        \n\t"
+    : [temp0]"=&r"(temp0), [temp1]"=&r"(temp1), [temp2]"=&r"(temp2),
+      [temp3]"=&r"(temp3), [dst]"+&r"(dst), [src]"+&r"(src)
+    : [p_loop1_end]"r"(p_loop1_end), [p_loop2_end]"r"(p_loop2_end)
+    : "memory"
+  );
+}
+
+static void ConvertBGRAToRGBA(const uint32_t* src,
+                              int num_pixels, uint8_t* dst) {
+  int temp0, temp1, temp2, temp3;
+  const uint32_t* const p_loop1_end = src + (num_pixels & ~3);
+  const uint32_t* const p_loop2_end = src + num_pixels;
+  __asm__ volatile (
+    ".set       push                                       \n\t"
+    ".set       noreorder                                  \n\t"
+    "beq        %[src],      %[p_loop1_end],    3f         \n\t"
+    " nop                                                  \n\t"
+  "0:                                                      \n\t"
+    "lw         %[temp0],    0(%[src])                     \n\t"
+    "lw         %[temp1],    4(%[src])                     \n\t"
+    "lw         %[temp2],    8(%[src])                     \n\t"
+    "lw         %[temp3],    12(%[src])                    \n\t"
+    "wsbh       %[temp0],    %[temp0]                      \n\t"
+    "wsbh       %[temp1],    %[temp1]                      \n\t"
+    "wsbh       %[temp2],    %[temp2]                      \n\t"
+    "wsbh       %[temp3],    %[temp3]                      \n\t"
+    "addiu      %[src],      %[src],            16         \n\t"
+    "balign     %[temp0],    %[temp0],          1          \n\t"
+    "balign     %[temp1],    %[temp1],          1          \n\t"
+    "balign     %[temp2],    %[temp2],          1          \n\t"
+    "balign     %[temp3],    %[temp3],          1          \n\t"
+    "usw        %[temp0],    0(%[dst])                     \n\t"
+    "usw        %[temp1],    4(%[dst])                     \n\t"
+    "usw        %[temp2],    8(%[dst])                     \n\t"
+    "usw        %[temp3],    12(%[dst])                    \n\t"
+    "bne        %[src],      %[p_loop1_end],    0b         \n\t"
+    " addiu     %[dst],      %[dst],            16         \n\t"
+  "3:                                                      \n\t"
+    "beq        %[src],      %[p_loop2_end],    2f         \n\t"
+    " nop                                                  \n\t"
+  "1:                                                      \n\t"
+    "lw         %[temp0],    0(%[src])                     \n\t"
+    "wsbh       %[temp0],    %[temp0]                      \n\t"
+    "addiu      %[src],      %[src],            4          \n\t"
+    "balign     %[temp0],    %[temp0],          1          \n\t"
+    "usw        %[temp0],    0(%[dst])                     \n\t"
+    "bne        %[src],      %[p_loop2_end],    1b         \n\t"
+    " addiu     %[dst],      %[dst],            4          \n\t"
+  "2:                                                      \n\t"
+    ".set       pop                                        \n\t"
+    : [temp0]"=&r"(temp0), [temp1]"=&r"(temp1), [temp2]"=&r"(temp2),
+      [temp3]"=&r"(temp3), [dst]"+&r"(dst), [src]"+&r"(src)
+    : [p_loop1_end]"r"(p_loop1_end), [p_loop2_end]"r"(p_loop2_end)
+    : "memory"
+  );
+}
+
+static void ConvertBGRAToRGBA4444(const uint32_t* src,
+                                  int num_pixels, uint8_t* dst) {
+  int temp0, temp1, temp2, temp3, temp4, temp5;
+  const uint32_t* const p_loop1_end = src + (num_pixels & ~3);
+  const uint32_t* const p_loop2_end = src + num_pixels;
+  __asm__ volatile (
+    ".set           push                                       \n\t"
+    ".set           noreorder                                  \n\t"
+    "beq            %[src],      %[p_loop1_end],    3f         \n\t"
+    " nop                                                      \n\t"
+  "0:                                                          \n\t"
+    "lw             %[temp0],    0(%[src])                     \n\t"
+    "lw             %[temp1],    4(%[src])                     \n\t"
+    "lw             %[temp2],    8(%[src])                     \n\t"
+    "lw             %[temp3],    12(%[src])                    \n\t"
+    "ext            %[temp4],    %[temp0],          28,   4    \n\t"
+    "ext            %[temp5],    %[temp0],          12,   4    \n\t"
+    "ins            %[temp0],    %[temp4],          0,    4    \n\t"
+    "ext            %[temp4],    %[temp1],          28,   4    \n\t"
+    "ins            %[temp0],    %[temp5],          16,   4    \n\t"
+    "ext            %[temp5],    %[temp1],          12,   4    \n\t"
+    "ins            %[temp1],    %[temp4],          0,    4    \n\t"
+    "ext            %[temp4],    %[temp2],          28,   4    \n\t"
+    "ins            %[temp1],    %[temp5],          16,   4    \n\t"
+    "ext            %[temp5],    %[temp2],          12,   4    \n\t"
+    "ins            %[temp2],    %[temp4],          0,    4    \n\t"
+    "ext            %[temp4],    %[temp3],          28,   4    \n\t"
+    "ins            %[temp2],    %[temp5],          16,   4    \n\t"
+    "ext            %[temp5],    %[temp3],          12,   4    \n\t"
+    "ins            %[temp3],    %[temp4],          0,    4    \n\t"
+    "precr.qb.ph    %[temp1],    %[temp1],          %[temp0]   \n\t"
+    "ins            %[temp3],    %[temp5],          16,   4    \n\t"
+    "addiu          %[src],      %[src],            16         \n\t"
+    "precr.qb.ph    %[temp3],    %[temp3],          %[temp2]   \n\t"
+#ifdef WEBP_SWAP_16BIT_CSP
+    "usw            %[temp1],    0(%[dst])                     \n\t"
+    "usw            %[temp3],    4(%[dst])                     \n\t"
+#else
+    "wsbh           %[temp1],    %[temp1]                      \n\t"
+    "wsbh           %[temp3],    %[temp3]                      \n\t"
+    "usw            %[temp1],    0(%[dst])                     \n\t"
+    "usw            %[temp3],    4(%[dst])                     \n\t"
+#endif
+    "bne            %[src],      %[p_loop1_end],    0b         \n\t"
+    " addiu         %[dst],      %[dst],            8          \n\t"
+  "3:                                                          \n\t"
+    "beq            %[src],      %[p_loop2_end],    2f         \n\t"
+    " nop                                                      \n\t"
+  "1:                                                          \n\t"
+    "lw             %[temp0],    0(%[src])                     \n\t"
+    "ext            %[temp4],    %[temp0],          28,   4    \n\t"
+    "ext            %[temp5],    %[temp0],          12,   4    \n\t"
+    "ins            %[temp0],    %[temp4],          0,    4    \n\t"
+    "ins            %[temp0],    %[temp5],          16,   4    \n\t"
+    "addiu          %[src],      %[src],            4          \n\t"
+    "precr.qb.ph    %[temp0],    %[temp0],          %[temp0]   \n\t"
+#ifdef WEBP_SWAP_16BIT_CSP
+    "ush            %[temp0],    0(%[dst])                     \n\t"
+#else
+    "wsbh           %[temp0],    %[temp0]                      \n\t"
+    "ush            %[temp0],    0(%[dst])                     \n\t"
+#endif
+    "bne            %[src],      %[p_loop2_end],    1b         \n\t"
+    " addiu         %[dst],      %[dst],            2          \n\t"
+  "2:                                                          \n\t"
+    ".set           pop                                        \n\t"
+    : [temp0]"=&r"(temp0), [temp1]"=&r"(temp1), [temp2]"=&r"(temp2),
+      [temp3]"=&r"(temp3), [temp4]"=&r"(temp4), [temp5]"=&r"(temp5),
+      [dst]"+&r"(dst), [src]"+&r"(src)
+    : [p_loop1_end]"r"(p_loop1_end), [p_loop2_end]"r"(p_loop2_end)
+    : "memory"
+  );
+}
+
+static void ConvertBGRAToRGB565(const uint32_t* src,
+                                int num_pixels, uint8_t* dst) {
+  int temp0, temp1, temp2, temp3, temp4, temp5;
+  const uint32_t* const p_loop1_end = src + (num_pixels & ~3);
+  const uint32_t* const p_loop2_end = src + num_pixels;
+  __asm__ volatile (
+    ".set           push                                       \n\t"
+    ".set           noreorder                                  \n\t"
+    "beq            %[src],      %[p_loop1_end],    3f         \n\t"
+    " nop                                                      \n\t"
+  "0:                                                          \n\t"
+    "lw             %[temp0],    0(%[src])                     \n\t"
+    "lw             %[temp1],    4(%[src])                     \n\t"
+    "lw             %[temp2],    8(%[src])                     \n\t"
+    "lw             %[temp3],    12(%[src])                    \n\t"
+    "ext            %[temp4],    %[temp0],          8,    16   \n\t"
+    "ext            %[temp5],    %[temp0],          5,    11   \n\t"
+    "ext            %[temp0],    %[temp0],          3,    5    \n\t"
+    "ins            %[temp4],    %[temp5],          0,    11   \n\t"
+    "ext            %[temp5],    %[temp1],          5,    11   \n\t"
+    "ins            %[temp4],    %[temp0],          0,    5    \n\t"
+    "ext            %[temp0],    %[temp1],          8,    16   \n\t"
+    "ext            %[temp1],    %[temp1],          3,    5    \n\t"
+    "ins            %[temp0],    %[temp5],          0,    11   \n\t"
+    "ext            %[temp5],    %[temp2],          5,    11   \n\t"
+    "ins            %[temp0],    %[temp1],          0,    5    \n\t"
+    "ext            %[temp1],    %[temp2],          8,    16   \n\t"
+    "ext            %[temp2],    %[temp2],          3,    5    \n\t"
+    "ins            %[temp1],    %[temp5],          0,    11   \n\t"
+    "ext            %[temp5],    %[temp3],          5,    11   \n\t"
+    "ins            %[temp1],    %[temp2],          0,    5    \n\t"
+    "ext            %[temp2],    %[temp3],          8,    16   \n\t"
+    "ext            %[temp3],    %[temp3],          3,    5    \n\t"
+    "ins            %[temp2],    %[temp5],          0,    11   \n\t"
+    "append         %[temp0],    %[temp4],          16         \n\t"
+    "ins            %[temp2],    %[temp3],          0,    5    \n\t"
+    "addiu          %[src],      %[src],            16         \n\t"
+    "append         %[temp2],    %[temp1],          16         \n\t"
+#ifdef WEBP_SWAP_16BIT_CSP
+    "usw            %[temp0],    0(%[dst])                     \n\t"
+    "usw            %[temp2],    4(%[dst])                     \n\t"
+#else
+    "wsbh           %[temp0],    %[temp0]                      \n\t"
+    "wsbh           %[temp2],    %[temp2]                      \n\t"
+    "usw            %[temp0],    0(%[dst])                     \n\t"
+    "usw            %[temp2],    4(%[dst])                     \n\t"
+#endif
+    "bne            %[src],      %[p_loop1_end],    0b         \n\t"
+    " addiu         %[dst],      %[dst],            8          \n\t"
+  "3:                                                          \n\t"
+    "beq            %[src],      %[p_loop2_end],    2f         \n\t"
+    " nop                                                      \n\t"
+  "1:                                                          \n\t"
+    "lw             %[temp0],    0(%[src])                     \n\t"
+    "ext            %[temp4],    %[temp0],          8,    16   \n\t"
+    "ext            %[temp5],    %[temp0],          5,    11   \n\t"
+    "ext            %[temp0],    %[temp0],          3,    5    \n\t"
+    "ins            %[temp4],    %[temp5],          0,    11   \n\t"
+    "addiu          %[src],      %[src],            4          \n\t"
+    "ins            %[temp4],    %[temp0],          0,    5    \n\t"
+#ifdef WEBP_SWAP_16BIT_CSP
+    "ush            %[temp4],    0(%[dst])                     \n\t"
+#else
+    "wsbh           %[temp4],    %[temp4]                      \n\t"
+    "ush            %[temp4],    0(%[dst])                     \n\t"
+#endif
+    "bne            %[src],      %[p_loop2_end],    1b         \n\t"
+    " addiu         %[dst],      %[dst],            2          \n\t"
+  "2:                                                          \n\t"
+    ".set           pop                                        \n\t"
+    : [temp0]"=&r"(temp0), [temp1]"=&r"(temp1), [temp2]"=&r"(temp2),
+      [temp3]"=&r"(temp3), [temp4]"=&r"(temp4), [temp5]"=&r"(temp5),
+      [dst]"+&r"(dst), [src]"+&r"(src)
+    : [p_loop1_end]"r"(p_loop1_end), [p_loop2_end]"r"(p_loop2_end)
+    : "memory"
+  );
+}
+
+static void ConvertBGRAToBGR(const uint32_t* src,
+                             int num_pixels, uint8_t* dst) {
+  int temp0, temp1, temp2, temp3;
+  const uint32_t* const p_loop1_end = src + (num_pixels & ~3);
+  const uint32_t* const p_loop2_end = src + num_pixels;
+  __asm__ volatile (
+    ".set       push                                         \n\t"
+    ".set       noreorder                                    \n\t"
+    "beq        %[src],      %[p_loop1_end],    3f           \n\t"
+    " nop                                                    \n\t"
+  "0:                                                        \n\t"
+    "lw         %[temp0],    0(%[src])                       \n\t"
+    "lw         %[temp1],    4(%[src])                       \n\t"
+    "lw         %[temp2],    8(%[src])                       \n\t"
+    "lw         %[temp3],    12(%[src])                      \n\t"
+    "ins        %[temp0],    %[temp1],          24,    8     \n\t"
+    "sra        %[temp1],    %[temp1],          8            \n\t"
+    "ins        %[temp1],    %[temp2],          16,    16    \n\t"
+    "sll        %[temp2],    %[temp2],          8            \n\t"
+    "balign     %[temp3],    %[temp2],          1            \n\t"
+    "addiu      %[src],      %[src],            16           \n\t"
+    "usw        %[temp0],    0(%[dst])                       \n\t"
+    "usw        %[temp1],    4(%[dst])                       \n\t"
+    "usw        %[temp3],    8(%[dst])                       \n\t"
+    "bne        %[src],      %[p_loop1_end],    0b           \n\t"
+    " addiu     %[dst],      %[dst],            12           \n\t"
+  "3:                                                        \n\t"
+    "beq        %[src],      %[p_loop2_end],    2f           \n\t"
+    " nop                                                    \n\t"
+  "1:                                                        \n\t"
+    "lw         %[temp0],    0(%[src])                       \n\t"
+    "addiu      %[src],      %[src],            4            \n\t"
+    "addiu      %[dst],      %[dst],            3            \n\t"
+    "ush        %[temp0],    -3(%[dst])                      \n\t"
+    "sra        %[temp0],    %[temp0],          16           \n\t"
+    "bne        %[src],      %[p_loop2_end],    1b           \n\t"
+    " sb        %[temp0],    -1(%[dst])                      \n\t"
+  "2:                                                        \n\t"
+    ".set       pop                                          \n\t"
+    : [temp0]"=&r"(temp0), [temp1]"=&r"(temp1), [temp2]"=&r"(temp2),
+      [temp3]"=&r"(temp3), [dst]"+&r"(dst), [src]"+&r"(src)
+    : [p_loop1_end]"r"(p_loop1_end), [p_loop2_end]"r"(p_loop2_end)
+    : "memory"
+  );
+}
+
+//------------------------------------------------------------------------------
+// Entry point
+
+extern void VP8LDspInitMIPSdspR2(void);
+
+WEBP_TSAN_IGNORE_FUNCTION void VP8LDspInitMIPSdspR2(void) {
+  VP8LMapColor32b = MapARGB;
+  VP8LMapColor8b = MapAlpha;
+  VP8LPredictors[5] = Predictor5;
+  VP8LPredictors[6] = Predictor6;
+  VP8LPredictors[7] = Predictor7;
+  VP8LPredictors[8] = Predictor8;
+  VP8LPredictors[9] = Predictor9;
+  VP8LPredictors[10] = Predictor10;
+  VP8LPredictors[11] = Predictor11;
+  VP8LPredictors[12] = Predictor12;
+  VP8LPredictors[13] = Predictor13;
+  VP8LAddGreenToBlueAndRed = AddGreenToBlueAndRed;
+  VP8LTransformColorInverse = TransformColorInverse;
+  VP8LConvertBGRAToRGB = ConvertBGRAToRGB;
+  VP8LConvertBGRAToRGBA = ConvertBGRAToRGBA;
+  VP8LConvertBGRAToRGBA4444 = ConvertBGRAToRGBA4444;
+  VP8LConvertBGRAToRGB565 = ConvertBGRAToRGB565;
+  VP8LConvertBGRAToBGR = ConvertBGRAToBGR;
+}
+
+#else  // !WEBP_USE_MIPS_DSP_R2
+
+WEBP_DSP_INIT_STUB(VP8LDspInitMIPSdspR2)
+
+#endif  // WEBP_USE_MIPS_DSP_R2
diff --git a/drivers/webp/dsp/lossless_neon.c b/drivers/webp/dsp/lossless_neon.c
new file mode 100644
index 0000000000..6faccb8f97
--- /dev/null
+++ b/drivers/webp/dsp/lossless_neon.c
@@ -0,0 +1,269 @@
+// Copyright 2014 Google Inc. All Rights Reserved.
+//
+// Use of this source code is governed by a BSD-style license
+// that can be found in the COPYING file in the root of the source
+// tree. An additional intellectual property rights grant can be found
+// in the file PATENTS. All contributing project authors may
+// be found in the AUTHORS file in the root of the source tree.
+// -----------------------------------------------------------------------------
+//
+// NEON variant of methods for lossless decoder
+//
+// Author: Skal (pascal.massimino@gmail.com)
+
+#include "./dsp.h"
+
+#if defined(WEBP_USE_NEON)
+
+#include <arm_neon.h>
+
+#include "./lossless.h"
+#include "./neon.h"
+
+//------------------------------------------------------------------------------
+// Colorspace conversion functions
+
+#if !defined(WORK_AROUND_GCC)
+// gcc 4.6.0 had some trouble (NDK-r9) with this code. We only use it for
+// gcc-4.8.x at least.
+static void ConvertBGRAToRGBA(const uint32_t* src,
+                              int num_pixels, uint8_t* dst) {
+  const uint32_t* const end = src + (num_pixels & ~15);
+  for (; src < end; src += 16) {
+    uint8x16x4_t pixel = vld4q_u8((uint8_t*)src);
+    // swap B and R. (VSWP d0,d2 has no intrinsics equivalent!)
+    const uint8x16_t tmp = pixel.val[0];
+    pixel.val[0] = pixel.val[2];
+    pixel.val[2] = tmp;
+    vst4q_u8(dst, pixel);
+    dst += 64;
+  }
+  VP8LConvertBGRAToRGBA_C(src, num_pixels & 15, dst);  // left-overs
+}
+
+static void ConvertBGRAToBGR(const uint32_t* src,
+                             int num_pixels, uint8_t* dst) {
+  const uint32_t* const end = src + (num_pixels & ~15);
+  for (; src < end; src += 16) {
+    const uint8x16x4_t pixel = vld4q_u8((uint8_t*)src);
+    const uint8x16x3_t tmp = { { pixel.val[0], pixel.val[1], pixel.val[2] } };
+    vst3q_u8(dst, tmp);
+    dst += 48;
+  }
+  VP8LConvertBGRAToBGR_C(src, num_pixels & 15, dst);  // left-overs
+}
+
+static void ConvertBGRAToRGB(const uint32_t* src,
+                             int num_pixels, uint8_t* dst) {
+  const uint32_t* const end = src + (num_pixels & ~15);
+  for (; src < end; src += 16) {
+    const uint8x16x4_t pixel = vld4q_u8((uint8_t*)src);
+    const uint8x16x3_t tmp = { { pixel.val[2], pixel.val[1], pixel.val[0] } };
+    vst3q_u8(dst, tmp);
+    dst += 48;
+  }
+  VP8LConvertBGRAToRGB_C(src, num_pixels & 15, dst);  // left-overs
+}
+
+#else  // WORK_AROUND_GCC
+
+// gcc-4.6.0 fallback
+
+static const uint8_t kRGBAShuffle[8] = { 2, 1, 0, 3, 6, 5, 4, 7 };
+
+static void ConvertBGRAToRGBA(const uint32_t* src,
+                              int num_pixels, uint8_t* dst) {
+  const uint32_t* const end = src + (num_pixels & ~1);
+  const uint8x8_t shuffle = vld1_u8(kRGBAShuffle);
+  for (; src < end; src += 2) {
+    const uint8x8_t pixels = vld1_u8((uint8_t*)src);
+    vst1_u8(dst, vtbl1_u8(pixels, shuffle));
+    dst += 8;
+  }
+  VP8LConvertBGRAToRGBA_C(src, num_pixels & 1, dst);  // left-overs
+}
+
+static const uint8_t kBGRShuffle[3][8] = {
+  {  0,  1,  2,  4,  5,  6,  8,  9 },
+  { 10, 12, 13, 14, 16, 17, 18, 20 },
+  { 21, 22, 24, 25, 26, 28, 29, 30 }
+};
+
+static void ConvertBGRAToBGR(const uint32_t* src,
+                             int num_pixels, uint8_t* dst) {
+  const uint32_t* const end = src + (num_pixels & ~7);
+  const uint8x8_t shuffle0 = vld1_u8(kBGRShuffle[0]);
+  const uint8x8_t shuffle1 = vld1_u8(kBGRShuffle[1]);
+  const uint8x8_t shuffle2 = vld1_u8(kBGRShuffle[2]);
+  for (; src < end; src += 8) {
+    uint8x8x4_t pixels;
+    INIT_VECTOR4(pixels,
+                 vld1_u8((const uint8_t*)(src + 0)),
+                 vld1_u8((const uint8_t*)(src + 2)),
+                 vld1_u8((const uint8_t*)(src + 4)),
+                 vld1_u8((const uint8_t*)(src + 6)));
+    vst1_u8(dst +  0, vtbl4_u8(pixels, shuffle0));
+    vst1_u8(dst +  8, vtbl4_u8(pixels, shuffle1));
+    vst1_u8(dst + 16, vtbl4_u8(pixels, shuffle2));
+    dst += 8 * 3;
+  }
+  VP8LConvertBGRAToBGR_C(src, num_pixels & 7, dst);  // left-overs
+}
+
+static const uint8_t kRGBShuffle[3][8] = {
+  {  2,  1,  0,  6,  5,  4, 10,  9 },
+  {  8, 14, 13, 12, 18, 17, 16, 22 },
+  { 21, 20, 26, 25, 24, 30, 29, 28 }
+};
+
+static void ConvertBGRAToRGB(const uint32_t* src,
+                             int num_pixels, uint8_t* dst) {
+  const uint32_t* const end = src + (num_pixels & ~7);
+  const uint8x8_t shuffle0 = vld1_u8(kRGBShuffle[0]);
+  const uint8x8_t shuffle1 = vld1_u8(kRGBShuffle[1]);
+  const uint8x8_t shuffle2 = vld1_u8(kRGBShuffle[2]);
+  for (; src < end; src += 8) {
+    uint8x8x4_t pixels;
+    INIT_VECTOR4(pixels,
+                 vld1_u8((const uint8_t*)(src + 0)),
+                 vld1_u8((const uint8_t*)(src + 2)),
+                 vld1_u8((const uint8_t*)(src + 4)),
+                 vld1_u8((const uint8_t*)(src + 6)));
+    vst1_u8(dst +  0, vtbl4_u8(pixels, shuffle0));
+    vst1_u8(dst +  8, vtbl4_u8(pixels, shuffle1));
+    vst1_u8(dst + 16, vtbl4_u8(pixels, shuffle2));
+    dst += 8 * 3;
+  }
+  VP8LConvertBGRAToRGB_C(src, num_pixels & 7, dst);  // left-overs
+}
+
+#endif   // !WORK_AROUND_GCC
+
+//------------------------------------------------------------------------------
+// Subtract-Green Transform
+
+// vtbl?_u8 are marked unavailable for iOS arm64 with Xcode < 6.3, use
+// non-standard versions there.
+#if defined(__APPLE__) && defined(__aarch64__) && \
+    defined(__apple_build_version__) && (__apple_build_version__< 6020037)
+#define USE_VTBLQ
+#endif
+
+#ifdef USE_VTBLQ
+// 255 = byte will be zeroed
+static const uint8_t kGreenShuffle[16] = {
+  1, 255, 1, 255, 5, 255, 5, 255, 9, 255, 9, 255, 13, 255, 13, 255
+};
+
+static WEBP_INLINE uint8x16_t DoGreenShuffle(const uint8x16_t argb,
+                                             const uint8x16_t shuffle) {
+  return vcombine_u8(vtbl1q_u8(argb, vget_low_u8(shuffle)),
+                     vtbl1q_u8(argb, vget_high_u8(shuffle)));
+}
+#else  // !USE_VTBLQ
+// 255 = byte will be zeroed
+static const uint8_t kGreenShuffle[8] = { 1, 255, 1, 255, 5, 255, 5, 255  };
+
+static WEBP_INLINE uint8x16_t DoGreenShuffle(const uint8x16_t argb,
+                                             const uint8x8_t shuffle) {
+  return vcombine_u8(vtbl1_u8(vget_low_u8(argb), shuffle),
+                     vtbl1_u8(vget_high_u8(argb), shuffle));
+}
+#endif  // USE_VTBLQ
+
+static void AddGreenToBlueAndRed(uint32_t* argb_data, int num_pixels) {
+  const uint32_t* const end = argb_data + (num_pixels & ~3);
+#ifdef USE_VTBLQ
+  const uint8x16_t shuffle = vld1q_u8(kGreenShuffle);
+#else
+  const uint8x8_t shuffle = vld1_u8(kGreenShuffle);
+#endif
+  for (; argb_data < end; argb_data += 4) {
+    const uint8x16_t argb = vld1q_u8((uint8_t*)argb_data);
+    const uint8x16_t greens = DoGreenShuffle(argb, shuffle);
+    vst1q_u8((uint8_t*)argb_data, vaddq_u8(argb, greens));
+  }
+  // fallthrough and finish off with plain-C
+  VP8LAddGreenToBlueAndRed_C(argb_data, num_pixels & 3);
+}
+
+//------------------------------------------------------------------------------
+// Color Transform
+
+static void TransformColorInverse(const VP8LMultipliers* const m,
+                                  uint32_t* argb_data, int num_pixels) {
+  // sign-extended multiplying constants, pre-shifted by 6.
+#define CST(X)  (((int16_t)(m->X << 8)) >> 6)
+  const int16_t rb[8] = {
+    CST(green_to_blue_), CST(green_to_red_),
+    CST(green_to_blue_), CST(green_to_red_),
+    CST(green_to_blue_), CST(green_to_red_),
+    CST(green_to_blue_), CST(green_to_red_)
+  };
+  const int16x8_t mults_rb = vld1q_s16(rb);
+  const int16_t b2[8] = {
+    0, CST(red_to_blue_), 0, CST(red_to_blue_),
+    0, CST(red_to_blue_), 0, CST(red_to_blue_),
+  };
+  const int16x8_t mults_b2 = vld1q_s16(b2);
+#undef CST
+#ifdef USE_VTBLQ
+  static const uint8_t kg0g0[16] = {
+    255, 1, 255, 1, 255, 5, 255, 5, 255, 9, 255, 9, 255, 13, 255, 13
+  };
+  const uint8x16_t shuffle = vld1q_u8(kg0g0);
+#else
+  static const uint8_t k0g0g[8] = { 255, 1, 255, 1, 255, 5, 255, 5 };
+  const uint8x8_t shuffle = vld1_u8(k0g0g);
+#endif
+  const uint32x4_t mask_ag = vdupq_n_u32(0xff00ff00u);
+  int i;
+  for (i = 0; i + 4 <= num_pixels; i += 4) {
+    const uint8x16_t in = vld1q_u8((uint8_t*)(argb_data + i));
+    const uint32x4_t a0g0 = vandq_u32(vreinterpretq_u32_u8(in), mask_ag);
+    // 0 g 0 g
+    const uint8x16_t greens = DoGreenShuffle(in, shuffle);
+    // x dr  x db1
+    const int16x8_t A = vqdmulhq_s16(vreinterpretq_s16_u8(greens), mults_rb);
+    // x r'  x   b'
+    const int8x16_t B = vaddq_s8(vreinterpretq_s8_u8(in),
+                                 vreinterpretq_s8_s16(A));
+    // r' 0   b' 0
+    const int16x8_t C = vshlq_n_s16(vreinterpretq_s16_s8(B), 8);
+    // x db2  0  0
+    const int16x8_t D = vqdmulhq_s16(C, mults_b2);
+    // 0  x db2  0
+    const uint32x4_t E = vshrq_n_u32(vreinterpretq_u32_s16(D), 8);
+    // r' x  b'' 0
+    const int8x16_t F = vaddq_s8(vreinterpretq_s8_u32(E),
+                                 vreinterpretq_s8_s16(C));
+    // 0  r'  0  b''
+    const uint16x8_t G = vshrq_n_u16(vreinterpretq_u16_s8(F), 8);
+    const uint32x4_t out = vorrq_u32(vreinterpretq_u32_u16(G), a0g0);
+    vst1q_u32(argb_data + i, out);
+  }
+  // Fall-back to C-version for left-overs.
+  VP8LTransformColorInverse_C(m, argb_data + i, num_pixels - i);
+}
+
+#undef USE_VTBLQ
+
+//------------------------------------------------------------------------------
+// Entry point
+
+extern void VP8LDspInitNEON(void);
+
+WEBP_TSAN_IGNORE_FUNCTION void VP8LDspInitNEON(void) {
+  VP8LConvertBGRAToRGBA = ConvertBGRAToRGBA;
+  VP8LConvertBGRAToBGR = ConvertBGRAToBGR;
+  VP8LConvertBGRAToRGB = ConvertBGRAToRGB;
+
+  VP8LAddGreenToBlueAndRed = AddGreenToBlueAndRed;
+  VP8LTransformColorInverse = TransformColorInverse;
+}
+
+#else  // !WEBP_USE_NEON
+
+WEBP_DSP_INIT_STUB(VP8LDspInitNEON)
+
+#endif  // WEBP_USE_NEON
diff --git a/drivers/webp/dsp/lossless_sse2.c b/drivers/webp/dsp/lossless_sse2.c
new file mode 100644
index 0000000000..2d016c2911
--- /dev/null
+++ b/drivers/webp/dsp/lossless_sse2.c
@@ -0,0 +1,372 @@
+// Copyright 2014 Google Inc. All Rights Reserved.
+//
+// Use of this source code is governed by a BSD-style license
+// that can be found in the COPYING file in the root of the source
+// tree. An additional intellectual property rights grant can be found
+// in the file PATENTS. All contributing project authors may
+// be found in the AUTHORS file in the root of the source tree.
+// -----------------------------------------------------------------------------
+//
+// SSE2 variant of methods for lossless decoder
+//
+// Author: Skal (pascal.massimino@gmail.com)
+
+#include "./dsp.h"
+
+#if defined(WEBP_USE_SSE2)
+#include <assert.h>
+#include <emmintrin.h>
+#include "./lossless.h"
+
+//------------------------------------------------------------------------------
+// Predictor Transform
+
+static WEBP_INLINE uint32_t ClampedAddSubtractFull(uint32_t c0, uint32_t c1,
+                                                   uint32_t c2) {
+  const __m128i zero = _mm_setzero_si128();
+  const __m128i C0 = _mm_unpacklo_epi8(_mm_cvtsi32_si128(c0), zero);
+  const __m128i C1 = _mm_unpacklo_epi8(_mm_cvtsi32_si128(c1), zero);
+  const __m128i C2 = _mm_unpacklo_epi8(_mm_cvtsi32_si128(c2), zero);
+  const __m128i V1 = _mm_add_epi16(C0, C1);
+  const __m128i V2 = _mm_sub_epi16(V1, C2);
+  const __m128i b = _mm_packus_epi16(V2, V2);
+  const uint32_t output = _mm_cvtsi128_si32(b);
+  return output;
+}
+
+static WEBP_INLINE uint32_t ClampedAddSubtractHalf(uint32_t c0, uint32_t c1,
+                                                   uint32_t c2) {
+  const __m128i zero = _mm_setzero_si128();
+  const __m128i C0 = _mm_unpacklo_epi8(_mm_cvtsi32_si128(c0), zero);
+  const __m128i C1 = _mm_unpacklo_epi8(_mm_cvtsi32_si128(c1), zero);
+  const __m128i B0 = _mm_unpacklo_epi8(_mm_cvtsi32_si128(c2), zero);
+  const __m128i avg = _mm_add_epi16(C1, C0);
+  const __m128i A0 = _mm_srli_epi16(avg, 1);
+  const __m128i A1 = _mm_sub_epi16(A0, B0);
+  const __m128i BgtA = _mm_cmpgt_epi16(B0, A0);
+  const __m128i A2 = _mm_sub_epi16(A1, BgtA);
+  const __m128i A3 = _mm_srai_epi16(A2, 1);
+  const __m128i A4 = _mm_add_epi16(A0, A3);
+  const __m128i A5 = _mm_packus_epi16(A4, A4);
+  const uint32_t output = _mm_cvtsi128_si32(A5);
+  return output;
+}
+
+static WEBP_INLINE uint32_t Select(uint32_t a, uint32_t b, uint32_t c) {
+  int pa_minus_pb;
+  const __m128i zero = _mm_setzero_si128();
+  const __m128i A0 = _mm_cvtsi32_si128(a);
+  const __m128i B0 = _mm_cvtsi32_si128(b);
+  const __m128i C0 = _mm_cvtsi32_si128(c);
+  const __m128i AC0 = _mm_subs_epu8(A0, C0);
+  const __m128i CA0 = _mm_subs_epu8(C0, A0);
+  const __m128i BC0 = _mm_subs_epu8(B0, C0);
+  const __m128i CB0 = _mm_subs_epu8(C0, B0);
+  const __m128i AC = _mm_or_si128(AC0, CA0);
+  const __m128i BC = _mm_or_si128(BC0, CB0);
+  const __m128i pa = _mm_unpacklo_epi8(AC, zero);  // |a - c|
+  const __m128i pb = _mm_unpacklo_epi8(BC, zero);  // |b - c|
+  const __m128i diff = _mm_sub_epi16(pb, pa);
+  {
+    int16_t out[8];
+    _mm_storeu_si128((__m128i*)out, diff);
+    pa_minus_pb = out[0] + out[1] + out[2] + out[3];
+  }
+  return (pa_minus_pb <= 0) ? a : b;
+}
+
+static WEBP_INLINE __m128i Average2_128i(uint32_t a0, uint32_t a1) {
+  const __m128i zero = _mm_setzero_si128();
+  const __m128i A0 = _mm_unpacklo_epi8(_mm_cvtsi32_si128(a0), zero);
+  const __m128i A1 = _mm_unpacklo_epi8(_mm_cvtsi32_si128(a1), zero);
+  const __m128i sum = _mm_add_epi16(A1, A0);
+  const __m128i avg = _mm_srli_epi16(sum, 1);
+  return avg;
+}
+
+static WEBP_INLINE uint32_t Average2(uint32_t a0, uint32_t a1) {
+  const __m128i avg = Average2_128i(a0, a1);
+  const __m128i A2 = _mm_packus_epi16(avg, avg);
+  const uint32_t output = _mm_cvtsi128_si32(A2);
+  return output;
+}
+
+static WEBP_INLINE uint32_t Average3(uint32_t a0, uint32_t a1, uint32_t a2) {
+  const __m128i zero = _mm_setzero_si128();
+  const __m128i avg1 = Average2_128i(a0, a2);
+  const __m128i A1 = _mm_unpacklo_epi8(_mm_cvtsi32_si128(a1), zero);
+  const __m128i sum = _mm_add_epi16(avg1, A1);
+  const __m128i avg2 = _mm_srli_epi16(sum, 1);
+  const __m128i A2 = _mm_packus_epi16(avg2, avg2);
+  const uint32_t output = _mm_cvtsi128_si32(A2);
+  return output;
+}
+
+static WEBP_INLINE uint32_t Average4(uint32_t a0, uint32_t a1,
+                                     uint32_t a2, uint32_t a3) {
+  const __m128i avg1 = Average2_128i(a0, a1);
+  const __m128i avg2 = Average2_128i(a2, a3);
+  const __m128i sum = _mm_add_epi16(avg2, avg1);
+  const __m128i avg3 = _mm_srli_epi16(sum, 1);
+  const __m128i A0 = _mm_packus_epi16(avg3, avg3);
+  const uint32_t output = _mm_cvtsi128_si32(A0);
+  return output;
+}
+
+static uint32_t Predictor5(uint32_t left, const uint32_t* const top) {
+  const uint32_t pred = Average3(left, top[0], top[1]);
+  return pred;
+}
+static uint32_t Predictor6(uint32_t left, const uint32_t* const top) {
+  const uint32_t pred = Average2(left, top[-1]);
+  return pred;
+}
+static uint32_t Predictor7(uint32_t left, const uint32_t* const top) {
+  const uint32_t pred = Average2(left, top[0]);
+  return pred;
+}
+static uint32_t Predictor8(uint32_t left, const uint32_t* const top) {
+  const uint32_t pred = Average2(top[-1], top[0]);
+  (void)left;
+  return pred;
+}
+static uint32_t Predictor9(uint32_t left, const uint32_t* const top) {
+  const uint32_t pred = Average2(top[0], top[1]);
+  (void)left;
+  return pred;
+}
+static uint32_t Predictor10(uint32_t left, const uint32_t* const top) {
+  const uint32_t pred = Average4(left, top[-1], top[0], top[1]);
+  return pred;
+}
+static uint32_t Predictor11(uint32_t left, const uint32_t* const top) {
+  const uint32_t pred = Select(top[0], left, top[-1]);
+  return pred;
+}
+static uint32_t Predictor12(uint32_t left, const uint32_t* const top) {
+  const uint32_t pred = ClampedAddSubtractFull(left, top[0], top[-1]);
+  return pred;
+}
+static uint32_t Predictor13(uint32_t left, const uint32_t* const top) {
+  const uint32_t pred = ClampedAddSubtractHalf(left, top[0], top[-1]);
+  return pred;
+}
+
+//------------------------------------------------------------------------------
+// Subtract-Green Transform
+
+static void AddGreenToBlueAndRed(uint32_t* argb_data, int num_pixels) {
+  int i;
+  for (i = 0; i + 4 <= num_pixels; i += 4) {
+    const __m128i in = _mm_loadu_si128((__m128i*)&argb_data[i]); // argb
+    const __m128i A = _mm_srli_epi16(in, 8);     // 0 a 0 g
+    const __m128i B = _mm_shufflelo_epi16(A, _MM_SHUFFLE(2, 2, 0, 0));
+    const __m128i C = _mm_shufflehi_epi16(B, _MM_SHUFFLE(2, 2, 0, 0));  // 0g0g
+    const __m128i out = _mm_add_epi8(in, C);
+    _mm_storeu_si128((__m128i*)&argb_data[i], out);
+  }
+  // fallthrough and finish off with plain-C
+  VP8LAddGreenToBlueAndRed_C(argb_data + i, num_pixels - i);
+}
+
+//------------------------------------------------------------------------------
+// Color Transform
+
+static void TransformColorInverse(const VP8LMultipliers* const m,
+                                  uint32_t* argb_data, int num_pixels) {
+  // sign-extended multiplying constants, pre-shifted by 5.
+#define CST(X)  (((int16_t)(m->X << 8)) >> 5)   // sign-extend
+  const __m128i mults_rb = _mm_set_epi16(
+      CST(green_to_red_), CST(green_to_blue_),
+      CST(green_to_red_), CST(green_to_blue_),
+      CST(green_to_red_), CST(green_to_blue_),
+      CST(green_to_red_), CST(green_to_blue_));
+  const __m128i mults_b2 = _mm_set_epi16(
+      CST(red_to_blue_), 0, CST(red_to_blue_), 0,
+      CST(red_to_blue_), 0, CST(red_to_blue_), 0);
+#undef CST
+  const __m128i mask_ag = _mm_set1_epi32(0xff00ff00);  // alpha-green masks
+  int i;
+  for (i = 0; i + 4 <= num_pixels; i += 4) {
+    const __m128i in = _mm_loadu_si128((__m128i*)&argb_data[i]); // argb
+    const __m128i A = _mm_and_si128(in, mask_ag);     // a   0   g   0
+    const __m128i B = _mm_shufflelo_epi16(A, _MM_SHUFFLE(2, 2, 0, 0));
+    const __m128i C = _mm_shufflehi_epi16(B, _MM_SHUFFLE(2, 2, 0, 0));  // g0g0
+    const __m128i D = _mm_mulhi_epi16(C, mults_rb);    // x dr  x db1
+    const __m128i E = _mm_add_epi8(in, D);             // x r'  x   b'
+    const __m128i F = _mm_slli_epi16(E, 8);            // r' 0   b' 0
+    const __m128i G = _mm_mulhi_epi16(F, mults_b2);    // x db2  0  0
+    const __m128i H = _mm_srli_epi32(G, 8);            // 0  x db2  0
+    const __m128i I = _mm_add_epi8(H, F);              // r' x  b'' 0
+    const __m128i J = _mm_srli_epi16(I, 8);            // 0  r'  0  b''
+    const __m128i out = _mm_or_si128(J, A);
+    _mm_storeu_si128((__m128i*)&argb_data[i], out);
+  }
+  // Fall-back to C-version for left-overs.
+  VP8LTransformColorInverse_C(m, argb_data + i, num_pixels - i);
+}
+
+//------------------------------------------------------------------------------
+// Color-space conversion functions
+
+static void ConvertBGRAToRGBA(const uint32_t* src,
+                              int num_pixels, uint8_t* dst) {
+  const __m128i* in = (const __m128i*)src;
+  __m128i* out = (__m128i*)dst;
+  while (num_pixels >= 8) {
+    const __m128i bgra0 = _mm_loadu_si128(in++);     // bgra0|bgra1|bgra2|bgra3
+    const __m128i bgra4 = _mm_loadu_si128(in++);     // bgra4|bgra5|bgra6|bgra7
+    const __m128i v0l = _mm_unpacklo_epi8(bgra0, bgra4);  // b0b4g0g4r0r4a0a4...
+    const __m128i v0h = _mm_unpackhi_epi8(bgra0, bgra4);  // b2b6g2g6r2r6a2a6...
+    const __m128i v1l = _mm_unpacklo_epi8(v0l, v0h);   // b0b2b4b6g0g2g4g6...
+    const __m128i v1h = _mm_unpackhi_epi8(v0l, v0h);   // b1b3b5b7g1g3g5g7...
+    const __m128i v2l = _mm_unpacklo_epi8(v1l, v1h);   // b0...b7 | g0...g7
+    const __m128i v2h = _mm_unpackhi_epi8(v1l, v1h);   // r0...r7 | a0...a7
+    const __m128i ga0 = _mm_unpackhi_epi64(v2l, v2h);  // g0...g7 | a0...a7
+    const __m128i rb0 = _mm_unpacklo_epi64(v2h, v2l);  // r0...r7 | b0...b7
+    const __m128i rg0 = _mm_unpacklo_epi8(rb0, ga0);   // r0g0r1g1 ... r6g6r7g7
+    const __m128i ba0 = _mm_unpackhi_epi8(rb0, ga0);   // b0a0b1a1 ... b6a6b7a7
+    const __m128i rgba0 = _mm_unpacklo_epi16(rg0, ba0);  // rgba0|rgba1...
+    const __m128i rgba4 = _mm_unpackhi_epi16(rg0, ba0);  // rgba4|rgba5...
+    _mm_storeu_si128(out++, rgba0);
+    _mm_storeu_si128(out++, rgba4);
+    num_pixels -= 8;
+  }
+  // left-overs
+  VP8LConvertBGRAToRGBA_C((const uint32_t*)in, num_pixels, (uint8_t*)out);
+}
+
+static void ConvertBGRAToRGBA4444(const uint32_t* src,
+                                  int num_pixels, uint8_t* dst) {
+  const __m128i mask_0x0f = _mm_set1_epi8(0x0f);
+  const __m128i mask_0xf0 = _mm_set1_epi8(0xf0);
+  const __m128i* in = (const __m128i*)src;
+  __m128i* out = (__m128i*)dst;
+  while (num_pixels >= 8) {
+    const __m128i bgra0 = _mm_loadu_si128(in++);     // bgra0|bgra1|bgra2|bgra3
+    const __m128i bgra4 = _mm_loadu_si128(in++);     // bgra4|bgra5|bgra6|bgra7
+    const __m128i v0l = _mm_unpacklo_epi8(bgra0, bgra4);  // b0b4g0g4r0r4a0a4...
+    const __m128i v0h = _mm_unpackhi_epi8(bgra0, bgra4);  // b2b6g2g6r2r6a2a6...
+    const __m128i v1l = _mm_unpacklo_epi8(v0l, v0h);    // b0b2b4b6g0g2g4g6...
+    const __m128i v1h = _mm_unpackhi_epi8(v0l, v0h);    // b1b3b5b7g1g3g5g7...
+    const __m128i v2l = _mm_unpacklo_epi8(v1l, v1h);    // b0...b7 | g0...g7
+    const __m128i v2h = _mm_unpackhi_epi8(v1l, v1h);    // r0...r7 | a0...a7
+    const __m128i ga0 = _mm_unpackhi_epi64(v2l, v2h);   // g0...g7 | a0...a7
+    const __m128i rb0 = _mm_unpacklo_epi64(v2h, v2l);   // r0...r7 | b0...b7
+    const __m128i ga1 = _mm_srli_epi16(ga0, 4);         // g0-|g1-|...|a6-|a7-
+    const __m128i rb1 = _mm_and_si128(rb0, mask_0xf0);  // -r0|-r1|...|-b6|-a7
+    const __m128i ga2 = _mm_and_si128(ga1, mask_0x0f);  // g0-|g1-|...|a6-|a7-
+    const __m128i rgba0 = _mm_or_si128(ga2, rb1);       // rg0..rg7 | ba0..ba7
+    const __m128i rgba1 = _mm_srli_si128(rgba0, 8);     // ba0..ba7 | 0
+#ifdef WEBP_SWAP_16BIT_CSP
+    const __m128i rgba = _mm_unpacklo_epi8(rgba1, rgba0);  // barg0...barg7
+#else
+    const __m128i rgba = _mm_unpacklo_epi8(rgba0, rgba1);  // rgba0...rgba7
+#endif
+    _mm_storeu_si128(out++, rgba);
+    num_pixels -= 8;
+  }
+  // left-overs
+  VP8LConvertBGRAToRGBA4444_C((const uint32_t*)in, num_pixels, (uint8_t*)out);
+}
+
+static void ConvertBGRAToRGB565(const uint32_t* src,
+                                int num_pixels, uint8_t* dst) {
+  const __m128i mask_0xe0 = _mm_set1_epi8(0xe0);
+  const __m128i mask_0xf8 = _mm_set1_epi8(0xf8);
+  const __m128i mask_0x07 = _mm_set1_epi8(0x07);
+  const __m128i* in = (const __m128i*)src;
+  __m128i* out = (__m128i*)dst;
+  while (num_pixels >= 8) {
+    const __m128i bgra0 = _mm_loadu_si128(in++);     // bgra0|bgra1|bgra2|bgra3
+    const __m128i bgra4 = _mm_loadu_si128(in++);     // bgra4|bgra5|bgra6|bgra7
+    const __m128i v0l = _mm_unpacklo_epi8(bgra0, bgra4);  // b0b4g0g4r0r4a0a4...
+    const __m128i v0h = _mm_unpackhi_epi8(bgra0, bgra4);  // b2b6g2g6r2r6a2a6...
+    const __m128i v1l = _mm_unpacklo_epi8(v0l, v0h);      // b0b2b4b6g0g2g4g6...
+    const __m128i v1h = _mm_unpackhi_epi8(v0l, v0h);      // b1b3b5b7g1g3g5g7...
+    const __m128i v2l = _mm_unpacklo_epi8(v1l, v1h);      // b0...b7 | g0...g7
+    const __m128i v2h = _mm_unpackhi_epi8(v1l, v1h);      // r0...r7 | a0...a7
+    const __m128i ga0 = _mm_unpackhi_epi64(v2l, v2h);     // g0...g7 | a0...a7
+    const __m128i rb0 = _mm_unpacklo_epi64(v2h, v2l);     // r0...r7 | b0...b7
+    const __m128i rb1 = _mm_and_si128(rb0, mask_0xf8);    // -r0..-r7|-b0..-b7
+    const __m128i g_lo1 = _mm_srli_epi16(ga0, 5);
+    const __m128i g_lo2 = _mm_and_si128(g_lo1, mask_0x07);  // g0-...g7-|xx (3b)
+    const __m128i g_hi1 = _mm_slli_epi16(ga0, 3);
+    const __m128i g_hi2 = _mm_and_si128(g_hi1, mask_0xe0);  // -g0...-g7|xx (3b)
+    const __m128i b0 = _mm_srli_si128(rb1, 8);              // -b0...-b7|0
+    const __m128i rg1 = _mm_or_si128(rb1, g_lo2);           // gr0...gr7|xx
+    const __m128i b1 = _mm_srli_epi16(b0, 3);
+    const __m128i gb1 = _mm_or_si128(b1, g_hi2);            // bg0...bg7|xx
+#ifdef WEBP_SWAP_16BIT_CSP
+    const __m128i rgba = _mm_unpacklo_epi8(gb1, rg1);     // rggb0...rggb7
+#else
+    const __m128i rgba = _mm_unpacklo_epi8(rg1, gb1);     // bgrb0...bgrb7
+#endif
+    _mm_storeu_si128(out++, rgba);
+    num_pixels -= 8;
+  }
+  // left-overs
+  VP8LConvertBGRAToRGB565_C((const uint32_t*)in, num_pixels, (uint8_t*)out);
+}
+
+static void ConvertBGRAToBGR(const uint32_t* src,
+                             int num_pixels, uint8_t* dst) {
+  const __m128i mask_l = _mm_set_epi32(0, 0x00ffffff, 0, 0x00ffffff);
+  const __m128i mask_h = _mm_set_epi32(0x00ffffff, 0, 0x00ffffff, 0);
+  const __m128i* in = (const __m128i*)src;
+  const uint8_t* const end = dst + num_pixels * 3;
+  // the last storel_epi64 below writes 8 bytes starting at offset 18
+  while (dst + 26 <= end) {
+    const __m128i bgra0 = _mm_loadu_si128(in++);     // bgra0|bgra1|bgra2|bgra3
+    const __m128i bgra4 = _mm_loadu_si128(in++);     // bgra4|bgra5|bgra6|bgra7
+    const __m128i a0l = _mm_and_si128(bgra0, mask_l);   // bgr0|0|bgr0|0
+    const __m128i a4l = _mm_and_si128(bgra4, mask_l);   // bgr0|0|bgr0|0
+    const __m128i a0h = _mm_and_si128(bgra0, mask_h);   // 0|bgr0|0|bgr0
+    const __m128i a4h = _mm_and_si128(bgra4, mask_h);   // 0|bgr0|0|bgr0
+    const __m128i b0h = _mm_srli_epi64(a0h, 8);         // 000b|gr00|000b|gr00
+    const __m128i b4h = _mm_srli_epi64(a4h, 8);         // 000b|gr00|000b|gr00
+    const __m128i c0 = _mm_or_si128(a0l, b0h);          // rgbrgb00|rgbrgb00
+    const __m128i c4 = _mm_or_si128(a4l, b4h);          // rgbrgb00|rgbrgb00
+    const __m128i c2 = _mm_srli_si128(c0, 8);
+    const __m128i c6 = _mm_srli_si128(c4, 8);
+    _mm_storel_epi64((__m128i*)(dst +   0), c0);
+    _mm_storel_epi64((__m128i*)(dst +   6), c2);
+    _mm_storel_epi64((__m128i*)(dst +  12), c4);
+    _mm_storel_epi64((__m128i*)(dst +  18), c6);
+    dst += 24;
+    num_pixels -= 8;
+  }
+  // left-overs
+  VP8LConvertBGRAToBGR_C((const uint32_t*)in, num_pixels, dst);
+}
+
+//------------------------------------------------------------------------------
+// Entry point
+
+extern void VP8LDspInitSSE2(void);
+
+WEBP_TSAN_IGNORE_FUNCTION void VP8LDspInitSSE2(void) {
+  VP8LPredictors[5] = Predictor5;
+  VP8LPredictors[6] = Predictor6;
+  VP8LPredictors[7] = Predictor7;
+  VP8LPredictors[8] = Predictor8;
+  VP8LPredictors[9] = Predictor9;
+  VP8LPredictors[10] = Predictor10;
+  VP8LPredictors[11] = Predictor11;
+  VP8LPredictors[12] = Predictor12;
+  VP8LPredictors[13] = Predictor13;
+
+  VP8LAddGreenToBlueAndRed = AddGreenToBlueAndRed;
+  VP8LTransformColorInverse = TransformColorInverse;
+
+  VP8LConvertBGRAToRGBA = ConvertBGRAToRGBA;
+  VP8LConvertBGRAToRGBA4444 = ConvertBGRAToRGBA4444;
+  VP8LConvertBGRAToRGB565 = ConvertBGRAToRGB565;
+  VP8LConvertBGRAToBGR = ConvertBGRAToBGR;
+}
+
+#else  // !WEBP_USE_SSE2
+
+WEBP_DSP_INIT_STUB(VP8LDspInitSSE2)
+
+#endif  // WEBP_USE_SSE2
diff --git a/drivers/webp/dsp/mips_macro.h b/drivers/webp/dsp/mips_macro.h
new file mode 100644
index 0000000000..44aba9b71d
--- /dev/null
+++ b/drivers/webp/dsp/mips_macro.h
@@ -0,0 +1,200 @@
+// Copyright 2014 Google Inc. All Rights Reserved.
+//
+// Use of this source code is governed by a BSD-style license
+// that can be found in the COPYING file in the root of the source
+// tree. An additional intellectual property rights grant can be found
+// in the file PATENTS. All contributing project authors may
+// be found in the AUTHORS file in the root of the source tree.
+// -----------------------------------------------------------------------------
+//
+// MIPS common macros
+
+#ifndef WEBP_DSP_MIPS_MACRO_H_
+#define WEBP_DSP_MIPS_MACRO_H_
+
+#if defined(__GNUC__) && defined(__ANDROID__) && LOCAL_GCC_VERSION == 0x409
+#define WORK_AROUND_GCC
+#endif
+
+#define STR(s) #s
+#define XSTR(s) STR(s)
+
+// O0[31..16 | 15..0] = I0[31..16 | 15..0] + I1[31..16 | 15..0]
+// O1[31..16 | 15..0] = I0[31..16 | 15..0] - I1[31..16 | 15..0]
+// O - output
+// I - input (macro doesn't change it)
+#define ADD_SUB_HALVES(O0, O1,                                                 \
+                       I0, I1)                                                 \
+  "addq.ph          %[" #O0 "],   %[" #I0 "],  %[" #I1 "]           \n\t"      \
+  "subq.ph          %[" #O1 "],   %[" #I0 "],  %[" #I1 "]           \n\t"
+
+// O - output
+// I - input (macro doesn't change it)
+// I[0/1] - offset in bytes
+#define LOAD_IN_X2(O0, O1,                                                     \
+                   I0, I1)                                                     \
+  "lh               %[" #O0 "],   " #I0 "(%[in])                  \n\t"        \
+  "lh               %[" #O1 "],   " #I1 "(%[in])                  \n\t"
+
+// I0 - location
+// I1..I9 - offsets in bytes
+#define LOAD_WITH_OFFSET_X4(O0, O1, O2, O3,                                    \
+                            I0, I1, I2, I3, I4, I5, I6, I7, I8, I9)            \
+  "ulw    %[" #O0 "],    " #I1 "+" XSTR(I9) "*" #I5 "(%[" #I0 "])       \n\t"  \
+  "ulw    %[" #O1 "],    " #I2 "+" XSTR(I9) "*" #I6 "(%[" #I0 "])       \n\t"  \
+  "ulw    %[" #O2 "],    " #I3 "+" XSTR(I9) "*" #I7 "(%[" #I0 "])       \n\t"  \
+  "ulw    %[" #O3 "],    " #I4 "+" XSTR(I9) "*" #I8 "(%[" #I0 "])       \n\t"
+
+// O - output
+// IO - input/output
+// I - input (macro doesn't change it)
+#define MUL_SHIFT_SUM(O0, O1, O2, O3, O4, O5, O6, O7,                          \
+                      IO0, IO1, IO2, IO3,                                      \
+                      I0, I1, I2, I3, I4, I5, I6, I7)                          \
+  "mul              %[" #O0 "],   %[" #I0 "],   %[kC2]        \n\t"            \
+  "mul              %[" #O1 "],   %[" #I0 "],   %[kC1]        \n\t"            \
+  "mul              %[" #O2 "],   %[" #I1 "],   %[kC2]        \n\t"            \
+  "mul              %[" #O3 "],   %[" #I1 "],   %[kC1]        \n\t"            \
+  "mul              %[" #O4 "],   %[" #I2 "],   %[kC2]        \n\t"            \
+  "mul              %[" #O5 "],   %[" #I2 "],   %[kC1]        \n\t"            \
+  "mul              %[" #O6 "],   %[" #I3 "],   %[kC2]        \n\t"            \
+  "mul              %[" #O7 "],   %[" #I3 "],   %[kC1]        \n\t"            \
+  "sra              %[" #O0 "],   %[" #O0 "],   16            \n\t"            \
+  "sra              %[" #O1 "],   %[" #O1 "],   16            \n\t"            \
+  "sra              %[" #O2 "],   %[" #O2 "],   16            \n\t"            \
+  "sra              %[" #O3 "],   %[" #O3 "],   16            \n\t"            \
+  "sra              %[" #O4 "],   %[" #O4 "],   16            \n\t"            \
+  "sra              %[" #O5 "],   %[" #O5 "],   16            \n\t"            \
+  "sra              %[" #O6 "],   %[" #O6 "],   16            \n\t"            \
+  "sra              %[" #O7 "],   %[" #O7 "],   16            \n\t"            \
+  "addu             %[" #IO0 "],  %[" #IO0 "],  %[" #I4 "]    \n\t"            \
+  "addu             %[" #IO1 "],  %[" #IO1 "],  %[" #I5 "]    \n\t"            \
+  "subu             %[" #IO2 "],  %[" #IO2 "],  %[" #I6 "]    \n\t"            \
+  "subu             %[" #IO3 "],  %[" #IO3 "],  %[" #I7 "]    \n\t"
+
+// O - output
+// I - input (macro doesn't change it)
+#define INSERT_HALF_X2(O0, O1,                                                 \
+                       I0, I1)                                                 \
+  "ins              %[" #O0 "],   %[" #I0 "], 16,    16           \n\t"        \
+  "ins              %[" #O1 "],   %[" #I1 "], 16,    16           \n\t"
+
+// O - output
+// I - input (macro doesn't change it)
+#define SRA_16(O0, O1, O2, O3,                                                 \
+               I0, I1, I2, I3)                                                 \
+  "sra              %[" #O0 "],  %[" #I0 "],  16                  \n\t"        \
+  "sra              %[" #O1 "],  %[" #I1 "],  16                  \n\t"        \
+  "sra              %[" #O2 "],  %[" #I2 "],  16                  \n\t"        \
+  "sra              %[" #O3 "],  %[" #I3 "],  16                  \n\t"
+
+// temp0[31..16 | 15..0] = temp8[31..16 | 15..0] + temp12[31..16 | 15..0]
+// temp1[31..16 | 15..0] = temp8[31..16 | 15..0] - temp12[31..16 | 15..0]
+// temp0[31..16 | 15..0] = temp0[31..16 >> 3 | 15..0 >> 3]
+// temp1[31..16 | 15..0] = temp1[31..16 >> 3 | 15..0 >> 3]
+// O - output
+// I - input (macro doesn't change it)
+#define SHIFT_R_SUM_X2(O0, O1, O2, O3, O4, O5, O6, O7,                         \
+                       I0, I1, I2, I3, I4, I5, I6, I7)                         \
+  "addq.ph          %[" #O0 "],   %[" #I0 "],   %[" #I4 "]    \n\t"            \
+  "subq.ph          %[" #O1 "],   %[" #I0 "],   %[" #I4 "]    \n\t"            \
+  "addq.ph          %[" #O2 "],   %[" #I1 "],   %[" #I5 "]    \n\t"            \
+  "subq.ph          %[" #O3 "],   %[" #I1 "],   %[" #I5 "]    \n\t"            \
+  "addq.ph          %[" #O4 "],   %[" #I2 "],   %[" #I6 "]    \n\t"            \
+  "subq.ph          %[" #O5 "],   %[" #I2 "],   %[" #I6 "]    \n\t"            \
+  "addq.ph          %[" #O6 "],   %[" #I3 "],   %[" #I7 "]    \n\t"            \
+  "subq.ph          %[" #O7 "],   %[" #I3 "],   %[" #I7 "]    \n\t"            \
+  "shra.ph          %[" #O0 "],   %[" #O0 "],   3             \n\t"            \
+  "shra.ph          %[" #O1 "],   %[" #O1 "],   3             \n\t"            \
+  "shra.ph          %[" #O2 "],   %[" #O2 "],   3             \n\t"            \
+  "shra.ph          %[" #O3 "],   %[" #O3 "],   3             \n\t"            \
+  "shra.ph          %[" #O4 "],   %[" #O4 "],   3             \n\t"            \
+  "shra.ph          %[" #O5 "],   %[" #O5 "],   3             \n\t"            \
+  "shra.ph          %[" #O6 "],   %[" #O6 "],   3             \n\t"            \
+  "shra.ph          %[" #O7 "],   %[" #O7 "],   3             \n\t"
+
+// precrq.ph.w temp0, temp8, temp2
+//   temp0 = temp8[31..16] | temp2[31..16]
+// ins temp2, temp8, 16, 16
+//   temp2 = temp8[31..16] | temp2[15..0]
+// O - output
+// IO - input/output
+// I - input (macro doesn't change it)
+#define PACK_2_HALVES_TO_WORD(O0, O1, O2, O3,                                  \
+                              IO0, IO1, IO2, IO3,                              \
+                              I0, I1, I2, I3)                                  \
+  "precrq.ph.w      %[" #O0 "],    %[" #I0 "],  %[" #IO0 "]       \n\t"        \
+  "precrq.ph.w      %[" #O1 "],    %[" #I1 "],  %[" #IO1 "]       \n\t"        \
+  "ins              %[" #IO0 "],   %[" #I0 "],  16,    16         \n\t"        \
+  "ins              %[" #IO1 "],   %[" #I1 "],  16,    16         \n\t"        \
+  "precrq.ph.w      %[" #O2 "],    %[" #I2 "],  %[" #IO2 "]       \n\t"        \
+  "precrq.ph.w      %[" #O3 "],    %[" #I3 "],  %[" #IO3 "]       \n\t"        \
+  "ins              %[" #IO2 "],   %[" #I2 "],  16,    16         \n\t"        \
+  "ins              %[" #IO3 "],   %[" #I3 "],  16,    16         \n\t"
+
+// preceu.ph.qbr temp0, temp8
+//   temp0 = 0 | 0 | temp8[23..16] | temp8[7..0]
+// preceu.ph.qbl temp1, temp8
+//   temp1 = temp8[23..16] | temp8[7..0] | 0 | 0
+// O - output
+// I - input (macro doesn't change it)
+#define CONVERT_2_BYTES_TO_HALF(O0, O1, O2, O3, O4, O5, O6, O7,                \
+                                I0, I1, I2, I3)                                \
+  "preceu.ph.qbr    %[" #O0 "],   %[" #I0 "]                      \n\t"        \
+  "preceu.ph.qbl    %[" #O1 "],   %[" #I0 "]                      \n\t"        \
+  "preceu.ph.qbr    %[" #O2 "],   %[" #I1 "]                      \n\t"        \
+  "preceu.ph.qbl    %[" #O3 "],   %[" #I1 "]                      \n\t"        \
+  "preceu.ph.qbr    %[" #O4 "],   %[" #I2 "]                      \n\t"        \
+  "preceu.ph.qbl    %[" #O5 "],   %[" #I2 "]                      \n\t"        \
+  "preceu.ph.qbr    %[" #O6 "],   %[" #I3 "]                      \n\t"        \
+  "preceu.ph.qbl    %[" #O7 "],   %[" #I3 "]                      \n\t"
+
+// temp0[31..16 | 15..0] = temp0[31..16 | 15..0] + temp8[31..16 | 15..0]
+// temp0[31..16 | 15..0] = temp0[31..16 <<(s) 7 | 15..0 <<(s) 7]
+// temp1..temp7 same as temp0
+// precrqu_s.qb.ph temp0, temp1, temp0:
+//   temp0 = temp1[31..24] | temp1[15..8] | temp0[31..24] | temp0[15..8]
+// store temp0 to dst
+// IO - input/output
+// I - input (macro doesn't change it)
+#define STORE_SAT_SUM_X2(IO0, IO1, IO2, IO3, IO4, IO5, IO6, IO7,               \
+                         I0, I1, I2, I3, I4, I5, I6, I7,                       \
+                         I8, I9, I10, I11, I12, I13)                           \
+  "addq.ph          %[" #IO0 "],  %[" #IO0 "],  %[" #I0 "]          \n\t"      \
+  "addq.ph          %[" #IO1 "],  %[" #IO1 "],  %[" #I1 "]          \n\t"      \
+  "addq.ph          %[" #IO2 "],  %[" #IO2 "],  %[" #I2 "]          \n\t"      \
+  "addq.ph          %[" #IO3 "],  %[" #IO3 "],  %[" #I3 "]          \n\t"      \
+  "addq.ph          %[" #IO4 "],  %[" #IO4 "],  %[" #I4 "]          \n\t"      \
+  "addq.ph          %[" #IO5 "],  %[" #IO5 "],  %[" #I5 "]          \n\t"      \
+  "addq.ph          %[" #IO6 "],  %[" #IO6 "],  %[" #I6 "]          \n\t"      \
+  "addq.ph          %[" #IO7 "],  %[" #IO7 "],  %[" #I7 "]          \n\t"      \
+  "shll_s.ph        %[" #IO0 "],  %[" #IO0 "],  7                   \n\t"      \
+  "shll_s.ph        %[" #IO1 "],  %[" #IO1 "],  7                   \n\t"      \
+  "shll_s.ph        %[" #IO2 "],  %[" #IO2 "],  7                   \n\t"      \
+  "shll_s.ph        %[" #IO3 "],  %[" #IO3 "],  7                   \n\t"      \
+  "shll_s.ph        %[" #IO4 "],  %[" #IO4 "],  7                   \n\t"      \
+  "shll_s.ph        %[" #IO5 "],  %[" #IO5 "],  7                   \n\t"      \
+  "shll_s.ph        %[" #IO6 "],  %[" #IO6 "],  7                   \n\t"      \
+  "shll_s.ph        %[" #IO7 "],  %[" #IO7 "],  7                   \n\t"      \
+  "precrqu_s.qb.ph  %[" #IO0 "],  %[" #IO1 "],  %[" #IO0 "]         \n\t"      \
+  "precrqu_s.qb.ph  %[" #IO2 "],  %[" #IO3 "],  %[" #IO2 "]         \n\t"      \
+  "precrqu_s.qb.ph  %[" #IO4 "],  %[" #IO5 "],  %[" #IO4 "]         \n\t"      \
+  "precrqu_s.qb.ph  %[" #IO6 "],  %[" #IO7 "],  %[" #IO6 "]         \n\t"      \
+  "usw              %[" #IO0 "],  " XSTR(I13) "*" #I9 "(%[" #I8 "])   \n\t"    \
+  "usw              %[" #IO2 "],  " XSTR(I13) "*" #I10 "(%[" #I8 "])  \n\t"    \
+  "usw              %[" #IO4 "],  " XSTR(I13) "*" #I11 "(%[" #I8 "])  \n\t"    \
+  "usw              %[" #IO6 "],  " XSTR(I13) "*" #I12 "(%[" #I8 "])  \n\t"
+
+#define OUTPUT_EARLY_CLOBBER_REGS_10()                                         \
+  : [temp1]"=&r"(temp1), [temp2]"=&r"(temp2), [temp3]"=&r"(temp3),             \
+    [temp4]"=&r"(temp4), [temp5]"=&r"(temp5), [temp6]"=&r"(temp6),             \
+    [temp7]"=&r"(temp7), [temp8]"=&r"(temp8), [temp9]"=&r"(temp9),             \
+    [temp10]"=&r"(temp10)
+
+#define OUTPUT_EARLY_CLOBBER_REGS_18()                                         \
+  OUTPUT_EARLY_CLOBBER_REGS_10(),                                              \
+  [temp11]"=&r"(temp11), [temp12]"=&r"(temp12), [temp13]"=&r"(temp13),         \
+  [temp14]"=&r"(temp14), [temp15]"=&r"(temp15), [temp16]"=&r"(temp16),         \
+  [temp17]"=&r"(temp17), [temp18]"=&r"(temp18)
+
+#endif  // WEBP_DSP_MIPS_MACRO_H_
diff --git a/drivers/webp/dsp/neon.h b/drivers/webp/dsp/neon.h
new file mode 100644
index 0000000000..0a06266848
--- /dev/null
+++ b/drivers/webp/dsp/neon.h
@@ -0,0 +1,82 @@
+// Copyright 2014 Google Inc. All Rights Reserved.
+//
+// Use of this source code is governed by a BSD-style license
+// that can be found in the COPYING file in the root of the source
+// tree. An additional intellectual property rights grant can be found
+// in the file PATENTS. All contributing project authors may
+// be found in the AUTHORS file in the root of the source tree.
+// -----------------------------------------------------------------------------
+//
+//  NEON common code.
+
+#ifndef WEBP_DSP_NEON_H_
+#define WEBP_DSP_NEON_H_
+
+#include <arm_neon.h>
+
+#include "./dsp.h"
+
+// Right now, some intrinsics functions seem slower, so we disable them
+// everywhere except aarch64 where the inline assembly is incompatible.
+#if defined(__aarch64__)
+#define WEBP_USE_INTRINSICS   // use intrinsics when possible
+#endif
+
+#define INIT_VECTOR2(v, a, b) do {  \
+  v.val[0] = a;                     \
+  v.val[1] = b;                     \
+} while (0)
+
+#define INIT_VECTOR3(v, a, b, c) do {  \
+  v.val[0] = a;                        \
+  v.val[1] = b;                        \
+  v.val[2] = c;                        \
+} while (0)
+
+#define INIT_VECTOR4(v, a, b, c, d) do {  \
+  v.val[0] = a;                           \
+  v.val[1] = b;                           \
+  v.val[2] = c;                           \
+  v.val[3] = d;                           \
+} while (0)
+
+// if using intrinsics, this flag avoids some functions that make gcc-4.6.3
+// crash ("internal compiler error: in immed_double_const, at emit-rtl.").
+// (probably similar to gcc.gnu.org/bugzilla/show_bug.cgi?id=48183)
+#if !(LOCAL_GCC_PREREQ(4,8) || defined(__aarch64__))
+#define WORK_AROUND_GCC
+#endif
+
+static WEBP_INLINE int32x4x4_t Transpose4x4(const int32x4x4_t rows) {
+  uint64x2x2_t row01, row23;
+
+  row01.val[0] = vreinterpretq_u64_s32(rows.val[0]);
+  row01.val[1] = vreinterpretq_u64_s32(rows.val[1]);
+  row23.val[0] = vreinterpretq_u64_s32(rows.val[2]);
+  row23.val[1] = vreinterpretq_u64_s32(rows.val[3]);
+  // Transpose 64-bit values (there's no vswp equivalent)
+  {
+    const uint64x1_t row0h = vget_high_u64(row01.val[0]);
+    const uint64x1_t row2l = vget_low_u64(row23.val[0]);
+    const uint64x1_t row1h = vget_high_u64(row01.val[1]);
+    const uint64x1_t row3l = vget_low_u64(row23.val[1]);
+    row01.val[0] = vcombine_u64(vget_low_u64(row01.val[0]), row2l);
+    row23.val[0] = vcombine_u64(row0h, vget_high_u64(row23.val[0]));
+    row01.val[1] = vcombine_u64(vget_low_u64(row01.val[1]), row3l);
+    row23.val[1] = vcombine_u64(row1h, vget_high_u64(row23.val[1]));
+  }
+  {
+    const int32x4x2_t out01 = vtrnq_s32(vreinterpretq_s32_u64(row01.val[0]),
+                                        vreinterpretq_s32_u64(row01.val[1]));
+    const int32x4x2_t out23 = vtrnq_s32(vreinterpretq_s32_u64(row23.val[0]),
+                                        vreinterpretq_s32_u64(row23.val[1]));
+    int32x4x4_t out;
+    out.val[0] = out01.val[0];
+    out.val[1] = out01.val[1];
+    out.val[2] = out23.val[0];
+    out.val[3] = out23.val[1];
+    return out;
+  }
+}
+
+#endif  // WEBP_DSP_NEON_H_
diff --git a/drivers/webp/dsp/rescaler.c b/drivers/webp/dsp/rescaler.c
new file mode 100644
index 0000000000..bc743d5dc5
--- /dev/null
+++ b/drivers/webp/dsp/rescaler.c
@@ -0,0 +1,238 @@
+// Copyright 2014 Google Inc. All Rights Reserved.
+//
+// Use of this source code is governed by a BSD-style license
+// that can be found in the COPYING file in the root of the source
+// tree. An additional intellectual property rights grant can be found
+// in the file PATENTS. All contributing project authors may
+// be found in the AUTHORS file in the root of the source tree.
+// -----------------------------------------------------------------------------
+//
+// Rescaling functions
+//
+// Author: Skal (pascal.massimino@gmail.com)
+
+#include <assert.h>
+
+#include "./dsp.h"
+#include "../utils/rescaler.h"
+
+//------------------------------------------------------------------------------
+// Implementations of critical functions ImportRow / ExportRow
+
+#define ROUNDER (WEBP_RESCALER_ONE >> 1)
+#define MULT_FIX(x, y) (((uint64_t)(x) * (y) + ROUNDER) >> WEBP_RESCALER_RFIX)
+
+//------------------------------------------------------------------------------
+// Row import
+
+void WebPRescalerImportRowExpandC(WebPRescaler* const wrk, const uint8_t* src) {
+  const int x_stride = wrk->num_channels;
+  const int x_out_max = wrk->dst_width * wrk->num_channels;
+  int channel;
+  assert(!WebPRescalerInputDone(wrk));
+  assert(wrk->x_expand);
+  for (channel = 0; channel < x_stride; ++channel) {
+    int x_in = channel;
+    int x_out = channel;
+    // simple bilinear interpolation
+    int accum = wrk->x_add;
+    int left = src[x_in];
+    int right = (wrk->src_width > 1) ? src[x_in + x_stride] : left;
+    x_in += x_stride;
+    while (1) {
+      wrk->frow[x_out] = right * wrk->x_add + (left - right) * accum;
+      x_out += x_stride;
+      if (x_out >= x_out_max) break;
+      accum -= wrk->x_sub;
+      if (accum < 0) {
+        left = right;
+        x_in += x_stride;
+        assert(x_in < wrk->src_width * x_stride);
+        right = src[x_in];
+        accum += wrk->x_add;
+      }
+    }
+    assert(wrk->x_sub == 0 /* <- special case for src_width=1 */ || accum == 0);
+  }
+}
+
+void WebPRescalerImportRowShrinkC(WebPRescaler* const wrk, const uint8_t* src) {
+  const int x_stride = wrk->num_channels;
+  const int x_out_max = wrk->dst_width * wrk->num_channels;
+  int channel;
+  assert(!WebPRescalerInputDone(wrk));
+  assert(!wrk->x_expand);
+  for (channel = 0; channel < x_stride; ++channel) {
+    int x_in = channel;
+    int x_out = channel;
+    uint32_t sum = 0;
+    int accum = 0;
+    while (x_out < x_out_max) {
+      uint32_t base = 0;
+      accum += wrk->x_add;
+      while (accum > 0) {
+        accum -= wrk->x_sub;
+        assert(x_in < wrk->src_width * x_stride);
+        base = src[x_in];
+        sum += base;
+        x_in += x_stride;
+      }
+      {        // Emit next horizontal pixel.
+        const rescaler_t frac = base * (-accum);
+        wrk->frow[x_out] = sum * wrk->x_sub - frac;
+        // fresh fractional start for next pixel
+        sum = (int)MULT_FIX(frac, wrk->fx_scale);
+      }
+      x_out += x_stride;
+    }
+    assert(accum == 0);
+  }
+}
+
+//------------------------------------------------------------------------------
+// Row export
+
+void WebPRescalerExportRowExpandC(WebPRescaler* const wrk) {
+  int x_out;
+  uint8_t* const dst = wrk->dst;
+  rescaler_t* const irow = wrk->irow;
+  const int x_out_max = wrk->dst_width * wrk->num_channels;
+  const rescaler_t* const frow = wrk->frow;
+  assert(!WebPRescalerOutputDone(wrk));
+  assert(wrk->y_accum <= 0);
+  assert(wrk->y_expand);
+  assert(wrk->y_sub != 0);
+  if (wrk->y_accum == 0) {
+    for (x_out = 0; x_out < x_out_max; ++x_out) {
+      const uint32_t J = frow[x_out];
+      const int v = (int)MULT_FIX(J, wrk->fy_scale);
+      assert(v >= 0 && v <= 255);
+      dst[x_out] = v;
+    }
+  } else {
+    const uint32_t B = WEBP_RESCALER_FRAC(-wrk->y_accum, wrk->y_sub);
+    const uint32_t A = (uint32_t)(WEBP_RESCALER_ONE - B);
+    for (x_out = 0; x_out < x_out_max; ++x_out) {
+      const uint64_t I = (uint64_t)A * frow[x_out]
+                       + (uint64_t)B * irow[x_out];
+      const uint32_t J = (uint32_t)((I + ROUNDER) >> WEBP_RESCALER_RFIX);
+      const int v = (int)MULT_FIX(J, wrk->fy_scale);
+      assert(v >= 0 && v <= 255);
+      dst[x_out] = v;
+    }
+  }
+}
+
+void WebPRescalerExportRowShrinkC(WebPRescaler* const wrk) {
+  int x_out;
+  uint8_t* const dst = wrk->dst;
+  rescaler_t* const irow = wrk->irow;
+  const int x_out_max = wrk->dst_width * wrk->num_channels;
+  const rescaler_t* const frow = wrk->frow;
+  const uint32_t yscale = wrk->fy_scale * (-wrk->y_accum);
+  assert(!WebPRescalerOutputDone(wrk));
+  assert(wrk->y_accum <= 0);
+  assert(!wrk->y_expand);
+  if (yscale) {
+    for (x_out = 0; x_out < x_out_max; ++x_out) {
+      const uint32_t frac = (uint32_t)MULT_FIX(frow[x_out], yscale);
+      const int v = (int)MULT_FIX(irow[x_out] - frac, wrk->fxy_scale);
+      assert(v >= 0 && v <= 255);
+      dst[x_out] = v;
+      irow[x_out] = frac;   // new fractional start
+    }
+  } else {
+    for (x_out = 0; x_out < x_out_max; ++x_out) {
+      const int v = (int)MULT_FIX(irow[x_out], wrk->fxy_scale);
+      assert(v >= 0 && v <= 255);
+      dst[x_out] = v;
+      irow[x_out] = 0;
+    }
+  }
+}
+
+#undef MULT_FIX
+#undef ROUNDER
+
+//------------------------------------------------------------------------------
+// Main entry calls
+
+void WebPRescalerImportRow(WebPRescaler* const wrk, const uint8_t* src) {
+  assert(!WebPRescalerInputDone(wrk));
+  if (!wrk->x_expand) {
+    WebPRescalerImportRowShrink(wrk, src);
+  } else {
+    WebPRescalerImportRowExpand(wrk, src);
+  }
+}
+
+void WebPRescalerExportRow(WebPRescaler* const wrk) {
+  if (wrk->y_accum <= 0) {
+    assert(!WebPRescalerOutputDone(wrk));
+    if (wrk->y_expand) {
+      WebPRescalerExportRowExpand(wrk);
+    } else if (wrk->fxy_scale) {
+      WebPRescalerExportRowShrink(wrk);
+    } else {  // very special case for src = dst = 1x1
+      int i;
+      assert(wrk->src_width == 1 && wrk->dst_width <= 2);
+      assert(wrk->src_height == 1 && wrk->dst_height == 1);
+      for (i = 0; i < wrk->num_channels * wrk->dst_width; ++i) {
+        wrk->dst[i] = wrk->irow[i];
+        wrk->irow[i] = 0;
+      }
+    }
+    wrk->y_accum += wrk->y_add;
+    wrk->dst += wrk->dst_stride;
+    ++wrk->dst_y;
+  }
+}
+
+//------------------------------------------------------------------------------
+
+WebPRescalerImportRowFunc WebPRescalerImportRowExpand;
+WebPRescalerImportRowFunc WebPRescalerImportRowShrink;
+
+WebPRescalerExportRowFunc WebPRescalerExportRowExpand;
+WebPRescalerExportRowFunc WebPRescalerExportRowShrink;
+
+extern void WebPRescalerDspInitSSE2(void);
+extern void WebPRescalerDspInitMIPS32(void);
+extern void WebPRescalerDspInitMIPSdspR2(void);
+extern void WebPRescalerDspInitNEON(void);
+
+static volatile VP8CPUInfo rescaler_last_cpuinfo_used =
+    (VP8CPUInfo)&rescaler_last_cpuinfo_used;
+
+WEBP_TSAN_IGNORE_FUNCTION void WebPRescalerDspInit(void) {
+  if (rescaler_last_cpuinfo_used == VP8GetCPUInfo) return;
+
+  WebPRescalerImportRowExpand = WebPRescalerImportRowExpandC;
+  WebPRescalerImportRowShrink = WebPRescalerImportRowShrinkC;
+  WebPRescalerExportRowExpand = WebPRescalerExportRowExpandC;
+  WebPRescalerExportRowShrink = WebPRescalerExportRowShrinkC;
+
+  if (VP8GetCPUInfo != NULL) {
+#if defined(WEBP_USE_SSE2)
+    if (VP8GetCPUInfo(kSSE2)) {
+      WebPRescalerDspInitSSE2();
+    }
+#endif
+#if defined(WEBP_USE_NEON)
+    if (VP8GetCPUInfo(kNEON)) {
+      WebPRescalerDspInitNEON();
+    }
+#endif
+#if defined(WEBP_USE_MIPS32)
+    if (VP8GetCPUInfo(kMIPS32)) {
+      WebPRescalerDspInitMIPS32();
+    }
+#endif
+#if defined(WEBP_USE_MIPS_DSP_R2)
+    if (VP8GetCPUInfo(kMIPSdspR2)) {
+      WebPRescalerDspInitMIPSdspR2();
+    }
+#endif
+  }
+  rescaler_last_cpuinfo_used = VP8GetCPUInfo;
+}
diff --git a/drivers/webp/dsp/rescaler_mips32.c b/drivers/webp/dsp/rescaler_mips32.c
new file mode 100644
index 0000000000..ddaa391336
--- /dev/null
+++ b/drivers/webp/dsp/rescaler_mips32.c
@@ -0,0 +1,291 @@
+// Copyright 2014 Google Inc. All Rights Reserved.
+//
+// Use of this source code is governed by a BSD-style license
+// that can be found in the COPYING file in the root of the source
+// tree. An additional intellectual property rights grant can be found
+// in the file PATENTS. All contributing project authors may
+// be found in the AUTHORS file in the root of the source tree.
+// -----------------------------------------------------------------------------
+//
+// MIPS version of rescaling functions
+//
+// Author(s): Djordje Pesut (djordje.pesut@imgtec.com)
+
+#include "./dsp.h"
+
+#if defined(WEBP_USE_MIPS32)
+
+#include <assert.h>
+#include "../utils/rescaler.h"
+
+//------------------------------------------------------------------------------
+// Row import
+
+static void ImportRowShrink(WebPRescaler* const wrk, const uint8_t* src) {
+  const int x_stride = wrk->num_channels;
+  const int x_out_max = wrk->dst_width * wrk->num_channels;
+  const int fx_scale = wrk->fx_scale;
+  const int x_add = wrk->x_add;
+  const int x_sub = wrk->x_sub;
+  const int x_stride1 = x_stride << 2;
+  int channel;
+  assert(!wrk->x_expand);
+  assert(!WebPRescalerInputDone(wrk));
+
+  for (channel = 0; channel < x_stride; ++channel) {
+    const uint8_t* src1 = src + channel;
+    rescaler_t* frow = wrk->frow + channel;
+    int temp1, temp2, temp3;
+    int base, frac, sum;
+    int accum, accum1;
+    int loop_c = x_out_max - channel;
+
+    __asm__ volatile (
+      "li     %[temp1],   0x8000                    \n\t"
+      "li     %[temp2],   0x10000                   \n\t"
+      "li     %[sum],     0                         \n\t"
+      "li     %[accum],   0                         \n\t"
+    "1:                                             \n\t"
+      "addu   %[accum],   %[accum],   %[x_add]      \n\t"
+      "li     %[base],    0                         \n\t"
+      "blez   %[accum],   3f                        \n\t"
+    "2:                                             \n\t"
+      "lbu    %[base],    0(%[src1])                \n\t"
+      "subu   %[accum],   %[accum],   %[x_sub]      \n\t"
+      "addu   %[src1],    %[src1],    %[x_stride]   \n\t"
+      "addu   %[sum],     %[sum],     %[base]       \n\t"
+      "bgtz   %[accum],   2b                        \n\t"
+    "3:                                             \n\t"
+      "negu   %[accum1],  %[accum]                  \n\t"
+      "mul    %[frac],    %[base],    %[accum1]     \n\t"
+      "mul    %[temp3],   %[sum],     %[x_sub]      \n\t"
+      "subu   %[loop_c],  %[loop_c],  %[x_stride]   \n\t"
+      "mult   %[temp1],   %[temp2]                  \n\t"
+      "maddu  %[frac],    %[fx_scale]               \n\t"
+      "mfhi   %[sum]                                \n\t"
+      "subu   %[temp3],   %[temp3],   %[frac]       \n\t"
+      "sw     %[temp3],   0(%[frow])                \n\t"
+      "addu   %[frow],    %[frow],    %[x_stride1]  \n\t"
+      "bgtz   %[loop_c],  1b                        \n\t"
+      : [accum]"=&r"(accum), [src1]"+r"(src1), [temp3]"=&r"(temp3),
+        [sum]"=&r"(sum), [base]"=&r"(base), [frac]"=&r"(frac),
+        [frow]"+r"(frow), [accum1]"=&r"(accum1),
+        [temp2]"=&r"(temp2), [temp1]"=&r"(temp1)
+      : [x_stride]"r"(x_stride), [fx_scale]"r"(fx_scale),
+        [x_sub]"r"(x_sub), [x_add]"r"(x_add),
+        [loop_c]"r"(loop_c), [x_stride1]"r"(x_stride1)
+      : "memory", "hi", "lo"
+    );
+    assert(accum == 0);
+  }
+}
+
+static void ImportRowExpand(WebPRescaler* const wrk, const uint8_t* src) {
+  const int x_stride = wrk->num_channels;
+  const int x_out_max = wrk->dst_width * wrk->num_channels;
+  const int x_add = wrk->x_add;
+  const int x_sub = wrk->x_sub;
+  const int src_width = wrk->src_width;
+  const int x_stride1 = x_stride << 2;
+  int channel;
+  assert(wrk->x_expand);
+  assert(!WebPRescalerInputDone(wrk));
+
+  for (channel = 0; channel < x_stride; ++channel) {
+    const uint8_t* src1 = src + channel;
+    rescaler_t* frow = wrk->frow + channel;
+    int temp1, temp2, temp3, temp4;
+    int frac;
+    int accum;
+    int x_out = channel;
+
+    __asm__ volatile (
+      "addiu  %[temp3],   %[src_width], -1            \n\t"
+      "lbu    %[temp2],   0(%[src1])                  \n\t"
+      "addu   %[src1],    %[src1],      %[x_stride]   \n\t"
+      "bgtz   %[temp3],   0f                          \n\t"
+      "addiu  %[temp1],   %[temp2],     0             \n\t"
+      "b      3f                                      \n\t"
+    "0:                                               \n\t"
+      "lbu    %[temp1],   0(%[src1])                  \n\t"
+    "3:                                               \n\t"
+      "addiu  %[accum],   %[x_add],     0             \n\t"
+    "1:                                               \n\t"
+      "subu   %[temp3],   %[temp2],     %[temp1]      \n\t"
+      "mul    %[temp3],   %[temp3],     %[accum]      \n\t"
+      "mul    %[temp4],   %[temp1],     %[x_add]      \n\t"
+      "addu   %[temp3],   %[temp4],     %[temp3]      \n\t"
+      "sw     %[temp3],   0(%[frow])                  \n\t"
+      "addu   %[frow],    %[frow],      %[x_stride1]  \n\t"
+      "addu   %[x_out],   %[x_out],     %[x_stride]   \n\t"
+      "subu   %[temp3],   %[x_out],     %[x_out_max]  \n\t"
+      "bgez   %[temp3],   2f                          \n\t"
+      "subu   %[accum],   %[accum],     %[x_sub]      \n\t"
+      "bgez   %[accum],   4f                          \n\t"
+      "addiu  %[temp2],   %[temp1],     0             \n\t"
+      "addu   %[src1],    %[src1],      %[x_stride]   \n\t"
+      "lbu    %[temp1],   0(%[src1])                  \n\t"
+      "addu   %[accum],   %[accum],     %[x_add]      \n\t"
+    "4:                                               \n\t"
+      "b      1b                                      \n\t"
+    "2:                                               \n\t"
+      : [src1]"+r"(src1), [accum]"=&r"(accum), [temp1]"=&r"(temp1),
+        [temp2]"=&r"(temp2), [temp3]"=&r"(temp3), [temp4]"=&r"(temp4),
+        [x_out]"+r"(x_out), [frac]"=&r"(frac), [frow]"+r"(frow)
+      : [x_stride]"r"(x_stride), [x_add]"r"(x_add), [x_sub]"r"(x_sub),
+        [x_stride1]"r"(x_stride1), [src_width]"r"(src_width),
+        [x_out_max]"r"(x_out_max)
+      : "memory", "hi", "lo"
+    );
+    assert(wrk->x_sub == 0 /* <- special case for src_width=1 */ || accum == 0);
+  }
+}
+
+//------------------------------------------------------------------------------
+// Row export
+
+static void ExportRowExpand(WebPRescaler* const wrk) {
+  uint8_t* dst = wrk->dst;
+  rescaler_t* irow = wrk->irow;
+  const int x_out_max = wrk->dst_width * wrk->num_channels;
+  const rescaler_t* frow = wrk->frow;
+  int temp0, temp1, temp3, temp4, temp5, loop_end;
+  const int temp2 = (int)wrk->fy_scale;
+  const int temp6 = x_out_max << 2;
+  assert(!WebPRescalerOutputDone(wrk));
+  assert(wrk->y_accum <= 0);
+  assert(wrk->y_expand);
+  assert(wrk->y_sub != 0);
+  if (wrk->y_accum == 0) {
+    __asm__ volatile (
+      "li       %[temp3],    0x10000                    \n\t"
+      "li       %[temp4],    0x8000                     \n\t"
+      "addu     %[loop_end], %[frow],     %[temp6]      \n\t"
+    "1:                                                 \n\t"
+      "lw       %[temp0],    0(%[frow])                 \n\t"
+      "addiu    %[dst],      %[dst],      1             \n\t"
+      "addiu    %[frow],     %[frow],     4             \n\t"
+      "mult     %[temp3],    %[temp4]                   \n\t"
+      "maddu    %[temp0],    %[temp2]                   \n\t"
+      "mfhi     %[temp5]                                \n\t"
+      "sb       %[temp5],    -1(%[dst])                 \n\t"
+      "bne      %[frow],     %[loop_end], 1b            \n\t"
+      : [temp0]"=&r"(temp0), [temp1]"=&r"(temp1), [temp3]"=&r"(temp3),
+        [temp4]"=&r"(temp4), [temp5]"=&r"(temp5), [frow]"+r"(frow),
+        [dst]"+r"(dst), [loop_end]"=&r"(loop_end)
+      : [temp2]"r"(temp2), [temp6]"r"(temp6)
+      : "memory", "hi", "lo"
+    );
+  } else {
+    const uint32_t B = WEBP_RESCALER_FRAC(-wrk->y_accum, wrk->y_sub);
+    const uint32_t A = (uint32_t)(WEBP_RESCALER_ONE - B);
+    __asm__ volatile (
+      "li       %[temp3],    0x10000                    \n\t"
+      "li       %[temp4],    0x8000                     \n\t"
+      "addu     %[loop_end], %[frow],     %[temp6]      \n\t"
+    "1:                                                 \n\t"
+      "lw       %[temp0],    0(%[frow])                 \n\t"
+      "lw       %[temp1],    0(%[irow])                 \n\t"
+      "addiu    %[dst],      %[dst],      1             \n\t"
+      "mult     %[temp3],    %[temp4]                   \n\t"
+      "maddu    %[A],        %[temp0]                   \n\t"
+      "maddu    %[B],        %[temp1]                   \n\t"
+      "addiu    %[frow],     %[frow],     4             \n\t"
+      "addiu    %[irow],     %[irow],     4             \n\t"
+      "mfhi     %[temp5]                                \n\t"
+      "mult     %[temp3],    %[temp4]                   \n\t"
+      "maddu    %[temp5],    %[temp2]                   \n\t"
+      "mfhi     %[temp5]                                \n\t"
+      "sb       %[temp5],    -1(%[dst])                 \n\t"
+      "bne      %[frow],     %[loop_end], 1b            \n\t"
+      : [temp0]"=&r"(temp0), [temp1]"=&r"(temp1), [temp3]"=&r"(temp3),
+        [temp4]"=&r"(temp4), [temp5]"=&r"(temp5), [frow]"+r"(frow),
+        [irow]"+r"(irow), [dst]"+r"(dst), [loop_end]"=&r"(loop_end)
+      : [temp2]"r"(temp2), [temp6]"r"(temp6), [A]"r"(A), [B]"r"(B)
+      : "memory", "hi", "lo"
+    );
+  }
+}
+
+static void ExportRowShrink(WebPRescaler* const wrk) {
+  const int x_out_max = wrk->dst_width * wrk->num_channels;
+  uint8_t* dst = wrk->dst;
+  rescaler_t* irow = wrk->irow;
+  const rescaler_t* frow = wrk->frow;
+  const int yscale = wrk->fy_scale * (-wrk->y_accum);
+  int temp0, temp1, temp3, temp4, temp5, loop_end;
+  const int temp2 = (int)wrk->fxy_scale;
+  const int temp6 = x_out_max << 2;
+
+  assert(!WebPRescalerOutputDone(wrk));
+  assert(wrk->y_accum <= 0);
+  assert(!wrk->y_expand);
+  assert(wrk->fxy_scale != 0);
+  if (yscale) {
+    __asm__ volatile (
+      "li       %[temp3],    0x10000                    \n\t"
+      "li       %[temp4],    0x8000                     \n\t"
+      "addu     %[loop_end], %[frow],     %[temp6]      \n\t"
+    "1:                                                 \n\t"
+      "lw       %[temp0],    0(%[frow])                 \n\t"
+      "mult     %[temp3],    %[temp4]                   \n\t"
+      "addiu    %[frow],     %[frow],     4             \n\t"
+      "maddu    %[temp0],    %[yscale]                  \n\t"
+      "mfhi     %[temp1]                                \n\t"
+      "lw       %[temp0],    0(%[irow])                 \n\t"
+      "addiu    %[dst],      %[dst],      1             \n\t"
+      "addiu    %[irow],     %[irow],     4             \n\t"
+      "subu     %[temp0],    %[temp0],    %[temp1]      \n\t"
+      "mult     %[temp3],    %[temp4]                   \n\t"
+      "maddu    %[temp0],    %[temp2]                   \n\t"
+      "mfhi     %[temp5]                                \n\t"
+      "sw       %[temp1],    -4(%[irow])                \n\t"
+      "sb       %[temp5],    -1(%[dst])                 \n\t"
+      "bne      %[frow],     %[loop_end], 1b            \n\t"
+      : [temp0]"=&r"(temp0), [temp1]"=&r"(temp1), [temp3]"=&r"(temp3),
+        [temp4]"=&r"(temp4), [temp5]"=&r"(temp5), [frow]"+r"(frow),
+        [irow]"+r"(irow), [dst]"+r"(dst), [loop_end]"=&r"(loop_end)
+      : [temp2]"r"(temp2), [yscale]"r"(yscale), [temp6]"r"(temp6)
+      : "memory", "hi", "lo"
+    );
+  } else {
+    __asm__ volatile (
+      "li       %[temp3],    0x10000                    \n\t"
+      "li       %[temp4],    0x8000                     \n\t"
+      "addu     %[loop_end], %[irow],     %[temp6]      \n\t"
+    "1:                                                 \n\t"
+      "lw       %[temp0],    0(%[irow])                 \n\t"
+      "addiu    %[dst],      %[dst],      1             \n\t"
+      "addiu    %[irow],     %[irow],     4             \n\t"
+      "mult     %[temp3],    %[temp4]                   \n\t"
+      "maddu    %[temp0],    %[temp2]                   \n\t"
+      "mfhi     %[temp5]                                \n\t"
+      "sw       $zero,       -4(%[irow])                \n\t"
+      "sb       %[temp5],    -1(%[dst])                 \n\t"
+      "bne      %[irow],     %[loop_end], 1b            \n\t"
+      : [temp0]"=&r"(temp0), [temp1]"=&r"(temp1), [temp3]"=&r"(temp3),
+        [temp4]"=&r"(temp4), [temp5]"=&r"(temp5), [irow]"+r"(irow),
+        [dst]"+r"(dst), [loop_end]"=&r"(loop_end)
+      : [temp2]"r"(temp2), [temp6]"r"(temp6)
+      : "memory", "hi", "lo"
+    );
+  }
+}
+
+//------------------------------------------------------------------------------
+// Entry point
+
+extern void WebPRescalerDspInitMIPS32(void);
+
+WEBP_TSAN_IGNORE_FUNCTION void WebPRescalerDspInitMIPS32(void) {
+  WebPRescalerImportRowExpand = ImportRowExpand;
+  WebPRescalerImportRowShrink = ImportRowShrink;
+  WebPRescalerExportRowExpand = ExportRowExpand;
+  WebPRescalerExportRowShrink = ExportRowShrink;
+}
+
+#else  // !WEBP_USE_MIPS32
+
+WEBP_DSP_INIT_STUB(WebPRescalerDspInitMIPS32)
+
+#endif  // WEBP_USE_MIPS32
diff --git a/drivers/webp/dsp/rescaler_mips_dsp_r2.c b/drivers/webp/dsp/rescaler_mips_dsp_r2.c
new file mode 100644
index 0000000000..b457d0a30a
--- /dev/null
+++ b/drivers/webp/dsp/rescaler_mips_dsp_r2.c
@@ -0,0 +1,314 @@
+// Copyright 2014 Google Inc. All Rights Reserved.
+//
+// Use of this source code is governed by a BSD-style license
+// that can be found in the COPYING file in the root of the source
+// tree. An additional intellectual property rights grant can be found
+// in the file PATENTS. All contributing project authors may
+// be found in the AUTHORS file in the root of the source tree.
+// -----------------------------------------------------------------------------
+//
+// MIPS version of rescaling functions
+//
+// Author(s): Djordje Pesut (djordje.pesut@imgtec.com)
+
+#include "./dsp.h"
+
+#if defined(WEBP_USE_MIPS_DSP_R2)
+
+#include <assert.h>
+#include "../utils/rescaler.h"
+
+#define ROUNDER (WEBP_RESCALER_ONE >> 1)
+#define MULT_FIX(x, y) (((uint64_t)(x) * (y) + ROUNDER) >> WEBP_RESCALER_RFIX)
+
+//------------------------------------------------------------------------------
+// Row export
+
+static void ExportRowShrink(WebPRescaler* const wrk) {
+  int i;
+  const int x_out_max = wrk->dst_width * wrk->num_channels;
+  uint8_t* dst = wrk->dst;
+  rescaler_t* irow = wrk->irow;
+  const rescaler_t* frow = wrk->frow;
+  const int yscale = wrk->fy_scale * (-wrk->y_accum);
+  int temp0, temp1, temp2, temp3, temp4, temp5, loop_end;
+  const int temp7 = (int)wrk->fxy_scale;
+  const int temp6 = (x_out_max & ~0x3) << 2;
+  assert(!WebPRescalerOutputDone(wrk));
+  assert(wrk->y_accum <= 0);
+  assert(!wrk->y_expand);
+  assert(wrk->fxy_scale != 0);
+  if (yscale) {
+    if (x_out_max >= 4) {
+      int temp8, temp9, temp10, temp11;
+      __asm__ volatile (
+        "li       %[temp3],    0x10000                    \n\t"
+        "li       %[temp4],    0x8000                     \n\t"
+        "addu     %[loop_end], %[frow],     %[temp6]      \n\t"
+      "1:                                                 \n\t"
+        "lw       %[temp0],    0(%[frow])                 \n\t"
+        "lw       %[temp1],    4(%[frow])                 \n\t"
+        "lw       %[temp2],    8(%[frow])                 \n\t"
+        "lw       %[temp5],    12(%[frow])                \n\t"
+        "mult     $ac0,        %[temp3],    %[temp4]      \n\t"
+        "maddu    $ac0,        %[temp0],    %[yscale]     \n\t"
+        "mult     $ac1,        %[temp3],    %[temp4]      \n\t"
+        "maddu    $ac1,        %[temp1],    %[yscale]     \n\t"
+        "mult     $ac2,        %[temp3],    %[temp4]      \n\t"
+        "maddu    $ac2,        %[temp2],    %[yscale]     \n\t"
+        "mult     $ac3,        %[temp3],    %[temp4]      \n\t"
+        "maddu    $ac3,        %[temp5],    %[yscale]     \n\t"
+        "addiu    %[frow],     %[frow],     16            \n\t"
+        "mfhi     %[temp0],    $ac0                       \n\t"
+        "mfhi     %[temp1],    $ac1                       \n\t"
+        "mfhi     %[temp2],    $ac2                       \n\t"
+        "mfhi     %[temp5],    $ac3                       \n\t"
+        "lw       %[temp8],    0(%[irow])                 \n\t"
+        "lw       %[temp9],    4(%[irow])                 \n\t"
+        "lw       %[temp10],   8(%[irow])                 \n\t"
+        "lw       %[temp11],   12(%[irow])                \n\t"
+        "addiu    %[dst],      %[dst],      4             \n\t"
+        "addiu    %[irow],     %[irow],     16            \n\t"
+        "subu     %[temp8],    %[temp8],    %[temp0]      \n\t"
+        "subu     %[temp9],    %[temp9],    %[temp1]      \n\t"
+        "subu     %[temp10],   %[temp10],   %[temp2]      \n\t"
+        "subu     %[temp11],   %[temp11],   %[temp5]      \n\t"
+        "mult     $ac0,        %[temp3],    %[temp4]      \n\t"
+        "maddu    $ac0,        %[temp8],    %[temp7]      \n\t"
+        "mult     $ac1,        %[temp3],    %[temp4]      \n\t"
+        "maddu    $ac1,        %[temp9],    %[temp7]      \n\t"
+        "mult     $ac2,        %[temp3],    %[temp4]      \n\t"
+        "maddu    $ac2,        %[temp10],   %[temp7]      \n\t"
+        "mult     $ac3,        %[temp3],    %[temp4]      \n\t"
+        "maddu    $ac3,        %[temp11],   %[temp7]      \n\t"
+        "mfhi     %[temp8],    $ac0                       \n\t"
+        "mfhi     %[temp9],    $ac1                       \n\t"
+        "mfhi     %[temp10],   $ac2                       \n\t"
+        "mfhi     %[temp11],   $ac3                       \n\t"
+        "sw       %[temp0],    -16(%[irow])               \n\t"
+        "sw       %[temp1],    -12(%[irow])               \n\t"
+        "sw       %[temp2],    -8(%[irow])                \n\t"
+        "sw       %[temp5],    -4(%[irow])                \n\t"
+        "sb       %[temp8],    -4(%[dst])                 \n\t"
+        "sb       %[temp9],    -3(%[dst])                 \n\t"
+        "sb       %[temp10],   -2(%[dst])                 \n\t"
+        "sb       %[temp11],   -1(%[dst])                 \n\t"
+        "bne      %[frow],     %[loop_end], 1b            \n\t"
+        : [temp0]"=&r"(temp0), [temp1]"=&r"(temp1), [temp3]"=&r"(temp3),
+          [temp4]"=&r"(temp4), [temp5]"=&r"(temp5), [frow]"+r"(frow),
+          [irow]"+r"(irow), [dst]"+r"(dst), [loop_end]"=&r"(loop_end),
+          [temp8]"=&r"(temp8), [temp9]"=&r"(temp9), [temp10]"=&r"(temp10),
+          [temp11]"=&r"(temp11), [temp2]"=&r"(temp2)
+        : [temp7]"r"(temp7), [yscale]"r"(yscale), [temp6]"r"(temp6)
+        : "memory", "hi", "lo", "$ac1hi", "$ac1lo",
+          "$ac2hi", "$ac2lo", "$ac3hi", "$ac3lo"
+      );
+    }
+    for (i = 0; i < (x_out_max & 0x3); ++i) {
+      const uint32_t frac = (uint32_t)MULT_FIX(*frow++, yscale);
+      const int v = (int)MULT_FIX(*irow - frac, wrk->fxy_scale);
+      assert(v >= 0 && v <= 255);
+      *dst++ = v;
+      *irow++ = frac;   // new fractional start
+    }
+  } else {
+    if (x_out_max >= 4) {
+      __asm__ volatile (
+        "li       %[temp3],    0x10000                    \n\t"
+        "li       %[temp4],    0x8000                     \n\t"
+        "addu     %[loop_end], %[irow],     %[temp6]      \n\t"
+      "1:                                                 \n\t"
+        "lw       %[temp0],    0(%[irow])                 \n\t"
+        "lw       %[temp1],    4(%[irow])                 \n\t"
+        "lw       %[temp2],    8(%[irow])                 \n\t"
+        "lw       %[temp5],    12(%[irow])                \n\t"
+        "addiu    %[dst],      %[dst],      4             \n\t"
+        "addiu    %[irow],     %[irow],     16            \n\t"
+        "mult     $ac0,        %[temp3],    %[temp4]      \n\t"
+        "maddu    $ac0,        %[temp0],    %[temp7]      \n\t"
+        "mult     $ac1,        %[temp3],    %[temp4]      \n\t"
+        "maddu    $ac1,        %[temp1],    %[temp7]      \n\t"
+        "mult     $ac2,        %[temp3],    %[temp4]      \n\t"
+        "maddu    $ac2,        %[temp2],    %[temp7]      \n\t"
+        "mult     $ac3,        %[temp3],    %[temp4]      \n\t"
+        "maddu    $ac3,        %[temp5],    %[temp7]      \n\t"
+        "mfhi     %[temp0],    $ac0                       \n\t"
+        "mfhi     %[temp1],    $ac1                       \n\t"
+        "mfhi     %[temp2],    $ac2                       \n\t"
+        "mfhi     %[temp5],    $ac3                       \n\t"
+        "sw       $zero,       -16(%[irow])               \n\t"
+        "sw       $zero,       -12(%[irow])               \n\t"
+        "sw       $zero,       -8(%[irow])                \n\t"
+        "sw       $zero,       -4(%[irow])                \n\t"
+        "sb       %[temp0],    -4(%[dst])                 \n\t"
+        "sb       %[temp1],    -3(%[dst])                 \n\t"
+        "sb       %[temp2],    -2(%[dst])                 \n\t"
+        "sb       %[temp5],    -1(%[dst])                 \n\t"
+        "bne      %[irow],     %[loop_end], 1b            \n\t"
+        : [temp0]"=&r"(temp0), [temp1]"=&r"(temp1), [temp3]"=&r"(temp3),
+          [temp4]"=&r"(temp4), [temp5]"=&r"(temp5), [irow]"+r"(irow),
+          [dst]"+r"(dst), [loop_end]"=&r"(loop_end), [temp2]"=&r"(temp2)
+        : [temp7]"r"(temp7), [temp6]"r"(temp6)
+        : "memory", "hi", "lo", "$ac1hi", "$ac1lo",
+          "$ac2hi", "$ac2lo", "$ac3hi", "$ac3lo"
+      );
+    }
+    for (i = 0; i < (x_out_max & 0x3); ++i) {
+      const int v = (int)MULT_FIX(*irow, wrk->fxy_scale);
+      assert(v >= 0 && v <= 255);
+      *dst++ = v;
+      *irow++ = 0;
+    }
+  }
+}
+
+static void ExportRowExpand(WebPRescaler* const wrk) {
+  int i;
+  uint8_t* dst = wrk->dst;
+  rescaler_t* irow = wrk->irow;
+  const int x_out_max = wrk->dst_width * wrk->num_channels;
+  const rescaler_t* frow = wrk->frow;
+  int temp0, temp1, temp2, temp3, temp4, temp5, loop_end;
+  const int temp6 = (x_out_max & ~0x3) << 2;
+  const int temp7 = (int)wrk->fy_scale;
+  assert(!WebPRescalerOutputDone(wrk));
+  assert(wrk->y_accum <= 0);
+  assert(wrk->y_expand);
+  assert(wrk->y_sub != 0);
+  if (wrk->y_accum == 0) {
+    if (x_out_max >= 4) {
+      __asm__ volatile (
+        "li       %[temp4],    0x10000                    \n\t"
+        "li       %[temp5],    0x8000                     \n\t"
+        "addu     %[loop_end], %[frow],     %[temp6]      \n\t"
+      "1:                                                 \n\t"
+        "lw       %[temp0],    0(%[frow])                 \n\t"
+        "lw       %[temp1],    4(%[frow])                 \n\t"
+        "lw       %[temp2],    8(%[frow])                 \n\t"
+        "lw       %[temp3],    12(%[frow])                \n\t"
+        "addiu    %[dst],      %[dst],      4             \n\t"
+        "addiu    %[frow],     %[frow],     16            \n\t"
+        "mult     $ac0,        %[temp4],    %[temp5]      \n\t"
+        "maddu    $ac0,        %[temp0],    %[temp7]      \n\t"
+        "mult     $ac1,        %[temp4],    %[temp5]      \n\t"
+        "maddu    $ac1,        %[temp1],    %[temp7]      \n\t"
+        "mult     $ac2,        %[temp4],    %[temp5]      \n\t"
+        "maddu    $ac2,        %[temp2],    %[temp7]      \n\t"
+        "mult     $ac3,        %[temp4],    %[temp5]      \n\t"
+        "maddu    $ac3,        %[temp3],    %[temp7]      \n\t"
+        "mfhi     %[temp0],    $ac0                       \n\t"
+        "mfhi     %[temp1],    $ac1                       \n\t"
+        "mfhi     %[temp2],    $ac2                       \n\t"
+        "mfhi     %[temp3],    $ac3                       \n\t"
+        "sb       %[temp0],    -4(%[dst])                 \n\t"
+        "sb       %[temp1],    -3(%[dst])                 \n\t"
+        "sb       %[temp2],    -2(%[dst])                 \n\t"
+        "sb       %[temp3],    -1(%[dst])                 \n\t"
+        "bne      %[frow],     %[loop_end], 1b            \n\t"
+        : [temp0]"=&r"(temp0), [temp1]"=&r"(temp1), [temp3]"=&r"(temp3),
+          [temp4]"=&r"(temp4), [temp5]"=&r"(temp5), [frow]"+r"(frow),
+          [dst]"+r"(dst), [loop_end]"=&r"(loop_end), [temp2]"=&r"(temp2)
+        : [temp7]"r"(temp7), [temp6]"r"(temp6)
+        : "memory", "hi", "lo", "$ac1hi", "$ac1lo",
+          "$ac2hi", "$ac2lo", "$ac3hi", "$ac3lo"
+      );
+    }
+    for (i = 0; i < (x_out_max & 0x3); ++i) {
+      const uint32_t J = *frow++;
+      const int v = (int)MULT_FIX(J, wrk->fy_scale);
+      assert(v >= 0 && v <= 255);
+      *dst++ = v;
+    }
+  } else {
+    const uint32_t B = WEBP_RESCALER_FRAC(-wrk->y_accum, wrk->y_sub);
+    const uint32_t A = (uint32_t)(WEBP_RESCALER_ONE - B);
+    if (x_out_max >= 4) {
+      int temp8, temp9, temp10, temp11;
+      __asm__ volatile (
+        "li       %[temp8],    0x10000                    \n\t"
+        "li       %[temp9],    0x8000                     \n\t"
+        "addu     %[loop_end], %[frow],     %[temp6]      \n\t"
+      "1:                                                 \n\t"
+        "lw       %[temp0],    0(%[frow])                 \n\t"
+        "lw       %[temp1],    4(%[frow])                 \n\t"
+        "lw       %[temp2],    8(%[frow])                 \n\t"
+        "lw       %[temp3],    12(%[frow])                \n\t"
+        "lw       %[temp4],    0(%[irow])                 \n\t"
+        "lw       %[temp5],    4(%[irow])                 \n\t"
+        "lw       %[temp10],   8(%[irow])                 \n\t"
+        "lw       %[temp11],   12(%[irow])                \n\t"
+        "addiu    %[dst],      %[dst],      4             \n\t"
+        "mult     $ac0,        %[temp8],    %[temp9]      \n\t"
+        "maddu    $ac0,        %[A],        %[temp0]      \n\t"
+        "maddu    $ac0,        %[B],        %[temp4]      \n\t"
+        "mult     $ac1,        %[temp8],    %[temp9]      \n\t"
+        "maddu    $ac1,        %[A],        %[temp1]      \n\t"
+        "maddu    $ac1,        %[B],        %[temp5]      \n\t"
+        "mult     $ac2,        %[temp8],    %[temp9]      \n\t"
+        "maddu    $ac2,        %[A],        %[temp2]      \n\t"
+        "maddu    $ac2,        %[B],        %[temp10]     \n\t"
+        "mult     $ac3,        %[temp8],    %[temp9]      \n\t"
+        "maddu    $ac3,        %[A],        %[temp3]      \n\t"
+        "maddu    $ac3,        %[B],        %[temp11]     \n\t"
+        "addiu    %[frow],     %[frow],     16            \n\t"
+        "addiu    %[irow],     %[irow],     16            \n\t"
+        "mfhi     %[temp0],    $ac0                       \n\t"
+        "mfhi     %[temp1],    $ac1                       \n\t"
+        "mfhi     %[temp2],    $ac2                       \n\t"
+        "mfhi     %[temp3],    $ac3                       \n\t"
+        "mult     $ac0,        %[temp8],    %[temp9]      \n\t"
+        "maddu    $ac0,        %[temp0],    %[temp7]      \n\t"
+        "mult     $ac1,        %[temp8],    %[temp9]      \n\t"
+        "maddu    $ac1,        %[temp1],    %[temp7]      \n\t"
+        "mult     $ac2,        %[temp8],    %[temp9]      \n\t"
+        "maddu    $ac2,        %[temp2],    %[temp7]      \n\t"
+        "mult     $ac3,        %[temp8],    %[temp9]      \n\t"
+        "maddu    $ac3,        %[temp3],    %[temp7]      \n\t"
+        "mfhi     %[temp0],    $ac0                       \n\t"
+        "mfhi     %[temp1],    $ac1                       \n\t"
+        "mfhi     %[temp2],    $ac2                       \n\t"
+        "mfhi     %[temp3],    $ac3                       \n\t"
+        "sb       %[temp0],    -4(%[dst])                 \n\t"
+        "sb       %[temp1],    -3(%[dst])                 \n\t"
+        "sb       %[temp2],    -2(%[dst])                 \n\t"
+        "sb       %[temp3],    -1(%[dst])                 \n\t"
+        "bne      %[frow],     %[loop_end], 1b            \n\t"
+        : [temp0]"=&r"(temp0), [temp1]"=&r"(temp1), [temp3]"=&r"(temp3),
+          [temp4]"=&r"(temp4), [temp5]"=&r"(temp5), [frow]"+r"(frow),
+          [irow]"+r"(irow), [dst]"+r"(dst), [loop_end]"=&r"(loop_end),
+          [temp8]"=&r"(temp8), [temp9]"=&r"(temp9), [temp10]"=&r"(temp10),
+          [temp11]"=&r"(temp11), [temp2]"=&r"(temp2)
+        : [temp7]"r"(temp7), [temp6]"r"(temp6), [A]"r"(A), [B]"r"(B)
+        : "memory", "hi", "lo", "$ac1hi", "$ac1lo",
+          "$ac2hi", "$ac2lo", "$ac3hi", "$ac3lo"
+      );
+    }
+    for (i = 0; i < (x_out_max & 0x3); ++i) {
+      const uint64_t I = (uint64_t)A * *frow++
+                       + (uint64_t)B * *irow++;
+      const uint32_t J = (uint32_t)((I + ROUNDER) >> WEBP_RESCALER_RFIX);
+      const int v = (int)MULT_FIX(J, wrk->fy_scale);
+      assert(v >= 0 && v <= 255);
+      *dst++ = v;
+    }
+  }
+}
+
+#undef MULT_FIX
+#undef ROUNDER
+
+//------------------------------------------------------------------------------
+// Entry point
+
+extern void WebPRescalerDspInitMIPSdspR2(void);
+
+WEBP_TSAN_IGNORE_FUNCTION void WebPRescalerDspInitMIPSdspR2(void) {
+  WebPRescalerExportRowExpand = ExportRowExpand;
+  WebPRescalerExportRowShrink = ExportRowShrink;
+}
+
+#else  // !WEBP_USE_MIPS_DSP_R2
+
+WEBP_DSP_INIT_STUB(WebPRescalerDspInitMIPSdspR2)
+
+#endif  // WEBP_USE_MIPS_DSP_R2
diff --git a/drivers/webp/dsp/rescaler_neon.c b/drivers/webp/dsp/rescaler_neon.c
new file mode 100644
index 0000000000..16fd450ea3
--- /dev/null
+++ b/drivers/webp/dsp/rescaler_neon.c
@@ -0,0 +1,186 @@
+// Copyright 2015 Google Inc. All Rights Reserved.
+//
+// Use of this source code is governed by a BSD-style license
+// that can be found in the COPYING file in the root of the source
+// tree. An additional intellectual property rights grant can be found
+// in the file PATENTS. All contributing project authors may
+// be found in the AUTHORS file in the root of the source tree.
+// -----------------------------------------------------------------------------
+//
+// NEON version of rescaling functions
+//
+// Author: Skal (pascal.massimino@gmail.com)
+
+#include "./dsp.h"
+
+#if defined(WEBP_USE_NEON)
+
+#include <arm_neon.h>
+#include <assert.h>
+#include "./neon.h"
+#include "../utils/rescaler.h"
+
+#define ROUNDER (WEBP_RESCALER_ONE >> 1)
+#define MULT_FIX_C(x, y) (((uint64_t)(x) * (y) + ROUNDER) >> WEBP_RESCALER_RFIX)
+
+#define LOAD_32x4(SRC, DST) const uint32x4_t DST = vld1q_u32((SRC))
+#define LOAD_32x8(SRC, DST0, DST1)                                    \
+    LOAD_32x4(SRC + 0, DST0);                                         \
+    LOAD_32x4(SRC + 4, DST1)
+
+#define STORE_32x8(SRC0, SRC1, DST) do {                              \
+    vst1q_u32((DST) + 0, SRC0);                                       \
+    vst1q_u32((DST) + 4, SRC1);                                       \
+} while (0);
+
+#if (WEBP_RESCALER_RFIX == 32)
+#define MAKE_HALF_CST(C) vdupq_n_s32((int32_t)((C) >> 1))
+#define MULT_FIX(A, B) /* note: B is actualy scale>>1. See MAKE_HALF_CST */ \
+    vreinterpretq_u32_s32(vqrdmulhq_s32(vreinterpretq_s32_u32((A)), (B)))
+#else
+#error "MULT_FIX/WEBP_RESCALER_RFIX need some more work"
+#endif
+
+static uint32x4_t Interpolate(const rescaler_t* const frow,
+                              const rescaler_t* const irow,
+                              uint32_t A, uint32_t B) {
+  LOAD_32x4(frow, A0);
+  LOAD_32x4(irow, B0);
+  const uint64x2_t C0 = vmull_n_u32(vget_low_u32(A0), A);
+  const uint64x2_t C1 = vmull_n_u32(vget_high_u32(A0), A);
+  const uint64x2_t D0 = vmlal_n_u32(C0, vget_low_u32(B0), B);
+  const uint64x2_t D1 = vmlal_n_u32(C1, vget_high_u32(B0), B);
+  const uint32x4_t E = vcombine_u32(
+      vrshrn_n_u64(D0, WEBP_RESCALER_RFIX),
+      vrshrn_n_u64(D1, WEBP_RESCALER_RFIX));
+  return E;
+}
+
+static void RescalerExportRowExpand(WebPRescaler* const wrk) {
+  int x_out;
+  uint8_t* const dst = wrk->dst;
+  rescaler_t* const irow = wrk->irow;
+  const int x_out_max = wrk->dst_width * wrk->num_channels;
+  const int max_span = x_out_max & ~7;
+  const rescaler_t* const frow = wrk->frow;
+  const uint32_t fy_scale = wrk->fy_scale;
+  const int32x4_t fy_scale_half = MAKE_HALF_CST(fy_scale);
+  assert(!WebPRescalerOutputDone(wrk));
+  assert(wrk->y_accum <= 0);
+  assert(wrk->y_expand);
+  assert(wrk->y_sub != 0);
+  if (wrk->y_accum == 0) {
+    for (x_out = 0; x_out < max_span; x_out += 8) {
+      LOAD_32x4(frow + x_out + 0, A0);
+      LOAD_32x4(frow + x_out + 4, A1);
+      const uint32x4_t B0 = MULT_FIX(A0, fy_scale_half);
+      const uint32x4_t B1 = MULT_FIX(A1, fy_scale_half);
+      const uint16x4_t C0 = vmovn_u32(B0);
+      const uint16x4_t C1 = vmovn_u32(B1);
+      const uint8x8_t D = vmovn_u16(vcombine_u16(C0, C1));
+      vst1_u8(dst + x_out, D);
+    }
+    for (; x_out < x_out_max; ++x_out) {
+      const uint32_t J = frow[x_out];
+      const int v = (int)MULT_FIX_C(J, fy_scale);
+      assert(v >= 0 && v <= 255);
+      dst[x_out] = v;
+    }
+  } else {
+    const uint32_t B = WEBP_RESCALER_FRAC(-wrk->y_accum, wrk->y_sub);
+    const uint32_t A = (uint32_t)(WEBP_RESCALER_ONE - B);
+    for (x_out = 0; x_out < max_span; x_out += 8) {
+      const uint32x4_t C0 =
+          Interpolate(frow + x_out + 0, irow + x_out + 0, A, B);
+      const uint32x4_t C1 =
+          Interpolate(frow + x_out + 4, irow + x_out + 4, A, B);
+      const uint32x4_t D0 = MULT_FIX(C0, fy_scale_half);
+      const uint32x4_t D1 = MULT_FIX(C1, fy_scale_half);
+      const uint16x4_t E0 = vmovn_u32(D0);
+      const uint16x4_t E1 = vmovn_u32(D1);
+      const uint8x8_t F = vmovn_u16(vcombine_u16(E0, E1));
+      vst1_u8(dst + x_out, F);
+    }
+    for (; x_out < x_out_max; ++x_out) {
+      const uint64_t I = (uint64_t)A * frow[x_out]
+                       + (uint64_t)B * irow[x_out];
+      const uint32_t J = (uint32_t)((I + ROUNDER) >> WEBP_RESCALER_RFIX);
+      const int v = (int)MULT_FIX_C(J, fy_scale);
+      assert(v >= 0 && v <= 255);
+      dst[x_out] = v;
+    }
+  }
+}
+
+static void RescalerExportRowShrink(WebPRescaler* const wrk) {
+  int x_out;
+  uint8_t* const dst = wrk->dst;
+  rescaler_t* const irow = wrk->irow;
+  const int x_out_max = wrk->dst_width * wrk->num_channels;
+  const int max_span = x_out_max & ~7;
+  const rescaler_t* const frow = wrk->frow;
+  const uint32_t yscale = wrk->fy_scale * (-wrk->y_accum);
+  const uint32_t fxy_scale = wrk->fxy_scale;
+  const uint32x4_t zero = vdupq_n_u32(0);
+  const int32x4_t yscale_half = MAKE_HALF_CST(yscale);
+  const int32x4_t fxy_scale_half = MAKE_HALF_CST(fxy_scale);
+  assert(!WebPRescalerOutputDone(wrk));
+  assert(wrk->y_accum <= 0);
+  assert(!wrk->y_expand);
+  if (yscale) {
+    for (x_out = 0; x_out < max_span; x_out += 8) {
+      LOAD_32x8(frow + x_out, in0, in1);
+      LOAD_32x8(irow + x_out, in2, in3);
+      const uint32x4_t A0 = MULT_FIX(in0, yscale_half);
+      const uint32x4_t A1 = MULT_FIX(in1, yscale_half);
+      const uint32x4_t B0 = vqsubq_u32(in2, A0);
+      const uint32x4_t B1 = vqsubq_u32(in3, A1);
+      const uint32x4_t C0 = MULT_FIX(B0, fxy_scale_half);
+      const uint32x4_t C1 = MULT_FIX(B1, fxy_scale_half);
+      const uint16x4_t D0 = vmovn_u32(C0);
+      const uint16x4_t D1 = vmovn_u32(C1);
+      const uint8x8_t E = vmovn_u16(vcombine_u16(D0, D1));
+      vst1_u8(dst + x_out, E);
+      STORE_32x8(A0, A1, irow + x_out);
+    }
+    for (; x_out < x_out_max; ++x_out) {
+      const uint32_t frac = (uint32_t)MULT_FIX_C(frow[x_out], yscale);
+      const int v = (int)MULT_FIX_C(irow[x_out] - frac, wrk->fxy_scale);
+      assert(v >= 0 && v <= 255);
+      dst[x_out] = v;
+      irow[x_out] = frac;   // new fractional start
+    }
+  } else {
+    for (x_out = 0; x_out < max_span; x_out += 8) {
+      LOAD_32x8(irow + x_out, in0, in1);
+      const uint32x4_t A0 = MULT_FIX(in0, fxy_scale_half);
+      const uint32x4_t A1 = MULT_FIX(in1, fxy_scale_half);
+      const uint16x4_t B0 = vmovn_u32(A0);
+      const uint16x4_t B1 = vmovn_u32(A1);
+      const uint8x8_t C = vmovn_u16(vcombine_u16(B0, B1));
+      vst1_u8(dst + x_out, C);
+      STORE_32x8(zero, zero, irow + x_out);
+    }
+    for (; x_out < x_out_max; ++x_out) {
+      const int v = (int)MULT_FIX_C(irow[x_out], fxy_scale);
+      assert(v >= 0 && v <= 255);
+      dst[x_out] = v;
+      irow[x_out] = 0;
+    }
+  }
+}
+
+//------------------------------------------------------------------------------
+
+extern void WebPRescalerDspInitNEON(void);
+
+WEBP_TSAN_IGNORE_FUNCTION void WebPRescalerDspInitNEON(void) {
+  WebPRescalerExportRowExpand = RescalerExportRowExpand;
+  WebPRescalerExportRowShrink = RescalerExportRowShrink;
+}
+
+#else     // !WEBP_USE_NEON
+
+WEBP_DSP_INIT_STUB(WebPRescalerDspInitNEON)
+
+#endif    // WEBP_USE_NEON
diff --git a/drivers/webp/dsp/rescaler_sse2.c b/drivers/webp/dsp/rescaler_sse2.c
new file mode 100644
index 0000000000..186edb653e
--- /dev/null
+++ b/drivers/webp/dsp/rescaler_sse2.c
@@ -0,0 +1,373 @@
+// Copyright 2015 Google Inc. All Rights Reserved.
+//
+// Use of this source code is governed by a BSD-style license
+// that can be found in the COPYING file in the root of the source
+// tree. An additional intellectual property rights grant can be found
+// in the file PATENTS. All contributing project authors may
+// be found in the AUTHORS file in the root of the source tree.
+// -----------------------------------------------------------------------------
+//
+// SSE2 Rescaling functions
+//
+// Author: Skal (pascal.massimino@gmail.com)
+
+#include "./dsp.h"
+
+#if defined(WEBP_USE_SSE2)
+#include <emmintrin.h>
+
+#include <assert.h>
+#include "../utils/rescaler.h"
+
+//------------------------------------------------------------------------------
+// Implementations of critical functions ImportRow / ExportRow
+
+#define ROUNDER (WEBP_RESCALER_ONE >> 1)
+#define MULT_FIX(x, y) (((uint64_t)(x) * (y) + ROUNDER) >> WEBP_RESCALER_RFIX)
+
+// input: 8 bytes ABCDEFGH -> output: A0E0B0F0C0G0D0H0
+static void LoadTwoPixels(const uint8_t* const src, __m128i* out) {
+  const __m128i zero = _mm_setzero_si128();
+  const __m128i A = _mm_loadl_epi64((const __m128i*)(src));  // ABCDEFGH
+  const __m128i B = _mm_unpacklo_epi8(A, zero);              // A0B0C0D0E0F0G0H0
+  const __m128i C = _mm_srli_si128(B, 8);                    // E0F0G0H0
+  *out = _mm_unpacklo_epi16(B, C);
+}
+
+// input: 8 bytes ABCDEFGH -> output: A0B0C0D0E0F0G0H0
+static void LoadHeightPixels(const uint8_t* const src, __m128i* out) {
+  const __m128i zero = _mm_setzero_si128();
+  const __m128i A = _mm_loadl_epi64((const __m128i*)(src));  // ABCDEFGH
+  *out = _mm_unpacklo_epi8(A, zero);
+}
+
+static void RescalerImportRowExpandSSE2(WebPRescaler* const wrk,
+                                        const uint8_t* src) {
+  rescaler_t* frow = wrk->frow;
+  const rescaler_t* const frow_end = frow + wrk->dst_width * wrk->num_channels;
+  const int x_add = wrk->x_add;
+  int accum = x_add;
+  __m128i cur_pixels;
+
+  assert(!WebPRescalerInputDone(wrk));
+  assert(wrk->x_expand);
+  if (wrk->num_channels == 4) {
+    if (wrk->src_width < 2) {
+      WebPRescalerImportRowExpandC(wrk, src);
+      return;
+    }
+    LoadTwoPixels(src, &cur_pixels);
+    src += 4;
+    while (1) {
+      const __m128i mult = _mm_set1_epi32(((x_add - accum) << 16) | accum);
+      const __m128i out = _mm_madd_epi16(cur_pixels, mult);
+      _mm_storeu_si128((__m128i*)frow, out);
+      frow += 4;
+      if (frow >= frow_end) break;
+      accum -= wrk->x_sub;
+      if (accum < 0) {
+        LoadTwoPixels(src, &cur_pixels);
+        src += 4;
+        accum += x_add;
+      }
+    }
+  } else {
+    int left;
+    const uint8_t* const src_limit = src + wrk->src_width - 8;
+    if (wrk->src_width < 8) {
+      WebPRescalerImportRowExpandC(wrk, src);
+      return;
+    }
+    LoadHeightPixels(src, &cur_pixels);
+    src += 7;
+    left = 7;
+    while (1) {
+      const __m128i mult = _mm_cvtsi32_si128(((x_add - accum) << 16) | accum);
+      const __m128i out = _mm_madd_epi16(cur_pixels, mult);
+      *(uint32_t*)frow = _mm_cvtsi128_si32(out);
+      frow += 1;
+      if (frow >= frow_end) break;
+      accum -= wrk->x_sub;
+      if (accum < 0) {
+        if (--left) {
+          cur_pixels = _mm_srli_si128(cur_pixels, 2);
+        } else if (src <= src_limit) {
+          LoadHeightPixels(src, &cur_pixels);
+          src += 7;
+          left = 7;
+        } else {   // tail
+          cur_pixels = _mm_srli_si128(cur_pixels, 2);
+          cur_pixels = _mm_insert_epi16(cur_pixels, src[1], 1);
+          src += 1;
+          left = 1;
+        }
+        accum += x_add;
+      }
+    }
+  }
+  assert(accum == 0);
+}
+
+static void RescalerImportRowShrinkSSE2(WebPRescaler* const wrk,
+                                        const uint8_t* src) {
+  const int x_sub = wrk->x_sub;
+  int accum = 0;
+  const __m128i zero = _mm_setzero_si128();
+  const __m128i mult0 = _mm_set1_epi16(x_sub);
+  const __m128i mult1 = _mm_set1_epi32(wrk->fx_scale);
+  const __m128i rounder = _mm_set_epi32(0, ROUNDER, 0, ROUNDER);
+  __m128i sum = zero;
+  rescaler_t* frow = wrk->frow;
+  const rescaler_t* const frow_end = wrk->frow + 4 * wrk->dst_width;
+
+  if (wrk->num_channels != 4 || wrk->x_add > (x_sub << 7)) {
+    WebPRescalerImportRowShrinkC(wrk, src);
+    return;
+  }
+  assert(!WebPRescalerInputDone(wrk));
+  assert(!wrk->x_expand);
+
+  for (; frow < frow_end; frow += 4) {
+    __m128i base = zero;
+    accum += wrk->x_add;
+    while (accum > 0) {
+      const __m128i A = _mm_cvtsi32_si128(*(int*)src);
+      src += 4;
+      base = _mm_unpacklo_epi8(A, zero);
+      // To avoid overflow, we need: base * x_add / x_sub < 32768
+      // => x_add < x_sub << 7. That's a 1/128 reduction ratio limit.
+      sum = _mm_add_epi16(sum, base);
+      accum -= x_sub;
+    }
+    {    // Emit next horizontal pixel.
+      const __m128i mult = _mm_set1_epi16(-accum);
+      const __m128i frac0 = _mm_mullo_epi16(base, mult);  // 16b x 16b -> 32b
+      const __m128i frac1 = _mm_mulhi_epu16(base, mult);
+      const __m128i frac = _mm_unpacklo_epi16(frac0, frac1);  // frac is 32b
+      const __m128i A0 = _mm_mullo_epi16(sum, mult0);
+      const __m128i A1 = _mm_mulhi_epu16(sum, mult0);
+      const __m128i B0 = _mm_unpacklo_epi16(A0, A1);      // sum * x_sub
+      const __m128i frow_out = _mm_sub_epi32(B0, frac);   // sum * x_sub - frac
+      const __m128i D0 = _mm_srli_epi64(frac, 32);
+      const __m128i D1 = _mm_mul_epu32(frac, mult1);      // 32b x 16b -> 64b
+      const __m128i D2 = _mm_mul_epu32(D0, mult1);
+      const __m128i E1 = _mm_add_epi64(D1, rounder);
+      const __m128i E2 = _mm_add_epi64(D2, rounder);
+      const __m128i F1 = _mm_shuffle_epi32(E1, 1 | (3 << 2));
+      const __m128i F2 = _mm_shuffle_epi32(E2, 1 | (3 << 2));
+      const __m128i G = _mm_unpacklo_epi32(F1, F2);
+      sum = _mm_packs_epi32(G, zero);
+      _mm_storeu_si128((__m128i*)frow, frow_out);
+    }
+  }
+  assert(accum == 0);
+}
+
+//------------------------------------------------------------------------------
+// Row export
+
+// load *src as epi64, multiply by mult and store result in [out0 ... out3]
+static WEBP_INLINE void LoadDispatchAndMult(const rescaler_t* const src,
+                                            const __m128i* const mult,
+                                            __m128i* const out0,
+                                            __m128i* const out1,
+                                            __m128i* const out2,
+                                            __m128i* const out3) {
+  const __m128i A0 = _mm_loadu_si128((const __m128i*)(src + 0));
+  const __m128i A1 = _mm_loadu_si128((const __m128i*)(src + 4));
+  const __m128i A2 = _mm_srli_epi64(A0, 32);
+  const __m128i A3 = _mm_srli_epi64(A1, 32);
+  if (mult != NULL) {
+    *out0 = _mm_mul_epu32(A0, *mult);
+    *out1 = _mm_mul_epu32(A1, *mult);
+    *out2 = _mm_mul_epu32(A2, *mult);
+    *out3 = _mm_mul_epu32(A3, *mult);
+  } else {
+    *out0 = A0;
+    *out1 = A1;
+    *out2 = A2;
+    *out3 = A3;
+  }
+}
+
+static WEBP_INLINE void ProcessRow(const __m128i* const A0,
+                                   const __m128i* const A1,
+                                   const __m128i* const A2,
+                                   const __m128i* const A3,
+                                   const __m128i* const mult,
+                                   uint8_t* const dst) {
+  const __m128i rounder = _mm_set_epi32(0, ROUNDER, 0, ROUNDER);
+  const __m128i mask = _mm_set_epi32(0xffffffffu, 0, 0xffffffffu, 0);
+  const __m128i B0 = _mm_mul_epu32(*A0, *mult);
+  const __m128i B1 = _mm_mul_epu32(*A1, *mult);
+  const __m128i B2 = _mm_mul_epu32(*A2, *mult);
+  const __m128i B3 = _mm_mul_epu32(*A3, *mult);
+  const __m128i C0 = _mm_add_epi64(B0, rounder);
+  const __m128i C1 = _mm_add_epi64(B1, rounder);
+  const __m128i C2 = _mm_add_epi64(B2, rounder);
+  const __m128i C3 = _mm_add_epi64(B3, rounder);
+  const __m128i D0 = _mm_srli_epi64(C0, WEBP_RESCALER_RFIX);
+  const __m128i D1 = _mm_srli_epi64(C1, WEBP_RESCALER_RFIX);
+#if (WEBP_RESCALER_FIX < 32)
+  const __m128i D2 =
+      _mm_and_si128(_mm_slli_epi64(C2, 32 - WEBP_RESCALER_RFIX), mask);
+  const __m128i D3 =
+      _mm_and_si128(_mm_slli_epi64(C3, 32 - WEBP_RESCALER_RFIX), mask);
+#else
+  const __m128i D2 = _mm_and_si128(C2, mask);
+  const __m128i D3 = _mm_and_si128(C3, mask);
+#endif
+  const __m128i E0 = _mm_or_si128(D0, D2);
+  const __m128i E1 = _mm_or_si128(D1, D3);
+  const __m128i F = _mm_packs_epi32(E0, E1);
+  const __m128i G = _mm_packus_epi16(F, F);
+  _mm_storel_epi64((__m128i*)dst, G);
+}
+
+static void RescalerExportRowExpandSSE2(WebPRescaler* const wrk) {
+  int x_out;
+  uint8_t* const dst = wrk->dst;
+  rescaler_t* const irow = wrk->irow;
+  const int x_out_max = wrk->dst_width * wrk->num_channels;
+  const rescaler_t* const frow = wrk->frow;
+  const __m128i mult = _mm_set_epi32(0, wrk->fy_scale, 0, wrk->fy_scale);
+
+  assert(!WebPRescalerOutputDone(wrk));
+  assert(wrk->y_accum <= 0 && wrk->y_sub + wrk->y_accum >= 0);
+  assert(wrk->y_expand);
+  if (wrk->y_accum == 0) {
+    for (x_out = 0; x_out + 8 <= x_out_max; x_out += 8) {
+      __m128i A0, A1, A2, A3;
+      LoadDispatchAndMult(frow + x_out, NULL, &A0, &A1, &A2, &A3);
+      ProcessRow(&A0, &A1, &A2, &A3, &mult, dst + x_out);
+    }
+    for (; x_out < x_out_max; ++x_out) {
+      const uint32_t J = frow[x_out];
+      const int v = (int)MULT_FIX(J, wrk->fy_scale);
+      assert(v >= 0 && v <= 255);
+      dst[x_out] = v;
+    }
+  } else {
+    const uint32_t B = WEBP_RESCALER_FRAC(-wrk->y_accum, wrk->y_sub);
+    const uint32_t A = (uint32_t)(WEBP_RESCALER_ONE - B);
+    const __m128i mA = _mm_set_epi32(0, A, 0, A);
+    const __m128i mB = _mm_set_epi32(0, B, 0, B);
+    const __m128i rounder = _mm_set_epi32(0, ROUNDER, 0, ROUNDER);
+    for (x_out = 0; x_out + 8 <= x_out_max; x_out += 8) {
+      __m128i A0, A1, A2, A3, B0, B1, B2, B3;
+      LoadDispatchAndMult(frow + x_out, &mA, &A0, &A1, &A2, &A3);
+      LoadDispatchAndMult(irow + x_out, &mB, &B0, &B1, &B2, &B3);
+      {
+        const __m128i C0 = _mm_add_epi64(A0, B0);
+        const __m128i C1 = _mm_add_epi64(A1, B1);
+        const __m128i C2 = _mm_add_epi64(A2, B2);
+        const __m128i C3 = _mm_add_epi64(A3, B3);
+        const __m128i D0 = _mm_add_epi64(C0, rounder);
+        const __m128i D1 = _mm_add_epi64(C1, rounder);
+        const __m128i D2 = _mm_add_epi64(C2, rounder);
+        const __m128i D3 = _mm_add_epi64(C3, rounder);
+        const __m128i E0 = _mm_srli_epi64(D0, WEBP_RESCALER_RFIX);
+        const __m128i E1 = _mm_srli_epi64(D1, WEBP_RESCALER_RFIX);
+        const __m128i E2 = _mm_srli_epi64(D2, WEBP_RESCALER_RFIX);
+        const __m128i E3 = _mm_srli_epi64(D3, WEBP_RESCALER_RFIX);
+        ProcessRow(&E0, &E1, &E2, &E3, &mult, dst + x_out);
+      }
+    }
+    for (; x_out < x_out_max; ++x_out) {
+      const uint64_t I = (uint64_t)A * frow[x_out]
+                       + (uint64_t)B * irow[x_out];
+      const uint32_t J = (uint32_t)((I + ROUNDER) >> WEBP_RESCALER_RFIX);
+      const int v = (int)MULT_FIX(J, wrk->fy_scale);
+      assert(v >= 0 && v <= 255);
+      dst[x_out] = v;
+    }
+  }
+}
+
+static void RescalerExportRowShrinkSSE2(WebPRescaler* const wrk) {
+  int x_out;
+  uint8_t* const dst = wrk->dst;
+  rescaler_t* const irow = wrk->irow;
+  const int x_out_max = wrk->dst_width * wrk->num_channels;
+  const rescaler_t* const frow = wrk->frow;
+  const uint32_t yscale = wrk->fy_scale * (-wrk->y_accum);
+  assert(!WebPRescalerOutputDone(wrk));
+  assert(wrk->y_accum <= 0);
+  assert(!wrk->y_expand);
+  if (yscale) {
+    const int scale_xy = wrk->fxy_scale;
+    const __m128i mult_xy = _mm_set_epi32(0, scale_xy, 0, scale_xy);
+    const __m128i mult_y = _mm_set_epi32(0, yscale, 0, yscale);
+    const __m128i rounder = _mm_set_epi32(0, ROUNDER, 0, ROUNDER);
+    for (x_out = 0; x_out + 8 <= x_out_max; x_out += 8) {
+      __m128i A0, A1, A2, A3, B0, B1, B2, B3;
+      LoadDispatchAndMult(irow + x_out, NULL, &A0, &A1, &A2, &A3);
+      LoadDispatchAndMult(frow + x_out, &mult_y, &B0, &B1, &B2, &B3);
+      {
+        const __m128i C0 = _mm_add_epi64(B0, rounder);
+        const __m128i C1 = _mm_add_epi64(B1, rounder);
+        const __m128i C2 = _mm_add_epi64(B2, rounder);
+        const __m128i C3 = _mm_add_epi64(B3, rounder);
+        const __m128i D0 = _mm_srli_epi64(C0, WEBP_RESCALER_RFIX);   // = frac
+        const __m128i D1 = _mm_srli_epi64(C1, WEBP_RESCALER_RFIX);
+        const __m128i D2 = _mm_srli_epi64(C2, WEBP_RESCALER_RFIX);
+        const __m128i D3 = _mm_srli_epi64(C3, WEBP_RESCALER_RFIX);
+        const __m128i E0 = _mm_sub_epi64(A0, D0);   // irow[x] - frac
+        const __m128i E1 = _mm_sub_epi64(A1, D1);
+        const __m128i E2 = _mm_sub_epi64(A2, D2);
+        const __m128i E3 = _mm_sub_epi64(A3, D3);
+        const __m128i F2 = _mm_slli_epi64(D2, 32);
+        const __m128i F3 = _mm_slli_epi64(D3, 32);
+        const __m128i G0 = _mm_or_si128(D0, F2);
+        const __m128i G1 = _mm_or_si128(D1, F3);
+        _mm_storeu_si128((__m128i*)(irow + x_out + 0), G0);
+        _mm_storeu_si128((__m128i*)(irow + x_out + 4), G1);
+        ProcessRow(&E0, &E1, &E2, &E3, &mult_xy, dst + x_out);
+      }
+    }
+    for (; x_out < x_out_max; ++x_out) {
+      const uint32_t frac = (int)MULT_FIX(frow[x_out], yscale);
+      const int v = (int)MULT_FIX(irow[x_out] - frac, wrk->fxy_scale);
+      assert(v >= 0 && v <= 255);
+      dst[x_out] = v;
+      irow[x_out] = frac;   // new fractional start
+    }
+  } else {
+    const uint32_t scale = wrk->fxy_scale;
+    const __m128i mult = _mm_set_epi32(0, scale, 0, scale);
+    const __m128i zero = _mm_setzero_si128();
+    for (x_out = 0; x_out + 8 <= x_out_max; x_out += 8) {
+      __m128i A0, A1, A2, A3;
+      LoadDispatchAndMult(irow + x_out, NULL, &A0, &A1, &A2, &A3);
+      _mm_storeu_si128((__m128i*)(irow + x_out + 0), zero);
+      _mm_storeu_si128((__m128i*)(irow + x_out + 4), zero);
+      ProcessRow(&A0, &A1, &A2, &A3, &mult, dst + x_out);
+    }
+    for (; x_out < x_out_max; ++x_out) {
+      const int v = (int)MULT_FIX(irow[x_out], scale);
+      assert(v >= 0 && v <= 255);
+      dst[x_out] = v;
+      irow[x_out] = 0;
+    }
+  }
+}
+
+#undef MULT_FIX
+#undef ROUNDER
+
+//------------------------------------------------------------------------------
+
+extern void WebPRescalerDspInitSSE2(void);
+
+WEBP_TSAN_IGNORE_FUNCTION void WebPRescalerDspInitSSE2(void) {
+  WebPRescalerImportRowExpand = RescalerImportRowExpandSSE2;
+  WebPRescalerImportRowShrink = RescalerImportRowShrinkSSE2;
+  WebPRescalerExportRowExpand = RescalerExportRowExpandSSE2;
+  WebPRescalerExportRowShrink = RescalerExportRowShrinkSSE2;
+}
+
+#else  // !WEBP_USE_SSE2
+
+WEBP_DSP_INIT_STUB(WebPRescalerDspInitSSE2)
+
+#endif  // WEBP_USE_SSE2
diff --git a/drivers/webp/dsp/upsampling.c b/drivers/webp/dsp/upsampling.c
index 4855eb1432..651274fcee 100644
--- a/drivers/webp/dsp/upsampling.c
+++ b/drivers/webp/dsp/upsampling.c
@@ -1,8 +1,10 @@
 // Copyright 2011 Google Inc. All Rights Reserved.
 //
-// This code is licensed under the same terms as WebM:
-//  Software License Agreement:  http://www.webmproject.org/license/software/
-//  Additional IP Rights Grant:  http://www.webmproject.org/license/additional/
+// Use of this source code is governed by a BSD-style license
+// that can be found in the COPYING file in the root of the source
+// tree. An additional intellectual property rights grant can be found
+// in the file PATENTS. All contributing project authors may
+// be found in the AUTHORS file in the root of the source tree.
 // -----------------------------------------------------------------------------
 //
 // YUV to RGB upsampling functions.
@@ -12,9 +14,7 @@
 #include "./dsp.h"
 #include "./yuv.h"
 
-#if defined(__cplusplus) || defined(c_plusplus)
-extern "C" {
-#endif
+#include <assert.h>
 
 //------------------------------------------------------------------------------
 // Fancy upsampler
@@ -32,7 +32,7 @@ WebPUpsampleLinePairFunc WebPUpsamplers[MODE_LAST];
 //  ([3*a +   b + 9*c + 3*d      a + 3*b + 3*c + 9*d]   [8 8]) / 16
 
 // We process u and v together stashed into 32bit (16bit each).
-#define LOAD_UV(u,v) ((u) | ((v) << 16))
+#define LOAD_UV(u, v) ((u) | ((v) << 16))
 
 #define UPSAMPLE_FUNC(FUNC_NAME, FUNC, XSTEP)                                  \
 static void FUNC_NAME(const uint8_t* top_y, const uint8_t* bottom_y,           \
@@ -43,11 +43,12 @@ static void FUNC_NAME(const uint8_t* top_y, const uint8_t* bottom_y,           \
   const int last_pixel_pair = (len - 1) >> 1;                                  \
   uint32_t tl_uv = LOAD_UV(top_u[0], top_v[0]);   /* top-left sample */        \
   uint32_t l_uv  = LOAD_UV(cur_u[0], cur_v[0]);   /* left-sample */            \
-  if (top_y) {                                                                 \
+  assert(top_y != NULL);                                                       \
+  {                                                                            \
     const uint32_t uv0 = (3 * tl_uv + l_uv + 0x00020002u) >> 2;                \
     FUNC(top_y[0], uv0 & 0xff, (uv0 >> 16), top_dst);                          \
   }                                                                            \
-  if (bottom_y) {                                                              \
+  if (bottom_y != NULL) {                                                      \
     const uint32_t uv0 = (3 * l_uv + tl_uv + 0x00020002u) >> 2;                \
     FUNC(bottom_y[0], uv0 & 0xff, (uv0 >> 16), bottom_dst);                    \
   }                                                                            \
@@ -58,7 +59,7 @@ static void FUNC_NAME(const uint8_t* top_y, const uint8_t* bottom_y,           \
     const uint32_t avg = tl_uv + t_uv + l_uv + uv + 0x00080008u;               \
     const uint32_t diag_12 = (avg + 2 * (t_uv + l_uv)) >> 3;                   \
     const uint32_t diag_03 = (avg + 2 * (tl_uv + uv)) >> 3;                    \
-    if (top_y) {                                                               \
+    {                                                                          \
       const uint32_t uv0 = (diag_12 + tl_uv) >> 1;                             \
       const uint32_t uv1 = (diag_03 + t_uv) >> 1;                              \
       FUNC(top_y[2 * x - 1], uv0 & 0xff, (uv0 >> 16),                          \
@@ -66,7 +67,7 @@ static void FUNC_NAME(const uint8_t* top_y, const uint8_t* bottom_y,           \
       FUNC(top_y[2 * x - 0], uv1 & 0xff, (uv1 >> 16),                          \
            top_dst + (2 * x - 0) * XSTEP);                                     \
     }                                                                          \
-    if (bottom_y) {                                                            \
+    if (bottom_y != NULL) {                                                    \
       const uint32_t uv0 = (diag_03 + l_uv) >> 1;                              \
       const uint32_t uv1 = (diag_12 + uv) >> 1;                                \
       FUNC(bottom_y[2 * x - 1], uv0 & 0xff, (uv0 >> 16),                       \
@@ -78,12 +79,12 @@ static void FUNC_NAME(const uint8_t* top_y, const uint8_t* bottom_y,           \
     l_uv = uv;                                                                 \
   }                                                                            \
   if (!(len & 1)) {                                                            \
-    if (top_y) {                                                               \
+    {                                                                          \
       const uint32_t uv0 = (3 * tl_uv + l_uv + 0x00020002u) >> 2;              \
       FUNC(top_y[len - 1], uv0 & 0xff, (uv0 >> 16),                            \
            top_dst + (len - 1) * XSTEP);                                       \
     }                                                                          \
-    if (bottom_y) {                                                            \
+    if (bottom_y != NULL) {                                                    \
       const uint32_t uv0 = (3 * l_uv + tl_uv + 0x00020002u) >> 2;              \
       FUNC(bottom_y[len - 1], uv0 & 0xff, (uv0 >> 16),                         \
            bottom_dst + (len - 1) * XSTEP);                                    \
@@ -106,57 +107,6 @@ UPSAMPLE_FUNC(UpsampleRgb565LinePair,  VP8YuvToRgb565,  2)
 #endif  // FANCY_UPSAMPLING
 
 //------------------------------------------------------------------------------
-// simple point-sampling
-
-#define SAMPLE_FUNC(FUNC_NAME, FUNC, XSTEP)                                    \
-static void FUNC_NAME(const uint8_t* top_y, const uint8_t* bottom_y,           \
-                      const uint8_t* u, const uint8_t* v,                      \
-                      uint8_t* top_dst, uint8_t* bottom_dst, int len) {        \
-  int i;                                                                       \
-  for (i = 0; i < len - 1; i += 2) {                                           \
-    FUNC(top_y[0], u[0], v[0], top_dst);                                       \
-    FUNC(top_y[1], u[0], v[0], top_dst + XSTEP);                               \
-    FUNC(bottom_y[0], u[0], v[0], bottom_dst);                                 \
-    FUNC(bottom_y[1], u[0], v[0], bottom_dst + XSTEP);                         \
-    top_y += 2;                                                                \
-    bottom_y += 2;                                                             \
-    u++;                                                                       \
-    v++;                                                                       \
-    top_dst += 2 * XSTEP;                                                      \
-    bottom_dst += 2 * XSTEP;                                                   \
-  }                                                                            \
-  if (i == len - 1) {    /* last one */                                        \
-    FUNC(top_y[0], u[0], v[0], top_dst);                                       \
-    FUNC(bottom_y[0], u[0], v[0], bottom_dst);                                 \
-  }                                                                            \
-}
-
-// All variants implemented.
-SAMPLE_FUNC(SampleRgbLinePair,      VP8YuvToRgb,  3)
-SAMPLE_FUNC(SampleBgrLinePair,      VP8YuvToBgr,  3)
-SAMPLE_FUNC(SampleRgbaLinePair,     VP8YuvToRgba, 4)
-SAMPLE_FUNC(SampleBgraLinePair,     VP8YuvToBgra, 4)
-SAMPLE_FUNC(SampleArgbLinePair,     VP8YuvToArgb, 4)
-SAMPLE_FUNC(SampleRgba4444LinePair, VP8YuvToRgba4444, 2)
-SAMPLE_FUNC(SampleRgb565LinePair,   VP8YuvToRgb565, 2)
-
-#undef SAMPLE_FUNC
-
-const WebPSampleLinePairFunc WebPSamplers[MODE_LAST] = {
-  SampleRgbLinePair,       // MODE_RGB
-  SampleRgbaLinePair,      // MODE_RGBA
-  SampleBgrLinePair,       // MODE_BGR
-  SampleBgraLinePair,      // MODE_BGRA
-  SampleArgbLinePair,      // MODE_ARGB
-  SampleRgba4444LinePair,  // MODE_RGBA_4444
-  SampleRgb565LinePair,    // MODE_RGB_565
-  SampleRgbaLinePair,      // MODE_rgbA
-  SampleBgraLinePair,      // MODE_bgrA
-  SampleArgbLinePair,      // MODE_Argb
-  SampleRgba4444LinePair   // MODE_rgbA_4444
-};
-
-//------------------------------------------------------------------------------
 
 #if !defined(FANCY_UPSAMPLING)
 #define DUAL_SAMPLE_FUNC(FUNC_NAME, FUNC)                                      \
@@ -166,7 +116,8 @@ static void FUNC_NAME(const uint8_t* top_y, const uint8_t* bot_y,              \
                       uint8_t* top_dst, uint8_t* bot_dst, int len) {           \
   const int half_len = len >> 1;                                               \
   int x;                                                                       \
-  if (top_dst != NULL) {                                                       \
+  assert(top_dst != NULL);                                                     \
+  {                                                                            \
     for (x = 0; x < half_len; ++x) {                                           \
       FUNC(top_y[2 * x + 0], top_u[x], top_v[x], top_dst + 8 * x + 0);         \
       FUNC(top_y[2 * x + 1], top_u[x], top_v[x], top_dst + 8 * x + 4);         \
@@ -202,116 +153,75 @@ WebPUpsampleLinePairFunc WebPGetLinePairConverter(int alpha_is_last) {
 // YUV444 converter
 
 #define YUV444_FUNC(FUNC_NAME, FUNC, XSTEP)                                    \
-static void FUNC_NAME(const uint8_t* y, const uint8_t* u, const uint8_t* v,    \
-                      uint8_t* dst, int len) {                                 \
+extern void FUNC_NAME(const uint8_t* y, const uint8_t* u, const uint8_t* v,    \
+                      uint8_t* dst, int len);                                  \
+void FUNC_NAME(const uint8_t* y, const uint8_t* u, const uint8_t* v,           \
+               uint8_t* dst, int len) {                                        \
   int i;                                                                       \
   for (i = 0; i < len; ++i) FUNC(y[i], u[i], v[i], &dst[i * XSTEP]);           \
 }
 
-YUV444_FUNC(Yuv444ToRgb,      VP8YuvToRgb,  3)
-YUV444_FUNC(Yuv444ToBgr,      VP8YuvToBgr,  3)
-YUV444_FUNC(Yuv444ToRgba,     VP8YuvToRgba, 4)
-YUV444_FUNC(Yuv444ToBgra,     VP8YuvToBgra, 4)
-YUV444_FUNC(Yuv444ToArgb,     VP8YuvToArgb, 4)
-YUV444_FUNC(Yuv444ToRgba4444, VP8YuvToRgba4444, 2)
-YUV444_FUNC(Yuv444ToRgb565,   VP8YuvToRgb565, 2)
+YUV444_FUNC(WebPYuv444ToRgbC,      VP8YuvToRgb,  3)
+YUV444_FUNC(WebPYuv444ToBgrC,      VP8YuvToBgr,  3)
+YUV444_FUNC(WebPYuv444ToRgbaC,     VP8YuvToRgba, 4)
+YUV444_FUNC(WebPYuv444ToBgraC,     VP8YuvToBgra, 4)
+YUV444_FUNC(WebPYuv444ToArgbC,     VP8YuvToArgb, 4)
+YUV444_FUNC(WebPYuv444ToRgba4444C, VP8YuvToRgba4444, 2)
+YUV444_FUNC(WebPYuv444ToRgb565C,   VP8YuvToRgb565, 2)
 
 #undef YUV444_FUNC
 
-const WebPYUV444Converter WebPYUV444Converters[MODE_LAST] = {
-  Yuv444ToRgb,       // MODE_RGB
-  Yuv444ToRgba,      // MODE_RGBA
-  Yuv444ToBgr,       // MODE_BGR
-  Yuv444ToBgra,      // MODE_BGRA
-  Yuv444ToArgb,      // MODE_ARGB
-  Yuv444ToRgba4444,  // MODE_RGBA_4444
-  Yuv444ToRgb565,    // MODE_RGB_565
-  Yuv444ToRgba,      // MODE_rgbA
-  Yuv444ToBgra,      // MODE_bgrA
-  Yuv444ToArgb,      // MODE_Argb
-  Yuv444ToRgba4444   // MODE_rgbA_4444
-};
-
-//------------------------------------------------------------------------------
-// Premultiplied modes
-
-// non dithered-modes
-
-// (x * a * 32897) >> 23 is bit-wise equivalent to (int)(x * a / 255.)
-// for all 8bit x or a. For bit-wise equivalence to (int)(x * a / 255. + .5),
-// one can use instead: (x * a * 65793 + (1 << 23)) >> 24
-#if 1     // (int)(x * a / 255.)
-#define MULTIPLIER(a)   ((a) * 32897UL)
-#define PREMULTIPLY(x, m) (((x) * (m)) >> 23)
-#else     // (int)(x * a / 255. + .5)
-#define MULTIPLIER(a) ((a) * 65793UL)
-#define PREMULTIPLY(x, m) (((x) * (m) + (1UL << 23)) >> 24)
-#endif
-
-static void ApplyAlphaMultiply(uint8_t* rgba, int alpha_first,
-                               int w, int h, int stride) {
-  while (h-- > 0) {
-    uint8_t* const rgb = rgba + (alpha_first ? 1 : 0);
-    const uint8_t* const alpha = rgba + (alpha_first ? 0 : 3);
-    int i;
-    for (i = 0; i < w; ++i) {
-      const uint32_t a = alpha[4 * i];
-      if (a != 0xff) {
-        const uint32_t mult = MULTIPLIER(a);
-        rgb[4 * i + 0] = PREMULTIPLY(rgb[4 * i + 0], mult);
-        rgb[4 * i + 1] = PREMULTIPLY(rgb[4 * i + 1], mult);
-        rgb[4 * i + 2] = PREMULTIPLY(rgb[4 * i + 2], mult);
-      }
-    }
-    rgba += stride;
-  }
-}
-#undef MULTIPLIER
-#undef PREMULTIPLY
-
-// rgbA4444
+WebPYUV444Converter WebPYUV444Converters[MODE_LAST];
 
-#define MULTIPLIER(a)  ((a) * 0x1111)    // 0x1111 ~= (1 << 16) / 15
+extern void WebPInitYUV444ConvertersMIPSdspR2(void);
+extern void WebPInitYUV444ConvertersSSE2(void);
 
-static WEBP_INLINE uint8_t dither_hi(uint8_t x) {
-  return (x & 0xf0) | (x >> 4);
-}
+static volatile VP8CPUInfo upsampling_last_cpuinfo_used1 =
+    (VP8CPUInfo)&upsampling_last_cpuinfo_used1;
 
-static WEBP_INLINE uint8_t dither_lo(uint8_t x) {
-  return (x & 0x0f) | (x << 4);
-}
+WEBP_TSAN_IGNORE_FUNCTION void WebPInitYUV444Converters(void) {
+  if (upsampling_last_cpuinfo_used1 == VP8GetCPUInfo) return;
 
-static WEBP_INLINE uint8_t multiply(uint8_t x, uint32_t m) {
-  return (x * m) >> 16;
-}
+  WebPYUV444Converters[MODE_RGB]       = WebPYuv444ToRgbC;
+  WebPYUV444Converters[MODE_RGBA]      = WebPYuv444ToRgbaC;
+  WebPYUV444Converters[MODE_BGR]       = WebPYuv444ToBgrC;
+  WebPYUV444Converters[MODE_BGRA]      = WebPYuv444ToBgraC;
+  WebPYUV444Converters[MODE_ARGB]      = WebPYuv444ToArgbC;
+  WebPYUV444Converters[MODE_RGBA_4444] = WebPYuv444ToRgba4444C;
+  WebPYUV444Converters[MODE_RGB_565]   = WebPYuv444ToRgb565C;
+  WebPYUV444Converters[MODE_rgbA]      = WebPYuv444ToRgbaC;
+  WebPYUV444Converters[MODE_bgrA]      = WebPYuv444ToBgraC;
+  WebPYUV444Converters[MODE_Argb]      = WebPYuv444ToArgbC;
+  WebPYUV444Converters[MODE_rgbA_4444] = WebPYuv444ToRgba4444C;
 
-static void ApplyAlphaMultiply4444(uint8_t* rgba4444,
-                                   int w, int h, int stride) {
-  while (h-- > 0) {
-    int i;
-    for (i = 0; i < w; ++i) {
-      const uint8_t a = (rgba4444[2 * i + 1] & 0x0f);
-      const uint32_t mult = MULTIPLIER(a);
-      const uint8_t r = multiply(dither_hi(rgba4444[2 * i + 0]), mult);
-      const uint8_t g = multiply(dither_lo(rgba4444[2 * i + 0]), mult);
-      const uint8_t b = multiply(dither_hi(rgba4444[2 * i + 1]), mult);
-      rgba4444[2 * i + 0] = (r & 0xf0) | ((g >> 4) & 0x0f);
-      rgba4444[2 * i + 1] = (b & 0xf0) | a;
+  if (VP8GetCPUInfo != NULL) {
+#if defined(WEBP_USE_SSE2)
+    if (VP8GetCPUInfo(kSSE2)) {
+      WebPInitYUV444ConvertersSSE2();
+    }
+#endif
+#if defined(WEBP_USE_MIPS_DSP_R2)
+    if (VP8GetCPUInfo(kMIPSdspR2)) {
+      WebPInitYUV444ConvertersMIPSdspR2();
     }
-    rgba4444 += stride;
+#endif
   }
+  upsampling_last_cpuinfo_used1 = VP8GetCPUInfo;
 }
-#undef MULTIPLIER
-
-void (*WebPApplyAlphaMultiply)(uint8_t*, int, int, int, int)
-    = ApplyAlphaMultiply;
-void (*WebPApplyAlphaMultiply4444)(uint8_t*, int, int, int)
-    = ApplyAlphaMultiply4444;
 
 //------------------------------------------------------------------------------
-// Main call
+// Main calls
+
+extern void WebPInitUpsamplersSSE2(void);
+extern void WebPInitUpsamplersNEON(void);
+extern void WebPInitUpsamplersMIPSdspR2(void);
+
+static volatile VP8CPUInfo upsampling_last_cpuinfo_used2 =
+    (VP8CPUInfo)&upsampling_last_cpuinfo_used2;
+
+WEBP_TSAN_IGNORE_FUNCTION void WebPInitUpsamplers(void) {
+  if (upsampling_last_cpuinfo_used2 == VP8GetCPUInfo) return;
 
-void WebPInitUpsamplers(void) {
 #ifdef FANCY_UPSAMPLING
   WebPUpsamplers[MODE_RGB]       = UpsampleRgbLinePair;
   WebPUpsamplers[MODE_RGBA]      = UpsampleRgbaLinePair;
@@ -320,38 +230,31 @@ void WebPInitUpsamplers(void) {
   WebPUpsamplers[MODE_ARGB]      = UpsampleArgbLinePair;
   WebPUpsamplers[MODE_RGBA_4444] = UpsampleRgba4444LinePair;
   WebPUpsamplers[MODE_RGB_565]   = UpsampleRgb565LinePair;
-
-  // If defined, use CPUInfo() to overwrite some pointers with faster versions.
-  if (VP8GetCPUInfo != NULL) {
-#if defined(WEBP_USE_SSE2)
-    if (VP8GetCPUInfo(kSSE2)) {
-      WebPInitUpsamplersSSE2();
-    }
-#endif
-  }
-#endif  // FANCY_UPSAMPLING
-}
-
-void WebPInitPremultiply(void) {
-  WebPApplyAlphaMultiply = ApplyAlphaMultiply;
-  WebPApplyAlphaMultiply4444 = ApplyAlphaMultiply4444;
-
-#ifdef FANCY_UPSAMPLING
   WebPUpsamplers[MODE_rgbA]      = UpsampleRgbaLinePair;
   WebPUpsamplers[MODE_bgrA]      = UpsampleBgraLinePair;
   WebPUpsamplers[MODE_Argb]      = UpsampleArgbLinePair;
   WebPUpsamplers[MODE_rgbA_4444] = UpsampleRgba4444LinePair;
 
+  // If defined, use CPUInfo() to overwrite some pointers with faster versions.
   if (VP8GetCPUInfo != NULL) {
 #if defined(WEBP_USE_SSE2)
     if (VP8GetCPUInfo(kSSE2)) {
-      WebPInitPremultiplySSE2();
+      WebPInitUpsamplersSSE2();
+    }
+#endif
+#if defined(WEBP_USE_NEON)
+    if (VP8GetCPUInfo(kNEON)) {
+      WebPInitUpsamplersNEON();
+    }
+#endif
+#if defined(WEBP_USE_MIPS_DSP_R2)
+    if (VP8GetCPUInfo(kMIPSdspR2)) {
+      WebPInitUpsamplersMIPSdspR2();
     }
 #endif
   }
 #endif  // FANCY_UPSAMPLING
+  upsampling_last_cpuinfo_used2 = VP8GetCPUInfo;
 }
 
-#if defined(__cplusplus) || defined(c_plusplus)
-}    // extern "C"
-#endif
+//------------------------------------------------------------------------------
diff --git a/drivers/webp/dsp/upsampling_mips_dsp_r2.c b/drivers/webp/dsp/upsampling_mips_dsp_r2.c
new file mode 100644
index 0000000000..46f207b43e
--- /dev/null
+++ b/drivers/webp/dsp/upsampling_mips_dsp_r2.c
@@ -0,0 +1,282 @@
+// Copyright 2014 Google Inc. All Rights Reserved.
+//
+// Use of this source code is governed by a BSD-style license
+// that can be found in the COPYING file in the root of the source
+// tree. An additional intellectual property rights grant can be found
+// in the file PATENTS. All contributing project authors may
+// be found in the AUTHORS file in the root of the source tree.
+// -----------------------------------------------------------------------------
+//
+// YUV to RGB upsampling functions.
+//
+// Author(s): Branimir Vasic (branimir.vasic@imgtec.com)
+//            Djordje Pesut  (djordje.pesut@imgtec.com)
+
+#include "./dsp.h"
+
+#if defined(WEBP_USE_MIPS_DSP_R2)
+
+#include <assert.h>
+#include "./yuv.h"
+
+#if !defined(WEBP_YUV_USE_TABLE)
+
+#define YUV_TO_RGB(Y, U, V, R, G, B) do {                                      \
+    const int t1 = kYScale * Y;                                                \
+    const int t2 = kVToG * V;                                                  \
+    R = kVToR * V;                                                             \
+    G = kUToG * U;                                                             \
+    B = kUToB * U;                                                             \
+    R = t1 + R;                                                                \
+    G = t1 - G;                                                                \
+    B = t1 + B;                                                                \
+    R = R + kRCst;                                                             \
+    G = G - t2 + kGCst;                                                        \
+    B = B + kBCst;                                                             \
+    __asm__ volatile (                                                         \
+      "shll_s.w         %[" #R "],      %[" #R "],        9          \n\t"     \
+      "shll_s.w         %[" #G "],      %[" #G "],        9          \n\t"     \
+      "shll_s.w         %[" #B "],      %[" #B "],        9          \n\t"     \
+      "precrqu_s.qb.ph  %[" #R "],      %[" #R "],        $zero      \n\t"     \
+      "precrqu_s.qb.ph  %[" #G "],      %[" #G "],        $zero      \n\t"     \
+      "precrqu_s.qb.ph  %[" #B "],      %[" #B "],        $zero      \n\t"     \
+      "srl              %[" #R "],      %[" #R "],        24         \n\t"     \
+      "srl              %[" #G "],      %[" #G "],        24         \n\t"     \
+      "srl              %[" #B "],      %[" #B "],        24         \n\t"     \
+      : [R]"+r"(R), [G]"+r"(G), [B]"+r"(B)                                     \
+      :                                                                        \
+    );                                                                         \
+  } while (0)
+
+static WEBP_INLINE void YuvToRgb(int y, int u, int v, uint8_t* const rgb) {
+  int r, g, b;
+  YUV_TO_RGB(y, u, v, r, g, b);
+  rgb[0] = r;
+  rgb[1] = g;
+  rgb[2] = b;
+}
+static WEBP_INLINE void YuvToBgr(int y, int u, int v, uint8_t* const bgr) {
+  int r, g, b;
+  YUV_TO_RGB(y, u, v, r, g, b);
+  bgr[0] = b;
+  bgr[1] = g;
+  bgr[2] = r;
+}
+static WEBP_INLINE void YuvToRgb565(int y, int u, int v, uint8_t* const rgb) {
+  int r, g, b;
+  YUV_TO_RGB(y, u, v, r, g, b);
+  {
+    const int rg = (r & 0xf8) | (g >> 5);
+    const int gb = ((g << 3) & 0xe0) | (b >> 3);
+#ifdef WEBP_SWAP_16BIT_CSP
+    rgb[0] = gb;
+    rgb[1] = rg;
+#else
+    rgb[0] = rg;
+    rgb[1] = gb;
+#endif
+  }
+}
+static WEBP_INLINE void YuvToRgba4444(int y, int u, int v,
+                                      uint8_t* const argb) {
+  int r, g, b;
+  YUV_TO_RGB(y, u, v, r, g, b);
+  {
+    const int rg = (r & 0xf0) | (g >> 4);
+    const int ba = (b & 0xf0) | 0x0f;     // overwrite the lower 4 bits
+#ifdef WEBP_SWAP_16BIT_CSP
+    argb[0] = ba;
+    argb[1] = rg;
+#else
+    argb[0] = rg;
+    argb[1] = ba;
+#endif
+   }
+}
+#endif  // WEBP_YUV_USE_TABLE
+
+//-----------------------------------------------------------------------------
+// Alpha handling variants
+
+static WEBP_INLINE void YuvToArgb(uint8_t y, uint8_t u, uint8_t v,
+                                  uint8_t* const argb) {
+  int r, g, b;
+  YUV_TO_RGB(y, u, v, r, g, b);
+  argb[0] = 0xff;
+  argb[1] = r;
+  argb[2] = g;
+  argb[3] = b;
+}
+static WEBP_INLINE void YuvToBgra(uint8_t y, uint8_t u, uint8_t v,
+                                  uint8_t* const bgra) {
+  int r, g, b;
+  YUV_TO_RGB(y, u, v, r, g, b);
+  bgra[0] = b;
+  bgra[1] = g;
+  bgra[2] = r;
+  bgra[3] = 0xff;
+}
+static WEBP_INLINE void YuvToRgba(uint8_t y, uint8_t u, uint8_t v,
+                                  uint8_t* const rgba) {
+  int r, g, b;
+  YUV_TO_RGB(y, u, v, r, g, b);
+  rgba[0] = r;
+  rgba[1] = g;
+  rgba[2] = b;
+  rgba[3] = 0xff;
+}
+
+//------------------------------------------------------------------------------
+// Fancy upsampler
+
+#ifdef FANCY_UPSAMPLING
+
+// Given samples laid out in a square as:
+//  [a b]
+//  [c d]
+// we interpolate u/v as:
+//  ([9*a + 3*b + 3*c +   d    3*a + 9*b + 3*c +   d] + [8 8]) / 16
+//  ([3*a +   b + 9*c + 3*d      a + 3*b + 3*c + 9*d]   [8 8]) / 16
+
+// We process u and v together stashed into 32bit (16bit each).
+#define LOAD_UV(u, v) ((u) | ((v) << 16))
+
+#define UPSAMPLE_FUNC(FUNC_NAME, FUNC, XSTEP)                                  \
+static void FUNC_NAME(const uint8_t* top_y, const uint8_t* bottom_y,           \
+                      const uint8_t* top_u, const uint8_t* top_v,              \
+                      const uint8_t* cur_u, const uint8_t* cur_v,              \
+                      uint8_t* top_dst, uint8_t* bottom_dst, int len) {        \
+  int x;                                                                       \
+  const int last_pixel_pair = (len - 1) >> 1;                                  \
+  uint32_t tl_uv = LOAD_UV(top_u[0], top_v[0]);   /* top-left sample */        \
+  uint32_t l_uv  = LOAD_UV(cur_u[0], cur_v[0]);   /* left-sample */            \
+  assert(top_y != NULL);                                                       \
+  {                                                                            \
+    const uint32_t uv0 = (3 * tl_uv + l_uv + 0x00020002u) >> 2;                \
+    FUNC(top_y[0], uv0 & 0xff, (uv0 >> 16), top_dst);                          \
+  }                                                                            \
+  if (bottom_y != NULL) {                                                      \
+    const uint32_t uv0 = (3 * l_uv + tl_uv + 0x00020002u) >> 2;                \
+    FUNC(bottom_y[0], uv0 & 0xff, (uv0 >> 16), bottom_dst);                    \
+  }                                                                            \
+  for (x = 1; x <= last_pixel_pair; ++x) {                                     \
+    const uint32_t t_uv = LOAD_UV(top_u[x], top_v[x]);  /* top sample */       \
+    const uint32_t uv   = LOAD_UV(cur_u[x], cur_v[x]);  /* sample */           \
+    /* precompute invariant values associated with first and second diagonals*/\
+    const uint32_t avg = tl_uv + t_uv + l_uv + uv + 0x00080008u;               \
+    const uint32_t diag_12 = (avg + 2 * (t_uv + l_uv)) >> 3;                   \
+    const uint32_t diag_03 = (avg + 2 * (tl_uv + uv)) >> 3;                    \
+    {                                                                          \
+      const uint32_t uv0 = (diag_12 + tl_uv) >> 1;                             \
+      const uint32_t uv1 = (diag_03 + t_uv) >> 1;                              \
+      FUNC(top_y[2 * x - 1], uv0 & 0xff, (uv0 >> 16),                          \
+           top_dst + (2 * x - 1) * XSTEP);                                     \
+      FUNC(top_y[2 * x - 0], uv1 & 0xff, (uv1 >> 16),                          \
+           top_dst + (2 * x - 0) * XSTEP);                                     \
+    }                                                                          \
+    if (bottom_y != NULL) {                                                    \
+      const uint32_t uv0 = (diag_03 + l_uv) >> 1;                              \
+      const uint32_t uv1 = (diag_12 + uv) >> 1;                                \
+      FUNC(bottom_y[2 * x - 1], uv0 & 0xff, (uv0 >> 16),                       \
+           bottom_dst + (2 * x - 1) * XSTEP);                                  \
+      FUNC(bottom_y[2 * x + 0], uv1 & 0xff, (uv1 >> 16),                       \
+           bottom_dst + (2 * x + 0) * XSTEP);                                  \
+    }                                                                          \
+    tl_uv = t_uv;                                                              \
+    l_uv = uv;                                                                 \
+  }                                                                            \
+  if (!(len & 1)) {                                                            \
+    {                                                                          \
+      const uint32_t uv0 = (3 * tl_uv + l_uv + 0x00020002u) >> 2;              \
+      FUNC(top_y[len - 1], uv0 & 0xff, (uv0 >> 16),                            \
+           top_dst + (len - 1) * XSTEP);                                       \
+    }                                                                          \
+    if (bottom_y != NULL) {                                                    \
+      const uint32_t uv0 = (3 * l_uv + tl_uv + 0x00020002u) >> 2;              \
+      FUNC(bottom_y[len - 1], uv0 & 0xff, (uv0 >> 16),                         \
+           bottom_dst + (len - 1) * XSTEP);                                    \
+    }                                                                          \
+  }                                                                            \
+}
+
+// All variants implemented.
+UPSAMPLE_FUNC(UpsampleRgbLinePair,      YuvToRgb,      3)
+UPSAMPLE_FUNC(UpsampleBgrLinePair,      YuvToBgr,      3)
+UPSAMPLE_FUNC(UpsampleRgbaLinePair,     YuvToRgba,     4)
+UPSAMPLE_FUNC(UpsampleBgraLinePair,     YuvToBgra,     4)
+UPSAMPLE_FUNC(UpsampleArgbLinePair,     YuvToArgb,     4)
+UPSAMPLE_FUNC(UpsampleRgba4444LinePair, YuvToRgba4444, 2)
+UPSAMPLE_FUNC(UpsampleRgb565LinePair,   YuvToRgb565,   2)
+
+#undef LOAD_UV
+#undef UPSAMPLE_FUNC
+
+//------------------------------------------------------------------------------
+// Entry point
+
+extern void WebPInitUpsamplersMIPSdspR2(void);
+
+WEBP_TSAN_IGNORE_FUNCTION void WebPInitUpsamplersMIPSdspR2(void) {
+  WebPUpsamplers[MODE_RGB]       = UpsampleRgbLinePair;
+  WebPUpsamplers[MODE_RGBA]      = UpsampleRgbaLinePair;
+  WebPUpsamplers[MODE_BGR]       = UpsampleBgrLinePair;
+  WebPUpsamplers[MODE_BGRA]      = UpsampleBgraLinePair;
+  WebPUpsamplers[MODE_ARGB]      = UpsampleArgbLinePair;
+  WebPUpsamplers[MODE_RGBA_4444] = UpsampleRgba4444LinePair;
+  WebPUpsamplers[MODE_RGB_565]   = UpsampleRgb565LinePair;
+  WebPUpsamplers[MODE_rgbA]      = UpsampleRgbaLinePair;
+  WebPUpsamplers[MODE_bgrA]      = UpsampleBgraLinePair;
+  WebPUpsamplers[MODE_Argb]      = UpsampleArgbLinePair;
+  WebPUpsamplers[MODE_rgbA_4444] = UpsampleRgba4444LinePair;
+}
+
+#endif  // FANCY_UPSAMPLING
+
+//------------------------------------------------------------------------------
+// YUV444 converter
+
+#define YUV444_FUNC(FUNC_NAME, FUNC, XSTEP)                                    \
+static void FUNC_NAME(const uint8_t* y, const uint8_t* u, const uint8_t* v,    \
+                      uint8_t* dst, int len) {                                 \
+  int i;                                                                       \
+  for (i = 0; i < len; ++i) FUNC(y[i], u[i], v[i], &dst[i * XSTEP]);           \
+}
+
+YUV444_FUNC(Yuv444ToRgb,      YuvToRgb,      3)
+YUV444_FUNC(Yuv444ToBgr,      YuvToBgr,      3)
+YUV444_FUNC(Yuv444ToRgba,     YuvToRgba,     4)
+YUV444_FUNC(Yuv444ToBgra,     YuvToBgra,     4)
+YUV444_FUNC(Yuv444ToArgb,     YuvToArgb,     4)
+YUV444_FUNC(Yuv444ToRgba4444, YuvToRgba4444, 2)
+YUV444_FUNC(Yuv444ToRgb565,   YuvToRgb565,   2)
+
+#undef YUV444_FUNC
+
+//------------------------------------------------------------------------------
+// Entry point
+
+extern void WebPInitYUV444ConvertersMIPSdspR2(void);
+
+WEBP_TSAN_IGNORE_FUNCTION void WebPInitYUV444ConvertersMIPSdspR2(void) {
+  WebPYUV444Converters[MODE_RGB]       = Yuv444ToRgb;
+  WebPYUV444Converters[MODE_RGBA]      = Yuv444ToRgba;
+  WebPYUV444Converters[MODE_BGR]       = Yuv444ToBgr;
+  WebPYUV444Converters[MODE_BGRA]      = Yuv444ToBgra;
+  WebPYUV444Converters[MODE_ARGB]      = Yuv444ToArgb;
+  WebPYUV444Converters[MODE_RGBA_4444] = Yuv444ToRgba4444;
+  WebPYUV444Converters[MODE_RGB_565]   = Yuv444ToRgb565;
+  WebPYUV444Converters[MODE_rgbA]      = Yuv444ToRgba;
+  WebPYUV444Converters[MODE_bgrA]      = Yuv444ToBgra;
+  WebPYUV444Converters[MODE_Argb]      = Yuv444ToArgb;
+  WebPYUV444Converters[MODE_rgbA_4444] = Yuv444ToRgba4444;
+}
+
+#else  // !WEBP_USE_MIPS_DSP_R2
+
+WEBP_DSP_INIT_STUB(WebPInitYUV444ConvertersMIPSdspR2)
+
+#endif  // WEBP_USE_MIPS_DSP_R2
+
+#if !(defined(FANCY_UPSAMPLING) && defined(WEBP_USE_MIPS_DSP_R2))
+WEBP_DSP_INIT_STUB(WebPInitUpsamplersMIPSdspR2)
+#endif
diff --git a/drivers/webp/dsp/upsampling_neon.c b/drivers/webp/dsp/upsampling_neon.c
new file mode 100644
index 0000000000..a8384c2149
--- /dev/null
+++ b/drivers/webp/dsp/upsampling_neon.c
@@ -0,0 +1,261 @@
+// Copyright 2011 Google Inc. All Rights Reserved.
+//
+// Use of this source code is governed by a BSD-style license
+// that can be found in the COPYING file in the root of the source
+// tree. An additional intellectual property rights grant can be found
+// in the file PATENTS. All contributing project authors may
+// be found in the AUTHORS file in the root of the source tree.
+// -----------------------------------------------------------------------------
+//
+// NEON version of YUV to RGB upsampling functions.
+//
+// Author: mans@mansr.com (Mans Rullgard)
+// Based on SSE code by: somnath@google.com (Somnath Banerjee)
+
+#include "./dsp.h"
+
+#if defined(WEBP_USE_NEON)
+
+#include <assert.h>
+#include <arm_neon.h>
+#include <string.h>
+#include "./neon.h"
+#include "./yuv.h"
+
+#ifdef FANCY_UPSAMPLING
+
+//-----------------------------------------------------------------------------
+// U/V upsampling
+
+// Loads 9 pixels each from rows r1 and r2 and generates 16 pixels.
+#define UPSAMPLE_16PIXELS(r1, r2, out) {                                \
+  uint8x8_t a = vld1_u8(r1);                                            \
+  uint8x8_t b = vld1_u8(r1 + 1);                                        \
+  uint8x8_t c = vld1_u8(r2);                                            \
+  uint8x8_t d = vld1_u8(r2 + 1);                                        \
+                                                                        \
+  uint16x8_t al = vshll_n_u8(a, 1);                                     \
+  uint16x8_t bl = vshll_n_u8(b, 1);                                     \
+  uint16x8_t cl = vshll_n_u8(c, 1);                                     \
+  uint16x8_t dl = vshll_n_u8(d, 1);                                     \
+                                                                        \
+  uint8x8_t diag1, diag2;                                               \
+  uint16x8_t sl;                                                        \
+                                                                        \
+  /* a + b + c + d */                                                   \
+  sl = vaddl_u8(a,  b);                                                 \
+  sl = vaddw_u8(sl, c);                                                 \
+  sl = vaddw_u8(sl, d);                                                 \
+                                                                        \
+  al = vaddq_u16(sl, al); /* 3a +  b +  c +  d */                       \
+  bl = vaddq_u16(sl, bl); /*  a + 3b +  c +  d */                       \
+                                                                        \
+  al = vaddq_u16(al, dl); /* 3a +  b +  c + 3d */                       \
+  bl = vaddq_u16(bl, cl); /*  a + 3b + 3c +  d */                       \
+                                                                        \
+  diag2 = vshrn_n_u16(al, 3);                                           \
+  diag1 = vshrn_n_u16(bl, 3);                                           \
+                                                                        \
+  a = vrhadd_u8(a, diag1);                                              \
+  b = vrhadd_u8(b, diag2);                                              \
+  c = vrhadd_u8(c, diag2);                                              \
+  d = vrhadd_u8(d, diag1);                                              \
+                                                                        \
+  {                                                                     \
+    uint8x8x2_t a_b, c_d;                                               \
+    INIT_VECTOR2(a_b, a, b);                                            \
+    INIT_VECTOR2(c_d, c, d);                                            \
+    vst2_u8(out,      a_b);                                             \
+    vst2_u8(out + 32, c_d);                                             \
+  }                                                                     \
+}
+
+// Turn the macro into a function for reducing code-size when non-critical
+static void Upsample16Pixels(const uint8_t *r1, const uint8_t *r2,
+                             uint8_t *out) {
+  UPSAMPLE_16PIXELS(r1, r2, out);
+}
+
+#define UPSAMPLE_LAST_BLOCK(tb, bb, num_pixels, out) {                  \
+  uint8_t r1[9], r2[9];                                                 \
+  memcpy(r1, (tb), (num_pixels));                                       \
+  memcpy(r2, (bb), (num_pixels));                                       \
+  /* replicate last byte */                                             \
+  memset(r1 + (num_pixels), r1[(num_pixels) - 1], 9 - (num_pixels));    \
+  memset(r2 + (num_pixels), r2[(num_pixels) - 1], 9 - (num_pixels));    \
+  Upsample16Pixels(r1, r2, out);                                        \
+}
+
+//-----------------------------------------------------------------------------
+// YUV->RGB conversion
+
+static const int16_t kCoeffs[4] = { kYScale, kVToR, kUToG, kVToG };
+
+#define v255 vdup_n_u8(255)
+
+#define STORE_Rgb(out, r, g, b) do {                                    \
+  uint8x8x3_t r_g_b;                                                    \
+  INIT_VECTOR3(r_g_b, r, g, b);                                         \
+  vst3_u8(out, r_g_b);                                                  \
+} while (0)
+
+#define STORE_Bgr(out, r, g, b) do {                                    \
+  uint8x8x3_t b_g_r;                                                    \
+  INIT_VECTOR3(b_g_r, b, g, r);                                         \
+  vst3_u8(out, b_g_r);                                                  \
+} while (0)
+
+#define STORE_Rgba(out, r, g, b) do {                                   \
+  uint8x8x4_t r_g_b_v255;                                               \
+  INIT_VECTOR4(r_g_b_v255, r, g, b, v255);                              \
+  vst4_u8(out, r_g_b_v255);                                             \
+} while (0)
+
+#define STORE_Bgra(out, r, g, b) do {                                   \
+  uint8x8x4_t b_g_r_v255;                                               \
+  INIT_VECTOR4(b_g_r_v255, b, g, r, v255);                              \
+  vst4_u8(out, b_g_r_v255);                                             \
+} while (0)
+
+#define CONVERT8(FMT, XSTEP, N, src_y, src_uv, out, cur_x) {            \
+  int i;                                                                \
+  for (i = 0; i < N; i += 8) {                                          \
+    const int off = ((cur_x) + i) * XSTEP;                              \
+    uint8x8_t y  = vld1_u8((src_y) + (cur_x)  + i);                     \
+    uint8x8_t u  = vld1_u8((src_uv) + i);                               \
+    uint8x8_t v  = vld1_u8((src_uv) + i + 16);                          \
+    const int16x8_t yy = vreinterpretq_s16_u16(vsubl_u8(y, u16));       \
+    const int16x8_t uu = vreinterpretq_s16_u16(vsubl_u8(u, u128));      \
+    const int16x8_t vv = vreinterpretq_s16_u16(vsubl_u8(v, u128));      \
+    int32x4_t yl = vmull_lane_s16(vget_low_s16(yy),  cf16, 0);          \
+    int32x4_t yh = vmull_lane_s16(vget_high_s16(yy), cf16, 0);          \
+    const int32x4_t rl = vmlal_lane_s16(yl, vget_low_s16(vv),  cf16, 1);\
+    const int32x4_t rh = vmlal_lane_s16(yh, vget_high_s16(vv), cf16, 1);\
+    int32x4_t gl = vmlsl_lane_s16(yl, vget_low_s16(uu),  cf16, 2);      \
+    int32x4_t gh = vmlsl_lane_s16(yh, vget_high_s16(uu), cf16, 2);      \
+    const int32x4_t bl = vmovl_s16(vget_low_s16(uu));                   \
+    const int32x4_t bh = vmovl_s16(vget_high_s16(uu));                  \
+    gl = vmlsl_lane_s16(gl, vget_low_s16(vv),  cf16, 3);                \
+    gh = vmlsl_lane_s16(gh, vget_high_s16(vv), cf16, 3);                \
+    yl = vmlaq_lane_s32(yl, bl, cf32, 0);                               \
+    yh = vmlaq_lane_s32(yh, bh, cf32, 0);                               \
+    /* vrshrn_n_s32() already incorporates the rounding constant */     \
+    y = vqmovun_s16(vcombine_s16(vrshrn_n_s32(rl, YUV_FIX2),            \
+                                 vrshrn_n_s32(rh, YUV_FIX2)));          \
+    u = vqmovun_s16(vcombine_s16(vrshrn_n_s32(gl, YUV_FIX2),            \
+                                 vrshrn_n_s32(gh, YUV_FIX2)));          \
+    v = vqmovun_s16(vcombine_s16(vrshrn_n_s32(yl, YUV_FIX2),            \
+                                 vrshrn_n_s32(yh, YUV_FIX2)));          \
+    STORE_ ## FMT(out + off, y, u, v);                                  \
+  }                                                                     \
+}
+
+#define CONVERT1(FUNC, XSTEP, N, src_y, src_uv, rgb, cur_x) {           \
+  int i;                                                                \
+  for (i = 0; i < N; i++) {                                             \
+    const int off = ((cur_x) + i) * XSTEP;                              \
+    const int y = src_y[(cur_x) + i];                                   \
+    const int u = (src_uv)[i];                                          \
+    const int v = (src_uv)[i + 16];                                     \
+    FUNC(y, u, v, rgb + off);                                           \
+  }                                                                     \
+}
+
+#define CONVERT2RGB_8(FMT, XSTEP, top_y, bottom_y, uv,                  \
+                      top_dst, bottom_dst, cur_x, len) {                \
+  CONVERT8(FMT, XSTEP, len, top_y, uv, top_dst, cur_x)                  \
+  if (bottom_y != NULL) {                                               \
+    CONVERT8(FMT, XSTEP, len, bottom_y, (uv) + 32, bottom_dst, cur_x)   \
+  }                                                                     \
+}
+
+#define CONVERT2RGB_1(FUNC, XSTEP, top_y, bottom_y, uv,                 \
+                      top_dst, bottom_dst, cur_x, len) {                \
+  CONVERT1(FUNC, XSTEP, len, top_y, uv, top_dst, cur_x);                \
+  if (bottom_y != NULL) {                                               \
+    CONVERT1(FUNC, XSTEP, len, bottom_y, (uv) + 32, bottom_dst, cur_x); \
+  }                                                                     \
+}
+
+#define NEON_UPSAMPLE_FUNC(FUNC_NAME, FMT, XSTEP)                       \
+static void FUNC_NAME(const uint8_t *top_y, const uint8_t *bottom_y,    \
+                      const uint8_t *top_u, const uint8_t *top_v,       \
+                      const uint8_t *cur_u, const uint8_t *cur_v,       \
+                      uint8_t *top_dst, uint8_t *bottom_dst, int len) { \
+  int block;                                                            \
+  /* 16 byte aligned array to cache reconstructed u and v */            \
+  uint8_t uv_buf[2 * 32 + 15];                                          \
+  uint8_t *const r_uv = (uint8_t*)((uintptr_t)(uv_buf + 15) & ~15);     \
+  const int uv_len = (len + 1) >> 1;                                    \
+  /* 9 pixels must be read-able for each block */                       \
+  const int num_blocks = (uv_len - 1) >> 3;                             \
+  const int leftover = uv_len - num_blocks * 8;                         \
+  const int last_pos = 1 + 16 * num_blocks;                             \
+                                                                        \
+  const int u_diag = ((top_u[0] + cur_u[0]) >> 1) + 1;                  \
+  const int v_diag = ((top_v[0] + cur_v[0]) >> 1) + 1;                  \
+                                                                        \
+  const int16x4_t cf16 = vld1_s16(kCoeffs);                             \
+  const int32x2_t cf32 = vdup_n_s32(kUToB);                             \
+  const uint8x8_t u16  = vdup_n_u8(16);                                 \
+  const uint8x8_t u128 = vdup_n_u8(128);                                \
+                                                                        \
+  /* Treat the first pixel in regular way */                            \
+  assert(top_y != NULL);                                                \
+  {                                                                     \
+    const int u0 = (top_u[0] + u_diag) >> 1;                            \
+    const int v0 = (top_v[0] + v_diag) >> 1;                            \
+    VP8YuvTo ## FMT(top_y[0], u0, v0, top_dst);                         \
+  }                                                                     \
+  if (bottom_y != NULL) {                                               \
+    const int u0 = (cur_u[0] + u_diag) >> 1;                            \
+    const int v0 = (cur_v[0] + v_diag) >> 1;                            \
+    VP8YuvTo ## FMT(bottom_y[0], u0, v0, bottom_dst);                   \
+  }                                                                     \
+                                                                        \
+  for (block = 0; block < num_blocks; ++block) {                        \
+    UPSAMPLE_16PIXELS(top_u, cur_u, r_uv);                              \
+    UPSAMPLE_16PIXELS(top_v, cur_v, r_uv + 16);                         \
+    CONVERT2RGB_8(FMT, XSTEP, top_y, bottom_y, r_uv,                    \
+                  top_dst, bottom_dst, 16 * block + 1, 16);             \
+    top_u += 8;                                                         \
+    cur_u += 8;                                                         \
+    top_v += 8;                                                         \
+    cur_v += 8;                                                         \
+  }                                                                     \
+                                                                        \
+  UPSAMPLE_LAST_BLOCK(top_u, cur_u, leftover, r_uv);                    \
+  UPSAMPLE_LAST_BLOCK(top_v, cur_v, leftover, r_uv + 16);               \
+  CONVERT2RGB_1(VP8YuvTo ## FMT, XSTEP, top_y, bottom_y, r_uv,          \
+                top_dst, bottom_dst, last_pos, len - last_pos);         \
+}
+
+// NEON variants of the fancy upsampler.
+NEON_UPSAMPLE_FUNC(UpsampleRgbLinePair,  Rgb,  3)
+NEON_UPSAMPLE_FUNC(UpsampleBgrLinePair,  Bgr,  3)
+NEON_UPSAMPLE_FUNC(UpsampleRgbaLinePair, Rgba, 4)
+NEON_UPSAMPLE_FUNC(UpsampleBgraLinePair, Bgra, 4)
+
+//------------------------------------------------------------------------------
+// Entry point
+
+extern WebPUpsampleLinePairFunc WebPUpsamplers[/* MODE_LAST */];
+
+extern void WebPInitUpsamplersNEON(void);
+
+WEBP_TSAN_IGNORE_FUNCTION void WebPInitUpsamplersNEON(void) {
+  WebPUpsamplers[MODE_RGB]  = UpsampleRgbLinePair;
+  WebPUpsamplers[MODE_RGBA] = UpsampleRgbaLinePair;
+  WebPUpsamplers[MODE_BGR]  = UpsampleBgrLinePair;
+  WebPUpsamplers[MODE_BGRA] = UpsampleBgraLinePair;
+  WebPUpsamplers[MODE_rgbA] = UpsampleRgbaLinePair;
+  WebPUpsamplers[MODE_bgrA] = UpsampleBgraLinePair;
+}
+
+#endif  // FANCY_UPSAMPLING
+
+#endif  // WEBP_USE_NEON
+
+#if !(defined(FANCY_UPSAMPLING) && defined(WEBP_USE_NEON))
+WEBP_DSP_INIT_STUB(WebPInitUpsamplersNEON)
+#endif
diff --git a/drivers/webp/dsp/upsampling_sse2.c b/drivers/webp/dsp/upsampling_sse2.c
index 8cb275a02b..b85808e271 100644
--- a/drivers/webp/dsp/upsampling_sse2.c
+++ b/drivers/webp/dsp/upsampling_sse2.c
@@ -1,8 +1,10 @@
 // Copyright 2011 Google Inc. All Rights Reserved.
 //
-// This code is licensed under the same terms as WebM:
-//  Software License Agreement:  http://www.webmproject.org/license/software/
-//  Additional IP Rights Grant:  http://www.webmproject.org/license/additional/
+// Use of this source code is governed by a BSD-style license
+// that can be found in the COPYING file in the root of the source
+// tree. An additional intellectual property rights grant can be found
+// in the file PATENTS. All contributing project authors may
+// be found in the AUTHORS file in the root of the source tree.
 // -----------------------------------------------------------------------------
 //
 // SSE2 version of YUV to RGB upsampling functions.
@@ -18,10 +20,6 @@
 #include <string.h>
 #include "./yuv.h"
 
-#if defined(__cplusplus) || defined(c_plusplus)
-extern "C" {
-#endif
-
 #ifdef FANCY_UPSAMPLING
 
 // We compute (9*a + 3*b + 3*c + d + 8) / 16 as follows
@@ -49,23 +47,23 @@ extern "C" {
   (out) = _mm_sub_epi8(tmp0, tmp4);    /* (k + in + 1) / 2 - lsb_correction */ \
 } while (0)
 
-// pack and store two alterning pixel rows
+// pack and store two alternating pixel rows
 #define PACK_AND_STORE(a, b, da, db, out) do {                                 \
-  const __m128i ta = _mm_avg_epu8(a, da);  /* (9a + 3b + 3c +  d + 8) / 16 */  \
-  const __m128i tb = _mm_avg_epu8(b, db);  /* (3a + 9b +  c + 3d + 8) / 16 */  \
-  const __m128i t1 = _mm_unpacklo_epi8(ta, tb);                                \
-  const __m128i t2 = _mm_unpackhi_epi8(ta, tb);                                \
-  _mm_store_si128(((__m128i*)(out)) + 0, t1);                                  \
-  _mm_store_si128(((__m128i*)(out)) + 1, t2);                                  \
+  const __m128i t_a = _mm_avg_epu8(a, da);  /* (9a + 3b + 3c +  d + 8) / 16 */ \
+  const __m128i t_b = _mm_avg_epu8(b, db);  /* (3a + 9b +  c + 3d + 8) / 16 */ \
+  const __m128i t_1 = _mm_unpacklo_epi8(t_a, t_b);                             \
+  const __m128i t_2 = _mm_unpackhi_epi8(t_a, t_b);                             \
+  _mm_store_si128(((__m128i*)(out)) + 0, t_1);                                 \
+  _mm_store_si128(((__m128i*)(out)) + 1, t_2);                                 \
 } while (0)
 
 // Loads 17 pixels each from rows r1 and r2 and generates 32 pixels.
 #define UPSAMPLE_32PIXELS(r1, r2, out) {                                       \
   const __m128i one = _mm_set1_epi8(1);                                        \
-  const __m128i a = _mm_loadu_si128((__m128i*)&(r1)[0]);                       \
-  const __m128i b = _mm_loadu_si128((__m128i*)&(r1)[1]);                       \
-  const __m128i c = _mm_loadu_si128((__m128i*)&(r2)[0]);                       \
-  const __m128i d = _mm_loadu_si128((__m128i*)&(r2)[1]);                       \
+  const __m128i a = _mm_loadu_si128((const __m128i*)&(r1)[0]);                 \
+  const __m128i b = _mm_loadu_si128((const __m128i*)&(r1)[1]);                 \
+  const __m128i c = _mm_loadu_si128((const __m128i*)&(r2)[0]);                 \
+  const __m128i d = _mm_loadu_si128((const __m128i*)&(r2)[1]);                 \
                                                                                \
   const __m128i s = _mm_avg_epu8(a, d);        /* s = (a + d + 1) / 2 */       \
   const __m128i t = _mm_avg_epu8(b, c);        /* t = (b + c + 1) / 2 */       \
@@ -85,8 +83,8 @@ extern "C" {
   GET_M(ad, s, diag2);                  /* diag2 = (3a + b + c + 3d) / 8 */    \
                                                                                \
   /* pack the alternate pixels */                                              \
-  PACK_AND_STORE(a, b, diag1, diag2, &(out)[0 * 32]);                          \
-  PACK_AND_STORE(c, d, diag2, diag1, &(out)[2 * 32]);                          \
+  PACK_AND_STORE(a, b, diag1, diag2, out +      0);  /* store top */           \
+  PACK_AND_STORE(c, d, diag2, diag1, out + 2 * 32);  /* store bottom */        \
 }
 
 // Turn the macro into a function for reducing code-size when non-critical
@@ -106,104 +104,140 @@ static void Upsample32Pixels(const uint8_t r1[], const uint8_t r2[],
   Upsample32Pixels(r1, r2, out);                                               \
 }
 
-#define CONVERT2RGB(FUNC, XSTEP, top_y, bottom_y, uv,                          \
+#define CONVERT2RGB(FUNC, XSTEP, top_y, bottom_y,                              \
                     top_dst, bottom_dst, cur_x, num_pixels) {                  \
   int n;                                                                       \
-  if (top_y) {                                                                 \
-    for (n = 0; n < (num_pixels); ++n) {                                       \
-      FUNC(top_y[(cur_x) + n], (uv)[n], (uv)[32 + n],                          \
-           top_dst + ((cur_x) + n) * XSTEP);                                   \
-    }                                                                          \
+  for (n = 0; n < (num_pixels); ++n) {                                         \
+    FUNC(top_y[(cur_x) + n], r_u[n], r_v[n],                                   \
+         top_dst + ((cur_x) + n) * XSTEP);                                     \
   }                                                                            \
-  if (bottom_y) {                                                              \
+  if (bottom_y != NULL) {                                                      \
     for (n = 0; n < (num_pixels); ++n) {                                       \
-      FUNC(bottom_y[(cur_x) + n], (uv)[64 + n], (uv)[64 + 32 + n],             \
+      FUNC(bottom_y[(cur_x) + n], r_u[64 + n], r_v[64 + n],                    \
            bottom_dst + ((cur_x) + n) * XSTEP);                                \
     }                                                                          \
   }                                                                            \
 }
 
+#define CONVERT2RGB_32(FUNC, XSTEP, top_y, bottom_y,                           \
+                       top_dst, bottom_dst, cur_x) do {                        \
+  FUNC##32(top_y + (cur_x), r_u, r_v, top_dst + (cur_x) * XSTEP);              \
+  if (bottom_y != NULL) {                                                      \
+    FUNC##32(bottom_y + (cur_x), r_u + 64, r_v + 64,                           \
+             bottom_dst + (cur_x) * XSTEP);                                    \
+  }                                                                            \
+} while (0)
+
 #define SSE2_UPSAMPLE_FUNC(FUNC_NAME, FUNC, XSTEP)                             \
 static void FUNC_NAME(const uint8_t* top_y, const uint8_t* bottom_y,           \
                       const uint8_t* top_u, const uint8_t* top_v,              \
                       const uint8_t* cur_u, const uint8_t* cur_v,              \
                       uint8_t* top_dst, uint8_t* bottom_dst, int len) {        \
-  int b;                                                                       \
-  /* 16 byte aligned array to cache reconstructed u and v */                   \
+  int uv_pos, pos;                                                             \
+  /* 16byte-aligned array to cache reconstructed u and v */                    \
   uint8_t uv_buf[4 * 32 + 15];                                                 \
-  uint8_t* const r_uv = (uint8_t*)((uintptr_t)(uv_buf + 15) & ~15);            \
-  const int uv_len = (len + 1) >> 1;                                           \
-  /* 17 pixels must be read-able for each block */                             \
-  const int num_blocks = (uv_len - 1) >> 4;                                    \
-  const int leftover = uv_len - num_blocks * 16;                               \
-  const int last_pos = 1 + 32 * num_blocks;                                    \
-                                                                               \
-  const int u_diag = ((top_u[0] + cur_u[0]) >> 1) + 1;                         \
-  const int v_diag = ((top_v[0] + cur_v[0]) >> 1) + 1;                         \
+  uint8_t* const r_u = (uint8_t*)((uintptr_t)(uv_buf + 15) & ~15);             \
+  uint8_t* const r_v = r_u + 32;                                               \
                                                                                \
-  assert(len > 0);                                                             \
-  /* Treat the first pixel in regular way */                                   \
-  if (top_y) {                                                                 \
-    const int u0 = (top_u[0] + u_diag) >> 1;                                   \
-    const int v0 = (top_v[0] + v_diag) >> 1;                                   \
-    FUNC(top_y[0], u0, v0, top_dst);                                           \
+  assert(top_y != NULL);                                                       \
+  {   /* Treat the first pixel in regular way */                               \
+    const int u_diag = ((top_u[0] + cur_u[0]) >> 1) + 1;                       \
+    const int v_diag = ((top_v[0] + cur_v[0]) >> 1) + 1;                       \
+    const int u0_t = (top_u[0] + u_diag) >> 1;                                 \
+    const int v0_t = (top_v[0] + v_diag) >> 1;                                 \
+    FUNC(top_y[0], u0_t, v0_t, top_dst);                                       \
+    if (bottom_y != NULL) {                                                    \
+      const int u0_b = (cur_u[0] + u_diag) >> 1;                               \
+      const int v0_b = (cur_v[0] + v_diag) >> 1;                               \
+      FUNC(bottom_y[0], u0_b, v0_b, bottom_dst);                               \
+    }                                                                          \
   }                                                                            \
-  if (bottom_y) {                                                              \
-    const int u0 = (cur_u[0] + u_diag) >> 1;                                   \
-    const int v0 = (cur_v[0] + v_diag) >> 1;                                   \
-    FUNC(bottom_y[0], u0, v0, bottom_dst);                                     \
+  /* For UPSAMPLE_32PIXELS, 17 u/v values must be read-able for each block */  \
+  for (pos = 1, uv_pos = 0; pos + 32 + 1 <= len; pos += 32, uv_pos += 16) {    \
+    UPSAMPLE_32PIXELS(top_u + uv_pos, cur_u + uv_pos, r_u);                    \
+    UPSAMPLE_32PIXELS(top_v + uv_pos, cur_v + uv_pos, r_v);                    \
+    CONVERT2RGB_32(FUNC, XSTEP, top_y, bottom_y, top_dst, bottom_dst, pos);    \
   }                                                                            \
-                                                                               \
-  for (b = 0; b < num_blocks; ++b) {                                           \
-    UPSAMPLE_32PIXELS(top_u, cur_u, r_uv + 0 * 32);                            \
-    UPSAMPLE_32PIXELS(top_v, cur_v, r_uv + 1 * 32);                            \
-    CONVERT2RGB(FUNC, XSTEP, top_y, bottom_y, r_uv, top_dst, bottom_dst,       \
-                32 * b + 1, 32)                                                \
-    top_u += 16;                                                               \
-    cur_u += 16;                                                               \
-    top_v += 16;                                                               \
-    cur_v += 16;                                                               \
+  if (len > 1) {                                                               \
+    const int left_over = ((len + 1) >> 1) - (pos >> 1);                       \
+    assert(left_over > 0);                                                     \
+    UPSAMPLE_LAST_BLOCK(top_u + uv_pos, cur_u + uv_pos, left_over, r_u);       \
+    UPSAMPLE_LAST_BLOCK(top_v + uv_pos, cur_v + uv_pos, left_over, r_v);       \
+    CONVERT2RGB(FUNC, XSTEP, top_y, bottom_y, top_dst, bottom_dst,             \
+                pos, len - pos);                                               \
   }                                                                            \
-                                                                               \
-  UPSAMPLE_LAST_BLOCK(top_u, cur_u, leftover, r_uv + 0 * 32);                  \
-  UPSAMPLE_LAST_BLOCK(top_v, cur_v, leftover, r_uv + 1 * 32);                  \
-  CONVERT2RGB(FUNC, XSTEP, top_y, bottom_y, r_uv, top_dst, bottom_dst,         \
-              last_pos, len - last_pos);                                       \
 }
 
 // SSE2 variants of the fancy upsampler.
-SSE2_UPSAMPLE_FUNC(UpsampleRgbLinePairSSE2,  VP8YuvToRgb,  3)
-SSE2_UPSAMPLE_FUNC(UpsampleBgrLinePairSSE2,  VP8YuvToBgr,  3)
-SSE2_UPSAMPLE_FUNC(UpsampleRgbaLinePairSSE2, VP8YuvToRgba, 4)
-SSE2_UPSAMPLE_FUNC(UpsampleBgraLinePairSSE2, VP8YuvToBgra, 4)
+SSE2_UPSAMPLE_FUNC(UpsampleRgbLinePair,  VP8YuvToRgb,  3)
+SSE2_UPSAMPLE_FUNC(UpsampleBgrLinePair,  VP8YuvToBgr,  3)
+SSE2_UPSAMPLE_FUNC(UpsampleRgbaLinePair, VP8YuvToRgba, 4)
+SSE2_UPSAMPLE_FUNC(UpsampleBgraLinePair, VP8YuvToBgra, 4)
 
 #undef GET_M
 #undef PACK_AND_STORE
 #undef UPSAMPLE_32PIXELS
 #undef UPSAMPLE_LAST_BLOCK
 #undef CONVERT2RGB
+#undef CONVERT2RGB_32
 #undef SSE2_UPSAMPLE_FUNC
 
 //------------------------------------------------------------------------------
+// Entry point
 
 extern WebPUpsampleLinePairFunc WebPUpsamplers[/* MODE_LAST */];
 
-void WebPInitUpsamplersSSE2(void) {
-  WebPUpsamplers[MODE_RGB]  = UpsampleRgbLinePairSSE2;
-  WebPUpsamplers[MODE_RGBA] = UpsampleRgbaLinePairSSE2;
-  WebPUpsamplers[MODE_BGR]  = UpsampleBgrLinePairSSE2;
-  WebPUpsamplers[MODE_BGRA] = UpsampleBgraLinePairSSE2;
-}
+extern void WebPInitUpsamplersSSE2(void);
 
-void WebPInitPremultiplySSE2(void) {
-  WebPUpsamplers[MODE_rgbA] = UpsampleRgbaLinePairSSE2;
-  WebPUpsamplers[MODE_bgrA] = UpsampleBgraLinePairSSE2;
+WEBP_TSAN_IGNORE_FUNCTION void WebPInitUpsamplersSSE2(void) {
+  VP8YUVInitSSE2();
+  WebPUpsamplers[MODE_RGB]  = UpsampleRgbLinePair;
+  WebPUpsamplers[MODE_RGBA] = UpsampleRgbaLinePair;
+  WebPUpsamplers[MODE_BGR]  = UpsampleBgrLinePair;
+  WebPUpsamplers[MODE_BGRA] = UpsampleBgraLinePair;
+  WebPUpsamplers[MODE_rgbA] = UpsampleRgbaLinePair;
+  WebPUpsamplers[MODE_bgrA] = UpsampleBgraLinePair;
 }
 
 #endif  // FANCY_UPSAMPLING
 
-#if defined(__cplusplus) || defined(c_plusplus)
-}    // extern "C"
-#endif
+//------------------------------------------------------------------------------
+
+extern WebPYUV444Converter WebPYUV444Converters[/* MODE_LAST */];
+extern void WebPInitYUV444ConvertersSSE2(void);
+
+#define YUV444_FUNC(FUNC_NAME, CALL, XSTEP) \
+extern void WebP##FUNC_NAME##C(const uint8_t* y, const uint8_t* u,             \
+                               const uint8_t* v, uint8_t* dst, int len);       \
+static void FUNC_NAME(const uint8_t* y, const uint8_t* u, const uint8_t* v,    \
+                      uint8_t* dst, int len) {                                 \
+  int i;                                                                       \
+  const int max_len = len & ~31;                                               \
+  for (i = 0; i < max_len; i += 32) CALL(y + i, u + i, v + i, dst + i * XSTEP);\
+  if (i < len) {  /* C-fallback */                                             \
+    WebP##FUNC_NAME##C(y + i, u + i, v + i, dst + i * XSTEP, len - i);         \
+  }                                                                            \
+}
 
-#endif   // WEBP_USE_SSE2
+YUV444_FUNC(Yuv444ToRgba, VP8YuvToRgba32, 4);
+YUV444_FUNC(Yuv444ToBgra, VP8YuvToBgra32, 4);
+YUV444_FUNC(Yuv444ToRgb, VP8YuvToRgb32, 3);
+YUV444_FUNC(Yuv444ToBgr, VP8YuvToBgr32, 3);
+
+WEBP_TSAN_IGNORE_FUNCTION void WebPInitYUV444ConvertersSSE2(void) {
+  VP8YUVInitSSE2();
+  WebPYUV444Converters[MODE_RGBA] = Yuv444ToRgba;
+  WebPYUV444Converters[MODE_BGRA] = Yuv444ToBgra;
+  WebPYUV444Converters[MODE_RGB]  = Yuv444ToRgb;
+  WebPYUV444Converters[MODE_BGR]  = Yuv444ToBgr;
+}
+
+#else
+
+WEBP_DSP_INIT_STUB(WebPInitYUV444ConvertersSSE2)
+
+#endif  // WEBP_USE_SSE2
+
+#if !(defined(FANCY_UPSAMPLING) && defined(WEBP_USE_SSE2))
+WEBP_DSP_INIT_STUB(WebPInitUpsamplersSSE2)
+#endif
diff --git a/drivers/webp/dsp/yuv.c b/drivers/webp/dsp/yuv.c
index 7f05f9a3aa..f50a253168 100644
--- a/drivers/webp/dsp/yuv.c
+++ b/drivers/webp/dsp/yuv.c
@@ -1,26 +1,19 @@
 // Copyright 2010 Google Inc. All Rights Reserved.
 //
-// This code is licensed under the same terms as WebM:
-//  Software License Agreement:  http://www.webmproject.org/license/software/
-//  Additional IP Rights Grant:  http://www.webmproject.org/license/additional/
+// Use of this source code is governed by a BSD-style license
+// that can be found in the COPYING file in the root of the source
+// tree. An additional intellectual property rights grant can be found
+// in the file PATENTS. All contributing project authors may
+// be found in the AUTHORS file in the root of the source tree.
 // -----------------------------------------------------------------------------
 //
-// YUV->RGB conversion function
+// YUV->RGB conversion functions
 //
 // Author: Skal (pascal.massimino@gmail.com)
 
 #include "./yuv.h"
 
-#if defined(__cplusplus) || defined(c_plusplus)
-extern "C" {
-#endif
-
-enum { YUV_HALF = 1 << (YUV_FIX - 1) };
-
-int16_t VP8kVToR[256], VP8kUToB[256];
-int32_t VP8kVToG[256], VP8kUToG[256];
-uint8_t VP8kClip[YUV_RANGE_MAX - YUV_RANGE_MIN];
-uint8_t VP8kClip4Bits[YUV_RANGE_MAX - YUV_RANGE_MIN];
+#if defined(WEBP_YUV_USE_TABLE)
 
 static int done = 0;
 
@@ -28,11 +21,17 @@ static WEBP_INLINE uint8_t clip(int v, int max_value) {
   return v < 0 ? 0 : v > max_value ? max_value : v;
 }
 
-void VP8YUVInit(void) {
+int16_t VP8kVToR[256], VP8kUToB[256];
+int32_t VP8kVToG[256], VP8kUToG[256];
+uint8_t VP8kClip[YUV_RANGE_MAX - YUV_RANGE_MIN];
+uint8_t VP8kClip4Bits[YUV_RANGE_MAX - YUV_RANGE_MIN];
+
+WEBP_TSAN_IGNORE_FUNCTION void VP8YUVInit(void) {
   int i;
   if (done) {
     return;
   }
+#ifndef USE_YUVj
   for (i = 0; i < 256; ++i) {
     VP8kVToR[i] = (89858 * (i - 128) + YUV_HALF) >> YUV_FIX;
     VP8kUToG[i] = -22014 * (i - 128) + YUV_HALF;
@@ -44,9 +43,238 @@ void VP8YUVInit(void) {
     VP8kClip[i - YUV_RANGE_MIN] = clip(k, 255);
     VP8kClip4Bits[i - YUV_RANGE_MIN] = clip((k + 8) >> 4, 15);
   }
+#else
+  for (i = 0; i < 256; ++i) {
+    VP8kVToR[i] = (91881 * (i - 128) + YUV_HALF) >> YUV_FIX;
+    VP8kUToG[i] = -22554 * (i - 128) + YUV_HALF;
+    VP8kVToG[i] = -46802 * (i - 128);
+    VP8kUToB[i] = (116130 * (i - 128) + YUV_HALF) >> YUV_FIX;
+  }
+  for (i = YUV_RANGE_MIN; i < YUV_RANGE_MAX; ++i) {
+    const int k = i;
+    VP8kClip[i - YUV_RANGE_MIN] = clip(k, 255);
+    VP8kClip4Bits[i - YUV_RANGE_MIN] = clip((k + 8) >> 4, 15);
+  }
+#endif
+
   done = 1;
 }
 
-#if defined(__cplusplus) || defined(c_plusplus)
-}    // extern "C"
-#endif
+#else
+
+WEBP_TSAN_IGNORE_FUNCTION void VP8YUVInit(void) {}
+
+#endif  // WEBP_YUV_USE_TABLE
+
+//-----------------------------------------------------------------------------
+// Plain-C version
+
+#define ROW_FUNC(FUNC_NAME, FUNC, XSTEP)                                       \
+static void FUNC_NAME(const uint8_t* y,                                        \
+                      const uint8_t* u, const uint8_t* v,                      \
+                      uint8_t* dst, int len) {                                 \
+  const uint8_t* const end = dst + (len & ~1) * XSTEP;                         \
+  while (dst != end) {                                                         \
+    FUNC(y[0], u[0], v[0], dst);                                               \
+    FUNC(y[1], u[0], v[0], dst + XSTEP);                                       \
+    y += 2;                                                                    \
+    ++u;                                                                       \
+    ++v;                                                                       \
+    dst += 2 * XSTEP;                                                          \
+  }                                                                            \
+  if (len & 1) {                                                               \
+    FUNC(y[0], u[0], v[0], dst);                                               \
+  }                                                                            \
+}                                                                              \
+
+// All variants implemented.
+ROW_FUNC(YuvToRgbRow,      VP8YuvToRgb,  3)
+ROW_FUNC(YuvToBgrRow,      VP8YuvToBgr,  3)
+ROW_FUNC(YuvToRgbaRow,     VP8YuvToRgba, 4)
+ROW_FUNC(YuvToBgraRow,     VP8YuvToBgra, 4)
+ROW_FUNC(YuvToArgbRow,     VP8YuvToArgb, 4)
+ROW_FUNC(YuvToRgba4444Row, VP8YuvToRgba4444, 2)
+ROW_FUNC(YuvToRgb565Row,   VP8YuvToRgb565, 2)
+
+#undef ROW_FUNC
+
+// Main call for processing a plane with a WebPSamplerRowFunc function:
+void WebPSamplerProcessPlane(const uint8_t* y, int y_stride,
+                             const uint8_t* u, const uint8_t* v, int uv_stride,
+                             uint8_t* dst, int dst_stride,
+                             int width, int height, WebPSamplerRowFunc func) {
+  int j;
+  for (j = 0; j < height; ++j) {
+    func(y, u, v, dst, width);
+    y += y_stride;
+    if (j & 1) {
+      u += uv_stride;
+      v += uv_stride;
+    }
+    dst += dst_stride;
+  }
+}
+
+//-----------------------------------------------------------------------------
+// Main call
+
+WebPSamplerRowFunc WebPSamplers[MODE_LAST];
+
+extern void WebPInitSamplersSSE2(void);
+extern void WebPInitSamplersMIPS32(void);
+extern void WebPInitSamplersMIPSdspR2(void);
+
+static volatile VP8CPUInfo yuv_last_cpuinfo_used =
+    (VP8CPUInfo)&yuv_last_cpuinfo_used;
+
+WEBP_TSAN_IGNORE_FUNCTION void WebPInitSamplers(void) {
+  if (yuv_last_cpuinfo_used == VP8GetCPUInfo) return;
+
+  WebPSamplers[MODE_RGB]       = YuvToRgbRow;
+  WebPSamplers[MODE_RGBA]      = YuvToRgbaRow;
+  WebPSamplers[MODE_BGR]       = YuvToBgrRow;
+  WebPSamplers[MODE_BGRA]      = YuvToBgraRow;
+  WebPSamplers[MODE_ARGB]      = YuvToArgbRow;
+  WebPSamplers[MODE_RGBA_4444] = YuvToRgba4444Row;
+  WebPSamplers[MODE_RGB_565]   = YuvToRgb565Row;
+  WebPSamplers[MODE_rgbA]      = YuvToRgbaRow;
+  WebPSamplers[MODE_bgrA]      = YuvToBgraRow;
+  WebPSamplers[MODE_Argb]      = YuvToArgbRow;
+  WebPSamplers[MODE_rgbA_4444] = YuvToRgba4444Row;
+
+  // If defined, use CPUInfo() to overwrite some pointers with faster versions.
+  if (VP8GetCPUInfo != NULL) {
+#if defined(WEBP_USE_SSE2)
+    if (VP8GetCPUInfo(kSSE2)) {
+      WebPInitSamplersSSE2();
+    }
+#endif  // WEBP_USE_SSE2
+#if defined(WEBP_USE_MIPS32)
+    if (VP8GetCPUInfo(kMIPS32)) {
+      WebPInitSamplersMIPS32();
+    }
+#endif  // WEBP_USE_MIPS32
+#if defined(WEBP_USE_MIPS_DSP_R2)
+    if (VP8GetCPUInfo(kMIPSdspR2)) {
+      WebPInitSamplersMIPSdspR2();
+    }
+#endif  // WEBP_USE_MIPS_DSP_R2
+  }
+  yuv_last_cpuinfo_used = VP8GetCPUInfo;
+}
+
+//-----------------------------------------------------------------------------
+// ARGB -> YUV converters
+
+static void ConvertARGBToY(const uint32_t* argb, uint8_t* y, int width) {
+  int i;
+  for (i = 0; i < width; ++i) {
+    const uint32_t p = argb[i];
+    y[i] = VP8RGBToY((p >> 16) & 0xff, (p >> 8) & 0xff, (p >>  0) & 0xff,
+                     YUV_HALF);
+  }
+}
+
+void WebPConvertARGBToUV_C(const uint32_t* argb, uint8_t* u, uint8_t* v,
+                           int src_width, int do_store) {
+  // No rounding. Last pixel is dealt with separately.
+  const int uv_width = src_width >> 1;
+  int i;
+  for (i = 0; i < uv_width; ++i) {
+    const uint32_t v0 = argb[2 * i + 0];
+    const uint32_t v1 = argb[2 * i + 1];
+    // VP8RGBToU/V expects four accumulated pixels. Hence we need to
+    // scale r/g/b value by a factor 2. We just shift v0/v1 one bit less.
+    const int r = ((v0 >> 15) & 0x1fe) + ((v1 >> 15) & 0x1fe);
+    const int g = ((v0 >>  7) & 0x1fe) + ((v1 >>  7) & 0x1fe);
+    const int b = ((v0 <<  1) & 0x1fe) + ((v1 <<  1) & 0x1fe);
+    const int tmp_u = VP8RGBToU(r, g, b, YUV_HALF << 2);
+    const int tmp_v = VP8RGBToV(r, g, b, YUV_HALF << 2);
+    if (do_store) {
+      u[i] = tmp_u;
+      v[i] = tmp_v;
+    } else {
+      // Approximated average-of-four. But it's an acceptable diff.
+      u[i] = (u[i] + tmp_u + 1) >> 1;
+      v[i] = (v[i] + tmp_v + 1) >> 1;
+    }
+  }
+  if (src_width & 1) {       // last pixel
+    const uint32_t v0 = argb[2 * i + 0];
+    const int r = (v0 >> 14) & 0x3fc;
+    const int g = (v0 >>  6) & 0x3fc;
+    const int b = (v0 <<  2) & 0x3fc;
+    const int tmp_u = VP8RGBToU(r, g, b, YUV_HALF << 2);
+    const int tmp_v = VP8RGBToV(r, g, b, YUV_HALF << 2);
+    if (do_store) {
+      u[i] = tmp_u;
+      v[i] = tmp_v;
+    } else {
+      u[i] = (u[i] + tmp_u + 1) >> 1;
+      v[i] = (v[i] + tmp_v + 1) >> 1;
+    }
+  }
+}
+
+//-----------------------------------------------------------------------------
+
+static void ConvertRGB24ToY(const uint8_t* rgb, uint8_t* y, int width) {
+  int i;
+  for (i = 0; i < width; ++i, rgb += 3) {
+    y[i] = VP8RGBToY(rgb[0], rgb[1], rgb[2], YUV_HALF);
+  }
+}
+
+static void ConvertBGR24ToY(const uint8_t* bgr, uint8_t* y, int width) {
+  int i;
+  for (i = 0; i < width; ++i, bgr += 3) {
+    y[i] = VP8RGBToY(bgr[2], bgr[1], bgr[0], YUV_HALF);
+  }
+}
+
+void WebPConvertRGBA32ToUV_C(const uint16_t* rgb,
+                             uint8_t* u, uint8_t* v, int width) {
+  int i;
+  for (i = 0; i < width; i += 1, rgb += 4) {
+    const int r = rgb[0], g = rgb[1], b = rgb[2];
+    u[i] = VP8RGBToU(r, g, b, YUV_HALF << 2);
+    v[i] = VP8RGBToV(r, g, b, YUV_HALF << 2);
+  }
+}
+
+//-----------------------------------------------------------------------------
+
+void (*WebPConvertRGB24ToY)(const uint8_t* rgb, uint8_t* y, int width);
+void (*WebPConvertBGR24ToY)(const uint8_t* bgr, uint8_t* y, int width);
+void (*WebPConvertRGBA32ToUV)(const uint16_t* rgb,
+                              uint8_t* u, uint8_t* v, int width);
+
+void (*WebPConvertARGBToY)(const uint32_t* argb, uint8_t* y, int width);
+void (*WebPConvertARGBToUV)(const uint32_t* argb, uint8_t* u, uint8_t* v,
+                            int src_width, int do_store);
+
+static volatile VP8CPUInfo rgba_to_yuv_last_cpuinfo_used =
+    (VP8CPUInfo)&rgba_to_yuv_last_cpuinfo_used;
+
+extern void WebPInitConvertARGBToYUVSSE2(void);
+
+WEBP_TSAN_IGNORE_FUNCTION void WebPInitConvertARGBToYUV(void) {
+  if (rgba_to_yuv_last_cpuinfo_used == VP8GetCPUInfo) return;
+
+  WebPConvertARGBToY = ConvertARGBToY;
+  WebPConvertARGBToUV = WebPConvertARGBToUV_C;
+
+  WebPConvertRGB24ToY = ConvertRGB24ToY;
+  WebPConvertBGR24ToY = ConvertBGR24ToY;
+
+  WebPConvertRGBA32ToUV = WebPConvertRGBA32ToUV_C;
+
+  if (VP8GetCPUInfo != NULL) {
+#if defined(WEBP_USE_SSE2)
+    if (VP8GetCPUInfo(kSSE2)) {
+      WebPInitConvertARGBToYUVSSE2();
+    }
+#endif  // WEBP_USE_SSE2
+  }
+  rgba_to_yuv_last_cpuinfo_used = VP8GetCPUInfo;
+}
diff --git a/drivers/webp/dsp/yuv.h b/drivers/webp/dsp/yuv.h
index a569109c54..af435a5b3e 100644
--- a/drivers/webp/dsp/yuv.h
+++ b/drivers/webp/dsp/yuv.h
@@ -1,36 +1,165 @@
 // Copyright 2010 Google Inc. All Rights Reserved.
 //
-// This code is licensed under the same terms as WebM:
-//  Software License Agreement:  http://www.webmproject.org/license/software/
-//  Additional IP Rights Grant:  http://www.webmproject.org/license/additional/
+// Use of this source code is governed by a BSD-style license
+// that can be found in the COPYING file in the root of the source
+// tree. An additional intellectual property rights grant can be found
+// in the file PATENTS. All contributing project authors may
+// be found in the AUTHORS file in the root of the source tree.
 // -----------------------------------------------------------------------------
 //
 // inline YUV<->RGB conversion function
 //
+// The exact naming is Y'CbCr, following the ITU-R BT.601 standard.
+// More information at: http://en.wikipedia.org/wiki/YCbCr
+// Y = 0.2569 * R + 0.5044 * G + 0.0979 * B + 16
+// U = -0.1483 * R - 0.2911 * G + 0.4394 * B + 128
+// V = 0.4394 * R - 0.3679 * G - 0.0715 * B + 128
+// We use 16bit fixed point operations for RGB->YUV conversion (YUV_FIX).
+//
+// For the Y'CbCr to RGB conversion, the BT.601 specification reads:
+//   R = 1.164 * (Y-16) + 1.596 * (V-128)
+//   G = 1.164 * (Y-16) - 0.813 * (V-128) - 0.391 * (U-128)
+//   B = 1.164 * (Y-16)                   + 2.018 * (U-128)
+// where Y is in the [16,235] range, and U/V in the [16,240] range.
+// In the table-lookup version (WEBP_YUV_USE_TABLE), the common factor
+// "1.164 * (Y-16)" can be handled as an offset in the VP8kClip[] table.
+// So in this case the formulae should read:
+//   R = 1.164 * [Y + 1.371 * (V-128)                  ] - 18.624
+//   G = 1.164 * [Y - 0.698 * (V-128) - 0.336 * (U-128)] - 18.624
+//   B = 1.164 * [Y                   + 1.733 * (U-128)] - 18.624
+// once factorized.
+// For YUV->RGB conversion, only 14bit fixed precision is used (YUV_FIX2).
+// That's the maximum possible for a convenient ARM implementation.
+//
 // Author: Skal (pascal.massimino@gmail.com)
 
 #ifndef WEBP_DSP_YUV_H_
 #define WEBP_DSP_YUV_H_
 
+#include "./dsp.h"
 #include "../dec/decode_vp8.h"
 
+// Define the following to use the LUT-based code:
+// #define WEBP_YUV_USE_TABLE
+
+#if defined(WEBP_EXPERIMENTAL_FEATURES)
+// Do NOT activate this feature for real compression. This is only experimental!
+// This flag is for comparison purpose against JPEG's "YUVj" natural colorspace.
+// This colorspace is close to Rec.601's Y'CbCr model with the notable
+// difference of allowing larger range for luma/chroma.
+// See http://en.wikipedia.org/wiki/YCbCr#JPEG_conversion paragraph, and its
+// difference with http://en.wikipedia.org/wiki/YCbCr#ITU-R_BT.601_conversion
+// #define USE_YUVj
+#endif
+
 //------------------------------------------------------------------------------
 // YUV -> RGB conversion
 
-#if defined(__cplusplus) || defined(c_plusplus)
+#ifdef __cplusplus
 extern "C" {
 #endif
 
-enum { YUV_FIX = 16,                // fixed-point precision
-       YUV_RANGE_MIN = -227,        // min value of r/g/b output
-       YUV_RANGE_MAX = 256 + 226    // max value of r/g/b output
+enum {
+  YUV_FIX = 16,                    // fixed-point precision for RGB->YUV
+  YUV_HALF = 1 << (YUV_FIX - 1),
+  YUV_MASK = (256 << YUV_FIX) - 1,
+  YUV_RANGE_MIN = -227,            // min value of r/g/b output
+  YUV_RANGE_MAX = 256 + 226,       // max value of r/g/b output
+
+  YUV_FIX2 = 14,                   // fixed-point precision for YUV->RGB
+  YUV_HALF2 = 1 << (YUV_FIX2 - 1),
+  YUV_MASK2 = (256 << YUV_FIX2) - 1
 };
+
+// These constants are 14b fixed-point version of ITU-R BT.601 constants.
+#define kYScale 19077    // 1.164 = 255 / 219
+#define kVToR   26149    // 1.596 = 255 / 112 * 0.701
+#define kUToG   6419     // 0.391 = 255 / 112 * 0.886 * 0.114 / 0.587
+#define kVToG   13320    // 0.813 = 255 / 112 * 0.701 * 0.299 / 0.587
+#define kUToB   33050    // 2.018 = 255 / 112 * 0.886
+#define kRCst (-kYScale * 16 - kVToR * 128 + YUV_HALF2)
+#define kGCst (-kYScale * 16 + kUToG * 128 + kVToG * 128 + YUV_HALF2)
+#define kBCst (-kYScale * 16 - kUToB * 128 + YUV_HALF2)
+
+//------------------------------------------------------------------------------
+
+#if !defined(WEBP_YUV_USE_TABLE)
+
+// slower on x86 by ~7-8%, but bit-exact with the SSE2 version
+
+static WEBP_INLINE int VP8Clip8(int v) {
+  return ((v & ~YUV_MASK2) == 0) ? (v >> YUV_FIX2) : (v < 0) ? 0 : 255;
+}
+
+static WEBP_INLINE int VP8YUVToR(int y, int v) {
+  return VP8Clip8(kYScale * y + kVToR * v + kRCst);
+}
+
+static WEBP_INLINE int VP8YUVToG(int y, int u, int v) {
+  return VP8Clip8(kYScale * y - kUToG * u - kVToG * v + kGCst);
+}
+
+static WEBP_INLINE int VP8YUVToB(int y, int u) {
+  return VP8Clip8(kYScale * y + kUToB * u + kBCst);
+}
+
+static WEBP_INLINE void VP8YuvToRgb(int y, int u, int v,
+                                    uint8_t* const rgb) {
+  rgb[0] = VP8YUVToR(y, v);
+  rgb[1] = VP8YUVToG(y, u, v);
+  rgb[2] = VP8YUVToB(y, u);
+}
+
+static WEBP_INLINE void VP8YuvToBgr(int y, int u, int v,
+                                    uint8_t* const bgr) {
+  bgr[0] = VP8YUVToB(y, u);
+  bgr[1] = VP8YUVToG(y, u, v);
+  bgr[2] = VP8YUVToR(y, v);
+}
+
+static WEBP_INLINE void VP8YuvToRgb565(int y, int u, int v,
+                                       uint8_t* const rgb) {
+  const int r = VP8YUVToR(y, v);      // 5 usable bits
+  const int g = VP8YUVToG(y, u, v);   // 6 usable bits
+  const int b = VP8YUVToB(y, u);      // 5 usable bits
+  const int rg = (r & 0xf8) | (g >> 5);
+  const int gb = ((g << 3) & 0xe0) | (b >> 3);
+#ifdef WEBP_SWAP_16BIT_CSP
+  rgb[0] = gb;
+  rgb[1] = rg;
+#else
+  rgb[0] = rg;
+  rgb[1] = gb;
+#endif
+}
+
+static WEBP_INLINE void VP8YuvToRgba4444(int y, int u, int v,
+                                         uint8_t* const argb) {
+  const int r = VP8YUVToR(y, v);        // 4 usable bits
+  const int g = VP8YUVToG(y, u, v);     // 4 usable bits
+  const int b = VP8YUVToB(y, u);        // 4 usable bits
+  const int rg = (r & 0xf0) | (g >> 4);
+  const int ba = (b & 0xf0) | 0x0f;     // overwrite the lower 4 bits
+#ifdef WEBP_SWAP_16BIT_CSP
+  argb[0] = ba;
+  argb[1] = rg;
+#else
+  argb[0] = rg;
+  argb[1] = ba;
+#endif
+}
+
+#else
+
+// Table-based version, not totally equivalent to the SSE2 version.
+// Rounding diff is only +/-1 though.
+
 extern int16_t VP8kVToR[256], VP8kUToB[256];
 extern int32_t VP8kVToG[256], VP8kUToG[256];
 extern uint8_t VP8kClip[YUV_RANGE_MAX - YUV_RANGE_MIN];
 extern uint8_t VP8kClip4Bits[YUV_RANGE_MAX - YUV_RANGE_MIN];
 
-static WEBP_INLINE void VP8YuvToRgb(uint8_t y, uint8_t u, uint8_t v,
+static WEBP_INLINE void VP8YuvToRgb(int y, int u, int v,
                                     uint8_t* const rgb) {
   const int r_off = VP8kVToR[v];
   const int g_off = (VP8kVToG[v] + VP8kUToG[u]) >> YUV_FIX;
@@ -40,42 +169,60 @@ static WEBP_INLINE void VP8YuvToRgb(uint8_t y, uint8_t u, uint8_t v,
   rgb[2] = VP8kClip[y + b_off - YUV_RANGE_MIN];
 }
 
-static WEBP_INLINE void VP8YuvToRgb565(uint8_t y, uint8_t u, uint8_t v,
-                                       uint8_t* const rgb) {
+static WEBP_INLINE void VP8YuvToBgr(int y, int u, int v,
+                                    uint8_t* const bgr) {
   const int r_off = VP8kVToR[v];
   const int g_off = (VP8kVToG[v] + VP8kUToG[u]) >> YUV_FIX;
   const int b_off = VP8kUToB[u];
-  rgb[0] = ((VP8kClip[y + r_off - YUV_RANGE_MIN] & 0xf8) |
-            (VP8kClip[y + g_off - YUV_RANGE_MIN] >> 5));
-  rgb[1] = (((VP8kClip[y + g_off - YUV_RANGE_MIN] << 3) & 0xe0) |
-            (VP8kClip[y + b_off - YUV_RANGE_MIN] >> 3));
-}
-
-static WEBP_INLINE void VP8YuvToArgb(uint8_t y, uint8_t u, uint8_t v,
-                                     uint8_t* const argb) {
-  argb[0] = 0xff;
-  VP8YuvToRgb(y, u, v, argb + 1);
+  bgr[0] = VP8kClip[y + b_off - YUV_RANGE_MIN];
+  bgr[1] = VP8kClip[y + g_off - YUV_RANGE_MIN];
+  bgr[2] = VP8kClip[y + r_off - YUV_RANGE_MIN];
 }
 
-static WEBP_INLINE void VP8YuvToRgba4444(uint8_t y, uint8_t u, uint8_t v,
-                                         uint8_t* const argb) {
+static WEBP_INLINE void VP8YuvToRgb565(int y, int u, int v,
+                                       uint8_t* const rgb) {
   const int r_off = VP8kVToR[v];
   const int g_off = (VP8kVToG[v] + VP8kUToG[u]) >> YUV_FIX;
   const int b_off = VP8kUToB[u];
-  // Don't update alpha (last 4 bits of argb[1])
-  argb[0] = ((VP8kClip4Bits[y + r_off - YUV_RANGE_MIN] << 4) |
-             VP8kClip4Bits[y + g_off - YUV_RANGE_MIN]);
-  argb[1] = 0x0f | (VP8kClip4Bits[y + b_off - YUV_RANGE_MIN] << 4);
+  const int rg = ((VP8kClip[y + r_off - YUV_RANGE_MIN] & 0xf8) |
+                  (VP8kClip[y + g_off - YUV_RANGE_MIN] >> 5));
+  const int gb = (((VP8kClip[y + g_off - YUV_RANGE_MIN] << 3) & 0xe0) |
+                   (VP8kClip[y + b_off - YUV_RANGE_MIN] >> 3));
+#ifdef WEBP_SWAP_16BIT_CSP
+  rgb[0] = gb;
+  rgb[1] = rg;
+#else
+  rgb[0] = rg;
+  rgb[1] = gb;
+#endif
 }
 
-static WEBP_INLINE void VP8YuvToBgr(uint8_t y, uint8_t u, uint8_t v,
-                                    uint8_t* const bgr) {
+static WEBP_INLINE void VP8YuvToRgba4444(int y, int u, int v,
+                                         uint8_t* const argb) {
   const int r_off = VP8kVToR[v];
   const int g_off = (VP8kVToG[v] + VP8kUToG[u]) >> YUV_FIX;
   const int b_off = VP8kUToB[u];
-  bgr[0] = VP8kClip[y + b_off - YUV_RANGE_MIN];
-  bgr[1] = VP8kClip[y + g_off - YUV_RANGE_MIN];
-  bgr[2] = VP8kClip[y + r_off - YUV_RANGE_MIN];
+  const int rg = ((VP8kClip4Bits[y + r_off - YUV_RANGE_MIN] << 4) |
+                   VP8kClip4Bits[y + g_off - YUV_RANGE_MIN]);
+  const int ba = (VP8kClip4Bits[y + b_off - YUV_RANGE_MIN] << 4) | 0x0f;
+#ifdef WEBP_SWAP_16BIT_CSP
+  argb[0] = ba;
+  argb[1] = rg;
+#else
+  argb[0] = rg;
+  argb[1] = ba;
+#endif
+}
+
+#endif  // WEBP_YUV_USE_TABLE
+
+//-----------------------------------------------------------------------------
+// Alpha handling variants
+
+static WEBP_INLINE void VP8YuvToArgb(uint8_t y, uint8_t u, uint8_t v,
+                                     uint8_t* const argb) {
+  argb[0] = 0xff;
+  VP8YuvToRgb(y, u, v, argb + 1);
 }
 
 static WEBP_INLINE void VP8YuvToBgra(uint8_t y, uint8_t u, uint8_t v,
@@ -93,35 +240,79 @@ static WEBP_INLINE void VP8YuvToRgba(uint8_t y, uint8_t u, uint8_t v,
 // Must be called before everything, to initialize the tables.
 void VP8YUVInit(void);
 
+//-----------------------------------------------------------------------------
+// SSE2 extra functions (mostly for upsampling_sse2.c)
+
+#if defined(WEBP_USE_SSE2)
+
+// When the following is defined, tables are initialized statically, adding ~12k
+// to the binary size. Otherwise, they are initialized at run-time (small cost).
+#define WEBP_YUV_USE_SSE2_TABLES
+
+// Process 32 pixels and store the result (24b or 32b per pixel) in *dst.
+void VP8YuvToRgba32(const uint8_t* y, const uint8_t* u, const uint8_t* v,
+                    uint8_t* dst);
+void VP8YuvToRgb32(const uint8_t* y, const uint8_t* u, const uint8_t* v,
+                   uint8_t* dst);
+void VP8YuvToBgra32(const uint8_t* y, const uint8_t* u, const uint8_t* v,
+                    uint8_t* dst);
+void VP8YuvToBgr32(const uint8_t* y, const uint8_t* u, const uint8_t* v,
+                   uint8_t* dst);
+
+// Must be called to initialize tables before using the functions.
+void VP8YUVInitSSE2(void);
+
+#endif    // WEBP_USE_SSE2
+
 //------------------------------------------------------------------------------
 // RGB -> YUV conversion
-// The exact naming is Y'CbCr, following the ITU-R BT.601 standard.
-// More information at: http://en.wikipedia.org/wiki/YCbCr
-// Y = 0.2569 * R + 0.5044 * G + 0.0979 * B + 16
-// U = -0.1483 * R - 0.2911 * G + 0.4394 * B + 128
-// V = 0.4394 * R - 0.3679 * G - 0.0715 * B + 128
-// We use 16bit fixed point operations.
 
-static WEBP_INLINE int VP8ClipUV(int v) {
-   v = (v + (257 << (YUV_FIX + 2 - 1))) >> (YUV_FIX + 2);
-   return ((v & ~0xff) == 0) ? v : (v < 0) ? 0 : 255;
+// Stub functions that can be called with various rounding values:
+static WEBP_INLINE int VP8ClipUV(int uv, int rounding) {
+  uv = (uv + rounding + (128 << (YUV_FIX + 2))) >> (YUV_FIX + 2);
+  return ((uv & ~0xff) == 0) ? uv : (uv < 0) ? 0 : 255;
 }
 
-static WEBP_INLINE int VP8RGBToY(int r, int g, int b) {
-  const int kRound = (1 << (YUV_FIX - 1)) + (16 << YUV_FIX);
+#ifndef USE_YUVj
+
+static WEBP_INLINE int VP8RGBToY(int r, int g, int b, int rounding) {
   const int luma = 16839 * r + 33059 * g + 6420 * b;
-  return (luma + kRound) >> YUV_FIX;  // no need to clip
+  return (luma + rounding + (16 << YUV_FIX)) >> YUV_FIX;  // no need to clip
 }
 
-static WEBP_INLINE int VP8RGBToU(int r, int g, int b) {
-  return VP8ClipUV(-9719 * r - 19081 * g + 28800 * b);
+static WEBP_INLINE int VP8RGBToU(int r, int g, int b, int rounding) {
+  const int u = -9719 * r - 19081 * g + 28800 * b;
+  return VP8ClipUV(u, rounding);
 }
 
-static WEBP_INLINE int VP8RGBToV(int r, int g, int b) {
-  return VP8ClipUV(+28800 * r - 24116 * g - 4684 * b);
+static WEBP_INLINE int VP8RGBToV(int r, int g, int b, int rounding) {
+  const int v = +28800 * r - 24116 * g - 4684 * b;
+  return VP8ClipUV(v, rounding);
+}
+
+#else
+
+// This JPEG-YUV colorspace, only for comparison!
+// These are also 16bit precision coefficients from Rec.601, but with full
+// [0..255] output range.
+static WEBP_INLINE int VP8RGBToY(int r, int g, int b, int rounding) {
+  const int luma = 19595 * r + 38470 * g + 7471 * b;
+  return (luma + rounding) >> YUV_FIX;  // no need to clip
 }
 
-#if defined(__cplusplus) || defined(c_plusplus)
+static WEBP_INLINE int VP8RGBToU(int r, int g, int b, int rounding) {
+  const int u = -11058 * r - 21710 * g + 32768 * b;
+  return VP8ClipUV(u, rounding);
+}
+
+static WEBP_INLINE int VP8RGBToV(int r, int g, int b, int rounding) {
+  const int v = 32768 * r - 27439 * g - 5329 * b;
+  return VP8ClipUV(v, rounding);
+}
+
+#endif    // USE_YUVj
+
+#ifdef __cplusplus
 }    // extern "C"
 #endif
 
diff --git a/drivers/webp/dsp/yuv_mips32.c b/drivers/webp/dsp/yuv_mips32.c
new file mode 100644
index 0000000000..018f8ab774
--- /dev/null
+++ b/drivers/webp/dsp/yuv_mips32.c
@@ -0,0 +1,103 @@
+// Copyright 2014 Google Inc. All Rights Reserved.
+//
+// Use of this source code is governed by a BSD-style license
+// that can be found in the COPYING file in the root of the source
+// tree. An additional intellectual property rights grant can be found
+// in the file PATENTS. All contributing project authors may
+// be found in the AUTHORS file in the root of the source tree.
+// -----------------------------------------------------------------------------
+//
+// MIPS version of YUV to RGB upsampling functions.
+//
+// Author(s):  Djordje Pesut    (djordje.pesut@imgtec.com)
+//             Jovan Zelincevic (jovan.zelincevic@imgtec.com)
+
+#include "./dsp.h"
+
+#if defined(WEBP_USE_MIPS32)
+
+#include "./yuv.h"
+
+//------------------------------------------------------------------------------
+// simple point-sampling
+
+#define ROW_FUNC(FUNC_NAME, XSTEP, R, G, B, A)                                 \
+static void FUNC_NAME(const uint8_t* y,                                        \
+                      const uint8_t* u, const uint8_t* v,                      \
+                      uint8_t* dst, int len) {                                 \
+  int i, r, g, b;                                                              \
+  int temp0, temp1, temp2, temp3, temp4;                                       \
+  for (i = 0; i < (len >> 1); i++) {                                           \
+    temp1 = kVToR * v[0];                                                      \
+    temp3 = kVToG * v[0];                                                      \
+    temp2 = kUToG * u[0];                                                      \
+    temp4 = kUToB * u[0];                                                      \
+    temp0 = kYScale * y[0];                                                    \
+    temp1 += kRCst;                                                            \
+    temp3 -= kGCst;                                                            \
+    temp2 += temp3;                                                            \
+    temp4 += kBCst;                                                            \
+    r = VP8Clip8(temp0 + temp1);                                               \
+    g = VP8Clip8(temp0 - temp2);                                               \
+    b = VP8Clip8(temp0 + temp4);                                               \
+    temp0 = kYScale * y[1];                                                    \
+    dst[R] = r;                                                                \
+    dst[G] = g;                                                                \
+    dst[B] = b;                                                                \
+    if (A) dst[A] = 0xff;                                                      \
+    r = VP8Clip8(temp0 + temp1);                                               \
+    g = VP8Clip8(temp0 - temp2);                                               \
+    b = VP8Clip8(temp0 + temp4);                                               \
+    dst[R + XSTEP] = r;                                                        \
+    dst[G + XSTEP] = g;                                                        \
+    dst[B + XSTEP] = b;                                                        \
+    if (A) dst[A + XSTEP] = 0xff;                                              \
+    y += 2;                                                                    \
+    ++u;                                                                       \
+    ++v;                                                                       \
+    dst += 2 * XSTEP;                                                          \
+  }                                                                            \
+  if (len & 1) {                                                               \
+    temp1 = kVToR * v[0];                                                      \
+    temp3 = kVToG * v[0];                                                      \
+    temp2 = kUToG * u[0];                                                      \
+    temp4 = kUToB * u[0];                                                      \
+    temp0 = kYScale * y[0];                                                    \
+    temp1 += kRCst;                                                            \
+    temp3 -= kGCst;                                                            \
+    temp2 += temp3;                                                            \
+    temp4 += kBCst;                                                            \
+    r = VP8Clip8(temp0 + temp1);                                               \
+    g = VP8Clip8(temp0 - temp2);                                               \
+    b = VP8Clip8(temp0 + temp4);                                               \
+    dst[R] = r;                                                                \
+    dst[G] = g;                                                                \
+    dst[B] = b;                                                                \
+    if (A) dst[A] = 0xff;                                                      \
+  }                                                                            \
+}
+
+ROW_FUNC(YuvToRgbRow,      3, 0, 1, 2, 0)
+ROW_FUNC(YuvToRgbaRow,     4, 0, 1, 2, 3)
+ROW_FUNC(YuvToBgrRow,      3, 2, 1, 0, 0)
+ROW_FUNC(YuvToBgraRow,     4, 2, 1, 0, 3)
+
+#undef ROW_FUNC
+
+//------------------------------------------------------------------------------
+// Entry point
+
+extern void WebPInitSamplersMIPS32(void);
+
+WEBP_TSAN_IGNORE_FUNCTION void WebPInitSamplersMIPS32(void) {
+  WebPSamplers[MODE_RGB]  = YuvToRgbRow;
+  WebPSamplers[MODE_RGBA] = YuvToRgbaRow;
+  WebPSamplers[MODE_BGR]  = YuvToBgrRow;
+  WebPSamplers[MODE_BGRA] = YuvToBgraRow;
+}
+
+#else  // !WEBP_USE_MIPS32
+
+WEBP_DSP_INIT_STUB(WebPInitSamplersMIPS32)
+
+#endif  // WEBP_USE_MIPS32
diff --git a/drivers/webp/dsp/yuv_mips_dsp_r2.c b/drivers/webp/dsp/yuv_mips_dsp_r2.c
new file mode 100644
index 0000000000..45a2200352
--- /dev/null
+++ b/drivers/webp/dsp/yuv_mips_dsp_r2.c
@@ -0,0 +1,134 @@
+// Copyright 2014 Google Inc. All Rights Reserved.
+//
+// Use of this source code is governed by a BSD-style license
+// that can be found in the COPYING file in the root of the source
+// tree. An additional intellectual property rights grant can be found
+// in the file PATENTS. All contributing project authors may
+// be found in the AUTHORS file in the root of the source tree.
+// -----------------------------------------------------------------------------
+//
+// MIPS DSPr2 version of YUV to RGB upsampling functions.
+//
+// Author(s):  Branimir Vasic (branimir.vasic@imgtec.com)
+//             Djordje Pesut  (djordje.pesut@imgtec.com)
+
+#include "./dsp.h"
+
+#if defined(WEBP_USE_MIPS_DSP_R2)
+
+#include "./yuv.h"
+
+//------------------------------------------------------------------------------
+// simple point-sampling
+
+#define ROW_FUNC_PART_1()                                                      \
+  "lbu              %[temp3],   0(%[v])                         \n\t"          \
+  "lbu              %[temp4],   0(%[u])                         \n\t"          \
+  "lbu              %[temp0],   0(%[y])                         \n\t"          \
+  "mul              %[temp1],   %[t_con_1],     %[temp3]        \n\t"          \
+  "mul              %[temp3],   %[t_con_2],     %[temp3]        \n\t"          \
+  "mul              %[temp2],   %[t_con_3],     %[temp4]        \n\t"          \
+  "mul              %[temp4],   %[t_con_4],     %[temp4]        \n\t"          \
+  "mul              %[temp0],   %[t_con_5],     %[temp0]        \n\t"          \
+  "addu             %[temp1],   %[temp1],       %[t_con_6]      \n\t"          \
+  "subu             %[temp3],   %[temp3],       %[t_con_7]      \n\t"          \
+  "addu             %[temp2],   %[temp2],       %[temp3]        \n\t"          \
+  "addu             %[temp4],   %[temp4],       %[t_con_8]      \n\t"          \
+
+#define ROW_FUNC_PART_2(R, G, B, K)                                            \
+  "addu             %[temp5],   %[temp0],       %[temp1]        \n\t"          \
+  "subu             %[temp6],   %[temp0],       %[temp2]        \n\t"          \
+  "addu             %[temp7],   %[temp0],       %[temp4]        \n\t"          \
+".if " #K "                                                     \n\t"          \
+  "lbu              %[temp0],   1(%[y])                         \n\t"          \
+".endif                                                         \n\t"          \
+  "shll_s.w         %[temp5],   %[temp5],       9               \n\t"          \
+  "shll_s.w         %[temp6],   %[temp6],       9               \n\t"          \
+".if " #K "                                                     \n\t"          \
+  "mul              %[temp0],   %[t_con_5],     %[temp0]        \n\t"          \
+".endif                                                         \n\t"          \
+  "shll_s.w         %[temp7],   %[temp7],       9               \n\t"          \
+  "precrqu_s.qb.ph  %[temp5],   %[temp5],       $zero           \n\t"          \
+  "precrqu_s.qb.ph  %[temp6],   %[temp6],       $zero           \n\t"          \
+  "precrqu_s.qb.ph  %[temp7],   %[temp7],       $zero           \n\t"          \
+  "srl              %[temp5],   %[temp5],       24              \n\t"          \
+  "srl              %[temp6],   %[temp6],       24              \n\t"          \
+  "srl              %[temp7],   %[temp7],       24              \n\t"          \
+  "sb               %[temp5],   " #R "(%[dst])                  \n\t"          \
+  "sb               %[temp6],   " #G "(%[dst])                  \n\t"          \
+  "sb               %[temp7],   " #B "(%[dst])                  \n\t"          \
+
+#define ASM_CLOBBER_LIST()                                                     \
+  : [temp0]"=&r"(temp0), [temp1]"=&r"(temp1), [temp2]"=&r"(temp2),             \
+    [temp3]"=&r"(temp3), [temp4]"=&r"(temp4), [temp5]"=&r"(temp5),             \
+    [temp6]"=&r"(temp6), [temp7]"=&r"(temp7)                                   \
+  : [t_con_1]"r"(t_con_1), [t_con_2]"r"(t_con_2), [t_con_3]"r"(t_con_3),       \
+    [t_con_4]"r"(t_con_4), [t_con_5]"r"(t_con_5), [t_con_6]"r"(t_con_6),       \
+    [u]"r"(u), [v]"r"(v), [y]"r"(y), [dst]"r"(dst),                            \
+    [t_con_7]"r"(t_con_7), [t_con_8]"r"(t_con_8)                               \
+  : "memory", "hi", "lo"                                                       \
+
+#define ROW_FUNC(FUNC_NAME, XSTEP, R, G, B, A)                                 \
+static void FUNC_NAME(const uint8_t* y,                                        \
+                      const uint8_t* u, const uint8_t* v,                      \
+                      uint8_t* dst, int len) {                                 \
+  int i;                                                                       \
+  uint32_t temp0, temp1, temp2, temp3, temp4, temp5, temp6, temp7;             \
+  const int t_con_1 = kVToR;                                                   \
+  const int t_con_2 = kVToG;                                                   \
+  const int t_con_3 = kUToG;                                                   \
+  const int t_con_4 = kUToB;                                                   \
+  const int t_con_5 = kYScale;                                                 \
+  const int t_con_6 = kRCst;                                                   \
+  const int t_con_7 = kGCst;                                                   \
+  const int t_con_8 = kBCst;                                                   \
+  for (i = 0; i < (len >> 1); i++) {                                           \
+    __asm__ volatile (                                                         \
+      ROW_FUNC_PART_1()                                                        \
+      ROW_FUNC_PART_2(R, G, B, 1)                                              \
+      ROW_FUNC_PART_2(R + XSTEP, G + XSTEP, B + XSTEP, 0)                      \
+      ASM_CLOBBER_LIST()                                                       \
+    );                                                                         \
+    if (A) dst[A] = dst[A + XSTEP] = 0xff;                                     \
+    y += 2;                                                                    \
+    ++u;                                                                       \
+    ++v;                                                                       \
+    dst += 2 * XSTEP;                                                          \
+  }                                                                            \
+  if (len & 1) {                                                               \
+    __asm__ volatile (                                                         \
+      ROW_FUNC_PART_1()                                                        \
+      ROW_FUNC_PART_2(R, G, B, 0)                                              \
+      ASM_CLOBBER_LIST()                                                       \
+    );                                                                         \
+    if (A) dst[A] = 0xff;                                                      \
+  }                                                                            \
+}
+
+ROW_FUNC(YuvToRgbRow,      3, 0, 1, 2, 0)
+ROW_FUNC(YuvToRgbaRow,     4, 0, 1, 2, 3)
+ROW_FUNC(YuvToBgrRow,      3, 2, 1, 0, 0)
+ROW_FUNC(YuvToBgraRow,     4, 2, 1, 0, 3)
+
+#undef ROW_FUNC
+#undef ASM_CLOBBER_LIST
+#undef ROW_FUNC_PART_2
+#undef ROW_FUNC_PART_1
+
+//------------------------------------------------------------------------------
+// Entry point
+
+extern void WebPInitSamplersMIPSdspR2(void);
+
+WEBP_TSAN_IGNORE_FUNCTION void WebPInitSamplersMIPSdspR2(void) {
+  WebPSamplers[MODE_RGB]  = YuvToRgbRow;
+  WebPSamplers[MODE_RGBA] = YuvToRgbaRow;
+  WebPSamplers[MODE_BGR]  = YuvToBgrRow;
+  WebPSamplers[MODE_BGRA] = YuvToBgraRow;
+}
+
+#else  // !WEBP_USE_MIPS_DSP_R2
+
+WEBP_DSP_INIT_STUB(WebPInitSamplersMIPSdspR2)
+
+#endif  // WEBP_USE_MIPS_DSP_R2
diff --git a/drivers/webp/dsp/yuv_sse2.c b/drivers/webp/dsp/yuv_sse2.c
new file mode 100644
index 0000000000..283b3af228
--- /dev/null
+++ b/drivers/webp/dsp/yuv_sse2.c
@@ -0,0 +1,606 @@
+// Copyright 2014 Google Inc. All Rights Reserved.
+//
+// Use of this source code is governed by a BSD-style license
+// that can be found in the COPYING file in the root of the source
+// tree. An additional intellectual property rights grant can be found
+// in the file PATENTS. All contributing project authors may
+// be found in the AUTHORS file in the root of the source tree.
+// -----------------------------------------------------------------------------
+//
+// YUV->RGB conversion functions
+//
+// Author: Skal (pascal.massimino@gmail.com)
+
+#include "./yuv.h"
+
+#if defined(WEBP_USE_SSE2)
+
+#include <emmintrin.h>
+#include <string.h>   // for memcpy
+
+typedef union {   // handy struct for converting SSE2 registers
+  int32_t i32[4];
+  uint8_t u8[16];
+  __m128i m;
+} VP8kCstSSE2;
+
+#if defined(WEBP_YUV_USE_SSE2_TABLES)
+
+#include "./yuv_tables_sse2.h"
+
+WEBP_TSAN_IGNORE_FUNCTION void VP8YUVInitSSE2(void) {}
+
+#else
+
+static int done_sse2 = 0;
+static VP8kCstSSE2 VP8kUtoRGBA[256], VP8kVtoRGBA[256], VP8kYtoRGBA[256];
+
+WEBP_TSAN_IGNORE_FUNCTION void VP8YUVInitSSE2(void) {
+  if (!done_sse2) {
+    int i;
+    for (i = 0; i < 256; ++i) {
+      VP8kYtoRGBA[i].i32[0] =
+        VP8kYtoRGBA[i].i32[1] =
+        VP8kYtoRGBA[i].i32[2] = (i - 16) * kYScale + YUV_HALF2;
+      VP8kYtoRGBA[i].i32[3] = 0xff << YUV_FIX2;
+
+      VP8kUtoRGBA[i].i32[0] = 0;
+      VP8kUtoRGBA[i].i32[1] = -kUToG * (i - 128);
+      VP8kUtoRGBA[i].i32[2] =  kUToB * (i - 128);
+      VP8kUtoRGBA[i].i32[3] = 0;
+
+      VP8kVtoRGBA[i].i32[0] =  kVToR * (i - 128);
+      VP8kVtoRGBA[i].i32[1] = -kVToG * (i - 128);
+      VP8kVtoRGBA[i].i32[2] = 0;
+      VP8kVtoRGBA[i].i32[3] = 0;
+    }
+    done_sse2 = 1;
+
+#if 0   // code used to generate 'yuv_tables_sse2.h'
+    printf("static const VP8kCstSSE2 VP8kYtoRGBA[256] = {\n");
+    for (i = 0; i < 256; ++i) {
+      printf("  {{0x%.8x, 0x%.8x, 0x%.8x, 0x%.8x}},\n",
+             VP8kYtoRGBA[i].i32[0], VP8kYtoRGBA[i].i32[1],
+             VP8kYtoRGBA[i].i32[2], VP8kYtoRGBA[i].i32[3]);
+    }
+    printf("};\n\n");
+    printf("static const VP8kCstSSE2 VP8kUtoRGBA[256] = {\n");
+    for (i = 0; i < 256; ++i) {
+      printf("  {{0, 0x%.8x, 0x%.8x, 0}},\n",
+             VP8kUtoRGBA[i].i32[1], VP8kUtoRGBA[i].i32[2]);
+    }
+    printf("};\n\n");
+    printf("static VP8kCstSSE2 VP8kVtoRGBA[256] = {\n");
+    for (i = 0; i < 256; ++i) {
+      printf("  {{0x%.8x, 0x%.8x, 0, 0}},\n",
+             VP8kVtoRGBA[i].i32[0], VP8kVtoRGBA[i].i32[1]);
+    }
+    printf("};\n\n");
+#endif
+  }
+}
+
+#endif  // WEBP_YUV_USE_SSE2_TABLES
+
+//-----------------------------------------------------------------------------
+
+static WEBP_INLINE __m128i LoadUVPart(int u, int v) {
+  const __m128i u_part = _mm_loadu_si128(&VP8kUtoRGBA[u].m);
+  const __m128i v_part = _mm_loadu_si128(&VP8kVtoRGBA[v].m);
+  const __m128i uv_part = _mm_add_epi32(u_part, v_part);
+  return uv_part;
+}
+
+static WEBP_INLINE __m128i GetRGBA32bWithUV(int y, const __m128i uv_part) {
+  const __m128i y_part = _mm_loadu_si128(&VP8kYtoRGBA[y].m);
+  const __m128i rgba1 = _mm_add_epi32(y_part, uv_part);
+  const __m128i rgba2 = _mm_srai_epi32(rgba1, YUV_FIX2);
+  return rgba2;
+}
+
+static WEBP_INLINE __m128i GetRGBA32b(int y, int u, int v) {
+  const __m128i uv_part = LoadUVPart(u, v);
+  return GetRGBA32bWithUV(y, uv_part);
+}
+
+static WEBP_INLINE void YuvToRgbSSE2(uint8_t y, uint8_t u, uint8_t v,
+                                     uint8_t* const rgb) {
+  const __m128i tmp0 = GetRGBA32b(y, u, v);
+  const __m128i tmp1 = _mm_packs_epi32(tmp0, tmp0);
+  const __m128i tmp2 = _mm_packus_epi16(tmp1, tmp1);
+  // Note: we store 8 bytes at a time, not 3 bytes! -> memory stomp
+  _mm_storel_epi64((__m128i*)rgb, tmp2);
+}
+
+static WEBP_INLINE void YuvToBgrSSE2(uint8_t y, uint8_t u, uint8_t v,
+                                     uint8_t* const bgr) {
+  const __m128i tmp0 = GetRGBA32b(y, u, v);
+  const __m128i tmp1 = _mm_shuffle_epi32(tmp0, _MM_SHUFFLE(3, 0, 1, 2));
+  const __m128i tmp2 = _mm_packs_epi32(tmp1, tmp1);
+  const __m128i tmp3 = _mm_packus_epi16(tmp2, tmp2);
+  // Note: we store 8 bytes at a time, not 3 bytes! -> memory stomp
+  _mm_storel_epi64((__m128i*)bgr, tmp3);
+}
+
+//-----------------------------------------------------------------------------
+// Convert spans of 32 pixels to various RGB formats for the fancy upsampler.
+
+void VP8YuvToRgba32(const uint8_t* y, const uint8_t* u, const uint8_t* v,
+                    uint8_t* dst) {
+  int n;
+  for (n = 0; n < 32; n += 4) {
+    const __m128i tmp0_1 = GetRGBA32b(y[n + 0], u[n + 0], v[n + 0]);
+    const __m128i tmp0_2 = GetRGBA32b(y[n + 1], u[n + 1], v[n + 1]);
+    const __m128i tmp0_3 = GetRGBA32b(y[n + 2], u[n + 2], v[n + 2]);
+    const __m128i tmp0_4 = GetRGBA32b(y[n + 3], u[n + 3], v[n + 3]);
+    const __m128i tmp1_1 = _mm_packs_epi32(tmp0_1, tmp0_2);
+    const __m128i tmp1_2 = _mm_packs_epi32(tmp0_3, tmp0_4);
+    const __m128i tmp2 = _mm_packus_epi16(tmp1_1, tmp1_2);
+    _mm_storeu_si128((__m128i*)dst, tmp2);
+    dst += 4 * 4;
+  }
+}
+
+void VP8YuvToBgra32(const uint8_t* y, const uint8_t* u, const uint8_t* v,
+                    uint8_t* dst) {
+  int n;
+  for (n = 0; n < 32; n += 2) {
+    const __m128i tmp0_1 = GetRGBA32b(y[n + 0], u[n + 0], v[n + 0]);
+    const __m128i tmp0_2 = GetRGBA32b(y[n + 1], u[n + 1], v[n + 1]);
+    const __m128i tmp1_1 = _mm_shuffle_epi32(tmp0_1, _MM_SHUFFLE(3, 0, 1, 2));
+    const __m128i tmp1_2 = _mm_shuffle_epi32(tmp0_2, _MM_SHUFFLE(3, 0, 1, 2));
+    const __m128i tmp2_1 = _mm_packs_epi32(tmp1_1, tmp1_2);
+    const __m128i tmp3 = _mm_packus_epi16(tmp2_1, tmp2_1);
+    _mm_storel_epi64((__m128i*)dst, tmp3);
+    dst += 4 * 2;
+  }
+}
+
+void VP8YuvToRgb32(const uint8_t* y, const uint8_t* u, const uint8_t* v,
+                   uint8_t* dst) {
+  int n;
+  uint8_t tmp0[2 * 3 + 5 + 15];
+  uint8_t* const tmp = (uint8_t*)((uintptr_t)(tmp0 + 15) & ~15);  // align
+  for (n = 0; n < 30; ++n) {   // we directly stomp the *dst memory
+    YuvToRgbSSE2(y[n], u[n], v[n], dst + n * 3);
+  }
+  // Last two pixels are special: we write in a tmp buffer before sending
+  // to dst.
+  YuvToRgbSSE2(y[n + 0], u[n + 0], v[n + 0], tmp + 0);
+  YuvToRgbSSE2(y[n + 1], u[n + 1], v[n + 1], tmp + 3);
+  memcpy(dst + n * 3, tmp, 2 * 3);
+}
+
+void VP8YuvToBgr32(const uint8_t* y, const uint8_t* u, const uint8_t* v,
+                   uint8_t* dst) {
+  int n;
+  uint8_t tmp0[2 * 3 + 5 + 15];
+  uint8_t* const tmp = (uint8_t*)((uintptr_t)(tmp0 + 15) & ~15);  // align
+  for (n = 0; n < 30; ++n) {
+    YuvToBgrSSE2(y[n], u[n], v[n], dst + n * 3);
+  }
+  YuvToBgrSSE2(y[n + 0], u[n + 0], v[n + 0], tmp + 0);
+  YuvToBgrSSE2(y[n + 1], u[n + 1], v[n + 1], tmp + 3);
+  memcpy(dst + n * 3, tmp, 2 * 3);
+}
+
+//-----------------------------------------------------------------------------
+// Arbitrary-length row conversion functions
+
+static void YuvToRgbaRow(const uint8_t* y, const uint8_t* u, const uint8_t* v,
+                         uint8_t* dst, int len) {
+  int n;
+  for (n = 0; n + 4 <= len; n += 4) {
+    const __m128i uv_0 = LoadUVPart(u[0], v[0]);
+    const __m128i uv_1 = LoadUVPart(u[1], v[1]);
+    const __m128i tmp0_1 = GetRGBA32bWithUV(y[0], uv_0);
+    const __m128i tmp0_2 = GetRGBA32bWithUV(y[1], uv_0);
+    const __m128i tmp0_3 = GetRGBA32bWithUV(y[2], uv_1);
+    const __m128i tmp0_4 = GetRGBA32bWithUV(y[3], uv_1);
+    const __m128i tmp1_1 = _mm_packs_epi32(tmp0_1, tmp0_2);
+    const __m128i tmp1_2 = _mm_packs_epi32(tmp0_3, tmp0_4);
+    const __m128i tmp2 = _mm_packus_epi16(tmp1_1, tmp1_2);
+    _mm_storeu_si128((__m128i*)dst, tmp2);
+    dst += 4 * 4;
+    y += 4;
+    u += 2;
+    v += 2;
+  }
+  // Finish off
+  while (n < len) {
+    VP8YuvToRgba(y[0], u[0], v[0], dst);
+    dst += 4;
+    ++y;
+    u += (n & 1);
+    v += (n & 1);
+    ++n;
+  }
+}
+
+static void YuvToBgraRow(const uint8_t* y, const uint8_t* u, const uint8_t* v,
+                         uint8_t* dst, int len) {
+  int n;
+  for (n = 0; n + 2 <= len; n += 2) {
+    const __m128i uv_0 = LoadUVPart(u[0], v[0]);
+    const __m128i tmp0_1 = GetRGBA32bWithUV(y[0], uv_0);
+    const __m128i tmp0_2 = GetRGBA32bWithUV(y[1], uv_0);
+    const __m128i tmp1_1 = _mm_shuffle_epi32(tmp0_1, _MM_SHUFFLE(3, 0, 1, 2));
+    const __m128i tmp1_2 = _mm_shuffle_epi32(tmp0_2, _MM_SHUFFLE(3, 0, 1, 2));
+    const __m128i tmp2_1 = _mm_packs_epi32(tmp1_1, tmp1_2);
+    const __m128i tmp3 = _mm_packus_epi16(tmp2_1, tmp2_1);
+    _mm_storel_epi64((__m128i*)dst, tmp3);
+    dst += 4 * 2;
+    y += 2;
+    ++u;
+    ++v;
+  }
+  // Finish off
+  if (len & 1) {
+    VP8YuvToBgra(y[0], u[0], v[0], dst);
+  }
+}
+
+static void YuvToArgbRow(const uint8_t* y, const uint8_t* u, const uint8_t* v,
+                         uint8_t* dst, int len) {
+  int n;
+  for (n = 0; n + 2 <= len; n += 2) {
+    const __m128i uv_0 = LoadUVPart(u[0], v[0]);
+    const __m128i tmp0_1 = GetRGBA32bWithUV(y[0], uv_0);
+    const __m128i tmp0_2 = GetRGBA32bWithUV(y[1], uv_0);
+    const __m128i tmp1_1 = _mm_shuffle_epi32(tmp0_1, _MM_SHUFFLE(2, 1, 0, 3));
+    const __m128i tmp1_2 = _mm_shuffle_epi32(tmp0_2, _MM_SHUFFLE(2, 1, 0, 3));
+    const __m128i tmp2_1 = _mm_packs_epi32(tmp1_1, tmp1_2);
+    const __m128i tmp3 = _mm_packus_epi16(tmp2_1, tmp2_1);
+    _mm_storel_epi64((__m128i*)dst, tmp3);
+    dst += 4 * 2;
+    y += 2;
+    ++u;
+    ++v;
+  }
+  // Finish off
+  if (len & 1) {
+    VP8YuvToArgb(y[0], u[0], v[0], dst);
+  }
+}
+
+static void YuvToRgbRow(const uint8_t* y, const uint8_t* u, const uint8_t* v,
+                        uint8_t* dst, int len) {
+  int n;
+  for (n = 0; n + 2 < len; ++n) {   // we directly stomp the *dst memory
+    YuvToRgbSSE2(y[0], u[0], v[0], dst);  // stomps 8 bytes
+    dst += 3;
+    ++y;
+    u += (n & 1);
+    v += (n & 1);
+  }
+  VP8YuvToRgb(y[0], u[0], v[0], dst);
+  if (len > 1) {
+    VP8YuvToRgb(y[1], u[n & 1], v[n & 1], dst + 3);
+  }
+}
+
+static void YuvToBgrRow(const uint8_t* y, const uint8_t* u, const uint8_t* v,
+                        uint8_t* dst, int len) {
+  int n;
+  for (n = 0; n + 2 < len; ++n) {   // we directly stomp the *dst memory
+    YuvToBgrSSE2(y[0], u[0], v[0], dst);  // stomps 8 bytes
+    dst += 3;
+    ++y;
+    u += (n & 1);
+    v += (n & 1);
+  }
+  VP8YuvToBgr(y[0], u[0], v[0], dst + 0);
+  if (len > 1) {
+    VP8YuvToBgr(y[1], u[n & 1], v[n & 1], dst + 3);
+  }
+}
+
+//------------------------------------------------------------------------------
+// Entry point
+
+extern void WebPInitSamplersSSE2(void);
+
+WEBP_TSAN_IGNORE_FUNCTION void WebPInitSamplersSSE2(void) {
+  WebPSamplers[MODE_RGB]  = YuvToRgbRow;
+  WebPSamplers[MODE_RGBA] = YuvToRgbaRow;
+  WebPSamplers[MODE_BGR]  = YuvToBgrRow;
+  WebPSamplers[MODE_BGRA] = YuvToBgraRow;
+  WebPSamplers[MODE_ARGB] = YuvToArgbRow;
+}
+
+//------------------------------------------------------------------------------
+// RGB24/32 -> YUV converters
+
+// Load eight 16b-words from *src.
+#define LOAD_16(src) _mm_loadu_si128((const __m128i*)(src))
+// Store either 16b-words into *dst
+#define STORE_16(V, dst) _mm_storeu_si128((__m128i*)(dst), (V))
+
+// Convert 8 packed RGB or BGR samples to r[], g[], b[]
+static WEBP_INLINE void RGB24PackedToPlanar(const uint8_t* const rgb,
+                                            __m128i* const r,
+                                            __m128i* const g,
+                                            __m128i* const b,
+                                            int input_is_bgr) {
+  const __m128i zero = _mm_setzero_si128();
+  // in0: r0 g0 b0 r1 | g1 b1 r2 g2 | b2 r3 g3 b3 | r4 g4 b4 r5
+  // in1: b2 r3 g3 b3 | r4 g4 b4 r5 | g5 b5 r6 g6 | b6 r7 g7 b7
+  const __m128i in0 = LOAD_16(rgb + 0);
+  const __m128i in1 = LOAD_16(rgb + 8);
+  // A0: | r2 g2 b2 r3 | g3 b3 r4 g4 | b4 r5 ...
+  // A1:                   ... b2 r3 | g3 b3 r4 g4 | b4 r5 g5 b5 |
+  const __m128i A0 = _mm_srli_si128(in0, 6);
+  const __m128i A1 = _mm_slli_si128(in1, 6);
+  // B0: r0 r2 g0 g2 | b0 b2 r1 r3 | g1 g3 b1 b3 | r2 r4 b2 b4
+  // B1: g3 g5 b3 b5 | r4 r6 g4 g6 | b4 b6 r5 r7 | g5 g7 b5 b7
+  const __m128i B0 = _mm_unpacklo_epi8(in0, A0);
+  const __m128i B1 = _mm_unpackhi_epi8(A1, in1);
+  // C0: r1 r3 g1 g3 | b1 b3 r2 r4 | b2 b4 ...
+  // C1:                 ... g3 g5 | b3 b5 r4 r6 | g4 g6 b4 b6
+  const __m128i C0 = _mm_srli_si128(B0, 6);
+  const __m128i C1 = _mm_slli_si128(B1, 6);
+  // D0: r0 r1 r2 r3 | g0 g1 g2 g3 | b0 b1 b2 b3 | r1 r2 r3 r4
+  // D1: b3 b4 b5 b6 | r4 r5 r6 r7 | g4 g5 g6 g7 | b4 b5 b6 b7 |
+  const __m128i D0 = _mm_unpacklo_epi8(B0, C0);
+  const __m128i D1 = _mm_unpackhi_epi8(C1, B1);
+  // r4 r5 r6 r7 | g4 g5 g6 g7 | b4 b5 b6 b7 | 0
+  const __m128i D2 = _mm_srli_si128(D1, 4);
+  // r0 r1 r2 r3 | r4 r5 r6 r7 | g0 g1 g2 g3 | g4 g5 g6 g7
+  const __m128i E0 = _mm_unpacklo_epi32(D0, D2);
+  // b0 b1 b2 b3 | b4 b5 b6 b7 | r1 r2 r3 r4 | 0
+  const __m128i E1 = _mm_unpackhi_epi32(D0, D2);
+  // g0 g1 g2 g3 | g4 g5 g6 g7 | 0
+  const __m128i E2 = _mm_srli_si128(E0, 8);
+  const __m128i F0 = _mm_unpacklo_epi8(E0, zero);  // -> R
+  const __m128i F1 = _mm_unpacklo_epi8(E1, zero);  // -> B
+  const __m128i F2 = _mm_unpacklo_epi8(E2, zero);  // -> G
+  *g = F2;
+  if (input_is_bgr) {
+    *r = F1;
+    *b = F0;
+  } else {
+    *r = F0;
+    *b = F1;
+  }
+}
+
+// Convert 8 packed ARGB to r[], g[], b[]
+static WEBP_INLINE void RGB32PackedToPlanar(const uint32_t* const argb,
+                                            __m128i* const r,
+                                            __m128i* const g,
+                                            __m128i* const b) {
+  const __m128i zero = _mm_setzero_si128();
+  const __m128i in0 = LOAD_16(argb + 0);    // argb3 | argb2 | argb1 | argb0
+  const __m128i in1 = LOAD_16(argb + 4);    // argb7 | argb6 | argb5 | argb4
+  // column-wise transpose
+  const __m128i A0 = _mm_unpacklo_epi8(in0, in1);
+  const __m128i A1 = _mm_unpackhi_epi8(in0, in1);
+  const __m128i B0 = _mm_unpacklo_epi8(A0, A1);
+  const __m128i B1 = _mm_unpackhi_epi8(A0, A1);
+  // C0 = g7 g6 ... g1 g0 | b7 b6 ... b1 b0
+  // C1 = a7 a6 ... a1 a0 | r7 r6 ... r1 r0
+  const __m128i C0 = _mm_unpacklo_epi8(B0, B1);
+  const __m128i C1 = _mm_unpackhi_epi8(B0, B1);
+  // store 16b
+  *r = _mm_unpacklo_epi8(C1, zero);
+  *g = _mm_unpackhi_epi8(C0, zero);
+  *b = _mm_unpacklo_epi8(C0, zero);
+}
+
+// This macro computes (RG * MULT_RG + GB * MULT_GB + ROUNDER) >> DESCALE_FIX
+// It's a macro and not a function because we need to use immediate values with
+// srai_epi32, e.g.
+#define TRANSFORM(RG_LO, RG_HI, GB_LO, GB_HI, MULT_RG, MULT_GB, \
+                  ROUNDER, DESCALE_FIX, OUT) do {               \
+  const __m128i V0_lo = _mm_madd_epi16(RG_LO, MULT_RG);         \
+  const __m128i V0_hi = _mm_madd_epi16(RG_HI, MULT_RG);         \
+  const __m128i V1_lo = _mm_madd_epi16(GB_LO, MULT_GB);         \
+  const __m128i V1_hi = _mm_madd_epi16(GB_HI, MULT_GB);         \
+  const __m128i V2_lo = _mm_add_epi32(V0_lo, V1_lo);            \
+  const __m128i V2_hi = _mm_add_epi32(V0_hi, V1_hi);            \
+  const __m128i V3_lo = _mm_add_epi32(V2_lo, ROUNDER);          \
+  const __m128i V3_hi = _mm_add_epi32(V2_hi, ROUNDER);          \
+  const __m128i V5_lo = _mm_srai_epi32(V3_lo, DESCALE_FIX);     \
+  const __m128i V5_hi = _mm_srai_epi32(V3_hi, DESCALE_FIX);     \
+  (OUT) = _mm_packs_epi32(V5_lo, V5_hi);                        \
+} while (0)
+
+#define MK_CST_16(A, B) _mm_set_epi16((B), (A), (B), (A), (B), (A), (B), (A))
+static WEBP_INLINE void ConvertRGBToY(const __m128i* const R,
+                                      const __m128i* const G,
+                                      const __m128i* const B,
+                                      __m128i* const Y) {
+  const __m128i kRG_y = MK_CST_16(16839, 33059 - 16384);
+  const __m128i kGB_y = MK_CST_16(16384, 6420);
+  const __m128i kHALF_Y = _mm_set1_epi32((16 << YUV_FIX) + YUV_HALF);
+
+  const __m128i RG_lo = _mm_unpacklo_epi16(*R, *G);
+  const __m128i RG_hi = _mm_unpackhi_epi16(*R, *G);
+  const __m128i GB_lo = _mm_unpacklo_epi16(*G, *B);
+  const __m128i GB_hi = _mm_unpackhi_epi16(*G, *B);
+  TRANSFORM(RG_lo, RG_hi, GB_lo, GB_hi, kRG_y, kGB_y, kHALF_Y, YUV_FIX, *Y);
+}
+
+static WEBP_INLINE void ConvertRGBToUV(const __m128i* const R,
+                                       const __m128i* const G,
+                                       const __m128i* const B,
+                                       __m128i* const U, __m128i* const V) {
+  const __m128i kRG_u = MK_CST_16(-9719, -19081);
+  const __m128i kGB_u = MK_CST_16(0, 28800);
+  const __m128i kRG_v = MK_CST_16(28800, 0);
+  const __m128i kGB_v = MK_CST_16(-24116, -4684);
+  const __m128i kHALF_UV = _mm_set1_epi32(((128 << YUV_FIX) + YUV_HALF) << 2);
+
+  const __m128i RG_lo = _mm_unpacklo_epi16(*R, *G);
+  const __m128i RG_hi = _mm_unpackhi_epi16(*R, *G);
+  const __m128i GB_lo = _mm_unpacklo_epi16(*G, *B);
+  const __m128i GB_hi = _mm_unpackhi_epi16(*G, *B);
+  TRANSFORM(RG_lo, RG_hi, GB_lo, GB_hi, kRG_u, kGB_u,
+            kHALF_UV, YUV_FIX + 2, *U);
+  TRANSFORM(RG_lo, RG_hi, GB_lo, GB_hi, kRG_v, kGB_v,
+            kHALF_UV, YUV_FIX + 2, *V);
+}
+
+#undef MK_CST_16
+#undef TRANSFORM
+
+static void ConvertRGB24ToY(const uint8_t* rgb, uint8_t* y, int width) {
+  const int max_width = width & ~15;
+  int i;
+  for (i = 0; i < max_width; i += 16, rgb += 3 * 16) {
+    __m128i r, g, b, Y0, Y1;
+    RGB24PackedToPlanar(rgb + 0 * 8, &r, &g, &b, 0);
+    ConvertRGBToY(&r, &g, &b, &Y0);
+    RGB24PackedToPlanar(rgb + 3 * 8, &r, &g, &b, 0);
+    ConvertRGBToY(&r, &g, &b, &Y1);
+    STORE_16(_mm_packus_epi16(Y0, Y1), y + i);
+  }
+  for (; i < width; ++i, rgb += 3) {   // left-over
+    y[i] = VP8RGBToY(rgb[0], rgb[1], rgb[2], YUV_HALF);
+  }
+}
+
+static void ConvertBGR24ToY(const uint8_t* bgr, uint8_t* y, int width) {
+  int i;
+  const int max_width = width & ~15;
+  for (i = 0; i < max_width; i += 16, bgr += 3 * 16) {
+    __m128i r, g, b, Y0, Y1;
+    RGB24PackedToPlanar(bgr + 0 * 8, &r, &g, &b, 1);
+    ConvertRGBToY(&r, &g, &b, &Y0);
+    RGB24PackedToPlanar(bgr + 3 * 8, &r, &g, &b, 1);
+    ConvertRGBToY(&r, &g, &b, &Y1);
+    STORE_16(_mm_packus_epi16(Y0, Y1), y + i);
+  }
+  for (; i < width; ++i, bgr += 3) {  // left-over
+    y[i] = VP8RGBToY(bgr[2], bgr[1], bgr[0], YUV_HALF);
+  }
+}
+
+static void ConvertARGBToY(const uint32_t* argb, uint8_t* y, int width) {
+  const int max_width = width & ~15;
+  int i;
+  for (i = 0; i < max_width; i += 16) {
+    __m128i r, g, b, Y0, Y1;
+    RGB32PackedToPlanar(&argb[i + 0], &r, &g, &b);
+    ConvertRGBToY(&r, &g, &b, &Y0);
+    RGB32PackedToPlanar(&argb[i + 8], &r, &g, &b);
+    ConvertRGBToY(&r, &g, &b, &Y1);
+    STORE_16(_mm_packus_epi16(Y0, Y1), y + i);
+  }
+  for (; i < width; ++i) {   // left-over
+    const uint32_t p = argb[i];
+    y[i] = VP8RGBToY((p >> 16) & 0xff, (p >> 8) & 0xff, (p >>  0) & 0xff,
+                     YUV_HALF);
+  }
+}
+
+// Horizontal add (doubled) of two 16b values, result is 16b.
+// in: A | B | C | D | ... -> out: 2*(A+B) | 2*(C+D) | ...
+static void HorizontalAddPack(const __m128i* const A, const __m128i* const B,
+                              __m128i* const out) {
+  const __m128i k2 = _mm_set1_epi16(2);
+  const __m128i C = _mm_madd_epi16(*A, k2);
+  const __m128i D = _mm_madd_epi16(*B, k2);
+  *out = _mm_packs_epi32(C, D);
+}
+
+static void ConvertARGBToUV(const uint32_t* argb, uint8_t* u, uint8_t* v,
+                            int src_width, int do_store) {
+  const int max_width = src_width & ~31;
+  int i;
+  for (i = 0; i < max_width; i += 32, u += 16, v += 16) {
+    __m128i r0, g0, b0, r1, g1, b1, U0, V0, U1, V1;
+    RGB32PackedToPlanar(&argb[i +  0], &r0, &g0, &b0);
+    RGB32PackedToPlanar(&argb[i +  8], &r1, &g1, &b1);
+    HorizontalAddPack(&r0, &r1, &r0);
+    HorizontalAddPack(&g0, &g1, &g0);
+    HorizontalAddPack(&b0, &b1, &b0);
+    ConvertRGBToUV(&r0, &g0, &b0, &U0, &V0);
+
+    RGB32PackedToPlanar(&argb[i + 16], &r0, &g0, &b0);
+    RGB32PackedToPlanar(&argb[i + 24], &r1, &g1, &b1);
+    HorizontalAddPack(&r0, &r1, &r0);
+    HorizontalAddPack(&g0, &g1, &g0);
+    HorizontalAddPack(&b0, &b1, &b0);
+    ConvertRGBToUV(&r0, &g0, &b0, &U1, &V1);
+
+    U0 = _mm_packus_epi16(U0, U1);
+    V0 = _mm_packus_epi16(V0, V1);
+    if (!do_store) {
+      const __m128i prev_u = LOAD_16(u);
+      const __m128i prev_v = LOAD_16(v);
+      U0 = _mm_avg_epu8(U0, prev_u);
+      V0 = _mm_avg_epu8(V0, prev_v);
+    }
+    STORE_16(U0, u);
+    STORE_16(V0, v);
+  }
+  if (i < src_width) {  // left-over
+    WebPConvertARGBToUV_C(argb + i, u, v, src_width - i, do_store);
+  }
+}
+
+// Convert 16 packed ARGB 16b-values to r[], g[], b[]
+static WEBP_INLINE void RGBA32PackedToPlanar_16b(const uint16_t* const rgbx,
+                                                 __m128i* const r,
+                                                 __m128i* const g,
+                                                 __m128i* const b) {
+  const __m128i in0 = LOAD_16(rgbx +  0);  // r0 | g0 | b0 |x| r1 | g1 | b1 |x
+  const __m128i in1 = LOAD_16(rgbx +  8);  // r2 | g2 | b2 |x| r3 | g3 | b3 |x
+  const __m128i in2 = LOAD_16(rgbx + 16);  // r4 | ...
+  const __m128i in3 = LOAD_16(rgbx + 24);  // r6 | ...
+  // column-wise transpose
+  const __m128i A0 = _mm_unpacklo_epi16(in0, in1);
+  const __m128i A1 = _mm_unpackhi_epi16(in0, in1);
+  const __m128i A2 = _mm_unpacklo_epi16(in2, in3);
+  const __m128i A3 = _mm_unpackhi_epi16(in2, in3);
+  const __m128i B0 = _mm_unpacklo_epi16(A0, A1);  // r0 r1 r2 r3 | g0 g1 ..
+  const __m128i B1 = _mm_unpackhi_epi16(A0, A1);  // b0 b1 b2 b3 | x x x x
+  const __m128i B2 = _mm_unpacklo_epi16(A2, A3);  // r4 r5 r6 r7 | g4 g5 ..
+  const __m128i B3 = _mm_unpackhi_epi16(A2, A3);  // b4 b5 b6 b7 | x x x x
+  *r = _mm_unpacklo_epi64(B0, B2);
+  *g = _mm_unpackhi_epi64(B0, B2);
+  *b = _mm_unpacklo_epi64(B1, B3);
+}
+
+static void ConvertRGBA32ToUV(const uint16_t* rgb,
+                              uint8_t* u, uint8_t* v, int width) {
+  const int max_width = width & ~15;
+  const uint16_t* const last_rgb = rgb + 4 * max_width;
+  while (rgb < last_rgb) {
+    __m128i r, g, b, U0, V0, U1, V1;
+    RGBA32PackedToPlanar_16b(rgb +  0, &r, &g, &b);
+    ConvertRGBToUV(&r, &g, &b, &U0, &V0);
+    RGBA32PackedToPlanar_16b(rgb + 32, &r, &g, &b);
+    ConvertRGBToUV(&r, &g, &b, &U1, &V1);
+    STORE_16(_mm_packus_epi16(U0, U1), u);
+    STORE_16(_mm_packus_epi16(V0, V1), v);
+    u += 16;
+    v += 16;
+    rgb += 2 * 32;
+  }
+  if (max_width < width) {  // left-over
+    WebPConvertRGBA32ToUV_C(rgb, u, v, width - max_width);
+  }
+}
+
+//------------------------------------------------------------------------------
+
+extern void WebPInitConvertARGBToYUVSSE2(void);
+
+WEBP_TSAN_IGNORE_FUNCTION void WebPInitConvertARGBToYUVSSE2(void) {
+  WebPConvertARGBToY = ConvertARGBToY;
+  WebPConvertARGBToUV = ConvertARGBToUV;
+
+  WebPConvertRGB24ToY = ConvertRGB24ToY;
+  WebPConvertBGR24ToY = ConvertBGR24ToY;
+
+  WebPConvertRGBA32ToUV = ConvertRGBA32ToUV;
+}
+
+#else  // !WEBP_USE_SSE2
+
+WEBP_DSP_INIT_STUB(WebPInitSamplersSSE2)
+WEBP_DSP_INIT_STUB(WebPInitConvertARGBToYUVSSE2)
+
+#endif  // WEBP_USE_SSE2
diff --git a/drivers/webp/dsp/yuv_tables_sse2.h b/drivers/webp/dsp/yuv_tables_sse2.h
new file mode 100644
index 0000000000..2b0f057518
--- /dev/null
+++ b/drivers/webp/dsp/yuv_tables_sse2.h
@@ -0,0 +1,536 @@
+// Copyright 2014 Google Inc. All Rights Reserved.
+//
+// Use of this source code is governed by a BSD-style license
+// that can be found in the COPYING file in the root of the source
+// tree. An additional intellectual property rights grant can be found
+// in the file PATENTS. All contributing project authors may
+// be found in the AUTHORS file in the root of the source tree.
+// -----------------------------------------------------------------------------
+//
+// SSE2 tables for YUV->RGB conversion (12kB overall)
+//
+// Author: Skal (pascal.massimino@gmail.com)
+
+// This file is not compiled, but #include'd directly from yuv.c
+// Only used if WEBP_YUV_USE_SSE2_TABLES is defined.
+
+static const VP8kCstSSE2 VP8kYtoRGBA[256] = {
+  {{0xfffb77b0, 0xfffb77b0, 0xfffb77b0, 0x003fc000}},
+  {{0xfffbc235, 0xfffbc235, 0xfffbc235, 0x003fc000}},
+  {{0xfffc0cba, 0xfffc0cba, 0xfffc0cba, 0x003fc000}},
+  {{0xfffc573f, 0xfffc573f, 0xfffc573f, 0x003fc000}},
+  {{0xfffca1c4, 0xfffca1c4, 0xfffca1c4, 0x003fc000}},
+  {{0xfffcec49, 0xfffcec49, 0xfffcec49, 0x003fc000}},
+  {{0xfffd36ce, 0xfffd36ce, 0xfffd36ce, 0x003fc000}},
+  {{0xfffd8153, 0xfffd8153, 0xfffd8153, 0x003fc000}},
+  {{0xfffdcbd8, 0xfffdcbd8, 0xfffdcbd8, 0x003fc000}},
+  {{0xfffe165d, 0xfffe165d, 0xfffe165d, 0x003fc000}},
+  {{0xfffe60e2, 0xfffe60e2, 0xfffe60e2, 0x003fc000}},
+  {{0xfffeab67, 0xfffeab67, 0xfffeab67, 0x003fc000}},
+  {{0xfffef5ec, 0xfffef5ec, 0xfffef5ec, 0x003fc000}},
+  {{0xffff4071, 0xffff4071, 0xffff4071, 0x003fc000}},
+  {{0xffff8af6, 0xffff8af6, 0xffff8af6, 0x003fc000}},
+  {{0xffffd57b, 0xffffd57b, 0xffffd57b, 0x003fc000}},
+  {{0x00002000, 0x00002000, 0x00002000, 0x003fc000}},
+  {{0x00006a85, 0x00006a85, 0x00006a85, 0x003fc000}},
+  {{0x0000b50a, 0x0000b50a, 0x0000b50a, 0x003fc000}},
+  {{0x0000ff8f, 0x0000ff8f, 0x0000ff8f, 0x003fc000}},
+  {{0x00014a14, 0x00014a14, 0x00014a14, 0x003fc000}},
+  {{0x00019499, 0x00019499, 0x00019499, 0x003fc000}},
+  {{0x0001df1e, 0x0001df1e, 0x0001df1e, 0x003fc000}},
+  {{0x000229a3, 0x000229a3, 0x000229a3, 0x003fc000}},
+  {{0x00027428, 0x00027428, 0x00027428, 0x003fc000}},
+  {{0x0002bead, 0x0002bead, 0x0002bead, 0x003fc000}},
+  {{0x00030932, 0x00030932, 0x00030932, 0x003fc000}},
+  {{0x000353b7, 0x000353b7, 0x000353b7, 0x003fc000}},
+  {{0x00039e3c, 0x00039e3c, 0x00039e3c, 0x003fc000}},
+  {{0x0003e8c1, 0x0003e8c1, 0x0003e8c1, 0x003fc000}},
+  {{0x00043346, 0x00043346, 0x00043346, 0x003fc000}},
+  {{0x00047dcb, 0x00047dcb, 0x00047dcb, 0x003fc000}},
+  {{0x0004c850, 0x0004c850, 0x0004c850, 0x003fc000}},
+  {{0x000512d5, 0x000512d5, 0x000512d5, 0x003fc000}},
+  {{0x00055d5a, 0x00055d5a, 0x00055d5a, 0x003fc000}},
+  {{0x0005a7df, 0x0005a7df, 0x0005a7df, 0x003fc000}},
+  {{0x0005f264, 0x0005f264, 0x0005f264, 0x003fc000}},
+  {{0x00063ce9, 0x00063ce9, 0x00063ce9, 0x003fc000}},
+  {{0x0006876e, 0x0006876e, 0x0006876e, 0x003fc000}},
+  {{0x0006d1f3, 0x0006d1f3, 0x0006d1f3, 0x003fc000}},
+  {{0x00071c78, 0x00071c78, 0x00071c78, 0x003fc000}},
+  {{0x000766fd, 0x000766fd, 0x000766fd, 0x003fc000}},
+  {{0x0007b182, 0x0007b182, 0x0007b182, 0x003fc000}},
+  {{0x0007fc07, 0x0007fc07, 0x0007fc07, 0x003fc000}},
+  {{0x0008468c, 0x0008468c, 0x0008468c, 0x003fc000}},
+  {{0x00089111, 0x00089111, 0x00089111, 0x003fc000}},
+  {{0x0008db96, 0x0008db96, 0x0008db96, 0x003fc000}},
+  {{0x0009261b, 0x0009261b, 0x0009261b, 0x003fc000}},
+  {{0x000970a0, 0x000970a0, 0x000970a0, 0x003fc000}},
+  {{0x0009bb25, 0x0009bb25, 0x0009bb25, 0x003fc000}},
+  {{0x000a05aa, 0x000a05aa, 0x000a05aa, 0x003fc000}},
+  {{0x000a502f, 0x000a502f, 0x000a502f, 0x003fc000}},
+  {{0x000a9ab4, 0x000a9ab4, 0x000a9ab4, 0x003fc000}},
+  {{0x000ae539, 0x000ae539, 0x000ae539, 0x003fc000}},
+  {{0x000b2fbe, 0x000b2fbe, 0x000b2fbe, 0x003fc000}},
+  {{0x000b7a43, 0x000b7a43, 0x000b7a43, 0x003fc000}},
+  {{0x000bc4c8, 0x000bc4c8, 0x000bc4c8, 0x003fc000}},
+  {{0x000c0f4d, 0x000c0f4d, 0x000c0f4d, 0x003fc000}},
+  {{0x000c59d2, 0x000c59d2, 0x000c59d2, 0x003fc000}},
+  {{0x000ca457, 0x000ca457, 0x000ca457, 0x003fc000}},
+  {{0x000ceedc, 0x000ceedc, 0x000ceedc, 0x003fc000}},
+  {{0x000d3961, 0x000d3961, 0x000d3961, 0x003fc000}},
+  {{0x000d83e6, 0x000d83e6, 0x000d83e6, 0x003fc000}},
+  {{0x000dce6b, 0x000dce6b, 0x000dce6b, 0x003fc000}},
+  {{0x000e18f0, 0x000e18f0, 0x000e18f0, 0x003fc000}},
+  {{0x000e6375, 0x000e6375, 0x000e6375, 0x003fc000}},
+  {{0x000eadfa, 0x000eadfa, 0x000eadfa, 0x003fc000}},
+  {{0x000ef87f, 0x000ef87f, 0x000ef87f, 0x003fc000}},
+  {{0x000f4304, 0x000f4304, 0x000f4304, 0x003fc000}},
+  {{0x000f8d89, 0x000f8d89, 0x000f8d89, 0x003fc000}},
+  {{0x000fd80e, 0x000fd80e, 0x000fd80e, 0x003fc000}},
+  {{0x00102293, 0x00102293, 0x00102293, 0x003fc000}},
+  {{0x00106d18, 0x00106d18, 0x00106d18, 0x003fc000}},
+  {{0x0010b79d, 0x0010b79d, 0x0010b79d, 0x003fc000}},
+  {{0x00110222, 0x00110222, 0x00110222, 0x003fc000}},
+  {{0x00114ca7, 0x00114ca7, 0x00114ca7, 0x003fc000}},
+  {{0x0011972c, 0x0011972c, 0x0011972c, 0x003fc000}},
+  {{0x0011e1b1, 0x0011e1b1, 0x0011e1b1, 0x003fc000}},
+  {{0x00122c36, 0x00122c36, 0x00122c36, 0x003fc000}},
+  {{0x001276bb, 0x001276bb, 0x001276bb, 0x003fc000}},
+  {{0x0012c140, 0x0012c140, 0x0012c140, 0x003fc000}},
+  {{0x00130bc5, 0x00130bc5, 0x00130bc5, 0x003fc000}},
+  {{0x0013564a, 0x0013564a, 0x0013564a, 0x003fc000}},
+  {{0x0013a0cf, 0x0013a0cf, 0x0013a0cf, 0x003fc000}},
+  {{0x0013eb54, 0x0013eb54, 0x0013eb54, 0x003fc000}},
+  {{0x001435d9, 0x001435d9, 0x001435d9, 0x003fc000}},
+  {{0x0014805e, 0x0014805e, 0x0014805e, 0x003fc000}},
+  {{0x0014cae3, 0x0014cae3, 0x0014cae3, 0x003fc000}},
+  {{0x00151568, 0x00151568, 0x00151568, 0x003fc000}},
+  {{0x00155fed, 0x00155fed, 0x00155fed, 0x003fc000}},
+  {{0x0015aa72, 0x0015aa72, 0x0015aa72, 0x003fc000}},
+  {{0x0015f4f7, 0x0015f4f7, 0x0015f4f7, 0x003fc000}},
+  {{0x00163f7c, 0x00163f7c, 0x00163f7c, 0x003fc000}},
+  {{0x00168a01, 0x00168a01, 0x00168a01, 0x003fc000}},
+  {{0x0016d486, 0x0016d486, 0x0016d486, 0x003fc000}},
+  {{0x00171f0b, 0x00171f0b, 0x00171f0b, 0x003fc000}},
+  {{0x00176990, 0x00176990, 0x00176990, 0x003fc000}},
+  {{0x0017b415, 0x0017b415, 0x0017b415, 0x003fc000}},
+  {{0x0017fe9a, 0x0017fe9a, 0x0017fe9a, 0x003fc000}},
+  {{0x0018491f, 0x0018491f, 0x0018491f, 0x003fc000}},
+  {{0x001893a4, 0x001893a4, 0x001893a4, 0x003fc000}},
+  {{0x0018de29, 0x0018de29, 0x0018de29, 0x003fc000}},
+  {{0x001928ae, 0x001928ae, 0x001928ae, 0x003fc000}},
+  {{0x00197333, 0x00197333, 0x00197333, 0x003fc000}},
+  {{0x0019bdb8, 0x0019bdb8, 0x0019bdb8, 0x003fc000}},
+  {{0x001a083d, 0x001a083d, 0x001a083d, 0x003fc000}},
+  {{0x001a52c2, 0x001a52c2, 0x001a52c2, 0x003fc000}},
+  {{0x001a9d47, 0x001a9d47, 0x001a9d47, 0x003fc000}},
+  {{0x001ae7cc, 0x001ae7cc, 0x001ae7cc, 0x003fc000}},
+  {{0x001b3251, 0x001b3251, 0x001b3251, 0x003fc000}},
+  {{0x001b7cd6, 0x001b7cd6, 0x001b7cd6, 0x003fc000}},
+  {{0x001bc75b, 0x001bc75b, 0x001bc75b, 0x003fc000}},
+  {{0x001c11e0, 0x001c11e0, 0x001c11e0, 0x003fc000}},
+  {{0x001c5c65, 0x001c5c65, 0x001c5c65, 0x003fc000}},
+  {{0x001ca6ea, 0x001ca6ea, 0x001ca6ea, 0x003fc000}},
+  {{0x001cf16f, 0x001cf16f, 0x001cf16f, 0x003fc000}},
+  {{0x001d3bf4, 0x001d3bf4, 0x001d3bf4, 0x003fc000}},
+  {{0x001d8679, 0x001d8679, 0x001d8679, 0x003fc000}},
+  {{0x001dd0fe, 0x001dd0fe, 0x001dd0fe, 0x003fc000}},
+  {{0x001e1b83, 0x001e1b83, 0x001e1b83, 0x003fc000}},
+  {{0x001e6608, 0x001e6608, 0x001e6608, 0x003fc000}},
+  {{0x001eb08d, 0x001eb08d, 0x001eb08d, 0x003fc000}},
+  {{0x001efb12, 0x001efb12, 0x001efb12, 0x003fc000}},
+  {{0x001f4597, 0x001f4597, 0x001f4597, 0x003fc000}},
+  {{0x001f901c, 0x001f901c, 0x001f901c, 0x003fc000}},
+  {{0x001fdaa1, 0x001fdaa1, 0x001fdaa1, 0x003fc000}},
+  {{0x00202526, 0x00202526, 0x00202526, 0x003fc000}},
+  {{0x00206fab, 0x00206fab, 0x00206fab, 0x003fc000}},
+  {{0x0020ba30, 0x0020ba30, 0x0020ba30, 0x003fc000}},
+  {{0x002104b5, 0x002104b5, 0x002104b5, 0x003fc000}},
+  {{0x00214f3a, 0x00214f3a, 0x00214f3a, 0x003fc000}},
+  {{0x002199bf, 0x002199bf, 0x002199bf, 0x003fc000}},
+  {{0x0021e444, 0x0021e444, 0x0021e444, 0x003fc000}},
+  {{0x00222ec9, 0x00222ec9, 0x00222ec9, 0x003fc000}},
+  {{0x0022794e, 0x0022794e, 0x0022794e, 0x003fc000}},
+  {{0x0022c3d3, 0x0022c3d3, 0x0022c3d3, 0x003fc000}},
+  {{0x00230e58, 0x00230e58, 0x00230e58, 0x003fc000}},
+  {{0x002358dd, 0x002358dd, 0x002358dd, 0x003fc000}},
+  {{0x0023a362, 0x0023a362, 0x0023a362, 0x003fc000}},
+  {{0x0023ede7, 0x0023ede7, 0x0023ede7, 0x003fc000}},
+  {{0x0024386c, 0x0024386c, 0x0024386c, 0x003fc000}},
+  {{0x002482f1, 0x002482f1, 0x002482f1, 0x003fc000}},
+  {{0x0024cd76, 0x0024cd76, 0x0024cd76, 0x003fc000}},
+  {{0x002517fb, 0x002517fb, 0x002517fb, 0x003fc000}},
+  {{0x00256280, 0x00256280, 0x00256280, 0x003fc000}},
+  {{0x0025ad05, 0x0025ad05, 0x0025ad05, 0x003fc000}},
+  {{0x0025f78a, 0x0025f78a, 0x0025f78a, 0x003fc000}},
+  {{0x0026420f, 0x0026420f, 0x0026420f, 0x003fc000}},
+  {{0x00268c94, 0x00268c94, 0x00268c94, 0x003fc000}},
+  {{0x0026d719, 0x0026d719, 0x0026d719, 0x003fc000}},
+  {{0x0027219e, 0x0027219e, 0x0027219e, 0x003fc000}},
+  {{0x00276c23, 0x00276c23, 0x00276c23, 0x003fc000}},
+  {{0x0027b6a8, 0x0027b6a8, 0x0027b6a8, 0x003fc000}},
+  {{0x0028012d, 0x0028012d, 0x0028012d, 0x003fc000}},
+  {{0x00284bb2, 0x00284bb2, 0x00284bb2, 0x003fc000}},
+  {{0x00289637, 0x00289637, 0x00289637, 0x003fc000}},
+  {{0x0028e0bc, 0x0028e0bc, 0x0028e0bc, 0x003fc000}},
+  {{0x00292b41, 0x00292b41, 0x00292b41, 0x003fc000}},
+  {{0x002975c6, 0x002975c6, 0x002975c6, 0x003fc000}},
+  {{0x0029c04b, 0x0029c04b, 0x0029c04b, 0x003fc000}},
+  {{0x002a0ad0, 0x002a0ad0, 0x002a0ad0, 0x003fc000}},
+  {{0x002a5555, 0x002a5555, 0x002a5555, 0x003fc000}},
+  {{0x002a9fda, 0x002a9fda, 0x002a9fda, 0x003fc000}},
+  {{0x002aea5f, 0x002aea5f, 0x002aea5f, 0x003fc000}},
+  {{0x002b34e4, 0x002b34e4, 0x002b34e4, 0x003fc000}},
+  {{0x002b7f69, 0x002b7f69, 0x002b7f69, 0x003fc000}},
+  {{0x002bc9ee, 0x002bc9ee, 0x002bc9ee, 0x003fc000}},
+  {{0x002c1473, 0x002c1473, 0x002c1473, 0x003fc000}},
+  {{0x002c5ef8, 0x002c5ef8, 0x002c5ef8, 0x003fc000}},
+  {{0x002ca97d, 0x002ca97d, 0x002ca97d, 0x003fc000}},
+  {{0x002cf402, 0x002cf402, 0x002cf402, 0x003fc000}},
+  {{0x002d3e87, 0x002d3e87, 0x002d3e87, 0x003fc000}},
+  {{0x002d890c, 0x002d890c, 0x002d890c, 0x003fc000}},
+  {{0x002dd391, 0x002dd391, 0x002dd391, 0x003fc000}},
+  {{0x002e1e16, 0x002e1e16, 0x002e1e16, 0x003fc000}},
+  {{0x002e689b, 0x002e689b, 0x002e689b, 0x003fc000}},
+  {{0x002eb320, 0x002eb320, 0x002eb320, 0x003fc000}},
+  {{0x002efda5, 0x002efda5, 0x002efda5, 0x003fc000}},
+  {{0x002f482a, 0x002f482a, 0x002f482a, 0x003fc000}},
+  {{0x002f92af, 0x002f92af, 0x002f92af, 0x003fc000}},
+  {{0x002fdd34, 0x002fdd34, 0x002fdd34, 0x003fc000}},
+  {{0x003027b9, 0x003027b9, 0x003027b9, 0x003fc000}},
+  {{0x0030723e, 0x0030723e, 0x0030723e, 0x003fc000}},
+  {{0x0030bcc3, 0x0030bcc3, 0x0030bcc3, 0x003fc000}},
+  {{0x00310748, 0x00310748, 0x00310748, 0x003fc000}},
+  {{0x003151cd, 0x003151cd, 0x003151cd, 0x003fc000}},
+  {{0x00319c52, 0x00319c52, 0x00319c52, 0x003fc000}},
+  {{0x0031e6d7, 0x0031e6d7, 0x0031e6d7, 0x003fc000}},
+  {{0x0032315c, 0x0032315c, 0x0032315c, 0x003fc000}},
+  {{0x00327be1, 0x00327be1, 0x00327be1, 0x003fc000}},
+  {{0x0032c666, 0x0032c666, 0x0032c666, 0x003fc000}},
+  {{0x003310eb, 0x003310eb, 0x003310eb, 0x003fc000}},
+  {{0x00335b70, 0x00335b70, 0x00335b70, 0x003fc000}},
+  {{0x0033a5f5, 0x0033a5f5, 0x0033a5f5, 0x003fc000}},
+  {{0x0033f07a, 0x0033f07a, 0x0033f07a, 0x003fc000}},
+  {{0x00343aff, 0x00343aff, 0x00343aff, 0x003fc000}},
+  {{0x00348584, 0x00348584, 0x00348584, 0x003fc000}},
+  {{0x0034d009, 0x0034d009, 0x0034d009, 0x003fc000}},
+  {{0x00351a8e, 0x00351a8e, 0x00351a8e, 0x003fc000}},
+  {{0x00356513, 0x00356513, 0x00356513, 0x003fc000}},
+  {{0x0035af98, 0x0035af98, 0x0035af98, 0x003fc000}},
+  {{0x0035fa1d, 0x0035fa1d, 0x0035fa1d, 0x003fc000}},
+  {{0x003644a2, 0x003644a2, 0x003644a2, 0x003fc000}},
+  {{0x00368f27, 0x00368f27, 0x00368f27, 0x003fc000}},
+  {{0x0036d9ac, 0x0036d9ac, 0x0036d9ac, 0x003fc000}},
+  {{0x00372431, 0x00372431, 0x00372431, 0x003fc000}},
+  {{0x00376eb6, 0x00376eb6, 0x00376eb6, 0x003fc000}},
+  {{0x0037b93b, 0x0037b93b, 0x0037b93b, 0x003fc000}},
+  {{0x003803c0, 0x003803c0, 0x003803c0, 0x003fc000}},
+  {{0x00384e45, 0x00384e45, 0x00384e45, 0x003fc000}},
+  {{0x003898ca, 0x003898ca, 0x003898ca, 0x003fc000}},
+  {{0x0038e34f, 0x0038e34f, 0x0038e34f, 0x003fc000}},
+  {{0x00392dd4, 0x00392dd4, 0x00392dd4, 0x003fc000}},
+  {{0x00397859, 0x00397859, 0x00397859, 0x003fc000}},
+  {{0x0039c2de, 0x0039c2de, 0x0039c2de, 0x003fc000}},
+  {{0x003a0d63, 0x003a0d63, 0x003a0d63, 0x003fc000}},
+  {{0x003a57e8, 0x003a57e8, 0x003a57e8, 0x003fc000}},
+  {{0x003aa26d, 0x003aa26d, 0x003aa26d, 0x003fc000}},
+  {{0x003aecf2, 0x003aecf2, 0x003aecf2, 0x003fc000}},
+  {{0x003b3777, 0x003b3777, 0x003b3777, 0x003fc000}},
+  {{0x003b81fc, 0x003b81fc, 0x003b81fc, 0x003fc000}},
+  {{0x003bcc81, 0x003bcc81, 0x003bcc81, 0x003fc000}},
+  {{0x003c1706, 0x003c1706, 0x003c1706, 0x003fc000}},
+  {{0x003c618b, 0x003c618b, 0x003c618b, 0x003fc000}},
+  {{0x003cac10, 0x003cac10, 0x003cac10, 0x003fc000}},
+  {{0x003cf695, 0x003cf695, 0x003cf695, 0x003fc000}},
+  {{0x003d411a, 0x003d411a, 0x003d411a, 0x003fc000}},
+  {{0x003d8b9f, 0x003d8b9f, 0x003d8b9f, 0x003fc000}},
+  {{0x003dd624, 0x003dd624, 0x003dd624, 0x003fc000}},
+  {{0x003e20a9, 0x003e20a9, 0x003e20a9, 0x003fc000}},
+  {{0x003e6b2e, 0x003e6b2e, 0x003e6b2e, 0x003fc000}},
+  {{0x003eb5b3, 0x003eb5b3, 0x003eb5b3, 0x003fc000}},
+  {{0x003f0038, 0x003f0038, 0x003f0038, 0x003fc000}},
+  {{0x003f4abd, 0x003f4abd, 0x003f4abd, 0x003fc000}},
+  {{0x003f9542, 0x003f9542, 0x003f9542, 0x003fc000}},
+  {{0x003fdfc7, 0x003fdfc7, 0x003fdfc7, 0x003fc000}},
+  {{0x00402a4c, 0x00402a4c, 0x00402a4c, 0x003fc000}},
+  {{0x004074d1, 0x004074d1, 0x004074d1, 0x003fc000}},
+  {{0x0040bf56, 0x0040bf56, 0x0040bf56, 0x003fc000}},
+  {{0x004109db, 0x004109db, 0x004109db, 0x003fc000}},
+  {{0x00415460, 0x00415460, 0x00415460, 0x003fc000}},
+  {{0x00419ee5, 0x00419ee5, 0x00419ee5, 0x003fc000}},
+  {{0x0041e96a, 0x0041e96a, 0x0041e96a, 0x003fc000}},
+  {{0x004233ef, 0x004233ef, 0x004233ef, 0x003fc000}},
+  {{0x00427e74, 0x00427e74, 0x00427e74, 0x003fc000}},
+  {{0x0042c8f9, 0x0042c8f9, 0x0042c8f9, 0x003fc000}},
+  {{0x0043137e, 0x0043137e, 0x0043137e, 0x003fc000}},
+  {{0x00435e03, 0x00435e03, 0x00435e03, 0x003fc000}},
+  {{0x0043a888, 0x0043a888, 0x0043a888, 0x003fc000}},
+  {{0x0043f30d, 0x0043f30d, 0x0043f30d, 0x003fc000}},
+  {{0x00443d92, 0x00443d92, 0x00443d92, 0x003fc000}},
+  {{0x00448817, 0x00448817, 0x00448817, 0x003fc000}},
+  {{0x0044d29c, 0x0044d29c, 0x0044d29c, 0x003fc000}},
+  {{0x00451d21, 0x00451d21, 0x00451d21, 0x003fc000}},
+  {{0x004567a6, 0x004567a6, 0x004567a6, 0x003fc000}},
+  {{0x0045b22b, 0x0045b22b, 0x0045b22b, 0x003fc000}}
+};
+
+static const VP8kCstSSE2 VP8kUtoRGBA[256] = {
+  {{0, 0x000c8980, 0xffbf7300, 0}}, {{0, 0x000c706d, 0xffbff41a, 0}},
+  {{0, 0x000c575a, 0xffc07534, 0}}, {{0, 0x000c3e47, 0xffc0f64e, 0}},
+  {{0, 0x000c2534, 0xffc17768, 0}}, {{0, 0x000c0c21, 0xffc1f882, 0}},
+  {{0, 0x000bf30e, 0xffc2799c, 0}}, {{0, 0x000bd9fb, 0xffc2fab6, 0}},
+  {{0, 0x000bc0e8, 0xffc37bd0, 0}}, {{0, 0x000ba7d5, 0xffc3fcea, 0}},
+  {{0, 0x000b8ec2, 0xffc47e04, 0}}, {{0, 0x000b75af, 0xffc4ff1e, 0}},
+  {{0, 0x000b5c9c, 0xffc58038, 0}}, {{0, 0x000b4389, 0xffc60152, 0}},
+  {{0, 0x000b2a76, 0xffc6826c, 0}}, {{0, 0x000b1163, 0xffc70386, 0}},
+  {{0, 0x000af850, 0xffc784a0, 0}}, {{0, 0x000adf3d, 0xffc805ba, 0}},
+  {{0, 0x000ac62a, 0xffc886d4, 0}}, {{0, 0x000aad17, 0xffc907ee, 0}},
+  {{0, 0x000a9404, 0xffc98908, 0}}, {{0, 0x000a7af1, 0xffca0a22, 0}},
+  {{0, 0x000a61de, 0xffca8b3c, 0}}, {{0, 0x000a48cb, 0xffcb0c56, 0}},
+  {{0, 0x000a2fb8, 0xffcb8d70, 0}}, {{0, 0x000a16a5, 0xffcc0e8a, 0}},
+  {{0, 0x0009fd92, 0xffcc8fa4, 0}}, {{0, 0x0009e47f, 0xffcd10be, 0}},
+  {{0, 0x0009cb6c, 0xffcd91d8, 0}}, {{0, 0x0009b259, 0xffce12f2, 0}},
+  {{0, 0x00099946, 0xffce940c, 0}}, {{0, 0x00098033, 0xffcf1526, 0}},
+  {{0, 0x00096720, 0xffcf9640, 0}}, {{0, 0x00094e0d, 0xffd0175a, 0}},
+  {{0, 0x000934fa, 0xffd09874, 0}}, {{0, 0x00091be7, 0xffd1198e, 0}},
+  {{0, 0x000902d4, 0xffd19aa8, 0}}, {{0, 0x0008e9c1, 0xffd21bc2, 0}},
+  {{0, 0x0008d0ae, 0xffd29cdc, 0}}, {{0, 0x0008b79b, 0xffd31df6, 0}},
+  {{0, 0x00089e88, 0xffd39f10, 0}}, {{0, 0x00088575, 0xffd4202a, 0}},
+  {{0, 0x00086c62, 0xffd4a144, 0}}, {{0, 0x0008534f, 0xffd5225e, 0}},
+  {{0, 0x00083a3c, 0xffd5a378, 0}}, {{0, 0x00082129, 0xffd62492, 0}},
+  {{0, 0x00080816, 0xffd6a5ac, 0}}, {{0, 0x0007ef03, 0xffd726c6, 0}},
+  {{0, 0x0007d5f0, 0xffd7a7e0, 0}}, {{0, 0x0007bcdd, 0xffd828fa, 0}},
+  {{0, 0x0007a3ca, 0xffd8aa14, 0}}, {{0, 0x00078ab7, 0xffd92b2e, 0}},
+  {{0, 0x000771a4, 0xffd9ac48, 0}}, {{0, 0x00075891, 0xffda2d62, 0}},
+  {{0, 0x00073f7e, 0xffdaae7c, 0}}, {{0, 0x0007266b, 0xffdb2f96, 0}},
+  {{0, 0x00070d58, 0xffdbb0b0, 0}}, {{0, 0x0006f445, 0xffdc31ca, 0}},
+  {{0, 0x0006db32, 0xffdcb2e4, 0}}, {{0, 0x0006c21f, 0xffdd33fe, 0}},
+  {{0, 0x0006a90c, 0xffddb518, 0}}, {{0, 0x00068ff9, 0xffde3632, 0}},
+  {{0, 0x000676e6, 0xffdeb74c, 0}}, {{0, 0x00065dd3, 0xffdf3866, 0}},
+  {{0, 0x000644c0, 0xffdfb980, 0}}, {{0, 0x00062bad, 0xffe03a9a, 0}},
+  {{0, 0x0006129a, 0xffe0bbb4, 0}}, {{0, 0x0005f987, 0xffe13cce, 0}},
+  {{0, 0x0005e074, 0xffe1bde8, 0}}, {{0, 0x0005c761, 0xffe23f02, 0}},
+  {{0, 0x0005ae4e, 0xffe2c01c, 0}}, {{0, 0x0005953b, 0xffe34136, 0}},
+  {{0, 0x00057c28, 0xffe3c250, 0}}, {{0, 0x00056315, 0xffe4436a, 0}},
+  {{0, 0x00054a02, 0xffe4c484, 0}}, {{0, 0x000530ef, 0xffe5459e, 0}},
+  {{0, 0x000517dc, 0xffe5c6b8, 0}}, {{0, 0x0004fec9, 0xffe647d2, 0}},
+  {{0, 0x0004e5b6, 0xffe6c8ec, 0}}, {{0, 0x0004cca3, 0xffe74a06, 0}},
+  {{0, 0x0004b390, 0xffe7cb20, 0}}, {{0, 0x00049a7d, 0xffe84c3a, 0}},
+  {{0, 0x0004816a, 0xffe8cd54, 0}}, {{0, 0x00046857, 0xffe94e6e, 0}},
+  {{0, 0x00044f44, 0xffe9cf88, 0}}, {{0, 0x00043631, 0xffea50a2, 0}},
+  {{0, 0x00041d1e, 0xffead1bc, 0}}, {{0, 0x0004040b, 0xffeb52d6, 0}},
+  {{0, 0x0003eaf8, 0xffebd3f0, 0}}, {{0, 0x0003d1e5, 0xffec550a, 0}},
+  {{0, 0x0003b8d2, 0xffecd624, 0}}, {{0, 0x00039fbf, 0xffed573e, 0}},
+  {{0, 0x000386ac, 0xffedd858, 0}}, {{0, 0x00036d99, 0xffee5972, 0}},
+  {{0, 0x00035486, 0xffeeda8c, 0}}, {{0, 0x00033b73, 0xffef5ba6, 0}},
+  {{0, 0x00032260, 0xffefdcc0, 0}}, {{0, 0x0003094d, 0xfff05dda, 0}},
+  {{0, 0x0002f03a, 0xfff0def4, 0}}, {{0, 0x0002d727, 0xfff1600e, 0}},
+  {{0, 0x0002be14, 0xfff1e128, 0}}, {{0, 0x0002a501, 0xfff26242, 0}},
+  {{0, 0x00028bee, 0xfff2e35c, 0}}, {{0, 0x000272db, 0xfff36476, 0}},
+  {{0, 0x000259c8, 0xfff3e590, 0}}, {{0, 0x000240b5, 0xfff466aa, 0}},
+  {{0, 0x000227a2, 0xfff4e7c4, 0}}, {{0, 0x00020e8f, 0xfff568de, 0}},
+  {{0, 0x0001f57c, 0xfff5e9f8, 0}}, {{0, 0x0001dc69, 0xfff66b12, 0}},
+  {{0, 0x0001c356, 0xfff6ec2c, 0}}, {{0, 0x0001aa43, 0xfff76d46, 0}},
+  {{0, 0x00019130, 0xfff7ee60, 0}}, {{0, 0x0001781d, 0xfff86f7a, 0}},
+  {{0, 0x00015f0a, 0xfff8f094, 0}}, {{0, 0x000145f7, 0xfff971ae, 0}},
+  {{0, 0x00012ce4, 0xfff9f2c8, 0}}, {{0, 0x000113d1, 0xfffa73e2, 0}},
+  {{0, 0x0000fabe, 0xfffaf4fc, 0}}, {{0, 0x0000e1ab, 0xfffb7616, 0}},
+  {{0, 0x0000c898, 0xfffbf730, 0}}, {{0, 0x0000af85, 0xfffc784a, 0}},
+  {{0, 0x00009672, 0xfffcf964, 0}}, {{0, 0x00007d5f, 0xfffd7a7e, 0}},
+  {{0, 0x0000644c, 0xfffdfb98, 0}}, {{0, 0x00004b39, 0xfffe7cb2, 0}},
+  {{0, 0x00003226, 0xfffefdcc, 0}}, {{0, 0x00001913, 0xffff7ee6, 0}},
+  {{0, 0x00000000, 0x00000000, 0}}, {{0, 0xffffe6ed, 0x0000811a, 0}},
+  {{0, 0xffffcdda, 0x00010234, 0}}, {{0, 0xffffb4c7, 0x0001834e, 0}},
+  {{0, 0xffff9bb4, 0x00020468, 0}}, {{0, 0xffff82a1, 0x00028582, 0}},
+  {{0, 0xffff698e, 0x0003069c, 0}}, {{0, 0xffff507b, 0x000387b6, 0}},
+  {{0, 0xffff3768, 0x000408d0, 0}}, {{0, 0xffff1e55, 0x000489ea, 0}},
+  {{0, 0xffff0542, 0x00050b04, 0}}, {{0, 0xfffeec2f, 0x00058c1e, 0}},
+  {{0, 0xfffed31c, 0x00060d38, 0}}, {{0, 0xfffeba09, 0x00068e52, 0}},
+  {{0, 0xfffea0f6, 0x00070f6c, 0}}, {{0, 0xfffe87e3, 0x00079086, 0}},
+  {{0, 0xfffe6ed0, 0x000811a0, 0}}, {{0, 0xfffe55bd, 0x000892ba, 0}},
+  {{0, 0xfffe3caa, 0x000913d4, 0}}, {{0, 0xfffe2397, 0x000994ee, 0}},
+  {{0, 0xfffe0a84, 0x000a1608, 0}}, {{0, 0xfffdf171, 0x000a9722, 0}},
+  {{0, 0xfffdd85e, 0x000b183c, 0}}, {{0, 0xfffdbf4b, 0x000b9956, 0}},
+  {{0, 0xfffda638, 0x000c1a70, 0}}, {{0, 0xfffd8d25, 0x000c9b8a, 0}},
+  {{0, 0xfffd7412, 0x000d1ca4, 0}}, {{0, 0xfffd5aff, 0x000d9dbe, 0}},
+  {{0, 0xfffd41ec, 0x000e1ed8, 0}}, {{0, 0xfffd28d9, 0x000e9ff2, 0}},
+  {{0, 0xfffd0fc6, 0x000f210c, 0}}, {{0, 0xfffcf6b3, 0x000fa226, 0}},
+  {{0, 0xfffcdda0, 0x00102340, 0}}, {{0, 0xfffcc48d, 0x0010a45a, 0}},
+  {{0, 0xfffcab7a, 0x00112574, 0}}, {{0, 0xfffc9267, 0x0011a68e, 0}},
+  {{0, 0xfffc7954, 0x001227a8, 0}}, {{0, 0xfffc6041, 0x0012a8c2, 0}},
+  {{0, 0xfffc472e, 0x001329dc, 0}}, {{0, 0xfffc2e1b, 0x0013aaf6, 0}},
+  {{0, 0xfffc1508, 0x00142c10, 0}}, {{0, 0xfffbfbf5, 0x0014ad2a, 0}},
+  {{0, 0xfffbe2e2, 0x00152e44, 0}}, {{0, 0xfffbc9cf, 0x0015af5e, 0}},
+  {{0, 0xfffbb0bc, 0x00163078, 0}}, {{0, 0xfffb97a9, 0x0016b192, 0}},
+  {{0, 0xfffb7e96, 0x001732ac, 0}}, {{0, 0xfffb6583, 0x0017b3c6, 0}},
+  {{0, 0xfffb4c70, 0x001834e0, 0}}, {{0, 0xfffb335d, 0x0018b5fa, 0}},
+  {{0, 0xfffb1a4a, 0x00193714, 0}}, {{0, 0xfffb0137, 0x0019b82e, 0}},
+  {{0, 0xfffae824, 0x001a3948, 0}}, {{0, 0xfffacf11, 0x001aba62, 0}},
+  {{0, 0xfffab5fe, 0x001b3b7c, 0}}, {{0, 0xfffa9ceb, 0x001bbc96, 0}},
+  {{0, 0xfffa83d8, 0x001c3db0, 0}}, {{0, 0xfffa6ac5, 0x001cbeca, 0}},
+  {{0, 0xfffa51b2, 0x001d3fe4, 0}}, {{0, 0xfffa389f, 0x001dc0fe, 0}},
+  {{0, 0xfffa1f8c, 0x001e4218, 0}}, {{0, 0xfffa0679, 0x001ec332, 0}},
+  {{0, 0xfff9ed66, 0x001f444c, 0}}, {{0, 0xfff9d453, 0x001fc566, 0}},
+  {{0, 0xfff9bb40, 0x00204680, 0}}, {{0, 0xfff9a22d, 0x0020c79a, 0}},
+  {{0, 0xfff9891a, 0x002148b4, 0}}, {{0, 0xfff97007, 0x0021c9ce, 0}},
+  {{0, 0xfff956f4, 0x00224ae8, 0}}, {{0, 0xfff93de1, 0x0022cc02, 0}},
+  {{0, 0xfff924ce, 0x00234d1c, 0}}, {{0, 0xfff90bbb, 0x0023ce36, 0}},
+  {{0, 0xfff8f2a8, 0x00244f50, 0}}, {{0, 0xfff8d995, 0x0024d06a, 0}},
+  {{0, 0xfff8c082, 0x00255184, 0}}, {{0, 0xfff8a76f, 0x0025d29e, 0}},
+  {{0, 0xfff88e5c, 0x002653b8, 0}}, {{0, 0xfff87549, 0x0026d4d2, 0}},
+  {{0, 0xfff85c36, 0x002755ec, 0}}, {{0, 0xfff84323, 0x0027d706, 0}},
+  {{0, 0xfff82a10, 0x00285820, 0}}, {{0, 0xfff810fd, 0x0028d93a, 0}},
+  {{0, 0xfff7f7ea, 0x00295a54, 0}}, {{0, 0xfff7ded7, 0x0029db6e, 0}},
+  {{0, 0xfff7c5c4, 0x002a5c88, 0}}, {{0, 0xfff7acb1, 0x002adda2, 0}},
+  {{0, 0xfff7939e, 0x002b5ebc, 0}}, {{0, 0xfff77a8b, 0x002bdfd6, 0}},
+  {{0, 0xfff76178, 0x002c60f0, 0}}, {{0, 0xfff74865, 0x002ce20a, 0}},
+  {{0, 0xfff72f52, 0x002d6324, 0}}, {{0, 0xfff7163f, 0x002de43e, 0}},
+  {{0, 0xfff6fd2c, 0x002e6558, 0}}, {{0, 0xfff6e419, 0x002ee672, 0}},
+  {{0, 0xfff6cb06, 0x002f678c, 0}}, {{0, 0xfff6b1f3, 0x002fe8a6, 0}},
+  {{0, 0xfff698e0, 0x003069c0, 0}}, {{0, 0xfff67fcd, 0x0030eada, 0}},
+  {{0, 0xfff666ba, 0x00316bf4, 0}}, {{0, 0xfff64da7, 0x0031ed0e, 0}},
+  {{0, 0xfff63494, 0x00326e28, 0}}, {{0, 0xfff61b81, 0x0032ef42, 0}},
+  {{0, 0xfff6026e, 0x0033705c, 0}}, {{0, 0xfff5e95b, 0x0033f176, 0}},
+  {{0, 0xfff5d048, 0x00347290, 0}}, {{0, 0xfff5b735, 0x0034f3aa, 0}},
+  {{0, 0xfff59e22, 0x003574c4, 0}}, {{0, 0xfff5850f, 0x0035f5de, 0}},
+  {{0, 0xfff56bfc, 0x003676f8, 0}}, {{0, 0xfff552e9, 0x0036f812, 0}},
+  {{0, 0xfff539d6, 0x0037792c, 0}}, {{0, 0xfff520c3, 0x0037fa46, 0}},
+  {{0, 0xfff507b0, 0x00387b60, 0}}, {{0, 0xfff4ee9d, 0x0038fc7a, 0}},
+  {{0, 0xfff4d58a, 0x00397d94, 0}}, {{0, 0xfff4bc77, 0x0039feae, 0}},
+  {{0, 0xfff4a364, 0x003a7fc8, 0}}, {{0, 0xfff48a51, 0x003b00e2, 0}},
+  {{0, 0xfff4713e, 0x003b81fc, 0}}, {{0, 0xfff4582b, 0x003c0316, 0}},
+  {{0, 0xfff43f18, 0x003c8430, 0}}, {{0, 0xfff42605, 0x003d054a, 0}},
+  {{0, 0xfff40cf2, 0x003d8664, 0}}, {{0, 0xfff3f3df, 0x003e077e, 0}},
+  {{0, 0xfff3dacc, 0x003e8898, 0}}, {{0, 0xfff3c1b9, 0x003f09b2, 0}},
+  {{0, 0xfff3a8a6, 0x003f8acc, 0}}, {{0, 0xfff38f93, 0x00400be6, 0}}
+};
+
+static VP8kCstSSE2 VP8kVtoRGBA[256] = {
+  {{0xffcced80, 0x001a0400, 0, 0}}, {{0xffcd53a5, 0x0019cff8, 0, 0}},
+  {{0xffcdb9ca, 0x00199bf0, 0, 0}}, {{0xffce1fef, 0x001967e8, 0, 0}},
+  {{0xffce8614, 0x001933e0, 0, 0}}, {{0xffceec39, 0x0018ffd8, 0, 0}},
+  {{0xffcf525e, 0x0018cbd0, 0, 0}}, {{0xffcfb883, 0x001897c8, 0, 0}},
+  {{0xffd01ea8, 0x001863c0, 0, 0}}, {{0xffd084cd, 0x00182fb8, 0, 0}},
+  {{0xffd0eaf2, 0x0017fbb0, 0, 0}}, {{0xffd15117, 0x0017c7a8, 0, 0}},
+  {{0xffd1b73c, 0x001793a0, 0, 0}}, {{0xffd21d61, 0x00175f98, 0, 0}},
+  {{0xffd28386, 0x00172b90, 0, 0}}, {{0xffd2e9ab, 0x0016f788, 0, 0}},
+  {{0xffd34fd0, 0x0016c380, 0, 0}}, {{0xffd3b5f5, 0x00168f78, 0, 0}},
+  {{0xffd41c1a, 0x00165b70, 0, 0}}, {{0xffd4823f, 0x00162768, 0, 0}},
+  {{0xffd4e864, 0x0015f360, 0, 0}}, {{0xffd54e89, 0x0015bf58, 0, 0}},
+  {{0xffd5b4ae, 0x00158b50, 0, 0}}, {{0xffd61ad3, 0x00155748, 0, 0}},
+  {{0xffd680f8, 0x00152340, 0, 0}}, {{0xffd6e71d, 0x0014ef38, 0, 0}},
+  {{0xffd74d42, 0x0014bb30, 0, 0}}, {{0xffd7b367, 0x00148728, 0, 0}},
+  {{0xffd8198c, 0x00145320, 0, 0}}, {{0xffd87fb1, 0x00141f18, 0, 0}},
+  {{0xffd8e5d6, 0x0013eb10, 0, 0}}, {{0xffd94bfb, 0x0013b708, 0, 0}},
+  {{0xffd9b220, 0x00138300, 0, 0}}, {{0xffda1845, 0x00134ef8, 0, 0}},
+  {{0xffda7e6a, 0x00131af0, 0, 0}}, {{0xffdae48f, 0x0012e6e8, 0, 0}},
+  {{0xffdb4ab4, 0x0012b2e0, 0, 0}}, {{0xffdbb0d9, 0x00127ed8, 0, 0}},
+  {{0xffdc16fe, 0x00124ad0, 0, 0}}, {{0xffdc7d23, 0x001216c8, 0, 0}},
+  {{0xffdce348, 0x0011e2c0, 0, 0}}, {{0xffdd496d, 0x0011aeb8, 0, 0}},
+  {{0xffddaf92, 0x00117ab0, 0, 0}}, {{0xffde15b7, 0x001146a8, 0, 0}},
+  {{0xffde7bdc, 0x001112a0, 0, 0}}, {{0xffdee201, 0x0010de98, 0, 0}},
+  {{0xffdf4826, 0x0010aa90, 0, 0}}, {{0xffdfae4b, 0x00107688, 0, 0}},
+  {{0xffe01470, 0x00104280, 0, 0}}, {{0xffe07a95, 0x00100e78, 0, 0}},
+  {{0xffe0e0ba, 0x000fda70, 0, 0}}, {{0xffe146df, 0x000fa668, 0, 0}},
+  {{0xffe1ad04, 0x000f7260, 0, 0}}, {{0xffe21329, 0x000f3e58, 0, 0}},
+  {{0xffe2794e, 0x000f0a50, 0, 0}}, {{0xffe2df73, 0x000ed648, 0, 0}},
+  {{0xffe34598, 0x000ea240, 0, 0}}, {{0xffe3abbd, 0x000e6e38, 0, 0}},
+  {{0xffe411e2, 0x000e3a30, 0, 0}}, {{0xffe47807, 0x000e0628, 0, 0}},
+  {{0xffe4de2c, 0x000dd220, 0, 0}}, {{0xffe54451, 0x000d9e18, 0, 0}},
+  {{0xffe5aa76, 0x000d6a10, 0, 0}}, {{0xffe6109b, 0x000d3608, 0, 0}},
+  {{0xffe676c0, 0x000d0200, 0, 0}}, {{0xffe6dce5, 0x000ccdf8, 0, 0}},
+  {{0xffe7430a, 0x000c99f0, 0, 0}}, {{0xffe7a92f, 0x000c65e8, 0, 0}},
+  {{0xffe80f54, 0x000c31e0, 0, 0}}, {{0xffe87579, 0x000bfdd8, 0, 0}},
+  {{0xffe8db9e, 0x000bc9d0, 0, 0}}, {{0xffe941c3, 0x000b95c8, 0, 0}},
+  {{0xffe9a7e8, 0x000b61c0, 0, 0}}, {{0xffea0e0d, 0x000b2db8, 0, 0}},
+  {{0xffea7432, 0x000af9b0, 0, 0}}, {{0xffeada57, 0x000ac5a8, 0, 0}},
+  {{0xffeb407c, 0x000a91a0, 0, 0}}, {{0xffeba6a1, 0x000a5d98, 0, 0}},
+  {{0xffec0cc6, 0x000a2990, 0, 0}}, {{0xffec72eb, 0x0009f588, 0, 0}},
+  {{0xffecd910, 0x0009c180, 0, 0}}, {{0xffed3f35, 0x00098d78, 0, 0}},
+  {{0xffeda55a, 0x00095970, 0, 0}}, {{0xffee0b7f, 0x00092568, 0, 0}},
+  {{0xffee71a4, 0x0008f160, 0, 0}}, {{0xffeed7c9, 0x0008bd58, 0, 0}},
+  {{0xffef3dee, 0x00088950, 0, 0}}, {{0xffefa413, 0x00085548, 0, 0}},
+  {{0xfff00a38, 0x00082140, 0, 0}}, {{0xfff0705d, 0x0007ed38, 0, 0}},
+  {{0xfff0d682, 0x0007b930, 0, 0}}, {{0xfff13ca7, 0x00078528, 0, 0}},
+  {{0xfff1a2cc, 0x00075120, 0, 0}}, {{0xfff208f1, 0x00071d18, 0, 0}},
+  {{0xfff26f16, 0x0006e910, 0, 0}}, {{0xfff2d53b, 0x0006b508, 0, 0}},
+  {{0xfff33b60, 0x00068100, 0, 0}}, {{0xfff3a185, 0x00064cf8, 0, 0}},
+  {{0xfff407aa, 0x000618f0, 0, 0}}, {{0xfff46dcf, 0x0005e4e8, 0, 0}},
+  {{0xfff4d3f4, 0x0005b0e0, 0, 0}}, {{0xfff53a19, 0x00057cd8, 0, 0}},
+  {{0xfff5a03e, 0x000548d0, 0, 0}}, {{0xfff60663, 0x000514c8, 0, 0}},
+  {{0xfff66c88, 0x0004e0c0, 0, 0}}, {{0xfff6d2ad, 0x0004acb8, 0, 0}},
+  {{0xfff738d2, 0x000478b0, 0, 0}}, {{0xfff79ef7, 0x000444a8, 0, 0}},
+  {{0xfff8051c, 0x000410a0, 0, 0}}, {{0xfff86b41, 0x0003dc98, 0, 0}},
+  {{0xfff8d166, 0x0003a890, 0, 0}}, {{0xfff9378b, 0x00037488, 0, 0}},
+  {{0xfff99db0, 0x00034080, 0, 0}}, {{0xfffa03d5, 0x00030c78, 0, 0}},
+  {{0xfffa69fa, 0x0002d870, 0, 0}}, {{0xfffad01f, 0x0002a468, 0, 0}},
+  {{0xfffb3644, 0x00027060, 0, 0}}, {{0xfffb9c69, 0x00023c58, 0, 0}},
+  {{0xfffc028e, 0x00020850, 0, 0}}, {{0xfffc68b3, 0x0001d448, 0, 0}},
+  {{0xfffcced8, 0x0001a040, 0, 0}}, {{0xfffd34fd, 0x00016c38, 0, 0}},
+  {{0xfffd9b22, 0x00013830, 0, 0}}, {{0xfffe0147, 0x00010428, 0, 0}},
+  {{0xfffe676c, 0x0000d020, 0, 0}}, {{0xfffecd91, 0x00009c18, 0, 0}},
+  {{0xffff33b6, 0x00006810, 0, 0}}, {{0xffff99db, 0x00003408, 0, 0}},
+  {{0x00000000, 0x00000000, 0, 0}}, {{0x00006625, 0xffffcbf8, 0, 0}},
+  {{0x0000cc4a, 0xffff97f0, 0, 0}}, {{0x0001326f, 0xffff63e8, 0, 0}},
+  {{0x00019894, 0xffff2fe0, 0, 0}}, {{0x0001feb9, 0xfffefbd8, 0, 0}},
+  {{0x000264de, 0xfffec7d0, 0, 0}}, {{0x0002cb03, 0xfffe93c8, 0, 0}},
+  {{0x00033128, 0xfffe5fc0, 0, 0}}, {{0x0003974d, 0xfffe2bb8, 0, 0}},
+  {{0x0003fd72, 0xfffdf7b0, 0, 0}}, {{0x00046397, 0xfffdc3a8, 0, 0}},
+  {{0x0004c9bc, 0xfffd8fa0, 0, 0}}, {{0x00052fe1, 0xfffd5b98, 0, 0}},
+  {{0x00059606, 0xfffd2790, 0, 0}}, {{0x0005fc2b, 0xfffcf388, 0, 0}},
+  {{0x00066250, 0xfffcbf80, 0, 0}}, {{0x0006c875, 0xfffc8b78, 0, 0}},
+  {{0x00072e9a, 0xfffc5770, 0, 0}}, {{0x000794bf, 0xfffc2368, 0, 0}},
+  {{0x0007fae4, 0xfffbef60, 0, 0}}, {{0x00086109, 0xfffbbb58, 0, 0}},
+  {{0x0008c72e, 0xfffb8750, 0, 0}}, {{0x00092d53, 0xfffb5348, 0, 0}},
+  {{0x00099378, 0xfffb1f40, 0, 0}}, {{0x0009f99d, 0xfffaeb38, 0, 0}},
+  {{0x000a5fc2, 0xfffab730, 0, 0}}, {{0x000ac5e7, 0xfffa8328, 0, 0}},
+  {{0x000b2c0c, 0xfffa4f20, 0, 0}}, {{0x000b9231, 0xfffa1b18, 0, 0}},
+  {{0x000bf856, 0xfff9e710, 0, 0}}, {{0x000c5e7b, 0xfff9b308, 0, 0}},
+  {{0x000cc4a0, 0xfff97f00, 0, 0}}, {{0x000d2ac5, 0xfff94af8, 0, 0}},
+  {{0x000d90ea, 0xfff916f0, 0, 0}}, {{0x000df70f, 0xfff8e2e8, 0, 0}},
+  {{0x000e5d34, 0xfff8aee0, 0, 0}}, {{0x000ec359, 0xfff87ad8, 0, 0}},
+  {{0x000f297e, 0xfff846d0, 0, 0}}, {{0x000f8fa3, 0xfff812c8, 0, 0}},
+  {{0x000ff5c8, 0xfff7dec0, 0, 0}}, {{0x00105bed, 0xfff7aab8, 0, 0}},
+  {{0x0010c212, 0xfff776b0, 0, 0}}, {{0x00112837, 0xfff742a8, 0, 0}},
+  {{0x00118e5c, 0xfff70ea0, 0, 0}}, {{0x0011f481, 0xfff6da98, 0, 0}},
+  {{0x00125aa6, 0xfff6a690, 0, 0}}, {{0x0012c0cb, 0xfff67288, 0, 0}},
+  {{0x001326f0, 0xfff63e80, 0, 0}}, {{0x00138d15, 0xfff60a78, 0, 0}},
+  {{0x0013f33a, 0xfff5d670, 0, 0}}, {{0x0014595f, 0xfff5a268, 0, 0}},
+  {{0x0014bf84, 0xfff56e60, 0, 0}}, {{0x001525a9, 0xfff53a58, 0, 0}},
+  {{0x00158bce, 0xfff50650, 0, 0}}, {{0x0015f1f3, 0xfff4d248, 0, 0}},
+  {{0x00165818, 0xfff49e40, 0, 0}}, {{0x0016be3d, 0xfff46a38, 0, 0}},
+  {{0x00172462, 0xfff43630, 0, 0}}, {{0x00178a87, 0xfff40228, 0, 0}},
+  {{0x0017f0ac, 0xfff3ce20, 0, 0}}, {{0x001856d1, 0xfff39a18, 0, 0}},
+  {{0x0018bcf6, 0xfff36610, 0, 0}}, {{0x0019231b, 0xfff33208, 0, 0}},
+  {{0x00198940, 0xfff2fe00, 0, 0}}, {{0x0019ef65, 0xfff2c9f8, 0, 0}},
+  {{0x001a558a, 0xfff295f0, 0, 0}}, {{0x001abbaf, 0xfff261e8, 0, 0}},
+  {{0x001b21d4, 0xfff22de0, 0, 0}}, {{0x001b87f9, 0xfff1f9d8, 0, 0}},
+  {{0x001bee1e, 0xfff1c5d0, 0, 0}}, {{0x001c5443, 0xfff191c8, 0, 0}},
+  {{0x001cba68, 0xfff15dc0, 0, 0}}, {{0x001d208d, 0xfff129b8, 0, 0}},
+  {{0x001d86b2, 0xfff0f5b0, 0, 0}}, {{0x001decd7, 0xfff0c1a8, 0, 0}},
+  {{0x001e52fc, 0xfff08da0, 0, 0}}, {{0x001eb921, 0xfff05998, 0, 0}},
+  {{0x001f1f46, 0xfff02590, 0, 0}}, {{0x001f856b, 0xffeff188, 0, 0}},
+  {{0x001feb90, 0xffefbd80, 0, 0}}, {{0x002051b5, 0xffef8978, 0, 0}},
+  {{0x0020b7da, 0xffef5570, 0, 0}}, {{0x00211dff, 0xffef2168, 0, 0}},
+  {{0x00218424, 0xffeeed60, 0, 0}}, {{0x0021ea49, 0xffeeb958, 0, 0}},
+  {{0x0022506e, 0xffee8550, 0, 0}}, {{0x0022b693, 0xffee5148, 0, 0}},
+  {{0x00231cb8, 0xffee1d40, 0, 0}}, {{0x002382dd, 0xffede938, 0, 0}},
+  {{0x0023e902, 0xffedb530, 0, 0}}, {{0x00244f27, 0xffed8128, 0, 0}},
+  {{0x0024b54c, 0xffed4d20, 0, 0}}, {{0x00251b71, 0xffed1918, 0, 0}},
+  {{0x00258196, 0xffece510, 0, 0}}, {{0x0025e7bb, 0xffecb108, 0, 0}},
+  {{0x00264de0, 0xffec7d00, 0, 0}}, {{0x0026b405, 0xffec48f8, 0, 0}},
+  {{0x00271a2a, 0xffec14f0, 0, 0}}, {{0x0027804f, 0xffebe0e8, 0, 0}},
+  {{0x0027e674, 0xffebace0, 0, 0}}, {{0x00284c99, 0xffeb78d8, 0, 0}},
+  {{0x0028b2be, 0xffeb44d0, 0, 0}}, {{0x002918e3, 0xffeb10c8, 0, 0}},
+  {{0x00297f08, 0xffeadcc0, 0, 0}}, {{0x0029e52d, 0xffeaa8b8, 0, 0}},
+  {{0x002a4b52, 0xffea74b0, 0, 0}}, {{0x002ab177, 0xffea40a8, 0, 0}},
+  {{0x002b179c, 0xffea0ca0, 0, 0}}, {{0x002b7dc1, 0xffe9d898, 0, 0}},
+  {{0x002be3e6, 0xffe9a490, 0, 0}}, {{0x002c4a0b, 0xffe97088, 0, 0}},
+  {{0x002cb030, 0xffe93c80, 0, 0}}, {{0x002d1655, 0xffe90878, 0, 0}},
+  {{0x002d7c7a, 0xffe8d470, 0, 0}}, {{0x002de29f, 0xffe8a068, 0, 0}},
+  {{0x002e48c4, 0xffe86c60, 0, 0}}, {{0x002eaee9, 0xffe83858, 0, 0}},
+  {{0x002f150e, 0xffe80450, 0, 0}}, {{0x002f7b33, 0xffe7d048, 0, 0}},
+  {{0x002fe158, 0xffe79c40, 0, 0}}, {{0x0030477d, 0xffe76838, 0, 0}},
+  {{0x0030ada2, 0xffe73430, 0, 0}}, {{0x003113c7, 0xffe70028, 0, 0}},
+  {{0x003179ec, 0xffe6cc20, 0, 0}}, {{0x0031e011, 0xffe69818, 0, 0}},
+  {{0x00324636, 0xffe66410, 0, 0}}, {{0x0032ac5b, 0xffe63008, 0, 0}}
+};
diff --git a/drivers/webp/enc/alpha.c b/drivers/webp/enc/alpha.c
index e554eb7f30..1842b36401 100644
--- a/drivers/webp/enc/alpha.c
+++ b/drivers/webp/enc/alpha.c
@@ -1,8 +1,10 @@
 // Copyright 2011 Google Inc. All Rights Reserved.
 //
-// This code is licensed under the same terms as WebM:
-//  Software License Agreement:  http://www.webmproject.org/license/software/
-//  Additional IP Rights Grant:  http://www.webmproject.org/license/additional/
+// Use of this source code is governed by a BSD-style license
+// that can be found in the COPYING file in the root of the source
+// tree. An additional intellectual property rights grant can be found
+// in the file PATENTS. All contributing project authors may
+// be found in the AUTHORS file in the root of the source tree.
 // -----------------------------------------------------------------------------
 //
 // Alpha-plane compression.
@@ -13,13 +15,11 @@
 #include <stdlib.h>
 
 #include "./vp8enci.h"
+#include "../dsp/dsp.h"
 #include "../utils/filters.h"
 #include "../utils/quant_levels.h"
-#include "../format_constants.h"
-
-#if defined(__cplusplus) || defined(c_plusplus)
-extern "C" {
-#endif
+#include "../utils/utils.h"
+#include "webp/format_constants.h"
 
 // -----------------------------------------------------------------------------
 // Encodes the given alpha data via specified compression method 'method'.
@@ -36,7 +36,7 @@ extern "C" {
 //
 // 'output' corresponds to the buffer containing compressed alpha data.
 //          This buffer is allocated by this method and caller should call
-//          free(*output) when done.
+//          WebPSafeFree(*output) when done.
 // 'output_size' corresponds to size of this compressed alpha buffer.
 //
 // Returns 1 on successfully encoding the alpha and
@@ -48,12 +48,11 @@ extern "C" {
 
 static int EncodeLossless(const uint8_t* const data, int width, int height,
                           int effort_level,  // in [0..6] range
-                          VP8BitWriter* const bw,
+                          VP8LBitWriter* const bw,
                           WebPAuxStats* const stats) {
   int ok = 0;
   WebPConfig config;
   WebPPicture picture;
-  VP8LBitWriter tmp_bw;
 
   WebPPictureInit(&picture);
   picture.width = width;
@@ -63,53 +62,51 @@ static int EncodeLossless(const uint8_t* const data, int width, int height,
   if (!WebPPictureAlloc(&picture)) return 0;
 
   // Transfer the alpha values to the green channel.
-  {
-    int i, j;
-    uint32_t* dst = picture.argb;
-    const uint8_t* src = data;
-    for (j = 0; j < picture.height; ++j) {
-      for (i = 0; i < picture.width; ++i) {
-        dst[i] = (src[i] << 8) | 0xff000000u;
-      }
-      src += width;
-      dst += picture.argb_stride;
-    }
-  }
+  WebPDispatchAlphaToGreen(data, width, picture.width, picture.height,
+                           picture.argb, picture.argb_stride);
 
   WebPConfigInit(&config);
   config.lossless = 1;
   config.method = effort_level;  // impact is very small
-  // Set moderate default quality setting for alpha. Higher qualities (80 and
-  // above) could be very slow.
-  config.quality = 10.f + 15.f * effort_level;
-  if (config.quality > 100.f) config.quality = 100.f;
+  // Set a low default quality for encoding alpha. Ensure that Alpha quality at
+  // lower methods (3 and below) is less than the threshold for triggering
+  // costly 'BackwardReferencesTraceBackwards'.
+  config.quality = 8.f * effort_level;
+  assert(config.quality >= 0 && config.quality <= 100.f);
 
-  ok = VP8LBitWriterInit(&tmp_bw, (width * height) >> 3);
-  ok = ok && (VP8LEncodeStream(&config, &picture, &tmp_bw) == VP8_ENC_OK);
+  ok = (VP8LEncodeStream(&config, &picture, bw) == VP8_ENC_OK);
   WebPPictureFree(&picture);
-  if (ok) {
-    const uint8_t* const data = VP8LBitWriterFinish(&tmp_bw);
-    const size_t data_size = VP8LBitWriterNumBytes(&tmp_bw);
-    VP8BitWriterAppend(bw, data, data_size);
+  ok = ok && !bw->error_;
+  if (!ok) {
+    VP8LBitWriterWipeOut(bw);
+    return 0;
   }
-  VP8LBitWriterDestroy(&tmp_bw);
-  return ok && !bw->error_;
+  return 1;
 }
 
 // -----------------------------------------------------------------------------
 
+// Small struct to hold the result of a filter mode compression attempt.
+typedef struct {
+  size_t score;
+  VP8BitWriter bw;
+  WebPAuxStats stats;
+} FilterTrial;
+
+// This function always returns an initialized 'bw' object, even upon error.
 static int EncodeAlphaInternal(const uint8_t* const data, int width, int height,
                                int method, int filter, int reduce_levels,
                                int effort_level,  // in [0..6] range
                                uint8_t* const tmp_alpha,
-                               VP8BitWriter* const bw,
-                               WebPAuxStats* const stats) {
+                               FilterTrial* result) {
   int ok = 0;
   const uint8_t* alpha_src;
   WebPFilterFunc filter_func;
   uint8_t header;
-  size_t expected_size;
   const size_t data_size = width * height;
+  const uint8_t* output = NULL;
+  size_t output_size = 0;
+  VP8LBitWriter tmp_bw;
 
   assert((uint64_t)data_size == (uint64_t)width * height);  // as per spec
   assert(filter >= 0 && filter < WEBP_FILTER_LAST);
@@ -118,43 +115,163 @@ static int EncodeAlphaInternal(const uint8_t* const data, int width, int height,
   assert(sizeof(header) == ALPHA_HEADER_LEN);
   // TODO(skal): have a common function and #define's to validate alpha params.
 
-  expected_size =
-      (method == ALPHA_NO_COMPRESSION) ? (ALPHA_HEADER_LEN + data_size)
-                                       : (data_size >> 5);
-  header = method | (filter << 2);
-  if (reduce_levels) header |= ALPHA_PREPROCESSED_LEVELS << 4;
-
-  VP8BitWriterInit(bw, expected_size);
-  VP8BitWriterAppend(bw, &header, ALPHA_HEADER_LEN);
-
   filter_func = WebPFilters[filter];
-  if (filter_func) {
-    filter_func(data, width, height, 1, width, tmp_alpha);
+  if (filter_func != NULL) {
+    filter_func(data, width, height, width, tmp_alpha);
     alpha_src = tmp_alpha;
   }  else {
     alpha_src = data;
   }
 
+  if (method != ALPHA_NO_COMPRESSION) {
+    ok = VP8LBitWriterInit(&tmp_bw, data_size >> 3);
+    ok = ok && EncodeLossless(alpha_src, width, height, effort_level,
+                              &tmp_bw, &result->stats);
+    if (ok) {
+      output = VP8LBitWriterFinish(&tmp_bw);
+      output_size = VP8LBitWriterNumBytes(&tmp_bw);
+      if (output_size > data_size) {
+        // compressed size is larger than source! Revert to uncompressed mode.
+        method = ALPHA_NO_COMPRESSION;
+        VP8LBitWriterWipeOut(&tmp_bw);
+      }
+    } else {
+      VP8LBitWriterWipeOut(&tmp_bw);
+      return 0;
+    }
+  }
+
   if (method == ALPHA_NO_COMPRESSION) {
-    ok = VP8BitWriterAppend(bw, alpha_src, width * height);
-    ok = ok && !bw->error_;
-  } else {
-    ok = EncodeLossless(alpha_src, width, height, effort_level, bw, stats);
-    VP8BitWriterFinish(bw);
+    output = alpha_src;
+    output_size = data_size;
+    ok = 1;
+  }
+
+  // Emit final result.
+  header = method | (filter << 2);
+  if (reduce_levels) header |= ALPHA_PREPROCESSED_LEVELS << 4;
+
+  VP8BitWriterInit(&result->bw, ALPHA_HEADER_LEN + output_size);
+  ok = ok && VP8BitWriterAppend(&result->bw, &header, ALPHA_HEADER_LEN);
+  ok = ok && VP8BitWriterAppend(&result->bw, output, output_size);
+
+  if (method != ALPHA_NO_COMPRESSION) {
+    VP8LBitWriterWipeOut(&tmp_bw);
   }
+  ok = ok && !result->bw.error_;
+  result->score = VP8BitWriterSize(&result->bw);
   return ok;
 }
 
 // -----------------------------------------------------------------------------
 
-// TODO(skal): move to dsp/ ?
-static void CopyPlane(const uint8_t* src, int src_stride,
-                      uint8_t* dst, int dst_stride, int width, int height) {
-  while (height-- > 0) {
-    memcpy(dst, src, width);
-    src += src_stride;
-    dst += dst_stride;
+static int GetNumColors(const uint8_t* data, int width, int height,
+                        int stride) {
+  int j;
+  int colors = 0;
+  uint8_t color[256] = { 0 };
+
+  for (j = 0; j < height; ++j) {
+    int i;
+    const uint8_t* const p = data + j * stride;
+    for (i = 0; i < width; ++i) {
+      color[p[i]] = 1;
+    }
+  }
+  for (j = 0; j < 256; ++j) {
+    if (color[j] > 0) ++colors;
   }
+  return colors;
+}
+
+#define FILTER_TRY_NONE (1 << WEBP_FILTER_NONE)
+#define FILTER_TRY_ALL ((1 << WEBP_FILTER_LAST) - 1)
+
+// Given the input 'filter' option, return an OR'd bit-set of filters to try.
+static uint32_t GetFilterMap(const uint8_t* alpha, int width, int height,
+                             int filter, int effort_level) {
+  uint32_t bit_map = 0U;
+  if (filter == WEBP_FILTER_FAST) {
+    // Quick estimate of the best candidate.
+    int try_filter_none = (effort_level > 3);
+    const int kMinColorsForFilterNone = 16;
+    const int kMaxColorsForFilterNone = 192;
+    const int num_colors = GetNumColors(alpha, width, height, width);
+    // For low number of colors, NONE yields better compression.
+    filter = (num_colors <= kMinColorsForFilterNone)
+        ? WEBP_FILTER_NONE
+        : WebPEstimateBestFilter(alpha, width, height, width);
+    bit_map |= 1 << filter;
+    // For large number of colors, try FILTER_NONE in addition to the best
+    // filter as well.
+    if (try_filter_none || num_colors > kMaxColorsForFilterNone) {
+      bit_map |= FILTER_TRY_NONE;
+    }
+  } else if (filter == WEBP_FILTER_NONE) {
+    bit_map = FILTER_TRY_NONE;
+  } else {  // WEBP_FILTER_BEST -> try all
+    bit_map = FILTER_TRY_ALL;
+  }
+  return bit_map;
+}
+
+static void InitFilterTrial(FilterTrial* const score) {
+  score->score = (size_t)~0U;
+  VP8BitWriterInit(&score->bw, 0);
+}
+
+static int ApplyFiltersAndEncode(const uint8_t* alpha, int width, int height,
+                                 size_t data_size, int method, int filter,
+                                 int reduce_levels, int effort_level,
+                                 uint8_t** const output,
+                                 size_t* const output_size,
+                                 WebPAuxStats* const stats) {
+  int ok = 1;
+  FilterTrial best;
+  uint32_t try_map =
+      GetFilterMap(alpha, width, height, filter, effort_level);
+  InitFilterTrial(&best);
+
+  if (try_map != FILTER_TRY_NONE) {
+    uint8_t* filtered_alpha =  (uint8_t*)WebPSafeMalloc(1ULL, data_size);
+    if (filtered_alpha == NULL) return 0;
+
+    for (filter = WEBP_FILTER_NONE; ok && try_map; ++filter, try_map >>= 1) {
+      if (try_map & 1) {
+        FilterTrial trial;
+        ok = EncodeAlphaInternal(alpha, width, height, method, filter,
+                                 reduce_levels, effort_level, filtered_alpha,
+                                 &trial);
+        if (ok && trial.score < best.score) {
+          VP8BitWriterWipeOut(&best.bw);
+          best = trial;
+        } else {
+          VP8BitWriterWipeOut(&trial.bw);
+        }
+      }
+    }
+    WebPSafeFree(filtered_alpha);
+  } else {
+    ok = EncodeAlphaInternal(alpha, width, height, method, WEBP_FILTER_NONE,
+                             reduce_levels, effort_level, NULL, &best);
+  }
+  if (ok) {
+    if (stats != NULL) {
+      stats->lossless_features = best.stats.lossless_features;
+      stats->histogram_bits = best.stats.histogram_bits;
+      stats->transform_bits = best.stats.transform_bits;
+      stats->cache_bits = best.stats.cache_bits;
+      stats->palette_size = best.stats.palette_size;
+      stats->lossless_size = best.stats.lossless_size;
+      stats->lossless_hdr_size = best.stats.lossless_hdr_size;
+      stats->lossless_data_size = best.stats.lossless_data_size;
+    }
+    *output_size = VP8BitWriterSize(&best.bw);
+    *output = VP8BitWriterBuf(&best.bw);
+  } else {
+    VP8BitWriterWipeOut(&best.bw);
+  }
+  return ok;
 }
 
 static int EncodeAlpha(VP8Encoder* const enc,
@@ -187,13 +304,18 @@ static int EncodeAlpha(VP8Encoder* const enc,
     return 0;
   }
 
-  quant_alpha = (uint8_t*)malloc(data_size);
+  if (method == ALPHA_NO_COMPRESSION) {
+    // Don't filter, as filtering will make no impact on compressed size.
+    filter = WEBP_FILTER_NONE;
+  }
+
+  quant_alpha = (uint8_t*)WebPSafeMalloc(1ULL, data_size);
   if (quant_alpha == NULL) {
     return 0;
   }
 
   // Extract alpha data (width x height) from raw_data (stride x height).
-  CopyPlane(pic->a, pic->a_stride, quant_alpha, width, width, height);
+  WebPCopyPlane(pic->a, pic->a_stride, quant_alpha, width, width, height);
 
   if (reduce_levels) {  // No Quantization required for 'quality = 100'.
     // 16 alpha levels gives quite a low MSE w.r.t original alpha plane hence
@@ -205,126 +327,99 @@ static int EncodeAlpha(VP8Encoder* const enc,
   }
 
   if (ok) {
-    VP8BitWriter bw;
-    int test_filter;
-    uint8_t* filtered_alpha = NULL;
-
-    // We always test WEBP_FILTER_NONE first.
-    ok = EncodeAlphaInternal(quant_alpha, width, height,
-                             method, WEBP_FILTER_NONE, reduce_levels,
-                             effort_level, NULL, &bw, pic->stats);
-    if (!ok) {
-      VP8BitWriterWipeOut(&bw);
-      goto End;
-    }
-
-    if (filter == WEBP_FILTER_FAST) {  // Quick estimate of a second candidate?
-      filter = EstimateBestFilter(quant_alpha, width, height, width);
-    }
-    // Stop?
-    if (filter == WEBP_FILTER_NONE) {
-      goto Ok;
-    }
-
-    filtered_alpha = (uint8_t*)malloc(data_size);
-    ok = (filtered_alpha != NULL);
-    if (!ok) {
-      goto End;
+    VP8FiltersInit();
+    ok = ApplyFiltersAndEncode(quant_alpha, width, height, data_size, method,
+                               filter, reduce_levels, effort_level, output,
+                               output_size, pic->stats);
+    if (pic->stats != NULL) {  // need stats?
+      pic->stats->coded_size += (int)(*output_size);
+      enc->sse_[3] = sse;
     }
-
-    // Try the other mode(s).
-    {
-      WebPAuxStats best_stats;
-      size_t best_score = VP8BitWriterSize(&bw);
-
-      memset(&best_stats, 0, sizeof(best_stats));  // prevent spurious warning
-      if (pic->stats != NULL) best_stats = *pic->stats;
-      for (test_filter = WEBP_FILTER_HORIZONTAL;
-           ok && (test_filter <= WEBP_FILTER_GRADIENT);
-           ++test_filter) {
-        VP8BitWriter tmp_bw;
-        if (filter != WEBP_FILTER_BEST && test_filter != filter) {
-          continue;
-        }
-        ok = EncodeAlphaInternal(quant_alpha, width, height,
-                                 method, test_filter, reduce_levels,
-                                 effort_level, filtered_alpha, &tmp_bw,
-                                 pic->stats);
-        if (ok) {
-          const size_t score = VP8BitWriterSize(&tmp_bw);
-          if (score < best_score) {
-            // swap bitwriter objects.
-            VP8BitWriter tmp = tmp_bw;
-            tmp_bw = bw;
-            bw = tmp;
-            best_score = score;
-            if (pic->stats != NULL) best_stats = *pic->stats;
-          }
-        } else {
-          VP8BitWriterWipeOut(&bw);
-        }
-        VP8BitWriterWipeOut(&tmp_bw);
-      }
-      if (pic->stats != NULL) *pic->stats = best_stats;
-    }
- Ok:
-    if (ok) {
-      *output_size = VP8BitWriterSize(&bw);
-      *output = VP8BitWriterBuf(&bw);
-      if (pic->stats != NULL) {         // need stats?
-        pic->stats->coded_size += (int)(*output_size);
-        enc->sse_[3] = sse;
-      }
-    }
-    free(filtered_alpha);
   }
- End:
-  free(quant_alpha);
+
+  WebPSafeFree(quant_alpha);
   return ok;
 }
 
-
 //------------------------------------------------------------------------------
 // Main calls
 
+static int CompressAlphaJob(VP8Encoder* const enc, void* dummy) {
+  const WebPConfig* config = enc->config_;
+  uint8_t* alpha_data = NULL;
+  size_t alpha_size = 0;
+  const int effort_level = config->method;  // maps to [0..6]
+  const WEBP_FILTER_TYPE filter =
+      (config->alpha_filtering == 0) ? WEBP_FILTER_NONE :
+      (config->alpha_filtering == 1) ? WEBP_FILTER_FAST :
+                                       WEBP_FILTER_BEST;
+  if (!EncodeAlpha(enc, config->alpha_quality, config->alpha_compression,
+                   filter, effort_level, &alpha_data, &alpha_size)) {
+    return 0;
+  }
+  if (alpha_size != (uint32_t)alpha_size) {  // Sanity check.
+    WebPSafeFree(alpha_data);
+    return 0;
+  }
+  enc->alpha_data_size_ = (uint32_t)alpha_size;
+  enc->alpha_data_ = alpha_data;
+  (void)dummy;
+  return 1;
+}
+
 void VP8EncInitAlpha(VP8Encoder* const enc) {
+  WebPInitAlphaProcessing();
   enc->has_alpha_ = WebPPictureHasTransparency(enc->pic_);
   enc->alpha_data_ = NULL;
   enc->alpha_data_size_ = 0;
+  if (enc->thread_level_ > 0) {
+    WebPWorker* const worker = &enc->alpha_worker_;
+    WebPGetWorkerInterface()->Init(worker);
+    worker->data1 = enc;
+    worker->data2 = NULL;
+    worker->hook = (WebPWorkerHook)CompressAlphaJob;
+  }
 }
 
-int VP8EncFinishAlpha(VP8Encoder* const enc) {
+int VP8EncStartAlpha(VP8Encoder* const enc) {
   if (enc->has_alpha_) {
-    const WebPConfig* config = enc->config_;
-    uint8_t* tmp_data = NULL;
-    size_t tmp_size = 0;
-    const int effort_level = config->method;  // maps to [0..6]
-    const WEBP_FILTER_TYPE filter =
-        (config->alpha_filtering == 0) ? WEBP_FILTER_NONE :
-        (config->alpha_filtering == 1) ? WEBP_FILTER_FAST :
-                                         WEBP_FILTER_BEST;
-
-    if (!EncodeAlpha(enc, config->alpha_quality, config->alpha_compression,
-                     filter, effort_level, &tmp_data, &tmp_size)) {
-      return 0;
+    if (enc->thread_level_ > 0) {
+      WebPWorker* const worker = &enc->alpha_worker_;
+      // Makes sure worker is good to go.
+      if (!WebPGetWorkerInterface()->Reset(worker)) {
+        return 0;
+      }
+      WebPGetWorkerInterface()->Launch(worker);
+      return 1;
+    } else {
+      return CompressAlphaJob(enc, NULL);   // just do the job right away
     }
-    if (tmp_size != (uint32_t)tmp_size) {  // Sanity check.
-      free(tmp_data);
-      return 0;
+  }
+  return 1;
+}
+
+int VP8EncFinishAlpha(VP8Encoder* const enc) {
+  if (enc->has_alpha_) {
+    if (enc->thread_level_ > 0) {
+      WebPWorker* const worker = &enc->alpha_worker_;
+      if (!WebPGetWorkerInterface()->Sync(worker)) return 0;  // error
     }
-    enc->alpha_data_size_ = (uint32_t)tmp_size;
-    enc->alpha_data_ = tmp_data;
   }
   return WebPReportProgress(enc->pic_, enc->percent_ + 20, &enc->percent_);
 }
 
-void VP8EncDeleteAlpha(VP8Encoder* const enc) {
-  free(enc->alpha_data_);
+int VP8EncDeleteAlpha(VP8Encoder* const enc) {
+  int ok = 1;
+  if (enc->thread_level_ > 0) {
+    WebPWorker* const worker = &enc->alpha_worker_;
+    // finish anything left in flight
+    ok = WebPGetWorkerInterface()->Sync(worker);
+    // still need to end the worker, even if !ok
+    WebPGetWorkerInterface()->End(worker);
+  }
+  WebPSafeFree(enc->alpha_data_);
   enc->alpha_data_ = NULL;
   enc->alpha_data_size_ = 0;
   enc->has_alpha_ = 0;
+  return ok;
 }
-
-#if defined(__cplusplus) || defined(c_plusplus)
-}    // extern "C"
-#endif
diff --git a/drivers/webp/enc/analysis.c b/drivers/webp/enc/analysis.c
index 22cfb492e7..b55128fd48 100644
--- a/drivers/webp/enc/analysis.c
+++ b/drivers/webp/enc/analysis.c
@@ -1,8 +1,10 @@
 // Copyright 2011 Google Inc. All Rights Reserved.
 //
-// This code is licensed under the same terms as WebM:
-//  Software License Agreement:  http://www.webmproject.org/license/software/
-//  Additional IP Rights Grant:  http://www.webmproject.org/license/additional/
+// Use of this source code is governed by a BSD-style license
+// that can be found in the COPYING file in the root of the source
+// tree. An additional intellectual property rights grant can be found
+// in the file PATENTS. All contributing project authors may
+// be found in the AUTHORS file in the root of the source tree.
 // -----------------------------------------------------------------------------
 //
 // Macroblock analysis
@@ -17,16 +19,8 @@
 #include "./cost.h"
 #include "../utils/utils.h"
 
-#if defined(__cplusplus) || defined(c_plusplus)
-extern "C" {
-#endif
-
 #define MAX_ITERS_K_MEANS  6
 
-static int ClipAlpha(int alpha) {
-  return alpha < 0 ? 0 : alpha > 255 ? 255 : alpha;
-}
-
 //------------------------------------------------------------------------------
 // Smooth the segment map by replacing isolated block by the majority of its
 // neighbours.
@@ -36,7 +30,7 @@ static void SmoothSegmentMap(VP8Encoder* const enc) {
   const int w = enc->mb_w_;
   const int h = enc->mb_h_;
   const int majority_cnt_3_x_3_grid = 5;
-  uint8_t* const tmp = (uint8_t*)WebPSafeMalloc((uint64_t)w * h, sizeof(*tmp));
+  uint8_t* const tmp = (uint8_t*)WebPSafeMalloc(w * h, sizeof(*tmp));
   assert((uint64_t)(w * h) == (uint64_t)w * h);   // no overflow, as per spec
 
   if (tmp == NULL) return;
@@ -57,6 +51,7 @@ static void SmoothSegmentMap(VP8Encoder* const enc) {
       for (n = 0; n < NUM_MB_SEGMENTS; ++n) {
         if (cnt[n] >= majority_cnt_3_x_3_grid) {
           majority_seg = n;
+          break;
         }
       }
       tmp[x + y * w] = majority_seg;
@@ -68,54 +63,14 @@ static void SmoothSegmentMap(VP8Encoder* const enc) {
       mb->segment_ = tmp[x + y * w];
     }
   }
-  free(tmp);
+  WebPSafeFree(tmp);
 }
 
 //------------------------------------------------------------------------------
-// Finalize Segment probability based on the coding tree
-
-static int GetProba(int a, int b) {
-  int proba;
-  const int total = a + b;
-  if (total == 0) return 255;  // that's the default probability.
-  proba = (255 * a + total / 2) / total;
-  return proba;
-}
-
-static void SetSegmentProbas(VP8Encoder* const enc) {
-  int p[NUM_MB_SEGMENTS] = { 0 };
-  int n;
-
-  for (n = 0; n < enc->mb_w_ * enc->mb_h_; ++n) {
-    const VP8MBInfo* const mb = &enc->mb_info_[n];
-    p[mb->segment_]++;
-  }
-  if (enc->pic_->stats) {
-    for (n = 0; n < NUM_MB_SEGMENTS; ++n) {
-      enc->pic_->stats->segment_size[n] = p[n];
-    }
-  }
-  if (enc->segment_hdr_.num_segments_ > 1) {
-    uint8_t* const probas = enc->proba_.segments_;
-    probas[0] = GetProba(p[0] + p[1], p[2] + p[3]);
-    probas[1] = GetProba(p[0], p[1]);
-    probas[2] = GetProba(p[2], p[3]);
-
-    enc->segment_hdr_.update_map_ =
-        (probas[0] != 255) || (probas[1] != 255) || (probas[2] != 255);
-    enc->segment_hdr_.size_ =
-      p[0] * (VP8BitCost(0, probas[0]) + VP8BitCost(0, probas[1])) +
-      p[1] * (VP8BitCost(0, probas[0]) + VP8BitCost(1, probas[1])) +
-      p[2] * (VP8BitCost(1, probas[0]) + VP8BitCost(0, probas[2])) +
-      p[3] * (VP8BitCost(1, probas[0]) + VP8BitCost(1, probas[2]));
-  } else {
-    enc->segment_hdr_.update_map_ = 0;
-    enc->segment_hdr_.size_ = 0;
-  }
-}
+// set segment susceptibility alpha_ / beta_
 
 static WEBP_INLINE int clip(int v, int m, int M) {
-  return v < m ? m : v > M ? M : v;
+  return (v < m) ? m : (v > M) ? M : v;
 }
 
 static void SetSegmentAlphas(VP8Encoder* const enc,
@@ -142,28 +97,77 @@ static void SetSegmentAlphas(VP8Encoder* const enc,
 }
 
 //------------------------------------------------------------------------------
+// Compute susceptibility based on DCT-coeff histograms:
+// the higher, the "easier" the macroblock is to compress.
+
+#define MAX_ALPHA 255                // 8b of precision for susceptibilities.
+#define ALPHA_SCALE (2 * MAX_ALPHA)  // scaling factor for alpha.
+#define DEFAULT_ALPHA (-1)
+#define IS_BETTER_ALPHA(alpha, best_alpha) ((alpha) > (best_alpha))
+
+static int FinalAlphaValue(int alpha) {
+  alpha = MAX_ALPHA - alpha;
+  return clip(alpha, 0, MAX_ALPHA);
+}
+
+static int GetAlpha(const VP8Histogram* const histo) {
+  // 'alpha' will later be clipped to [0..MAX_ALPHA] range, clamping outer
+  // values which happen to be mostly noise. This leaves the maximum precision
+  // for handling the useful small values which contribute most.
+  const int max_value = histo->max_value;
+  const int last_non_zero = histo->last_non_zero;
+  const int alpha =
+      (max_value > 1) ? ALPHA_SCALE * last_non_zero / max_value : 0;
+  return alpha;
+}
+
+static void InitHistogram(VP8Histogram* const histo) {
+  histo->max_value = 0;
+  histo->last_non_zero = 1;
+}
+
+static void MergeHistograms(const VP8Histogram* const in,
+                            VP8Histogram* const out) {
+  if (in->max_value > out->max_value) {
+    out->max_value = in->max_value;
+  }
+  if (in->last_non_zero > out->last_non_zero) {
+    out->last_non_zero = in->last_non_zero;
+  }
+}
+
+//------------------------------------------------------------------------------
 // Simplified k-Means, to assign Nb segments based on alpha-histogram
 
-static void AssignSegments(VP8Encoder* const enc, const int alphas[256]) {
-  const int nb = enc->segment_hdr_.num_segments_;
+static void AssignSegments(VP8Encoder* const enc,
+                           const int alphas[MAX_ALPHA + 1]) {
+  // 'num_segments_' is previously validated and <= NUM_MB_SEGMENTS, but an
+  // explicit check is needed to avoid spurious warning about 'n + 1' exceeding
+  // array bounds of 'centers' with some compilers (noticed with gcc-4.9).
+  const int nb = (enc->segment_hdr_.num_segments_ < NUM_MB_SEGMENTS) ?
+                 enc->segment_hdr_.num_segments_ : NUM_MB_SEGMENTS;
   int centers[NUM_MB_SEGMENTS];
   int weighted_average = 0;
-  int map[256];
+  int map[MAX_ALPHA + 1];
   int a, n, k;
-  int min_a = 0, max_a = 255, range_a;
+  int min_a = 0, max_a = MAX_ALPHA, range_a;
   // 'int' type is ok for histo, and won't overflow
   int accum[NUM_MB_SEGMENTS], dist_accum[NUM_MB_SEGMENTS];
 
+  assert(nb >= 1);
+  assert(nb <= NUM_MB_SEGMENTS);
+
   // bracket the input
-  for (n = 0; n < 256 && alphas[n] == 0; ++n) {}
+  for (n = 0; n <= MAX_ALPHA && alphas[n] == 0; ++n) {}
   min_a = n;
-  for (n = 255; n > min_a && alphas[n] == 0; --n) {}
+  for (n = MAX_ALPHA; n > min_a && alphas[n] == 0; --n) {}
   max_a = n;
   range_a = max_a - min_a;
 
   // Spread initial centers evenly
-  for (n = 1, k = 0; n < 2 * nb; n += 2) {
-    centers[k++] = min_a + (n * range_a) / (2 * nb);
+  for (k = 0, n = 1; k < nb; ++k, n += 2) {
+    assert(n < 2 * nb);
+    centers[k] = min_a + (n * range_a) / (2 * nb);
   }
 
   for (k = 0; k < MAX_ITERS_K_MEANS; ++k) {     // few iters are enough
@@ -178,7 +182,7 @@ static void AssignSegments(VP8Encoder* const enc, const int alphas[256]) {
     n = 0;    // track the nearest center for current 'a'
     for (a = min_a; a <= max_a; ++a) {
       if (alphas[a]) {
-        while (n < nb - 1 && abs(a - centers[n + 1]) < abs(a - centers[n])) {
+        while (n + 1 < nb && abs(a - centers[n + 1]) < abs(a - centers[n])) {
           n++;
         }
         map[a] = n;
@@ -210,7 +214,7 @@ static void AssignSegments(VP8Encoder* const enc, const int alphas[256]) {
     VP8MBInfo* const mb = &enc->mb_info_[n];
     const int alpha = mb->alpha_;
     mb->segment_ = map[alpha];
-    mb->alpha_ = centers[map[alpha]];     // just for the record.
+    mb->alpha_ = centers[map[alpha]];  // for the record.
   }
 
   if (nb > 1) {
@@ -218,7 +222,6 @@ static void AssignSegments(VP8Encoder* const enc, const int alphas[256]) {
     if (smooth) SmoothSegmentMap(enc);
   }
 
-  SetSegmentProbas(enc);                             // Assign final proba
   SetSegmentAlphas(enc, centers, weighted_average);  // pick some alphas.
 }
 
@@ -227,24 +230,30 @@ static void AssignSegments(VP8Encoder* const enc, const int alphas[256]) {
 // susceptibility and set best modes for this macroblock.
 // Segment assignment is done later.
 
-// Number of modes to inspect for alpha_ evaluation. For high-quality settings,
-// we don't need to test all the possible modes during the analysis phase.
+// Number of modes to inspect for alpha_ evaluation. We don't need to test all
+// the possible modes during the analysis phase: we risk falling into a local
+// optimum, or be subject to boundary effect
 #define MAX_INTRA16_MODE 2
 #define MAX_INTRA4_MODE  2
 #define MAX_UV_MODE      2
 
 static int MBAnalyzeBestIntra16Mode(VP8EncIterator* const it) {
-  const int max_mode = (it->enc_->method_ >= 3) ? MAX_INTRA16_MODE : 4;
+  const int max_mode = MAX_INTRA16_MODE;
   int mode;
-  int best_alpha = -1;
+  int best_alpha = DEFAULT_ALPHA;
   int best_mode = 0;
 
   VP8MakeLuma16Preds(it);
   for (mode = 0; mode < max_mode; ++mode) {
-    const int alpha = VP8CollectHistogram(it->yuv_in_ + Y_OFF,
-                                          it->yuv_p_ + VP8I16ModeOffsets[mode],
-                                          0, 16);
-    if (alpha > best_alpha) {
+    VP8Histogram histo;
+    int alpha;
+
+    InitHistogram(&histo);
+    VP8CollectHistogram(it->yuv_in_ + Y_OFF_ENC,
+                        it->yuv_p_ + VP8I16ModeOffsets[mode],
+                        0, 16, &histo);
+    alpha = GetAlpha(&histo);
+    if (IS_BETTER_ALPHA(alpha, best_alpha)) {
       best_alpha = alpha;
       best_mode = mode;
     }
@@ -256,46 +265,62 @@ static int MBAnalyzeBestIntra16Mode(VP8EncIterator* const it) {
 static int MBAnalyzeBestIntra4Mode(VP8EncIterator* const it,
                                    int best_alpha) {
   uint8_t modes[16];
-  const int max_mode = (it->enc_->method_ >= 3) ? MAX_INTRA4_MODE : NUM_BMODES;
-  int i4_alpha = 0;
+  const int max_mode = MAX_INTRA4_MODE;
+  int i4_alpha;
+  VP8Histogram total_histo;
+  int cur_histo = 0;
+  InitHistogram(&total_histo);
+
   VP8IteratorStartI4(it);
   do {
     int mode;
-    int best_mode_alpha = -1;
-    const uint8_t* const src = it->yuv_in_ + Y_OFF + VP8Scan[it->i4_];
+    int best_mode_alpha = DEFAULT_ALPHA;
+    VP8Histogram histos[2];
+    const uint8_t* const src = it->yuv_in_ + Y_OFF_ENC + VP8Scan[it->i4_];
 
     VP8MakeIntra4Preds(it);
     for (mode = 0; mode < max_mode; ++mode) {
-      const int alpha = VP8CollectHistogram(src,
-                                            it->yuv_p_ + VP8I4ModeOffsets[mode],
-                                            0, 1);
-      if (alpha > best_mode_alpha) {
+      int alpha;
+
+      InitHistogram(&histos[cur_histo]);
+      VP8CollectHistogram(src, it->yuv_p_ + VP8I4ModeOffsets[mode],
+                          0, 1, &histos[cur_histo]);
+      alpha = GetAlpha(&histos[cur_histo]);
+      if (IS_BETTER_ALPHA(alpha, best_mode_alpha)) {
         best_mode_alpha = alpha;
         modes[it->i4_] = mode;
+        cur_histo ^= 1;   // keep track of best histo so far.
       }
     }
-    i4_alpha += best_mode_alpha;
+    // accumulate best histogram
+    MergeHistograms(&histos[cur_histo ^ 1], &total_histo);
     // Note: we reuse the original samples for predictors
-  } while (VP8IteratorRotateI4(it, it->yuv_in_ + Y_OFF));
+  } while (VP8IteratorRotateI4(it, it->yuv_in_ + Y_OFF_ENC));
 
-  if (i4_alpha > best_alpha) {
+  i4_alpha = GetAlpha(&total_histo);
+  if (IS_BETTER_ALPHA(i4_alpha, best_alpha)) {
     VP8SetIntra4Mode(it, modes);
-    best_alpha = ClipAlpha(i4_alpha);
+    best_alpha = i4_alpha;
   }
   return best_alpha;
 }
 
 static int MBAnalyzeBestUVMode(VP8EncIterator* const it) {
-  int best_alpha = -1;
+  int best_alpha = DEFAULT_ALPHA;
   int best_mode = 0;
-  const int max_mode = (it->enc_->method_ >= 3) ? MAX_UV_MODE : 4;
+  const int max_mode = MAX_UV_MODE;
   int mode;
+
   VP8MakeChroma8Preds(it);
   for (mode = 0; mode < max_mode; ++mode) {
-    const int alpha = VP8CollectHistogram(it->yuv_in_ + U_OFF,
-                                          it->yuv_p_ + VP8UVModeOffsets[mode],
-                                          16, 16 + 4 + 4);
-    if (alpha > best_alpha) {
+    VP8Histogram histo;
+    int alpha;
+    InitHistogram(&histo);
+    VP8CollectHistogram(it->yuv_in_ + U_OFF_ENC,
+                        it->yuv_p_ + VP8UVModeOffsets[mode],
+                        16, 16 + 4 + 4, &histo);
+    alpha = GetAlpha(&histo);
+    if (IS_BETTER_ALPHA(alpha, best_alpha)) {
       best_alpha = alpha;
       best_mode = mode;
     }
@@ -305,7 +330,8 @@ static int MBAnalyzeBestUVMode(VP8EncIterator* const it) {
 }
 
 static void MBAnalyze(VP8EncIterator* const it,
-                      int alphas[256], int* const uv_alpha) {
+                      int alphas[MAX_ALPHA + 1],
+                      int* const alpha, int* const uv_alpha) {
   const VP8Encoder* const enc = it->enc_;
   int best_alpha, best_uv_alpha;
 
@@ -314,7 +340,7 @@ static void MBAnalyze(VP8EncIterator* const it,
   VP8SetSegment(it, 0);      // default segment, spec-wise.
 
   best_alpha = MBAnalyzeBestIntra16Mode(it);
-  if (enc->method_ != 3) {
+  if (enc->method_ >= 5) {
     // We go and make a fast decision for intra4/intra16.
     // It's usually not a good and definitive pick, but helps seeding the stats
     // about level bit-cost.
@@ -324,10 +350,22 @@ static void MBAnalyze(VP8EncIterator* const it,
   best_uv_alpha = MBAnalyzeBestUVMode(it);
 
   // Final susceptibility mix
-  best_alpha = (best_alpha + best_uv_alpha + 1) / 2;
+  best_alpha = (3 * best_alpha + best_uv_alpha + 2) >> 2;
+  best_alpha = FinalAlphaValue(best_alpha);
   alphas[best_alpha]++;
+  it->mb_->alpha_ = best_alpha;   // for later remapping.
+
+  // Accumulate for later complexity analysis.
+  *alpha += best_alpha;   // mixed susceptibility (not just luma)
   *uv_alpha += best_uv_alpha;
-  it->mb_->alpha_ = best_alpha;   // Informative only.
+}
+
+static void DefaultMBInfo(VP8MBInfo* const mb) {
+  mb->type_ = 1;     // I16x16
+  mb->uv_mode_ = 0;
+  mb->skip_ = 0;     // not skipped
+  mb->segment_ = 0;  // default segment
+  mb->alpha_ = 0;
 }
 
 //------------------------------------------------------------------------------
@@ -340,25 +378,124 @@ static void MBAnalyze(VP8EncIterator* const it,
 // and decide intra4/intra16, but that's usually almost always a bad choice at
 // this stage.
 
-int VP8EncAnalyze(VP8Encoder* const enc) {
-  int ok = 1;
-  int alphas[256] = { 0 };
-  VP8EncIterator it;
-
-  VP8IteratorInit(enc, &it);
+static void ResetAllMBInfo(VP8Encoder* const enc) {
+  int n;
+  for (n = 0; n < enc->mb_w_ * enc->mb_h_; ++n) {
+    DefaultMBInfo(&enc->mb_info_[n]);
+  }
+  // Default susceptibilities.
+  enc->dqm_[0].alpha_ = 0;
+  enc->dqm_[0].beta_ = 0;
+  // Note: we can't compute this alpha_ / uv_alpha_ -> set to default value.
+  enc->alpha_ = 0;
   enc->uv_alpha_ = 0;
-  do {
-    VP8IteratorImport(&it);
-    MBAnalyze(&it, alphas, &enc->uv_alpha_);
-    ok = VP8IteratorProgress(&it, 20);
-    // Let's pretend we have perfect lossless reconstruction.
-  } while (ok && VP8IteratorNext(&it, it.yuv_in_));
-  enc->uv_alpha_ /= enc->mb_w_ * enc->mb_h_;
-  if (ok) AssignSegments(enc, alphas);
+  WebPReportProgress(enc->pic_, enc->percent_ + 20, &enc->percent_);
+}
+
+// struct used to collect job result
+typedef struct {
+  WebPWorker worker;
+  int alphas[MAX_ALPHA + 1];
+  int alpha, uv_alpha;
+  VP8EncIterator it;
+  int delta_progress;
+} SegmentJob;
 
+// main work call
+static int DoSegmentsJob(SegmentJob* const job, VP8EncIterator* const it) {
+  int ok = 1;
+  if (!VP8IteratorIsDone(it)) {
+    uint8_t tmp[32 + WEBP_ALIGN_CST];
+    uint8_t* const scratch = (uint8_t*)WEBP_ALIGN(tmp);
+    do {
+      // Let's pretend we have perfect lossless reconstruction.
+      VP8IteratorImport(it, scratch);
+      MBAnalyze(it, job->alphas, &job->alpha, &job->uv_alpha);
+      ok = VP8IteratorProgress(it, job->delta_progress);
+    } while (ok && VP8IteratorNext(it));
+  }
   return ok;
 }
 
-#if defined(__cplusplus) || defined(c_plusplus)
-}    // extern "C"
+static void MergeJobs(const SegmentJob* const src, SegmentJob* const dst) {
+  int i;
+  for (i = 0; i <= MAX_ALPHA; ++i) dst->alphas[i] += src->alphas[i];
+  dst->alpha += src->alpha;
+  dst->uv_alpha += src->uv_alpha;
+}
+
+// initialize the job struct with some TODOs
+static void InitSegmentJob(VP8Encoder* const enc, SegmentJob* const job,
+                           int start_row, int end_row) {
+  WebPGetWorkerInterface()->Init(&job->worker);
+  job->worker.data1 = job;
+  job->worker.data2 = &job->it;
+  job->worker.hook = (WebPWorkerHook)DoSegmentsJob;
+  VP8IteratorInit(enc, &job->it);
+  VP8IteratorSetRow(&job->it, start_row);
+  VP8IteratorSetCountDown(&job->it, (end_row - start_row) * enc->mb_w_);
+  memset(job->alphas, 0, sizeof(job->alphas));
+  job->alpha = 0;
+  job->uv_alpha = 0;
+  // only one of both jobs can record the progress, since we don't
+  // expect the user's hook to be multi-thread safe
+  job->delta_progress = (start_row == 0) ? 20 : 0;
+}
+
+// main entry point
+int VP8EncAnalyze(VP8Encoder* const enc) {
+  int ok = 1;
+  const int do_segments =
+      enc->config_->emulate_jpeg_size ||   // We need the complexity evaluation.
+      (enc->segment_hdr_.num_segments_ > 1) ||
+      (enc->method_ == 0);  // for method 0, we need preds_[] to be filled.
+  if (do_segments) {
+    const int last_row = enc->mb_h_;
+    // We give a little more than a half work to the main thread.
+    const int split_row = (9 * last_row + 15) >> 4;
+    const int total_mb = last_row * enc->mb_w_;
+#ifdef WEBP_USE_THREAD
+    const int kMinSplitRow = 2;  // minimal rows needed for mt to be worth it
+    const int do_mt = (enc->thread_level_ > 0) && (split_row >= kMinSplitRow);
+#else
+    const int do_mt = 0;
 #endif
+    const WebPWorkerInterface* const worker_interface =
+        WebPGetWorkerInterface();
+    SegmentJob main_job;
+    if (do_mt) {
+      SegmentJob side_job;
+      // Note the use of '&' instead of '&&' because we must call the functions
+      // no matter what.
+      InitSegmentJob(enc, &main_job, 0, split_row);
+      InitSegmentJob(enc, &side_job, split_row, last_row);
+      // we don't need to call Reset() on main_job.worker, since we're calling
+      // WebPWorkerExecute() on it
+      ok &= worker_interface->Reset(&side_job.worker);
+      // launch the two jobs in parallel
+      if (ok) {
+        worker_interface->Launch(&side_job.worker);
+        worker_interface->Execute(&main_job.worker);
+        ok &= worker_interface->Sync(&side_job.worker);
+        ok &= worker_interface->Sync(&main_job.worker);
+      }
+      worker_interface->End(&side_job.worker);
+      if (ok) MergeJobs(&side_job, &main_job);  // merge results together
+    } else {
+      // Even for single-thread case, we use the generic Worker tools.
+      InitSegmentJob(enc, &main_job, 0, last_row);
+      worker_interface->Execute(&main_job.worker);
+      ok &= worker_interface->Sync(&main_job.worker);
+    }
+    worker_interface->End(&main_job.worker);
+    if (ok) {
+      enc->alpha_ = main_job.alpha / total_mb;
+      enc->uv_alpha_ = main_job.uv_alpha / total_mb;
+      AssignSegments(enc, main_job.alphas);
+    }
+  } else {   // Use only one default segment.
+    ResetAllMBInfo(enc);
+  }
+  return ok;
+}
+
diff --git a/drivers/webp/enc/backward_references.c b/drivers/webp/enc/backward_references.c
index b8c8ece806..049125e521 100644
--- a/drivers/webp/enc/backward_references.c
+++ b/drivers/webp/enc/backward_references.c
@@ -1,8 +1,10 @@
 // Copyright 2012 Google Inc. All Rights Reserved.
 //
-// This code is licensed under the same terms as WebM:
-//  Software License Agreement:  http://www.webmproject.org/license/software/
-//  Additional IP Rights Grant:  http://www.webmproject.org/license/additional/
+// Use of this source code is governed by a BSD-style license
+// that can be found in the COPYING file in the root of the source
+// tree. An additional intellectual property rights grant can be found
+// in the file PATENTS. All contributing project authors may
+// be found in the AUTHORS file in the root of the source tree.
 // -----------------------------------------------------------------------------
 //
 // Author: Jyrki Alakuijala (jyrki@google.com)
@@ -10,7 +12,6 @@
 
 #include <assert.h>
 #include <math.h>
-#include <stdio.h>
 
 #include "./backward_references.h"
 #include "./histogram.h"
@@ -20,9 +21,9 @@
 
 #define VALUES_IN_BYTE 256
 
-#define HASH_BITS 18
-#define HASH_SIZE (1 << HASH_BITS)
-#define HASH_MULTIPLIER (0xc6a4a7935bd1e995ULL)
+#define MIN_BLOCK_SIZE 256  // minimum block size for backward references
+
+#define MAX_ENTROPY    (1e30f)
 
 // 1M window (4M bytes) minus 120 special codes for short distances.
 #define WINDOW_SIZE ((1 << 20) - 120)
@@ -31,14 +32,6 @@
 #define MIN_LENGTH 2
 #define MAX_LENGTH 4096
 
-typedef struct {
-  // Stores the most recently added position with the given hash value.
-  int32_t hash_to_first_index_[HASH_SIZE];
-  // chain_[pos] stores the previous position with the same hash value
-  // for every pixel in the image.
-  int32_t* chain_;
-} HashChain;
-
 // -----------------------------------------------------------------------------
 
 static const uint8_t plane_to_code_lut[128] = {
@@ -65,145 +58,275 @@ static int DistanceToPlaneCode(int xsize, int dist) {
 
 static WEBP_INLINE int FindMatchLength(const uint32_t* const array1,
                                        const uint32_t* const array2,
-                                       const int max_limit) {
+                                       int best_len_match,
+                                       int max_limit) {
+#if !defined(__x86_64__)
+  // TODO(vrabaud): Compare on other architectures.
   int match_len = 0;
+  // Before 'expensive' linear match, check if the two arrays match at the
+  // current best length index.
+  if (array1[best_len_match] != array2[best_len_match]) return 0;
   while (match_len < max_limit && array1[match_len] == array2[match_len]) {
     ++match_len;
   }
   return match_len;
+#else
+  const uint32_t* array1_32 = array1;
+  const uint32_t* array2_32 = array2;
+  // max value is aligned to (uint64_t*) array1
+  const uint32_t* const array1_32_max = array1 + (max_limit & ~1);
+
+  // Before 'expensive' linear match, check if the two arrays match at the
+  // current best length index.
+  if (array1[best_len_match] != array2[best_len_match]) return 0;
+
+  // TODO(vrabaud): add __predict_true on bound checking?
+  while (array1_32 < array1_32_max) {
+    if (*(uint64_t*)array1_32 == *(uint64_t*)array2_32) {
+      array1_32 += 2;
+      array2_32 += 2;
+    } else {
+      // if the uint32_t pointed to are the same, then the following ones have
+      // to be different
+      return (array1_32 - array1) + (*array1_32 == *array2_32);
+    }
+  }
+
+  // Deal with the potential last uint32_t.
+  if ((max_limit & 1) && (*array1_32 != *array2_32)) return max_limit - 1;
+  return max_limit;
+#endif
 }
 
 // -----------------------------------------------------------------------------
 //  VP8LBackwardRefs
 
-void VP8LInitBackwardRefs(VP8LBackwardRefs* const refs) {
-  if (refs != NULL) {
-    refs->refs = NULL;
-    refs->size = 0;
-    refs->max_size = 0;
+struct PixOrCopyBlock {
+  PixOrCopyBlock* next_;   // next block (or NULL)
+  PixOrCopy* start_;       // data start
+  int size_;               // currently used size
+};
+
+static void ClearBackwardRefs(VP8LBackwardRefs* const refs) {
+  assert(refs != NULL);
+  if (refs->tail_ != NULL) {
+    *refs->tail_ = refs->free_blocks_;  // recycle all blocks at once
   }
+  refs->free_blocks_ = refs->refs_;
+  refs->tail_ = &refs->refs_;
+  refs->last_block_ = NULL;
+  refs->refs_ = NULL;
 }
 
-void VP8LClearBackwardRefs(VP8LBackwardRefs* const refs) {
-  if (refs != NULL) {
-    free(refs->refs);
-    VP8LInitBackwardRefs(refs);
+void VP8LBackwardRefsClear(VP8LBackwardRefs* const refs) {
+  assert(refs != NULL);
+  ClearBackwardRefs(refs);
+  while (refs->free_blocks_ != NULL) {
+    PixOrCopyBlock* const next = refs->free_blocks_->next_;
+    WebPSafeFree(refs->free_blocks_);
+    refs->free_blocks_ = next;
   }
 }
 
-int VP8LBackwardRefsAlloc(VP8LBackwardRefs* const refs, int max_size) {
+void VP8LBackwardRefsInit(VP8LBackwardRefs* const refs, int block_size) {
   assert(refs != NULL);
-  refs->size = 0;
-  refs->max_size = 0;
-  refs->refs = (PixOrCopy*)WebPSafeMalloc((uint64_t)max_size,
-                                          sizeof(*refs->refs));
-  if (refs->refs == NULL) return 0;
-  refs->max_size = max_size;
+  memset(refs, 0, sizeof(*refs));
+  refs->tail_ = &refs->refs_;
+  refs->block_size_ =
+      (block_size < MIN_BLOCK_SIZE) ? MIN_BLOCK_SIZE : block_size;
+}
+
+VP8LRefsCursor VP8LRefsCursorInit(const VP8LBackwardRefs* const refs) {
+  VP8LRefsCursor c;
+  c.cur_block_ = refs->refs_;
+  if (refs->refs_ != NULL) {
+    c.cur_pos = c.cur_block_->start_;
+    c.last_pos_ = c.cur_pos + c.cur_block_->size_;
+  } else {
+    c.cur_pos = NULL;
+    c.last_pos_ = NULL;
+  }
+  return c;
+}
+
+void VP8LRefsCursorNextBlock(VP8LRefsCursor* const c) {
+  PixOrCopyBlock* const b = c->cur_block_->next_;
+  c->cur_pos = (b == NULL) ? NULL : b->start_;
+  c->last_pos_ = (b == NULL) ? NULL : b->start_ + b->size_;
+  c->cur_block_ = b;
+}
+
+// Create a new block, either from the free list or allocated
+static PixOrCopyBlock* BackwardRefsNewBlock(VP8LBackwardRefs* const refs) {
+  PixOrCopyBlock* b = refs->free_blocks_;
+  if (b == NULL) {   // allocate new memory chunk
+    const size_t total_size =
+        sizeof(*b) + refs->block_size_ * sizeof(*b->start_);
+    b = (PixOrCopyBlock*)WebPSafeMalloc(1ULL, total_size);
+    if (b == NULL) {
+      refs->error_ |= 1;
+      return NULL;
+    }
+    b->start_ = (PixOrCopy*)((uint8_t*)b + sizeof(*b));  // not always aligned
+  } else {  // recycle from free-list
+    refs->free_blocks_ = b->next_;
+  }
+  *refs->tail_ = b;
+  refs->tail_ = &b->next_;
+  refs->last_block_ = b;
+  b->next_ = NULL;
+  b->size_ = 0;
+  return b;
+}
+
+static WEBP_INLINE void BackwardRefsCursorAdd(VP8LBackwardRefs* const refs,
+                                              const PixOrCopy v) {
+  PixOrCopyBlock* b = refs->last_block_;
+  if (b == NULL || b->size_ == refs->block_size_) {
+    b = BackwardRefsNewBlock(refs);
+    if (b == NULL) return;   // refs->error_ is set
+  }
+  b->start_[b->size_++] = v;
+}
+
+int VP8LBackwardRefsCopy(const VP8LBackwardRefs* const src,
+                         VP8LBackwardRefs* const dst) {
+  const PixOrCopyBlock* b = src->refs_;
+  ClearBackwardRefs(dst);
+  assert(src->block_size_ == dst->block_size_);
+  while (b != NULL) {
+    PixOrCopyBlock* const new_b = BackwardRefsNewBlock(dst);
+    if (new_b == NULL) return 0;   // dst->error_ is set
+    memcpy(new_b->start_, b->start_, b->size_ * sizeof(*b->start_));
+    new_b->size_ = b->size_;
+    b = b->next_;
+  }
   return 1;
 }
 
 // -----------------------------------------------------------------------------
 // Hash chains
 
-static WEBP_INLINE uint64_t GetPixPairHash64(const uint32_t* const argb) {
-  uint64_t key = ((uint64_t)(argb[1]) << 32) | argb[0];
-  key = (key * HASH_MULTIPLIER) >> (64 - HASH_BITS);
-  return key;
-}
-
-static int HashChainInit(HashChain* const p, int size) {
+// initialize as empty
+static void HashChainReset(VP8LHashChain* const p) {
   int i;
-  p->chain_ = (int*)WebPSafeMalloc((uint64_t)size, sizeof(*p->chain_));
-  if (p->chain_ == NULL) {
-    return 0;
-  }
-  for (i = 0; i < size; ++i) {
+  assert(p != NULL);
+  for (i = 0; i < p->size_; ++i) {
     p->chain_[i] = -1;
   }
   for (i = 0; i < HASH_SIZE; ++i) {
     p->hash_to_first_index_[i] = -1;
   }
+}
+
+int VP8LHashChainInit(VP8LHashChain* const p, int size) {
+  assert(p->size_ == 0);
+  assert(p->chain_ == NULL);
+  assert(size > 0);
+  p->chain_ = (int*)WebPSafeMalloc(size, sizeof(*p->chain_));
+  if (p->chain_ == NULL) return 0;
+  p->size_ = size;
+  HashChainReset(p);
   return 1;
 }
 
-static void HashChainDelete(HashChain* const p) {
-  if (p != NULL) {
-    free(p->chain_);
-    free(p);
-  }
+void VP8LHashChainClear(VP8LHashChain* const p) {
+  assert(p != NULL);
+  WebPSafeFree(p->chain_);
+  p->size_ = 0;
+  p->chain_ = NULL;
+}
+
+// -----------------------------------------------------------------------------
+
+#define HASH_MULTIPLIER_HI (0xc6a4a793U)
+#define HASH_MULTIPLIER_LO (0x5bd1e996U)
+
+static WEBP_INLINE uint32_t GetPixPairHash64(const uint32_t* const argb) {
+  uint32_t key;
+  key  = argb[1] * HASH_MULTIPLIER_HI;
+  key += argb[0] * HASH_MULTIPLIER_LO;
+  key = key >> (32 - HASH_BITS);
+  return key;
 }
 
 // Insertion of two pixels at a time.
-static void HashChainInsert(HashChain* const p,
+static void HashChainInsert(VP8LHashChain* const p,
                             const uint32_t* const argb, int pos) {
-  const uint64_t hash_code = GetPixPairHash64(argb);
+  const uint32_t hash_code = GetPixPairHash64(argb);
   p->chain_[pos] = p->hash_to_first_index_[hash_code];
   p->hash_to_first_index_[hash_code] = pos;
 }
 
-static int HashChainFindCopy(const HashChain* const p,
-                             int quality, int index, int xsize,
-                             const uint32_t* const argb, int maxlen,
+// Returns the maximum number of hash chain lookups to do for a
+// given compression quality. Return value in range [6, 86].
+static int GetMaxItersForQuality(int quality, int low_effort) {
+  return (low_effort ? 6 : 8) + (quality * quality) / 128;
+}
+
+static int GetWindowSizeForHashChain(int quality, int xsize) {
+  const int max_window_size = (quality > 75) ? WINDOW_SIZE
+                            : (quality > 50) ? (xsize << 8)
+                            : (quality > 25) ? (xsize << 6)
+                            : (xsize << 4);
+  assert(xsize > 0);
+  return (max_window_size > WINDOW_SIZE) ? WINDOW_SIZE : max_window_size;
+}
+
+static WEBP_INLINE int MaxFindCopyLength(int len) {
+  return (len < MAX_LENGTH) ? len : MAX_LENGTH;
+}
+
+static void HashChainFindOffset(const VP8LHashChain* const p, int base_position,
+                                const uint32_t* const argb, int len,
+                                int window_size, int* const distance_ptr) {
+  const uint32_t* const argb_start = argb + base_position;
+  const int min_pos =
+      (base_position > window_size) ? base_position - window_size : 0;
+  int pos;
+  assert(len <= MAX_LENGTH);
+  for (pos = p->hash_to_first_index_[GetPixPairHash64(argb_start)];
+       pos >= min_pos;
+       pos = p->chain_[pos]) {
+    const int curr_length =
+        FindMatchLength(argb + pos, argb_start, len - 1, len);
+    if (curr_length == len) break;
+  }
+  *distance_ptr = base_position - pos;
+}
+
+static int HashChainFindCopy(const VP8LHashChain* const p,
+                             int base_position,
+                             const uint32_t* const argb, int max_len,
+                             int window_size, int iter_max,
                              int* const distance_ptr,
                              int* const length_ptr) {
-  const uint64_t hash_code = GetPixPairHash64(&argb[index]);
-  int prev_length = 0;
-  int64_t best_val = 0;
+  const uint32_t* const argb_start = argb + base_position;
+  int iter = iter_max;
   int best_length = 0;
   int best_distance = 0;
-  const uint32_t* const argb_start = argb + index;
-  const int iter_min_mult = (quality < 50) ? 2 : (quality < 75) ? 4 : 8;
-  const int iter_min = -quality * iter_min_mult;
-  int iter_cnt = 10 + (quality >> 1);
-  const int min_pos = (index > WINDOW_SIZE) ? index - WINDOW_SIZE : 0;
+  const int min_pos =
+      (base_position > window_size) ? base_position - window_size : 0;
   int pos;
-
-  assert(xsize > 0);
-  for (pos = p->hash_to_first_index_[hash_code];
+  int length_max = 256;
+  if (max_len < length_max) {
+    length_max = max_len;
+  }
+  for (pos = p->hash_to_first_index_[GetPixPairHash64(argb_start)];
        pos >= min_pos;
        pos = p->chain_[pos]) {
-    int64_t val;
     int curr_length;
-    if (iter_cnt < 0) {
-      if (iter_cnt < iter_min || best_val >= 0xff0000) {
-        break;
-      }
-    }
-    --iter_cnt;
-    if (best_length != 0 &&
-        argb[pos + best_length - 1] != argb_start[best_length - 1]) {
-      continue;
-    }
-    curr_length = FindMatchLength(argb + pos, argb_start, maxlen);
-    if (curr_length < prev_length) {
-      continue;
+    int distance;
+    if (--iter < 0) {
+      break;
     }
-    val = 65536 * curr_length;
-    // Favoring 2d locality here gives savings for certain images.
-    if (index - pos < 9 * xsize) {
-      const int y = (index - pos) / xsize;
-      int x = (index - pos) % xsize;
-      if (x > xsize / 2) {
-        x = xsize - x;
-      }
-      if (x <= 7 && x >= -8) {
-        val -= y * y + x * x;
-      } else {
-        val -= 9 * 9 + 9 * 9;
-      }
-    } else {
-      val -= 9 * 9 + 9 * 9;
-    }
-    if (best_val < val) {
-      prev_length = curr_length;
-      best_val = val;
+
+    curr_length = FindMatchLength(argb + pos, argb_start, best_length, max_len);
+    if (best_length < curr_length) {
+      distance = base_position - pos;
       best_length = curr_length;
-      best_distance = index - pos;
-      if (curr_length >= MAX_LENGTH) {
-        break;
-      }
-      if ((best_distance == 1 || best_distance == xsize) &&
-          best_length >= 128) {
+      best_distance = distance;
+      if (curr_length >= length_max) {
         break;
       }
     }
@@ -213,140 +336,153 @@ static int HashChainFindCopy(const HashChain* const p,
   return (best_length >= MIN_LENGTH);
 }
 
-static WEBP_INLINE void PushBackCopy(VP8LBackwardRefs* const refs, int length) {
-  int size = refs->size;
-  while (length >= MAX_LENGTH) {
-    refs->refs[size++] = PixOrCopyCreateCopy(1, MAX_LENGTH);
-    length -= MAX_LENGTH;
-  }
-  if (length > 0) {
-    refs->refs[size++] = PixOrCopyCreateCopy(1, length);
+static WEBP_INLINE void AddSingleLiteral(uint32_t pixel, int use_color_cache,
+                                         VP8LColorCache* const hashers,
+                                         VP8LBackwardRefs* const refs) {
+  PixOrCopy v;
+  if (use_color_cache) {
+    const uint32_t key = VP8LColorCacheGetIndex(hashers, pixel);
+    if (VP8LColorCacheLookup(hashers, key) == pixel) {
+      v = PixOrCopyCreateCacheIdx(key);
+    } else {
+      v = PixOrCopyCreateLiteral(pixel);
+      VP8LColorCacheSet(hashers, key, pixel);
+    }
+  } else {
+    v = PixOrCopyCreateLiteral(pixel);
   }
-  refs->size = size;
+  BackwardRefsCursorAdd(refs, v);
 }
 
-static void BackwardReferencesRle(int xsize, int ysize,
-                                  const uint32_t* const argb,
-                                  VP8LBackwardRefs* const refs) {
+static int BackwardReferencesRle(int xsize, int ysize,
+                                 const uint32_t* const argb,
+                                 int cache_bits, VP8LBackwardRefs* const refs) {
   const int pix_count = xsize * ysize;
-  int match_len = 0;
-  int i;
-  refs->size = 0;
-  PushBackCopy(refs, match_len);    // i=0 case
-  refs->refs[refs->size++] = PixOrCopyCreateLiteral(argb[0]);
-  for (i = 1; i < pix_count; ++i) {
-    if (argb[i] == argb[i - 1]) {
-      ++match_len;
+  int i, k;
+  const int use_color_cache = (cache_bits > 0);
+  VP8LColorCache hashers;
+
+  if (use_color_cache && !VP8LColorCacheInit(&hashers, cache_bits)) {
+    return 0;
+  }
+  ClearBackwardRefs(refs);
+  // Add first pixel as literal.
+  AddSingleLiteral(argb[0], use_color_cache, &hashers, refs);
+  i = 1;
+  while (i < pix_count) {
+    const int max_len = MaxFindCopyLength(pix_count - i);
+    const int kMinLength = 4;
+    const int rle_len = FindMatchLength(argb + i, argb + i - 1, 0, max_len);
+    const int prev_row_len = (i < xsize) ? 0 :
+        FindMatchLength(argb + i, argb + i - xsize, 0, max_len);
+    if (rle_len >= prev_row_len && rle_len >= kMinLength) {
+      BackwardRefsCursorAdd(refs, PixOrCopyCreateCopy(1, rle_len));
+      // We don't need to update the color cache here since it is always the
+      // same pixel being copied, and that does not change the color cache
+      // state.
+      i += rle_len;
+    } else if (prev_row_len >= kMinLength) {
+      BackwardRefsCursorAdd(refs, PixOrCopyCreateCopy(xsize, prev_row_len));
+      if (use_color_cache) {
+        for (k = 0; k < prev_row_len; ++k) {
+          VP8LColorCacheInsert(&hashers, argb[i + k]);
+        }
+      }
+      i += prev_row_len;
     } else {
-      PushBackCopy(refs, match_len);
-      match_len = 0;
-      refs->refs[refs->size++] = PixOrCopyCreateLiteral(argb[i]);
+      AddSingleLiteral(argb[i], use_color_cache, &hashers, refs);
+      i++;
     }
   }
-  PushBackCopy(refs, match_len);
+  if (use_color_cache) VP8LColorCacheClear(&hashers);
+  return !refs->error_;
 }
 
-static int BackwardReferencesHashChain(int xsize, int ysize,
-                                       const uint32_t* const argb,
-                                       int cache_bits, int quality,
-                                       VP8LBackwardRefs* const refs) {
+static int BackwardReferencesLz77(int xsize, int ysize,
+                                  const uint32_t* const argb, int cache_bits,
+                                  int quality, int low_effort,
+                                  VP8LHashChain* const hash_chain,
+                                  VP8LBackwardRefs* const refs) {
   int i;
   int ok = 0;
   int cc_init = 0;
   const int use_color_cache = (cache_bits > 0);
   const int pix_count = xsize * ysize;
-  HashChain* const hash_chain = (HashChain*)malloc(sizeof(*hash_chain));
   VP8LColorCache hashers;
+  int iter_max = GetMaxItersForQuality(quality, low_effort);
+  const int window_size = GetWindowSizeForHashChain(quality, xsize);
+  int min_matches = 32;
 
-  if (hash_chain == NULL) return 0;
   if (use_color_cache) {
     cc_init = VP8LColorCacheInit(&hashers, cache_bits);
     if (!cc_init) goto Error;
   }
-
-  if (!HashChainInit(hash_chain, pix_count)) goto Error;
-
-  refs->size = 0;
-  for (i = 0; i < pix_count; ) {
+  ClearBackwardRefs(refs);
+  HashChainReset(hash_chain);
+  for (i = 0; i < pix_count - 2; ) {
     // Alternative#1: Code the pixels starting at 'i' using backward reference.
     int offset = 0;
     int len = 0;
-    if (i < pix_count - 1) {  // FindCopy(i,..) reads pixels at [i] and [i + 1].
-      int maxlen = pix_count - i;
-      if (maxlen > MAX_LENGTH) {
-        maxlen = MAX_LENGTH;
-      }
-      HashChainFindCopy(hash_chain, quality, i, xsize, argb, maxlen,
-                        &offset, &len);
-    }
-    if (len >= MIN_LENGTH) {
-      // Alternative#2: Insert the pixel at 'i' as literal, and code the
-      // pixels starting at 'i + 1' using backward reference.
+    const int max_len = MaxFindCopyLength(pix_count - i);
+    HashChainFindCopy(hash_chain, i, argb, max_len, window_size,
+                      iter_max, &offset, &len);
+    if (len > MIN_LENGTH || (len == MIN_LENGTH && offset <= 512)) {
       int offset2 = 0;
       int len2 = 0;
       int k;
+      min_matches = 8;
       HashChainInsert(hash_chain, &argb[i], i);
-      if (i < pix_count - 2) {  // FindCopy(i+1,..) reads [i + 1] and [i + 2].
-        int maxlen = pix_count - (i + 1);
-        if (maxlen > MAX_LENGTH) {
-          maxlen = MAX_LENGTH;
-        }
-        HashChainFindCopy(hash_chain, quality,
-                          i + 1, xsize, argb, maxlen, &offset2, &len2);
+      if ((len < (max_len >> 2)) && !low_effort) {
+        // Evaluate Alternative#2: Insert the pixel at 'i' as literal, and code
+        // the pixels starting at 'i + 1' using backward reference.
+        HashChainFindCopy(hash_chain, i + 1, argb, max_len - 1,
+                          window_size, iter_max, &offset2,
+                          &len2);
         if (len2 > len + 1) {
-          const uint32_t pixel = argb[i];
-          // Alternative#2 is a better match. So push pixel at 'i' as literal.
-          if (use_color_cache && VP8LColorCacheContains(&hashers, pixel)) {
-            const int ix = VP8LColorCacheGetIndex(&hashers, pixel);
-            refs->refs[refs->size] = PixOrCopyCreateCacheIdx(ix);
-          } else {
-            refs->refs[refs->size] = PixOrCopyCreateLiteral(pixel);
-          }
-          ++refs->size;
-          if (use_color_cache) VP8LColorCacheInsert(&hashers, pixel);
+          AddSingleLiteral(argb[i], use_color_cache, &hashers, refs);
           i++;  // Backward reference to be done for next pixel.
           len = len2;
           offset = offset2;
         }
       }
-      if (len >= MAX_LENGTH) {
-        len = MAX_LENGTH - 1;
-      }
-      refs->refs[refs->size++] = PixOrCopyCreateCopy(offset, len);
+      BackwardRefsCursorAdd(refs, PixOrCopyCreateCopy(offset, len));
       if (use_color_cache) {
         for (k = 0; k < len; ++k) {
           VP8LColorCacheInsert(&hashers, argb[i + k]);
         }
       }
       // Add to the hash_chain (but cannot add the last pixel).
-      {
+      if (offset >= 3 && offset != xsize) {
         const int last = (len < pix_count - 1 - i) ? len : pix_count - 1 - i;
-        for (k = 1; k < last; ++k) {
+        for (k = 2; k < last - 8; k += 2) {
+          HashChainInsert(hash_chain, &argb[i + k], i + k);
+        }
+        for (; k < last; ++k) {
           HashChainInsert(hash_chain, &argb[i + k], i + k);
         }
       }
       i += len;
     } else {
-      const uint32_t pixel = argb[i];
-      if (use_color_cache && VP8LColorCacheContains(&hashers, pixel)) {
-        // push pixel as a PixOrCopyCreateCacheIdx pixel
-        const int ix = VP8LColorCacheGetIndex(&hashers, pixel);
-        refs->refs[refs->size] = PixOrCopyCreateCacheIdx(ix);
-      } else {
-        refs->refs[refs->size] = PixOrCopyCreateLiteral(pixel);
-      }
-      ++refs->size;
-      if (use_color_cache) VP8LColorCacheInsert(&hashers, pixel);
-      if (i + 1 < pix_count) {
+      AddSingleLiteral(argb[i], use_color_cache, &hashers, refs);
+      HashChainInsert(hash_chain, &argb[i], i);
+      ++i;
+      --min_matches;
+      if (min_matches <= 0) {
+        AddSingleLiteral(argb[i], use_color_cache, &hashers, refs);
         HashChainInsert(hash_chain, &argb[i], i);
+        ++i;
       }
-      ++i;
     }
   }
-  ok = 1;
-Error:
+  while (i < pix_count) {
+    // Handle the last pixel(s).
+    AddSingleLiteral(argb[i], use_color_cache, &hashers, refs);
+    ++i;
+  }
+
+  ok = !refs->error_;
+ Error:
   if (cc_init) VP8LColorCacheClear(&hashers);
-  HashChainDelete(hash_chain);
   return ok;
 }
 
@@ -355,18 +491,19 @@ Error:
 typedef struct {
   double alpha_[VALUES_IN_BYTE];
   double red_[VALUES_IN_BYTE];
-  double literal_[PIX_OR_COPY_CODES_MAX];
   double blue_[VALUES_IN_BYTE];
   double distance_[NUM_DISTANCE_CODES];
+  double* literal_;
 } CostModel;
 
 static int BackwardReferencesTraceBackwards(
-    int xsize, int ysize, int recursive_cost_model,
-    const uint32_t* const argb, int cache_bits, VP8LBackwardRefs* const refs);
+    int xsize, int ysize, const uint32_t* const argb, int quality,
+    int cache_bits, VP8LHashChain* const hash_chain,
+    VP8LBackwardRefs* const refs);
 
 static void ConvertPopulationCountTableToBitEstimates(
-    int num_symbols, const int population_counts[], double output[]) {
-  int sum = 0;
+    int num_symbols, const uint32_t population_counts[], double output[]) {
+  uint32_t sum = 0;
   int nonzeros = 0;
   int i;
   for (i = 0; i < num_symbols; ++i) {
@@ -385,42 +522,29 @@ static void ConvertPopulationCountTableToBitEstimates(
   }
 }
 
-static int CostModelBuild(CostModel* const m, int xsize, int ysize,
-                          int recursion_level, const uint32_t* const argb,
-                          int cache_bits) {
+static int CostModelBuild(CostModel* const m, int cache_bits,
+                          VP8LBackwardRefs* const refs) {
   int ok = 0;
-  VP8LHistogram histo;
-  VP8LBackwardRefs refs;
-  const int quality = 100;
+  VP8LHistogram* const histo = VP8LAllocateHistogram(cache_bits);
+  if (histo == NULL) goto Error;
 
-  if (!VP8LBackwardRefsAlloc(&refs, xsize * ysize)) goto Error;
+  VP8LHistogramCreate(histo, refs, cache_bits);
 
-  if (recursion_level > 0) {
-    if (!BackwardReferencesTraceBackwards(xsize, ysize, recursion_level - 1,
-                                          argb, cache_bits, &refs)) {
-      goto Error;
-    }
-  } else {
-    if (!BackwardReferencesHashChain(xsize, ysize, argb, cache_bits, quality,
-                                     &refs)) {
-      goto Error;
-    }
-  }
-  VP8LHistogramCreate(&histo, &refs, cache_bits);
   ConvertPopulationCountTableToBitEstimates(
-      VP8LHistogramNumCodes(&histo), histo.literal_, m->literal_);
+      VP8LHistogramNumCodes(histo->palette_code_bits_),
+      histo->literal_, m->literal_);
   ConvertPopulationCountTableToBitEstimates(
-      VALUES_IN_BYTE, histo.red_, m->red_);
+      VALUES_IN_BYTE, histo->red_, m->red_);
   ConvertPopulationCountTableToBitEstimates(
-      VALUES_IN_BYTE, histo.blue_, m->blue_);
+      VALUES_IN_BYTE, histo->blue_, m->blue_);
   ConvertPopulationCountTableToBitEstimates(
-      VALUES_IN_BYTE, histo.alpha_, m->alpha_);
+      VALUES_IN_BYTE, histo->alpha_, m->alpha_);
   ConvertPopulationCountTableToBitEstimates(
-      NUM_DISTANCE_CODES, histo.distance_, m->distance_);
+      NUM_DISTANCE_CODES, histo->distance_, m->distance_);
   ok = 1;
 
  Error:
-  VP8LClearBackwardRefs(&refs);
+  VP8LFreeHistogram(histo);
   return ok;
 }
 
@@ -438,203 +562,211 @@ static WEBP_INLINE double GetCacheCost(const CostModel* const m, uint32_t idx) {
 
 static WEBP_INLINE double GetLengthCost(const CostModel* const m,
                                         uint32_t length) {
-  int code, extra_bits_count, extra_bits_value;
-  PrefixEncode(length, &code, &extra_bits_count, &extra_bits_value);
-  return m->literal_[VALUES_IN_BYTE + code] + extra_bits_count;
+  int code, extra_bits;
+  VP8LPrefixEncodeBits(length, &code, &extra_bits);
+  return m->literal_[VALUES_IN_BYTE + code] + extra_bits;
 }
 
 static WEBP_INLINE double GetDistanceCost(const CostModel* const m,
                                           uint32_t distance) {
-  int code, extra_bits_count, extra_bits_value;
-  PrefixEncode(distance, &code, &extra_bits_count, &extra_bits_value);
-  return m->distance_[code] + extra_bits_count;
+  int code, extra_bits;
+  VP8LPrefixEncodeBits(distance, &code, &extra_bits);
+  return m->distance_[code] + extra_bits;
+}
+
+static void AddSingleLiteralWithCostModel(
+    const uint32_t* const argb, VP8LHashChain* const hash_chain,
+    VP8LColorCache* const hashers, const CostModel* const cost_model, int idx,
+    int is_last, int use_color_cache, double prev_cost, float* const cost,
+    uint16_t* const dist_array) {
+  double cost_val = prev_cost;
+  const uint32_t color = argb[0];
+  if (!is_last) {
+    HashChainInsert(hash_chain, argb, idx);
+  }
+  if (use_color_cache && VP8LColorCacheContains(hashers, color)) {
+    const double mul0 = 0.68;
+    const int ix = VP8LColorCacheGetIndex(hashers, color);
+    cost_val += GetCacheCost(cost_model, ix) * mul0;
+  } else {
+    const double mul1 = 0.82;
+    if (use_color_cache) VP8LColorCacheInsert(hashers, color);
+    cost_val += GetLiteralCost(cost_model, color) * mul1;
+  }
+  if (cost[idx] > cost_val) {
+    cost[idx] = (float)cost_val;
+    dist_array[idx] = 1;  // only one is inserted.
+  }
 }
 
 static int BackwardReferencesHashChainDistanceOnly(
-    int xsize, int ysize, int recursive_cost_model, const uint32_t* const argb,
-    int cache_bits, uint32_t* const dist_array) {
+    int xsize, int ysize, const uint32_t* const argb,
+    int quality, int cache_bits, VP8LHashChain* const hash_chain,
+    VP8LBackwardRefs* const refs, uint16_t* const dist_array) {
   int i;
   int ok = 0;
   int cc_init = 0;
-  const int quality = 100;
   const int pix_count = xsize * ysize;
   const int use_color_cache = (cache_bits > 0);
-  double* const cost =
-      (double*)WebPSafeMalloc((uint64_t)pix_count, sizeof(*cost));
-  CostModel* cost_model = (CostModel*)malloc(sizeof(*cost_model));
-  HashChain* hash_chain = (HashChain*)malloc(sizeof(*hash_chain));
+  float* const cost =
+      (float*)WebPSafeMalloc(pix_count, sizeof(*cost));
+  const size_t literal_array_size = sizeof(double) *
+      (NUM_LITERAL_CODES + NUM_LENGTH_CODES +
+       ((cache_bits > 0) ? (1 << cache_bits) : 0));
+  const size_t cost_model_size = sizeof(CostModel) + literal_array_size;
+  CostModel* const cost_model =
+      (CostModel*)WebPSafeMalloc(1ULL, cost_model_size);
   VP8LColorCache hashers;
-  const double mul0 = (recursive_cost_model != 0) ? 1.0 : 0.68;
-  const double mul1 = (recursive_cost_model != 0) ? 1.0 : 0.82;
-
-  if (cost == NULL || cost_model == NULL || hash_chain == NULL) goto Error;
+  const int skip_length = 32 + quality;
+  const int skip_min_distance_code = 2;
+  int iter_max = GetMaxItersForQuality(quality, 0);
+  const int window_size = GetWindowSizeForHashChain(quality, xsize);
 
-  if (!HashChainInit(hash_chain, pix_count)) goto Error;
+  if (cost == NULL || cost_model == NULL) goto Error;
 
+  cost_model->literal_ = (double*)(cost_model + 1);
   if (use_color_cache) {
     cc_init = VP8LColorCacheInit(&hashers, cache_bits);
     if (!cc_init) goto Error;
   }
 
-  if (!CostModelBuild(cost_model, xsize, ysize, recursive_cost_model, argb,
-                      cache_bits)) {
+  if (!CostModelBuild(cost_model, cache_bits, refs)) {
     goto Error;
   }
 
-  for (i = 0; i < pix_count; ++i) cost[i] = 1e100;
+  for (i = 0; i < pix_count; ++i) cost[i] = 1e38f;
 
   // We loop one pixel at a time, but store all currently best points to
   // non-processed locations from this point.
   dist_array[0] = 0;
-  for (i = 0; i < pix_count; ++i) {
-    double prev_cost = 0.0;
-    int shortmax;
-    if (i > 0) {
-      prev_cost = cost[i - 1];
-    }
-    for (shortmax = 0; shortmax < 2; ++shortmax) {
-      int offset = 0;
-      int len = 0;
-      if (i < pix_count - 1) {  // FindCopy reads pixels at [i] and [i + 1].
-        int maxlen = shortmax ? 2 : MAX_LENGTH;
-        if (maxlen > pix_count - i) {
-          maxlen = pix_count - i;
+  HashChainReset(hash_chain);
+  // Add first pixel as literal.
+  AddSingleLiteralWithCostModel(argb + 0, hash_chain, &hashers, cost_model, 0,
+                                0, use_color_cache, 0.0, cost, dist_array);
+  for (i = 1; i < pix_count - 1; ++i) {
+    int offset = 0;
+    int len = 0;
+    double prev_cost = cost[i - 1];
+    const int max_len = MaxFindCopyLength(pix_count - i);
+    HashChainFindCopy(hash_chain, i, argb, max_len, window_size,
+                      iter_max, &offset, &len);
+    if (len >= MIN_LENGTH) {
+      const int code = DistanceToPlaneCode(xsize, offset);
+      const double distance_cost =
+          prev_cost + GetDistanceCost(cost_model, code);
+      int k;
+      for (k = 1; k < len; ++k) {
+        const double cost_val = distance_cost + GetLengthCost(cost_model, k);
+        if (cost[i + k] > cost_val) {
+          cost[i + k] = (float)cost_val;
+          dist_array[i + k] = k + 1;
         }
-        HashChainFindCopy(hash_chain, quality, i, xsize, argb, maxlen,
-                          &offset, &len);
       }
-      if (len >= MIN_LENGTH) {
-        const int code = DistanceToPlaneCode(xsize, offset);
-        const double distance_cost =
-            prev_cost + GetDistanceCost(cost_model, code);
-        int k;
-        for (k = 1; k < len; ++k) {
-          const double cost_val =
-              distance_cost + GetLengthCost(cost_model, k);
-          if (cost[i + k] > cost_val) {
-            cost[i + k] = cost_val;
-            dist_array[i + k] = k + 1;
+      // This if is for speedup only. It roughly doubles the speed, and
+      // makes compression worse by .1 %.
+      if (len >= skip_length && code <= skip_min_distance_code) {
+        // Long copy for short distances, let's skip the middle
+        // lookups for better copies.
+        // 1) insert the hashes.
+        if (use_color_cache) {
+          for (k = 0; k < len; ++k) {
+            VP8LColorCacheInsert(&hashers, argb[i + k]);
           }
         }
-        // This if is for speedup only. It roughly doubles the speed, and
-        // makes compression worse by .1 %.
-        if (len >= 128 && code < 2) {
-          // Long copy for short distances, let's skip the middle
-          // lookups for better copies.
-          // 1) insert the hashes.
-          if (use_color_cache) {
-            for (k = 0; k < len; ++k) {
-              VP8LColorCacheInsert(&hashers, argb[i + k]);
-            }
-          }
-          // 2) Add to the hash_chain (but cannot add the last pixel)
-          {
-            const int last = (len < pix_count - 1 - i) ? len
-                                                       : pix_count - 1 - i;
-            for (k = 0; k < last; ++k) {
-              HashChainInsert(hash_chain, &argb[i + k], i + k);
-            }
+        // 2) Add to the hash_chain (but cannot add the last pixel)
+        {
+          const int last = (len + i < pix_count - 1) ? len + i
+                                                     : pix_count - 1;
+          for (k = i; k < last; ++k) {
+            HashChainInsert(hash_chain, &argb[k], k);
           }
-          // 3) jump.
-          i += len - 1;  // for loop does ++i, thus -1 here.
-          goto next_symbol;
         }
+        // 3) jump.
+        i += len - 1;  // for loop does ++i, thus -1 here.
+        goto next_symbol;
       }
-    }
-    if (i < pix_count - 1) {
-      HashChainInsert(hash_chain, &argb[i], i);
-    }
-    {
-      // inserting a literal pixel
-      double cost_val = prev_cost;
-      if (use_color_cache && VP8LColorCacheContains(&hashers, argb[i])) {
-        const int ix = VP8LColorCacheGetIndex(&hashers, argb[i]);
-        cost_val += GetCacheCost(cost_model, ix) * mul0;
-      } else {
-        cost_val += GetLiteralCost(cost_model, argb[i]) * mul1;
-      }
-      if (cost[i] > cost_val) {
-        cost[i] = cost_val;
-        dist_array[i] = 1;  // only one is inserted.
+      if (len != MIN_LENGTH) {
+        int code_min_length;
+        double cost_total;
+        HashChainFindOffset(hash_chain, i, argb, MIN_LENGTH, window_size,
+                            &offset);
+        code_min_length = DistanceToPlaneCode(xsize, offset);
+        cost_total = prev_cost +
+            GetDistanceCost(cost_model, code_min_length) +
+            GetLengthCost(cost_model, 1);
+        if (cost[i + 1] > cost_total) {
+          cost[i + 1] = (float)cost_total;
+          dist_array[i + 1] = 2;
+        }
       }
-      if (use_color_cache) VP8LColorCacheInsert(&hashers, argb[i]);
     }
+    AddSingleLiteralWithCostModel(argb + i, hash_chain, &hashers, cost_model, i,
+                                  0, use_color_cache, prev_cost, cost,
+                                  dist_array);
  next_symbol: ;
   }
-  // Last pixel still to do, it can only be a single step if not reached
-  // through cheaper means already.
-  ok = 1;
-Error:
+  // Handle the last pixel.
+  if (i == (pix_count - 1)) {
+    AddSingleLiteralWithCostModel(argb + i, hash_chain, &hashers, cost_model, i,
+                                  1, use_color_cache, cost[pix_count - 2], cost,
+                                  dist_array);
+  }
+  ok = !refs->error_;
+ Error:
   if (cc_init) VP8LColorCacheClear(&hashers);
-  HashChainDelete(hash_chain);
-  free(cost_model);
-  free(cost);
+  WebPSafeFree(cost_model);
+  WebPSafeFree(cost);
   return ok;
 }
 
-static int TraceBackwards(const uint32_t* const dist_array,
-                          int dist_array_size,
-                          uint32_t** const chosen_path,
-                          int* const chosen_path_size) {
-  int i;
-  // Count how many.
-  int count = 0;
-  for (i = dist_array_size - 1; i >= 0; ) {
-    int k = dist_array[i];
-    assert(k >= 1);
-    ++count;
-    i -= k;
-  }
-  // Allocate.
-  *chosen_path_size = count;
-  *chosen_path =
-      (uint32_t*)WebPSafeMalloc((uint64_t)count, sizeof(**chosen_path));
-  if (*chosen_path == NULL) return 0;
-
-  // Write in reverse order.
-  for (i = dist_array_size - 1; i >= 0; ) {
-    int k = dist_array[i];
-    assert(k >= 1);
-    (*chosen_path)[--count] = k;
-    i -= k;
+// We pack the path at the end of *dist_array and return
+// a pointer to this part of the array. Example:
+// dist_array = [1x2xx3x2] => packed [1x2x1232], chosen_path = [1232]
+static void TraceBackwards(uint16_t* const dist_array,
+                           int dist_array_size,
+                           uint16_t** const chosen_path,
+                           int* const chosen_path_size) {
+  uint16_t* path = dist_array + dist_array_size;
+  uint16_t* cur = dist_array + dist_array_size - 1;
+  while (cur >= dist_array) {
+    const int k = *cur;
+    --path;
+    *path = k;
+    cur -= k;
   }
-  return 1;
+  *chosen_path = path;
+  *chosen_path_size = (int)(dist_array + dist_array_size - path);
 }
 
 static int BackwardReferencesHashChainFollowChosenPath(
-    int xsize, int ysize, const uint32_t* const argb, int cache_bits,
-    const uint32_t* const chosen_path, int chosen_path_size,
+    int xsize, int ysize, const uint32_t* const argb,
+    int quality, int cache_bits,
+    const uint16_t* const chosen_path, int chosen_path_size,
+    VP8LHashChain* const hash_chain,
     VP8LBackwardRefs* const refs) {
-  const int quality = 100;
   const int pix_count = xsize * ysize;
   const int use_color_cache = (cache_bits > 0);
-  int size = 0;
-  int i = 0;
-  int k;
   int ix;
+  int i = 0;
   int ok = 0;
   int cc_init = 0;
-  HashChain* hash_chain = (HashChain*)malloc(sizeof(*hash_chain));
+  const int window_size = GetWindowSizeForHashChain(quality, xsize);
   VP8LColorCache hashers;
 
-  if (hash_chain == NULL || !HashChainInit(hash_chain, pix_count)) {
-    goto Error;
-  }
   if (use_color_cache) {
     cc_init = VP8LColorCacheInit(&hashers, cache_bits);
     if (!cc_init) goto Error;
   }
 
-  refs->size = 0;
-  for (ix = 0; ix < chosen_path_size; ++ix, ++size) {
+  ClearBackwardRefs(refs);
+  HashChainReset(hash_chain);
+  for (ix = 0; ix < chosen_path_size; ++ix) {
     int offset = 0;
-    int len = 0;
-    int maxlen = chosen_path[ix];
-    if (maxlen != 1) {
-      HashChainFindCopy(hash_chain, quality,
-                        i, xsize, argb, maxlen, &offset, &len);
-      assert(len == maxlen);
-      refs->refs[size] = PixOrCopyCreateCopy(offset, len);
+    const int len = chosen_path[ix];
+    if (len != 1) {
+      int k;
+      HashChainFindOffset(hash_chain, i, argb, len, window_size, &offset);
+      BackwardRefsCursorAdd(refs, PixOrCopyCreateCopy(offset, len));
       if (use_color_cache) {
         for (k = 0; k < len; ++k) {
           VP8LColorCacheInsert(&hashers, argb[i + k]);
@@ -648,227 +780,330 @@ static int BackwardReferencesHashChainFollowChosenPath(
       }
       i += len;
     } else {
+      PixOrCopy v;
       if (use_color_cache && VP8LColorCacheContains(&hashers, argb[i])) {
         // push pixel as a color cache index
         const int idx = VP8LColorCacheGetIndex(&hashers, argb[i]);
-        refs->refs[size] = PixOrCopyCreateCacheIdx(idx);
+        v = PixOrCopyCreateCacheIdx(idx);
       } else {
-        refs->refs[size] = PixOrCopyCreateLiteral(argb[i]);
+        if (use_color_cache) VP8LColorCacheInsert(&hashers, argb[i]);
+        v = PixOrCopyCreateLiteral(argb[i]);
       }
-      if (use_color_cache) VP8LColorCacheInsert(&hashers, argb[i]);
+      BackwardRefsCursorAdd(refs, v);
       if (i + 1 < pix_count) {
         HashChainInsert(hash_chain, &argb[i], i);
       }
       ++i;
     }
   }
-  assert(size <= refs->max_size);
-  refs->size = size;
-  ok = 1;
-Error:
+  ok = !refs->error_;
+ Error:
   if (cc_init) VP8LColorCacheClear(&hashers);
-  HashChainDelete(hash_chain);
   return ok;
 }
 
 // Returns 1 on success.
 static int BackwardReferencesTraceBackwards(int xsize, int ysize,
-                                            int recursive_cost_model,
                                             const uint32_t* const argb,
-                                            int cache_bits,
+                                            int quality, int cache_bits,
+                                            VP8LHashChain* const hash_chain,
                                             VP8LBackwardRefs* const refs) {
   int ok = 0;
   const int dist_array_size = xsize * ysize;
-  uint32_t* chosen_path = NULL;
+  uint16_t* chosen_path = NULL;
   int chosen_path_size = 0;
-  uint32_t* dist_array =
-      (uint32_t*)WebPSafeMalloc((uint64_t)dist_array_size, sizeof(*dist_array));
+  uint16_t* dist_array =
+      (uint16_t*)WebPSafeMalloc(dist_array_size, sizeof(*dist_array));
 
   if (dist_array == NULL) goto Error;
 
   if (!BackwardReferencesHashChainDistanceOnly(
-      xsize, ysize, recursive_cost_model, argb, cache_bits, dist_array)) {
-    goto Error;
-  }
-  if (!TraceBackwards(dist_array, dist_array_size,
-                      &chosen_path, &chosen_path_size)) {
+      xsize, ysize, argb, quality, cache_bits, hash_chain,
+      refs, dist_array)) {
     goto Error;
   }
-  free(dist_array);   // no need to retain this memory any longer
-  dist_array = NULL;
+  TraceBackwards(dist_array, dist_array_size, &chosen_path, &chosen_path_size);
   if (!BackwardReferencesHashChainFollowChosenPath(
-      xsize, ysize, argb, cache_bits, chosen_path, chosen_path_size, refs)) {
+      xsize, ysize, argb, quality, cache_bits, chosen_path, chosen_path_size,
+      hash_chain, refs)) {
     goto Error;
   }
   ok = 1;
  Error:
-  free(chosen_path);
-  free(dist_array);
+  WebPSafeFree(dist_array);
   return ok;
 }
 
 static void BackwardReferences2DLocality(int xsize,
-                                         VP8LBackwardRefs* const refs) {
-  int i;
-  for (i = 0; i < refs->size; ++i) {
-    if (PixOrCopyIsCopy(&refs->refs[i])) {
-      const int dist = refs->refs[i].argb_or_distance;
+                                         const VP8LBackwardRefs* const refs) {
+  VP8LRefsCursor c = VP8LRefsCursorInit(refs);
+  while (VP8LRefsCursorOk(&c)) {
+    if (PixOrCopyIsCopy(c.cur_pos)) {
+      const int dist = c.cur_pos->argb_or_distance;
       const int transformed_dist = DistanceToPlaneCode(xsize, dist);
-      refs->refs[i].argb_or_distance = transformed_dist;
+      c.cur_pos->argb_or_distance = transformed_dist;
     }
+    VP8LRefsCursorNext(&c);
   }
 }
 
-int VP8LGetBackwardReferences(int width, int height,
-                              const uint32_t* const argb,
-                              int quality, int cache_bits, int use_2d_locality,
-                              VP8LBackwardRefs* const best) {
-  int ok = 0;
-  int lz77_is_useful;
-  VP8LBackwardRefs refs_rle, refs_lz77;
-  const int num_pix = width * height;
-
-  VP8LBackwardRefsAlloc(&refs_rle, num_pix);
-  VP8LBackwardRefsAlloc(&refs_lz77, num_pix);
-  VP8LInitBackwardRefs(best);
-  if (refs_rle.refs == NULL || refs_lz77.refs == NULL) {
- Error1:
-    VP8LClearBackwardRefs(&refs_rle);
-    VP8LClearBackwardRefs(&refs_lz77);
-    goto End;
-  }
-
-  if (!BackwardReferencesHashChain(width, height, argb, cache_bits, quality,
-                                   &refs_lz77)) {
-    goto End;
-  }
-  // Backward Reference using RLE only.
-  BackwardReferencesRle(width, height, argb, &refs_rle);
+// Returns entropy for the given cache bits.
+static double ComputeCacheEntropy(const uint32_t* argb,
+                                  const VP8LBackwardRefs* const refs,
+                                  int cache_bits) {
+  const int use_color_cache = (cache_bits > 0);
+  int cc_init = 0;
+  double entropy = MAX_ENTROPY;
+  const double kSmallPenaltyForLargeCache = 4.0;
+  VP8LColorCache hashers;
+  VP8LRefsCursor c = VP8LRefsCursorInit(refs);
+  VP8LHistogram* histo = VP8LAllocateHistogram(cache_bits);
+  if (histo == NULL) goto Error;
 
-  {
-    double bit_cost_lz77, bit_cost_rle;
-    VP8LHistogram* const histo = (VP8LHistogram*)malloc(sizeof(*histo));
-    if (histo == NULL) goto Error1;
-    // Evaluate lz77 coding
-    VP8LHistogramCreate(histo, &refs_lz77, cache_bits);
-    bit_cost_lz77 = VP8LHistogramEstimateBits(histo);
-    // Evaluate RLE coding
-    VP8LHistogramCreate(histo, &refs_rle, cache_bits);
-    bit_cost_rle = VP8LHistogramEstimateBits(histo);
-    // Decide if LZ77 is useful.
-    lz77_is_useful = (bit_cost_lz77 < bit_cost_rle);
-    free(histo);
+  if (use_color_cache) {
+    cc_init = VP8LColorCacheInit(&hashers, cache_bits);
+    if (!cc_init) goto Error;
   }
-
-  // Choose appropriate backward reference.
-  if (lz77_is_useful) {
-    // TraceBackwards is costly. Run it for higher qualities.
-    const int try_lz77_trace_backwards = (quality >= 75);
-    *best = refs_lz77;   // default guess: lz77 is better
-    VP8LClearBackwardRefs(&refs_rle);
-    if (try_lz77_trace_backwards) {
-      const int recursion_level = (num_pix < 320 * 200) ? 1 : 0;
-      VP8LBackwardRefs refs_trace;
-      if (!VP8LBackwardRefsAlloc(&refs_trace, num_pix)) {
-        goto End;
-      }
-      if (BackwardReferencesTraceBackwards(
-          width, height, recursion_level, argb, cache_bits, &refs_trace)) {
-        VP8LClearBackwardRefs(&refs_lz77);
-        *best = refs_trace;
-      }
+  if (!use_color_cache) {
+    while (VP8LRefsCursorOk(&c)) {
+      VP8LHistogramAddSinglePixOrCopy(histo, c.cur_pos);
+      VP8LRefsCursorNext(&c);
     }
   } else {
-    VP8LClearBackwardRefs(&refs_lz77);
-    *best = refs_rle;
+    while (VP8LRefsCursorOk(&c)) {
+      const PixOrCopy* const v = c.cur_pos;
+      if (PixOrCopyIsLiteral(v)) {
+        const uint32_t pix = *argb++;
+        const uint32_t key = VP8LColorCacheGetIndex(&hashers, pix);
+        if (VP8LColorCacheLookup(&hashers, key) == pix) {
+          ++histo->literal_[NUM_LITERAL_CODES + NUM_LENGTH_CODES + key];
+        } else {
+          VP8LColorCacheSet(&hashers, key, pix);
+          ++histo->blue_[pix & 0xff];
+          ++histo->literal_[(pix >> 8) & 0xff];
+          ++histo->red_[(pix >> 16) & 0xff];
+          ++histo->alpha_[pix >> 24];
+        }
+      } else {
+        int len = PixOrCopyLength(v);
+        int code, extra_bits;
+        VP8LPrefixEncodeBits(len, &code, &extra_bits);
+        ++histo->literal_[NUM_LITERAL_CODES + code];
+        VP8LPrefixEncodeBits(PixOrCopyDistance(v), &code, &extra_bits);
+        ++histo->distance_[code];
+        do {
+          VP8LColorCacheInsert(&hashers, *argb++);
+        } while(--len != 0);
+      }
+      VP8LRefsCursorNext(&c);
+    }
   }
+  entropy = VP8LHistogramEstimateBits(histo) +
+      kSmallPenaltyForLargeCache * cache_bits;
+ Error:
+  if (cc_init) VP8LColorCacheClear(&hashers);
+  VP8LFreeHistogram(histo);
+  return entropy;
+}
 
-  if (use_2d_locality) BackwardReferences2DLocality(width, best);
-
-  ok = 1;
-
- End:
-  if (!ok) {
-    VP8LClearBackwardRefs(best);
+// Evaluate optimal cache bits for the local color cache.
+// The input *best_cache_bits sets the maximum cache bits to use (passing 0
+// implies disabling the local color cache). The local color cache is also
+// disabled for the lower (<= 25) quality.
+// Returns 0 in case of memory error.
+static int CalculateBestCacheSize(const uint32_t* const argb,
+                                  int xsize, int ysize, int quality,
+                                  VP8LHashChain* const hash_chain,
+                                  VP8LBackwardRefs* const refs,
+                                  int* const lz77_computed,
+                                  int* const best_cache_bits) {
+  int eval_low = 1;
+  int eval_high = 1;
+  double entropy_low = MAX_ENTROPY;
+  double entropy_high = MAX_ENTROPY;
+  const double cost_mul = 5e-4;
+  int cache_bits_low = 0;
+  int cache_bits_high = (quality <= 25) ? 0 : *best_cache_bits;
+
+  assert(cache_bits_high <= MAX_COLOR_CACHE_BITS);
+
+  *lz77_computed = 0;
+  if (cache_bits_high == 0) {
+    *best_cache_bits = 0;
+    // Local color cache is disabled.
+    return 1;
   }
-  return ok;
+  if (!BackwardReferencesLz77(xsize, ysize, argb, cache_bits_low, quality, 0,
+                              hash_chain, refs)) {
+    return 0;
+  }
+  // Do a binary search to find the optimal entropy for cache_bits.
+  while (eval_low || eval_high) {
+    if (eval_low) {
+      entropy_low = ComputeCacheEntropy(argb, refs, cache_bits_low);
+      entropy_low += entropy_low * cache_bits_low * cost_mul;
+      eval_low = 0;
+    }
+    if (eval_high) {
+      entropy_high = ComputeCacheEntropy(argb, refs, cache_bits_high);
+      entropy_high += entropy_high * cache_bits_high * cost_mul;
+      eval_high = 0;
+    }
+    if (entropy_high < entropy_low) {
+      const int prev_cache_bits_low = cache_bits_low;
+      *best_cache_bits = cache_bits_high;
+      cache_bits_low = (cache_bits_low + cache_bits_high) / 2;
+      if (cache_bits_low != prev_cache_bits_low) eval_low = 1;
+    } else {
+      *best_cache_bits = cache_bits_low;
+      cache_bits_high = (cache_bits_low + cache_bits_high) / 2;
+      if (cache_bits_high != cache_bits_low) eval_high = 1;
+    }
+  }
+  *lz77_computed = 1;
+  return 1;
 }
 
-// Returns 1 on success.
-static int ComputeCacheHistogram(const uint32_t* const argb,
-                                 int xsize, int ysize,
-                                 const VP8LBackwardRefs* const refs,
-                                 int cache_bits,
-                                 VP8LHistogram* const histo) {
+// Update (in-place) backward references for specified cache_bits.
+static int BackwardRefsWithLocalCache(const uint32_t* const argb,
+                                      int cache_bits,
+                                      VP8LBackwardRefs* const refs) {
   int pixel_index = 0;
-  int i;
-  uint32_t k;
   VP8LColorCache hashers;
-  const int use_color_cache = (cache_bits > 0);
-  int cc_init = 0;
+  VP8LRefsCursor c = VP8LRefsCursorInit(refs);
+  if (!VP8LColorCacheInit(&hashers, cache_bits)) return 0;
 
-  if (use_color_cache) {
-    cc_init = VP8LColorCacheInit(&hashers, cache_bits);
-    if (!cc_init) return 0;
-  }
-
-  for (i = 0; i < refs->size; ++i) {
-    const PixOrCopy* const v = &refs->refs[i];
+  while (VP8LRefsCursorOk(&c)) {
+    PixOrCopy* const v = c.cur_pos;
     if (PixOrCopyIsLiteral(v)) {
-      if (use_color_cache &&
-          VP8LColorCacheContains(&hashers, argb[pixel_index])) {
-        // push pixel as a cache index
-        const int ix = VP8LColorCacheGetIndex(&hashers, argb[pixel_index]);
-        const PixOrCopy token = PixOrCopyCreateCacheIdx(ix);
-        VP8LHistogramAddSinglePixOrCopy(histo, &token);
+      const uint32_t argb_literal = v->argb_or_distance;
+      if (VP8LColorCacheContains(&hashers, argb_literal)) {
+        const int ix = VP8LColorCacheGetIndex(&hashers, argb_literal);
+        *v = PixOrCopyCreateCacheIdx(ix);
       } else {
-        VP8LHistogramAddSinglePixOrCopy(histo, v);
+        VP8LColorCacheInsert(&hashers, argb_literal);
       }
+      ++pixel_index;
     } else {
-      VP8LHistogramAddSinglePixOrCopy(histo, v);
-    }
-    if (use_color_cache) {
-      for (k = 0; k < PixOrCopyLength(v); ++k) {
-        VP8LColorCacheInsert(&hashers, argb[pixel_index + k]);
+      // refs was created without local cache, so it can not have cache indexes.
+      int k;
+      assert(PixOrCopyIsCopy(v));
+      for (k = 0; k < v->len; ++k) {
+        VP8LColorCacheInsert(&hashers, argb[pixel_index++]);
       }
     }
-    pixel_index += PixOrCopyLength(v);
+    VP8LRefsCursorNext(&c);
   }
-  assert(pixel_index == xsize * ysize);
-  (void)xsize;  // xsize is not used in non-debug compilations otherwise.
-  (void)ysize;  // ysize is not used in non-debug compilations otherwise.
-  if (cc_init) VP8LColorCacheClear(&hashers);
+  VP8LColorCacheClear(&hashers);
   return 1;
 }
 
-// Returns how many bits are to be used for a color cache.
-int VP8LCalculateEstimateForCacheSize(const uint32_t* const argb,
-                                      int xsize, int ysize,
-                                      int* const best_cache_bits) {
-  int ok = 0;
-  int cache_bits;
-  double lowest_entropy = 1e99;
-  VP8LBackwardRefs refs;
-  static const double kSmallPenaltyForLargeCache = 4.0;
-  static const int quality = 30;
-  if (!VP8LBackwardRefsAlloc(&refs, xsize * ysize) ||
-      !BackwardReferencesHashChain(xsize, ysize, argb, 0, quality, &refs)) {
+static VP8LBackwardRefs* GetBackwardReferencesLowEffort(
+    int width, int height, const uint32_t* const argb, int quality,
+    int* const cache_bits, VP8LHashChain* const hash_chain,
+    VP8LBackwardRefs refs_array[2]) {
+  VP8LBackwardRefs* refs_lz77 = &refs_array[0];
+  *cache_bits = 0;
+  if (!BackwardReferencesLz77(width, height, argb, 0, quality,
+                              1 /* Low effort. */, hash_chain, refs_lz77)) {
+    return NULL;
+  }
+  BackwardReferences2DLocality(width, refs_lz77);
+  return refs_lz77;
+}
+
+static VP8LBackwardRefs* GetBackwardReferences(
+    int width, int height, const uint32_t* const argb, int quality,
+    int* const cache_bits, VP8LHashChain* const hash_chain,
+    VP8LBackwardRefs refs_array[2]) {
+  int lz77_is_useful;
+  int lz77_computed;
+  double bit_cost_lz77, bit_cost_rle;
+  VP8LBackwardRefs* best = NULL;
+  VP8LBackwardRefs* refs_lz77 = &refs_array[0];
+  VP8LBackwardRefs* refs_rle = &refs_array[1];
+  VP8LHistogram* histo = NULL;
+
+  if (!CalculateBestCacheSize(argb, width, height, quality, hash_chain,
+                              refs_lz77, &lz77_computed, cache_bits)) {
     goto Error;
   }
-  for (cache_bits = 0; cache_bits <= MAX_COLOR_CACHE_BITS; ++cache_bits) {
-    double cur_entropy;
-    VP8LHistogram histo;
-    VP8LHistogramInit(&histo, cache_bits);
-    ComputeCacheHistogram(argb, xsize, ysize, &refs, cache_bits, &histo);
-    cur_entropy = VP8LHistogramEstimateBits(&histo) +
-        kSmallPenaltyForLargeCache * cache_bits;
-    if (cache_bits == 0 || cur_entropy < lowest_entropy) {
-      *best_cache_bits = cache_bits;
-      lowest_entropy = cur_entropy;
+
+  if (lz77_computed) {
+    // Transform refs_lz77 for the optimized cache_bits.
+    if (*cache_bits > 0) {
+      if (!BackwardRefsWithLocalCache(argb, *cache_bits, refs_lz77)) {
+        goto Error;
+      }
+    }
+  } else {
+    if (!BackwardReferencesLz77(width, height, argb, *cache_bits, quality,
+                                0 /* Low effort. */, hash_chain, refs_lz77)) {
+      goto Error;
     }
   }
-  ok = 1;
+
+  if (!BackwardReferencesRle(width, height, argb, *cache_bits, refs_rle)) {
+    goto Error;
+  }
+
+  histo = VP8LAllocateHistogram(*cache_bits);
+  if (histo == NULL) goto Error;
+
+  {
+    // Evaluate LZ77 coding.
+    VP8LHistogramCreate(histo, refs_lz77, *cache_bits);
+    bit_cost_lz77 = VP8LHistogramEstimateBits(histo);
+    // Evaluate RLE coding.
+    VP8LHistogramCreate(histo, refs_rle, *cache_bits);
+    bit_cost_rle = VP8LHistogramEstimateBits(histo);
+    // Decide if LZ77 is useful.
+    lz77_is_useful = (bit_cost_lz77 < bit_cost_rle);
+  }
+
+  // Choose appropriate backward reference.
+  if (lz77_is_useful) {
+    // TraceBackwards is costly. Don't execute it at lower quality.
+    const int try_lz77_trace_backwards = (quality >= 25);
+    best = refs_lz77;   // default guess: lz77 is better
+    if (try_lz77_trace_backwards) {
+      VP8LBackwardRefs* const refs_trace = refs_rle;
+      if (!VP8LBackwardRefsCopy(refs_lz77, refs_trace)) {
+        best = NULL;
+        goto Error;
+      }
+      if (BackwardReferencesTraceBackwards(width, height, argb, quality,
+                                           *cache_bits, hash_chain,
+                                           refs_trace)) {
+        double bit_cost_trace;
+        // Evaluate LZ77 coding.
+        VP8LHistogramCreate(histo, refs_trace, *cache_bits);
+        bit_cost_trace = VP8LHistogramEstimateBits(histo);
+        if (bit_cost_trace < bit_cost_lz77) {
+          best = refs_trace;
+        }
+      }
+    }
+  } else {
+    best = refs_rle;
+  }
+
+  BackwardReferences2DLocality(width, best);
+
  Error:
-  VP8LClearBackwardRefs(&refs);
-  return ok;
+  VP8LFreeHistogram(histo);
+  return best;
+}
+
+VP8LBackwardRefs* VP8LGetBackwardReferences(
+    int width, int height, const uint32_t* const argb, int quality,
+    int low_effort, int* const cache_bits, VP8LHashChain* const hash_chain,
+    VP8LBackwardRefs refs_array[2]) {
+  if (low_effort) {
+    return GetBackwardReferencesLowEffort(width, height, argb, quality,
+                                          cache_bits, hash_chain, refs_array);
+  } else {
+    return GetBackwardReferences(width, height, argb, quality, cache_bits,
+                                 hash_chain, refs_array);
+  }
 }
diff --git a/drivers/webp/enc/backward_references.h b/drivers/webp/enc/backward_references.h
index 8006a56ba1..e410b06f7d 100644
--- a/drivers/webp/enc/backward_references.h
+++ b/drivers/webp/enc/backward_references.h
@@ -1,8 +1,10 @@
 // Copyright 2012 Google Inc. All Rights Reserved.
 //
-// This code is licensed under the same terms as WebM:
-//  Software License Agreement:  http://www.webmproject.org/license/software/
-//  Additional IP Rights Grant:  http://www.webmproject.org/license/additional/
+// Use of this source code is governed by a BSD-style license
+// that can be found in the COPYING file in the root of the source
+// tree. An additional intellectual property rights grant can be found
+// in the file PATENTS. All contributing project authors may
+// be found in the AUTHORS file in the root of the source tree.
 // -----------------------------------------------------------------------------
 //
 // Author: Jyrki Alakuijala (jyrki@google.com)
@@ -13,82 +15,15 @@
 
 #include <assert.h>
 #include <stdlib.h>
-#include "../types.h"
-#include "../format_constants.h"
+#include "webp/types.h"
+#include "webp/format_constants.h"
 
-#if defined(__cplusplus) || defined(c_plusplus)
+#ifdef __cplusplus
 extern "C" {
 #endif
 
-// The spec allows 11, we use 9 bits to reduce memory consumption in encoding.
-// Having 9 instead of 11 only removes about 0.25 % of compression density.
-#define MAX_COLOR_CACHE_BITS 9
-
-// Max ever number of codes we'll use:
-#define PIX_OR_COPY_CODES_MAX \
-    (NUM_LITERAL_CODES + NUM_LENGTH_CODES + (1 << MAX_COLOR_CACHE_BITS))
-
-// -----------------------------------------------------------------------------
-// PrefixEncode()
-
-// use GNU builtins where available.
-#if defined(__GNUC__) && \
-    ((__GNUC__ == 3 && __GNUC_MINOR__ >= 4) || __GNUC__ >= 4)
-static WEBP_INLINE int BitsLog2Floor(uint32_t n) {
-  return n == 0 ? -1 : 31 ^ __builtin_clz(n);
-}
-#elif defined(_MSC_VER) && (defined(_M_X64) || defined(_M_IX86))
-#include <intrin.h>
-#pragma intrinsic(_BitScanReverse)
-
-static WEBP_INLINE int BitsLog2Floor(uint32_t n) {
-  unsigned long first_set_bit;
-  return _BitScanReverse(&first_set_bit, n) ? first_set_bit : -1;
-}
-#else
-static WEBP_INLINE int BitsLog2Floor(uint32_t n) {
-  int log = 0;
-  uint32_t value = n;
-  int i;
-
-  if (value == 0) return -1;
-  for (i = 4; i >= 0; --i) {
-    const int shift = (1 << i);
-    const uint32_t x = value >> shift;
-    if (x != 0) {
-      value = x;
-      log += shift;
-    }
-  }
-  return log;
-}
-#endif
-
-static WEBP_INLINE int VP8LBitsLog2Ceiling(uint32_t n) {
-  const int floor = BitsLog2Floor(n);
-  if (n == (n & ~(n - 1)))  // zero or a power of two.
-    return floor;
-  else
-    return floor + 1;
-}
-
-// Splitting of distance and length codes into prefixes and
-// extra bits. The prefixes are encoded with an entropy code
-// while the extra bits are stored just as normal bits.
-static WEBP_INLINE void PrefixEncode(int distance, int* const code,
-                                     int* const extra_bits_count,
-                                     int* const extra_bits_value) {
-  // Collect the two most significant bits where the highest bit is 1.
-  const int highest_bit = BitsLog2Floor(--distance);
-  // & 0x3f is to make behavior well defined when highest_bit
-  // does not exist or is the least significant bit.
-  const int second_highest_bit =
-      (distance >> ((highest_bit - 1) & 0x3f)) & 1;
-  *extra_bits_count = (highest_bit > 0) ? (highest_bit - 1) : 0;
-  *extra_bits_value = distance & ((1 << *extra_bits_count) - 1);
-  *code = (highest_bit > 0) ? (2 * highest_bit + second_highest_bit)
-                            : (highest_bit == 0) ? 1 : 0;
-}
+// The maximum allowed limit is 11.
+#define MAX_COLOR_CACHE_BITS 10
 
 // -----------------------------------------------------------------------------
 // PixOrCopy
@@ -173,39 +108,94 @@ static WEBP_INLINE uint32_t PixOrCopyDistance(const PixOrCopy* const p) {
 }
 
 // -----------------------------------------------------------------------------
-// VP8LBackwardRefs
+// VP8LHashChain
+
+#define HASH_BITS 18
+#define HASH_SIZE (1 << HASH_BITS)
+
+typedef struct VP8LHashChain VP8LHashChain;
+struct VP8LHashChain {
+  // Stores the most recently added position with the given hash value.
+  int32_t hash_to_first_index_[HASH_SIZE];
+  // chain_[pos] stores the previous position with the same hash value
+  // for every pixel in the image.
+  int32_t* chain_;
+  // This is the maximum size of the hash_chain that can be constructed.
+  // Typically this is the pixel count (width x height) for a given image.
+  int size_;
+};
 
-typedef struct {
-  PixOrCopy* refs;
-  int size;      // currently used
-  int max_size;  // maximum capacity
-} VP8LBackwardRefs;
+// Must be called first, to set size.
+int VP8LHashChainInit(VP8LHashChain* const p, int size);
+void VP8LHashChainClear(VP8LHashChain* const p);  // release memory
 
-// Initialize the object. Must be called first. 'refs' can be NULL.
-void VP8LInitBackwardRefs(VP8LBackwardRefs* const refs);
+// -----------------------------------------------------------------------------
+// VP8LBackwardRefs (block-based backward-references storage)
+
+// maximum number of reference blocks the image will be segmented into
+#define MAX_REFS_BLOCK_PER_IMAGE 16
+
+typedef struct PixOrCopyBlock PixOrCopyBlock;   // forward declaration
+typedef struct VP8LBackwardRefs VP8LBackwardRefs;
+
+// Container for blocks chain
+struct VP8LBackwardRefs {
+  int block_size_;               // common block-size
+  int error_;                    // set to true if some memory error occurred
+  PixOrCopyBlock* refs_;         // list of currently used blocks
+  PixOrCopyBlock** tail_;        // for list recycling
+  PixOrCopyBlock* free_blocks_;  // free-list
+  PixOrCopyBlock* last_block_;   // used for adding new refs (internal)
+};
 
-// Release memory and re-initialize the object. 'refs' can be NULL.
-void VP8LClearBackwardRefs(VP8LBackwardRefs* const refs);
+// Initialize the object. 'block_size' is the common block size to store
+// references (typically, width * height / MAX_REFS_BLOCK_PER_IMAGE).
+void VP8LBackwardRefsInit(VP8LBackwardRefs* const refs, int block_size);
+// Release memory for backward references.
+void VP8LBackwardRefsClear(VP8LBackwardRefs* const refs);
+// Copies the 'src' backward refs to the 'dst'. Returns 0 in case of error.
+int VP8LBackwardRefsCopy(const VP8LBackwardRefs* const src,
+                         VP8LBackwardRefs* const dst);
 
-// Allocate 'max_size' references. Returns false in case of memory error.
-int VP8LBackwardRefsAlloc(VP8LBackwardRefs* const refs, int max_size);
+// Cursor for iterating on references content
+typedef struct {
+  // public:
+  PixOrCopy* cur_pos;           // current position
+  // private:
+  PixOrCopyBlock* cur_block_;   // current block in the refs list
+  const PixOrCopy* last_pos_;   // sentinel for switching to next block
+} VP8LRefsCursor;
+
+// Returns a cursor positioned at the beginning of the references list.
+VP8LRefsCursor VP8LRefsCursorInit(const VP8LBackwardRefs* const refs);
+// Returns true if cursor is pointing at a valid position.
+static WEBP_INLINE int VP8LRefsCursorOk(const VP8LRefsCursor* const c) {
+  return (c->cur_pos != NULL);
+}
+// Move to next block of references. Internal, not to be called directly.
+void VP8LRefsCursorNextBlock(VP8LRefsCursor* const c);
+// Move to next position, or NULL. Should not be called if !VP8LRefsCursorOk().
+static WEBP_INLINE void VP8LRefsCursorNext(VP8LRefsCursor* const c) {
+  assert(c != NULL);
+  assert(VP8LRefsCursorOk(c));
+  if (++c->cur_pos == c->last_pos_) VP8LRefsCursorNextBlock(c);
+}
 
 // -----------------------------------------------------------------------------
 // Main entry points
 
 // Evaluates best possible backward references for specified quality.
-// Further optimize for 2D locality if use_2d_locality flag is set.
-int VP8LGetBackwardReferences(int width, int height,
-                              const uint32_t* const argb,
-                              int quality, int cache_bits, int use_2d_locality,
-                              VP8LBackwardRefs* const best);
-
-// Produce an estimate for a good color cache size for the image.
-int VP8LCalculateEstimateForCacheSize(const uint32_t* const argb,
-                                      int xsize, int ysize,
-                                      int* const best_cache_bits);
-
-#if defined(__cplusplus) || defined(c_plusplus)
+// The input cache_bits to 'VP8LGetBackwardReferences' sets the maximum cache
+// bits to use (passing 0 implies disabling the local color cache).
+// The optimal cache bits is evaluated and set for the *cache_bits parameter.
+// The return value is the pointer to the best of the two backward refs viz,
+// refs[0] or refs[1].
+VP8LBackwardRefs* VP8LGetBackwardReferences(
+    int width, int height, const uint32_t* const argb, int quality,
+    int low_effort, int* const cache_bits, VP8LHashChain* const hash_chain,
+    VP8LBackwardRefs refs[2]);
+
+#ifdef __cplusplus
 }
 #endif
 
diff --git a/drivers/webp/enc/config.c b/drivers/webp/enc/config.c
index 4136f6c227..8fd2276cb5 100644
--- a/drivers/webp/enc/config.c
+++ b/drivers/webp/enc/config.c
@@ -1,19 +1,17 @@
 // Copyright 2011 Google Inc. All Rights Reserved.
 //
-// This code is licensed under the same terms as WebM:
-//  Software License Agreement:  http://www.webmproject.org/license/software/
-//  Additional IP Rights Grant:  http://www.webmproject.org/license/additional/
+// Use of this source code is governed by a BSD-style license
+// that can be found in the COPYING file in the root of the source
+// tree. An additional intellectual property rights grant can be found
+// in the file PATENTS. All contributing project authors may
+// be found in the AUTHORS file in the root of the source tree.
 // -----------------------------------------------------------------------------
 //
 // Coding tools configuration
 //
 // Author: Skal (pascal.massimino@gmail.com)
 
-#include "../encode.h"
-
-#if defined(__cplusplus) || defined(c_plusplus)
-extern "C" {
-#endif
+#include "webp/encode.h"
 
 //------------------------------------------------------------------------------
 // WebPConfig
@@ -31,9 +29,9 @@ int WebPConfigInitInternal(WebPConfig* config,
   config->target_PSNR = 0.;
   config->method = 4;
   config->sns_strength = 50;
-  config->filter_strength = 20;   // default: light filtering
+  config->filter_strength = 60;   // mid-filtering
   config->filter_sharpness = 0;
-  config->filter_type = 0;        // default: simple
+  config->filter_type = 1;        // default: strong (so U/V is filtered too)
   config->partitions = 0;
   config->segments = 4;
   config->pass = 1;
@@ -45,7 +43,15 @@ int WebPConfigInitInternal(WebPConfig* config,
   config->alpha_filtering = 1;
   config->alpha_quality = 100;
   config->lossless = 0;
+  config->exact = 0;
   config->image_hint = WEBP_HINT_DEFAULT;
+  config->emulate_jpeg_size = 0;
+  config->thread_level = 0;
+  config->low_memory = 0;
+  config->near_lossless = 100;
+#ifdef WEBP_EXPERIMENTAL_FEATURES
+  config->delta_palettization = 0;
+#endif // WEBP_EXPERIMENTAL_FEATURES
 
   // TODO(skal): tune.
   switch (preset) {
@@ -53,11 +59,13 @@ int WebPConfigInitInternal(WebPConfig* config,
       config->sns_strength = 80;
       config->filter_sharpness = 4;
       config->filter_strength = 35;
+      config->preprocessing &= ~2;   // no dithering
       break;
     case WEBP_PRESET_PHOTO:
       config->sns_strength = 80;
       config->filter_sharpness = 3;
       config->filter_strength = 30;
+      config->preprocessing |= 2;
       break;
     case WEBP_PRESET_DRAWING:
       config->sns_strength = 25;
@@ -67,10 +75,12 @@ int WebPConfigInitInternal(WebPConfig* config,
     case WEBP_PRESET_ICON:
       config->sns_strength = 0;
       config->filter_strength = 0;   // disable filtering to retain sharpness
+      config->preprocessing &= ~2;   // no dithering
       break;
     case WEBP_PRESET_TEXT:
       config->sns_strength = 0;
       config->filter_strength = 0;   // disable filtering to retain sharpness
+      config->preprocessing &= ~2;   // no dithering
       config->segments = 2;
       break;
     case WEBP_PRESET_DEFAULT:
@@ -106,7 +116,7 @@ int WebPValidateConfig(const WebPConfig* config) {
     return 0;
   if (config->show_compressed < 0 || config->show_compressed > 1)
     return 0;
-  if (config->preprocessing < 0 || config->preprocessing > 1)
+  if (config->preprocessing < 0 || config->preprocessing > 7)
     return 0;
   if (config->partitions < 0 || config->partitions > 3)
     return 0;
@@ -120,13 +130,44 @@ int WebPValidateConfig(const WebPConfig* config) {
     return 0;
   if (config->lossless < 0 || config->lossless > 1)
     return 0;
+  if (config->near_lossless < 0 || config->near_lossless > 100)
+    return 0;
   if (config->image_hint >= WEBP_HINT_LAST)
     return 0;
+  if (config->emulate_jpeg_size < 0 || config->emulate_jpeg_size > 1)
+    return 0;
+  if (config->thread_level < 0 || config->thread_level > 1)
+    return 0;
+  if (config->low_memory < 0 || config->low_memory > 1)
+    return 0;
+  if (config->exact < 0 || config->exact > 1)
+    return 0;
+#ifdef WEBP_EXPERIMENTAL_FEATURES
+  if (config->delta_palettization < 0 || config->delta_palettization > 1)
+    return 0;
+#endif  // WEBP_EXPERIMENTAL_FEATURES
   return 1;
 }
 
 //------------------------------------------------------------------------------
 
-#if defined(__cplusplus) || defined(c_plusplus)
-}    // extern "C"
-#endif
+#define MAX_LEVEL 9
+
+// Mapping between -z level and -m / -q parameter settings.
+static const struct {
+  uint8_t method_;
+  uint8_t quality_;
+} kLosslessPresets[MAX_LEVEL + 1] = {
+  { 0,  0 }, { 1, 20 }, { 2, 25 }, { 3, 30 }, { 3, 50 },
+  { 4, 50 }, { 4, 75 }, { 4, 90 }, { 5, 90 }, { 6, 100 }
+};
+
+int WebPConfigLosslessPreset(WebPConfig* config, int level) {
+  if (config == NULL || level < 0 || level > MAX_LEVEL) return 0;
+  config->lossless = 1;
+  config->method = kLosslessPresets[level].method_;
+  config->quality = kLosslessPresets[level].quality_;
+  return 1;
+}
+
+//------------------------------------------------------------------------------
diff --git a/drivers/webp/enc/cost.c b/drivers/webp/enc/cost.c
index 92e0cc713c..ae7fe01388 100644
--- a/drivers/webp/enc/cost.c
+++ b/drivers/webp/enc/cost.c
@@ -1,8 +1,10 @@
 // Copyright 2011 Google Inc. All Rights Reserved.
 //
-// This code is licensed under the same terms as WebM:
-//  Software License Agreement:  http://www.webmproject.org/license/software/
-//  Additional IP Rights Grant:  http://www.webmproject.org/license/additional/
+// Use of this source code is governed by a BSD-style license
+// that can be found in the COPYING file in the root of the source
+// tree. An additional intellectual property rights grant can be found
+// in the file PATENTS. All contributing project authors may
+// be found in the AUTHORS file in the root of the source tree.
 // -----------------------------------------------------------------------------
 //
 // Cost tables for level and modes
@@ -11,42 +13,6 @@
 
 #include "./cost.h"
 
-#if defined(__cplusplus) || defined(c_plusplus)
-extern "C" {
-#endif
-
-//------------------------------------------------------------------------------
-// Boolean-cost cost table
-
-const uint16_t VP8EntropyCost[256] = {
-  1792, 1792, 1792, 1536, 1536, 1408, 1366, 1280, 1280, 1216,
-  1178, 1152, 1110, 1076, 1061, 1024, 1024,  992,  968,  951,
-   939,  911,  896,  878,  871,  854,  838,  820,  811,  794,
-   786,  768,  768,  752,  740,  732,  720,  709,  704,  690,
-   683,  672,  666,  655,  647,  640,  631,  622,  615,  607,
-   598,  592,  586,  576,  572,  564,  559,  555,  547,  541,
-   534,  528,  522,  512,  512,  504,  500,  494,  488,  483,
-   477,  473,  467,  461,  458,  452,  448,  443,  438,  434,
-   427,  424,  419,  415,  410,  406,  403,  399,  394,  390,
-   384,  384,  377,  374,  370,  366,  362,  359,  355,  351,
-   347,  342,  342,  336,  333,  330,  326,  323,  320,  316,
-   312,  308,  305,  302,  299,  296,  293,  288,  287,  283,
-   280,  277,  274,  272,  268,  266,  262,  256,  256,  256,
-   251,  248,  245,  242,  240,  237,  234,  232,  228,  226,
-   223,  221,  218,  216,  214,  211,  208,  205,  203,  201,
-   198,  196,  192,  191,  188,  187,  183,  181,  179,  176,
-   175,  171,  171,  168,  165,  163,  160,  159,  156,  154,
-   152,  150,  148,  146,  144,  142,  139,  138,  135,  133,
-   131,  128,  128,  125,  123,  121,  119,  117,  115,  113,
-   111,  110,  107,  105,  103,  102,  100,   98,   96,   94,
-    92,   91,   89,   86,   86,   83,   82,   80,   77,   76,
-    74,   73,   71,   69,   67,   66,   64,   63,   61,   59,
-    57,   55,   54,   52,   51,   49,   47,   46,   44,   43,
-    41,   40,   38,   36,   35,   33,   32,   30,   29,   27,
-    25,   24,   22,   21,   19,   18,   16,   15,   13,   12,
-    10,    9,    7,    6,    4,    3
-};
-
 //------------------------------------------------------------------------------
 // Level cost tables
 
@@ -73,267 +39,6 @@ const uint16_t VP8LevelCodes[MAX_VARIABLE_LEVEL][2] = {
   {0x153, 0x053}, {0x153, 0x053}, {0x153, 0x053}, {0x153, 0x153}
 };
 
-// fixed costs for coding levels, deduce from the coding tree.
-// This is only the part that doesn't depend on the probability state.
-const uint16_t VP8LevelFixedCosts[2048] = {
-     0,  256,  256,  256,  256,  432,  618,  630,
-   731,  640,  640,  828,  901,  948, 1021, 1101,
-  1174, 1221, 1294, 1042, 1085, 1115, 1158, 1202,
-  1245, 1275, 1318, 1337, 1380, 1410, 1453, 1497,
-  1540, 1570, 1613, 1280, 1295, 1317, 1332, 1358,
-  1373, 1395, 1410, 1454, 1469, 1491, 1506, 1532,
-  1547, 1569, 1584, 1601, 1616, 1638, 1653, 1679,
-  1694, 1716, 1731, 1775, 1790, 1812, 1827, 1853,
-  1868, 1890, 1905, 1727, 1733, 1742, 1748, 1759,
-  1765, 1774, 1780, 1800, 1806, 1815, 1821, 1832,
-  1838, 1847, 1853, 1878, 1884, 1893, 1899, 1910,
-  1916, 1925, 1931, 1951, 1957, 1966, 1972, 1983,
-  1989, 1998, 2004, 2027, 2033, 2042, 2048, 2059,
-  2065, 2074, 2080, 2100, 2106, 2115, 2121, 2132,
-  2138, 2147, 2153, 2178, 2184, 2193, 2199, 2210,
-  2216, 2225, 2231, 2251, 2257, 2266, 2272, 2283,
-  2289, 2298, 2304, 2168, 2174, 2183, 2189, 2200,
-  2206, 2215, 2221, 2241, 2247, 2256, 2262, 2273,
-  2279, 2288, 2294, 2319, 2325, 2334, 2340, 2351,
-  2357, 2366, 2372, 2392, 2398, 2407, 2413, 2424,
-  2430, 2439, 2445, 2468, 2474, 2483, 2489, 2500,
-  2506, 2515, 2521, 2541, 2547, 2556, 2562, 2573,
-  2579, 2588, 2594, 2619, 2625, 2634, 2640, 2651,
-  2657, 2666, 2672, 2692, 2698, 2707, 2713, 2724,
-  2730, 2739, 2745, 2540, 2546, 2555, 2561, 2572,
-  2578, 2587, 2593, 2613, 2619, 2628, 2634, 2645,
-  2651, 2660, 2666, 2691, 2697, 2706, 2712, 2723,
-  2729, 2738, 2744, 2764, 2770, 2779, 2785, 2796,
-  2802, 2811, 2817, 2840, 2846, 2855, 2861, 2872,
-  2878, 2887, 2893, 2913, 2919, 2928, 2934, 2945,
-  2951, 2960, 2966, 2991, 2997, 3006, 3012, 3023,
-  3029, 3038, 3044, 3064, 3070, 3079, 3085, 3096,
-  3102, 3111, 3117, 2981, 2987, 2996, 3002, 3013,
-  3019, 3028, 3034, 3054, 3060, 3069, 3075, 3086,
-  3092, 3101, 3107, 3132, 3138, 3147, 3153, 3164,
-  3170, 3179, 3185, 3205, 3211, 3220, 3226, 3237,
-  3243, 3252, 3258, 3281, 3287, 3296, 3302, 3313,
-  3319, 3328, 3334, 3354, 3360, 3369, 3375, 3386,
-  3392, 3401, 3407, 3432, 3438, 3447, 3453, 3464,
-  3470, 3479, 3485, 3505, 3511, 3520, 3526, 3537,
-  3543, 3552, 3558, 2816, 2822, 2831, 2837, 2848,
-  2854, 2863, 2869, 2889, 2895, 2904, 2910, 2921,
-  2927, 2936, 2942, 2967, 2973, 2982, 2988, 2999,
-  3005, 3014, 3020, 3040, 3046, 3055, 3061, 3072,
-  3078, 3087, 3093, 3116, 3122, 3131, 3137, 3148,
-  3154, 3163, 3169, 3189, 3195, 3204, 3210, 3221,
-  3227, 3236, 3242, 3267, 3273, 3282, 3288, 3299,
-  3305, 3314, 3320, 3340, 3346, 3355, 3361, 3372,
-  3378, 3387, 3393, 3257, 3263, 3272, 3278, 3289,
-  3295, 3304, 3310, 3330, 3336, 3345, 3351, 3362,
-  3368, 3377, 3383, 3408, 3414, 3423, 3429, 3440,
-  3446, 3455, 3461, 3481, 3487, 3496, 3502, 3513,
-  3519, 3528, 3534, 3557, 3563, 3572, 3578, 3589,
-  3595, 3604, 3610, 3630, 3636, 3645, 3651, 3662,
-  3668, 3677, 3683, 3708, 3714, 3723, 3729, 3740,
-  3746, 3755, 3761, 3781, 3787, 3796, 3802, 3813,
-  3819, 3828, 3834, 3629, 3635, 3644, 3650, 3661,
-  3667, 3676, 3682, 3702, 3708, 3717, 3723, 3734,
-  3740, 3749, 3755, 3780, 3786, 3795, 3801, 3812,
-  3818, 3827, 3833, 3853, 3859, 3868, 3874, 3885,
-  3891, 3900, 3906, 3929, 3935, 3944, 3950, 3961,
-  3967, 3976, 3982, 4002, 4008, 4017, 4023, 4034,
-  4040, 4049, 4055, 4080, 4086, 4095, 4101, 4112,
-  4118, 4127, 4133, 4153, 4159, 4168, 4174, 4185,
-  4191, 4200, 4206, 4070, 4076, 4085, 4091, 4102,
-  4108, 4117, 4123, 4143, 4149, 4158, 4164, 4175,
-  4181, 4190, 4196, 4221, 4227, 4236, 4242, 4253,
-  4259, 4268, 4274, 4294, 4300, 4309, 4315, 4326,
-  4332, 4341, 4347, 4370, 4376, 4385, 4391, 4402,
-  4408, 4417, 4423, 4443, 4449, 4458, 4464, 4475,
-  4481, 4490, 4496, 4521, 4527, 4536, 4542, 4553,
-  4559, 4568, 4574, 4594, 4600, 4609, 4615, 4626,
-  4632, 4641, 4647, 3515, 3521, 3530, 3536, 3547,
-  3553, 3562, 3568, 3588, 3594, 3603, 3609, 3620,
-  3626, 3635, 3641, 3666, 3672, 3681, 3687, 3698,
-  3704, 3713, 3719, 3739, 3745, 3754, 3760, 3771,
-  3777, 3786, 3792, 3815, 3821, 3830, 3836, 3847,
-  3853, 3862, 3868, 3888, 3894, 3903, 3909, 3920,
-  3926, 3935, 3941, 3966, 3972, 3981, 3987, 3998,
-  4004, 4013, 4019, 4039, 4045, 4054, 4060, 4071,
-  4077, 4086, 4092, 3956, 3962, 3971, 3977, 3988,
-  3994, 4003, 4009, 4029, 4035, 4044, 4050, 4061,
-  4067, 4076, 4082, 4107, 4113, 4122, 4128, 4139,
-  4145, 4154, 4160, 4180, 4186, 4195, 4201, 4212,
-  4218, 4227, 4233, 4256, 4262, 4271, 4277, 4288,
-  4294, 4303, 4309, 4329, 4335, 4344, 4350, 4361,
-  4367, 4376, 4382, 4407, 4413, 4422, 4428, 4439,
-  4445, 4454, 4460, 4480, 4486, 4495, 4501, 4512,
-  4518, 4527, 4533, 4328, 4334, 4343, 4349, 4360,
-  4366, 4375, 4381, 4401, 4407, 4416, 4422, 4433,
-  4439, 4448, 4454, 4479, 4485, 4494, 4500, 4511,
-  4517, 4526, 4532, 4552, 4558, 4567, 4573, 4584,
-  4590, 4599, 4605, 4628, 4634, 4643, 4649, 4660,
-  4666, 4675, 4681, 4701, 4707, 4716, 4722, 4733,
-  4739, 4748, 4754, 4779, 4785, 4794, 4800, 4811,
-  4817, 4826, 4832, 4852, 4858, 4867, 4873, 4884,
-  4890, 4899, 4905, 4769, 4775, 4784, 4790, 4801,
-  4807, 4816, 4822, 4842, 4848, 4857, 4863, 4874,
-  4880, 4889, 4895, 4920, 4926, 4935, 4941, 4952,
-  4958, 4967, 4973, 4993, 4999, 5008, 5014, 5025,
-  5031, 5040, 5046, 5069, 5075, 5084, 5090, 5101,
-  5107, 5116, 5122, 5142, 5148, 5157, 5163, 5174,
-  5180, 5189, 5195, 5220, 5226, 5235, 5241, 5252,
-  5258, 5267, 5273, 5293, 5299, 5308, 5314, 5325,
-  5331, 5340, 5346, 4604, 4610, 4619, 4625, 4636,
-  4642, 4651, 4657, 4677, 4683, 4692, 4698, 4709,
-  4715, 4724, 4730, 4755, 4761, 4770, 4776, 4787,
-  4793, 4802, 4808, 4828, 4834, 4843, 4849, 4860,
-  4866, 4875, 4881, 4904, 4910, 4919, 4925, 4936,
-  4942, 4951, 4957, 4977, 4983, 4992, 4998, 5009,
-  5015, 5024, 5030, 5055, 5061, 5070, 5076, 5087,
-  5093, 5102, 5108, 5128, 5134, 5143, 5149, 5160,
-  5166, 5175, 5181, 5045, 5051, 5060, 5066, 5077,
-  5083, 5092, 5098, 5118, 5124, 5133, 5139, 5150,
-  5156, 5165, 5171, 5196, 5202, 5211, 5217, 5228,
-  5234, 5243, 5249, 5269, 5275, 5284, 5290, 5301,
-  5307, 5316, 5322, 5345, 5351, 5360, 5366, 5377,
-  5383, 5392, 5398, 5418, 5424, 5433, 5439, 5450,
-  5456, 5465, 5471, 5496, 5502, 5511, 5517, 5528,
-  5534, 5543, 5549, 5569, 5575, 5584, 5590, 5601,
-  5607, 5616, 5622, 5417, 5423, 5432, 5438, 5449,
-  5455, 5464, 5470, 5490, 5496, 5505, 5511, 5522,
-  5528, 5537, 5543, 5568, 5574, 5583, 5589, 5600,
-  5606, 5615, 5621, 5641, 5647, 5656, 5662, 5673,
-  5679, 5688, 5694, 5717, 5723, 5732, 5738, 5749,
-  5755, 5764, 5770, 5790, 5796, 5805, 5811, 5822,
-  5828, 5837, 5843, 5868, 5874, 5883, 5889, 5900,
-  5906, 5915, 5921, 5941, 5947, 5956, 5962, 5973,
-  5979, 5988, 5994, 5858, 5864, 5873, 5879, 5890,
-  5896, 5905, 5911, 5931, 5937, 5946, 5952, 5963,
-  5969, 5978, 5984, 6009, 6015, 6024, 6030, 6041,
-  6047, 6056, 6062, 6082, 6088, 6097, 6103, 6114,
-  6120, 6129, 6135, 6158, 6164, 6173, 6179, 6190,
-  6196, 6205, 6211, 6231, 6237, 6246, 6252, 6263,
-  6269, 6278, 6284, 6309, 6315, 6324, 6330, 6341,
-  6347, 6356, 6362, 6382, 6388, 6397, 6403, 6414,
-  6420, 6429, 6435, 3515, 3521, 3530, 3536, 3547,
-  3553, 3562, 3568, 3588, 3594, 3603, 3609, 3620,
-  3626, 3635, 3641, 3666, 3672, 3681, 3687, 3698,
-  3704, 3713, 3719, 3739, 3745, 3754, 3760, 3771,
-  3777, 3786, 3792, 3815, 3821, 3830, 3836, 3847,
-  3853, 3862, 3868, 3888, 3894, 3903, 3909, 3920,
-  3926, 3935, 3941, 3966, 3972, 3981, 3987, 3998,
-  4004, 4013, 4019, 4039, 4045, 4054, 4060, 4071,
-  4077, 4086, 4092, 3956, 3962, 3971, 3977, 3988,
-  3994, 4003, 4009, 4029, 4035, 4044, 4050, 4061,
-  4067, 4076, 4082, 4107, 4113, 4122, 4128, 4139,
-  4145, 4154, 4160, 4180, 4186, 4195, 4201, 4212,
-  4218, 4227, 4233, 4256, 4262, 4271, 4277, 4288,
-  4294, 4303, 4309, 4329, 4335, 4344, 4350, 4361,
-  4367, 4376, 4382, 4407, 4413, 4422, 4428, 4439,
-  4445, 4454, 4460, 4480, 4486, 4495, 4501, 4512,
-  4518, 4527, 4533, 4328, 4334, 4343, 4349, 4360,
-  4366, 4375, 4381, 4401, 4407, 4416, 4422, 4433,
-  4439, 4448, 4454, 4479, 4485, 4494, 4500, 4511,
-  4517, 4526, 4532, 4552, 4558, 4567, 4573, 4584,
-  4590, 4599, 4605, 4628, 4634, 4643, 4649, 4660,
-  4666, 4675, 4681, 4701, 4707, 4716, 4722, 4733,
-  4739, 4748, 4754, 4779, 4785, 4794, 4800, 4811,
-  4817, 4826, 4832, 4852, 4858, 4867, 4873, 4884,
-  4890, 4899, 4905, 4769, 4775, 4784, 4790, 4801,
-  4807, 4816, 4822, 4842, 4848, 4857, 4863, 4874,
-  4880, 4889, 4895, 4920, 4926, 4935, 4941, 4952,
-  4958, 4967, 4973, 4993, 4999, 5008, 5014, 5025,
-  5031, 5040, 5046, 5069, 5075, 5084, 5090, 5101,
-  5107, 5116, 5122, 5142, 5148, 5157, 5163, 5174,
-  5180, 5189, 5195, 5220, 5226, 5235, 5241, 5252,
-  5258, 5267, 5273, 5293, 5299, 5308, 5314, 5325,
-  5331, 5340, 5346, 4604, 4610, 4619, 4625, 4636,
-  4642, 4651, 4657, 4677, 4683, 4692, 4698, 4709,
-  4715, 4724, 4730, 4755, 4761, 4770, 4776, 4787,
-  4793, 4802, 4808, 4828, 4834, 4843, 4849, 4860,
-  4866, 4875, 4881, 4904, 4910, 4919, 4925, 4936,
-  4942, 4951, 4957, 4977, 4983, 4992, 4998, 5009,
-  5015, 5024, 5030, 5055, 5061, 5070, 5076, 5087,
-  5093, 5102, 5108, 5128, 5134, 5143, 5149, 5160,
-  5166, 5175, 5181, 5045, 5051, 5060, 5066, 5077,
-  5083, 5092, 5098, 5118, 5124, 5133, 5139, 5150,
-  5156, 5165, 5171, 5196, 5202, 5211, 5217, 5228,
-  5234, 5243, 5249, 5269, 5275, 5284, 5290, 5301,
-  5307, 5316, 5322, 5345, 5351, 5360, 5366, 5377,
-  5383, 5392, 5398, 5418, 5424, 5433, 5439, 5450,
-  5456, 5465, 5471, 5496, 5502, 5511, 5517, 5528,
-  5534, 5543, 5549, 5569, 5575, 5584, 5590, 5601,
-  5607, 5616, 5622, 5417, 5423, 5432, 5438, 5449,
-  5455, 5464, 5470, 5490, 5496, 5505, 5511, 5522,
-  5528, 5537, 5543, 5568, 5574, 5583, 5589, 5600,
-  5606, 5615, 5621, 5641, 5647, 5656, 5662, 5673,
-  5679, 5688, 5694, 5717, 5723, 5732, 5738, 5749,
-  5755, 5764, 5770, 5790, 5796, 5805, 5811, 5822,
-  5828, 5837, 5843, 5868, 5874, 5883, 5889, 5900,
-  5906, 5915, 5921, 5941, 5947, 5956, 5962, 5973,
-  5979, 5988, 5994, 5858, 5864, 5873, 5879, 5890,
-  5896, 5905, 5911, 5931, 5937, 5946, 5952, 5963,
-  5969, 5978, 5984, 6009, 6015, 6024, 6030, 6041,
-  6047, 6056, 6062, 6082, 6088, 6097, 6103, 6114,
-  6120, 6129, 6135, 6158, 6164, 6173, 6179, 6190,
-  6196, 6205, 6211, 6231, 6237, 6246, 6252, 6263,
-  6269, 6278, 6284, 6309, 6315, 6324, 6330, 6341,
-  6347, 6356, 6362, 6382, 6388, 6397, 6403, 6414,
-  6420, 6429, 6435, 5303, 5309, 5318, 5324, 5335,
-  5341, 5350, 5356, 5376, 5382, 5391, 5397, 5408,
-  5414, 5423, 5429, 5454, 5460, 5469, 5475, 5486,
-  5492, 5501, 5507, 5527, 5533, 5542, 5548, 5559,
-  5565, 5574, 5580, 5603, 5609, 5618, 5624, 5635,
-  5641, 5650, 5656, 5676, 5682, 5691, 5697, 5708,
-  5714, 5723, 5729, 5754, 5760, 5769, 5775, 5786,
-  5792, 5801, 5807, 5827, 5833, 5842, 5848, 5859,
-  5865, 5874, 5880, 5744, 5750, 5759, 5765, 5776,
-  5782, 5791, 5797, 5817, 5823, 5832, 5838, 5849,
-  5855, 5864, 5870, 5895, 5901, 5910, 5916, 5927,
-  5933, 5942, 5948, 5968, 5974, 5983, 5989, 6000,
-  6006, 6015, 6021, 6044, 6050, 6059, 6065, 6076,
-  6082, 6091, 6097, 6117, 6123, 6132, 6138, 6149,
-  6155, 6164, 6170, 6195, 6201, 6210, 6216, 6227,
-  6233, 6242, 6248, 6268, 6274, 6283, 6289, 6300,
-  6306, 6315, 6321, 6116, 6122, 6131, 6137, 6148,
-  6154, 6163, 6169, 6189, 6195, 6204, 6210, 6221,
-  6227, 6236, 6242, 6267, 6273, 6282, 6288, 6299,
-  6305, 6314, 6320, 6340, 6346, 6355, 6361, 6372,
-  6378, 6387, 6393, 6416, 6422, 6431, 6437, 6448,
-  6454, 6463, 6469, 6489, 6495, 6504, 6510, 6521,
-  6527, 6536, 6542, 6567, 6573, 6582, 6588, 6599,
-  6605, 6614, 6620, 6640, 6646, 6655, 6661, 6672,
-  6678, 6687, 6693, 6557, 6563, 6572, 6578, 6589,
-  6595, 6604, 6610, 6630, 6636, 6645, 6651, 6662,
-  6668, 6677, 6683, 6708, 6714, 6723, 6729, 6740,
-  6746, 6755, 6761, 6781, 6787, 6796, 6802, 6813,
-  6819, 6828, 6834, 6857, 6863, 6872, 6878, 6889,
-  6895, 6904, 6910, 6930, 6936, 6945, 6951, 6962,
-  6968, 6977, 6983, 7008, 7014, 7023, 7029, 7040,
-  7046, 7055, 7061, 7081, 7087, 7096, 7102, 7113,
-  7119, 7128, 7134, 6392, 6398, 6407, 6413, 6424,
-  6430, 6439, 6445, 6465, 6471, 6480, 6486, 6497,
-  6503, 6512, 6518, 6543, 6549, 6558, 6564, 6575,
-  6581, 6590, 6596, 6616, 6622, 6631, 6637, 6648,
-  6654, 6663, 6669, 6692, 6698, 6707, 6713, 6724,
-  6730, 6739, 6745, 6765, 6771, 6780, 6786, 6797,
-  6803, 6812, 6818, 6843, 6849, 6858, 6864, 6875,
-  6881, 6890, 6896, 6916, 6922, 6931, 6937, 6948,
-  6954, 6963, 6969, 6833, 6839, 6848, 6854, 6865,
-  6871, 6880, 6886, 6906, 6912, 6921, 6927, 6938,
-  6944, 6953, 6959, 6984, 6990, 6999, 7005, 7016,
-  7022, 7031, 7037, 7057, 7063, 7072, 7078, 7089,
-  7095, 7104, 7110, 7133, 7139, 7148, 7154, 7165,
-  7171, 7180, 7186, 7206, 7212, 7221, 7227, 7238,
-  7244, 7253, 7259, 7284, 7290, 7299, 7305, 7316,
-  7322, 7331, 7337, 7357, 7363, 7372, 7378, 7389,
-  7395, 7404, 7410, 7205, 7211, 7220, 7226, 7237,
-  7243, 7252, 7258, 7278, 7284, 7293, 7299, 7310,
-  7316, 7325, 7331, 7356, 7362, 7371, 7377, 7388,
-  7394, 7403, 7409, 7429, 7435, 7444, 7450, 7461,
-  7467, 7476, 7482, 7505, 7511, 7520, 7526, 7537,
-  7543, 7552, 7558, 7578, 7584, 7593, 7599, 7610,
-  7616, 7625, 7631, 7656, 7662, 7671, 7677, 7688,
-  7694, 7703, 7709, 7729, 7735, 7744, 7750, 7761
-};
-
 static int VariableLevelCost(int level, const uint8_t probas[NUM_PROBAS]) {
   int pattern = VP8LevelCodes[level - 1][0];
   int bits = VP8LevelCodes[level - 1][1];
@@ -352,19 +57,21 @@ static int VariableLevelCost(int level, const uint8_t probas[NUM_PROBAS]) {
 //------------------------------------------------------------------------------
 // Pre-calc level costs once for all
 
-void VP8CalculateLevelCosts(VP8Proba* const proba) {
+void VP8CalculateLevelCosts(VP8EncProba* const proba) {
   int ctype, band, ctx;
 
   if (!proba->dirty_) return;  // nothing to do.
 
   for (ctype = 0; ctype < NUM_TYPES; ++ctype) {
+    int n;
     for (band = 0; band < NUM_BANDS; ++band) {
-      for(ctx = 0; ctx < NUM_CTX; ++ctx) {
+      for (ctx = 0; ctx < NUM_CTX; ++ctx) {
         const uint8_t* const p = proba->coeffs_[ctype][band][ctx];
         uint16_t* const table = proba->level_cost_[ctype][band][ctx];
-        const int cost_base = VP8BitCost(1, p[1]);
+        const int cost0 = (ctx > 0) ? VP8BitCost(1, p[0]) : 0;
+        const int cost_base = VP8BitCost(1, p[1]) + cost0;
         int v;
-        table[0] = VP8BitCost(0, p[1]);
+        table[0] = VP8BitCost(0, p[1]) + cost0;
         for (v = 1; v <= MAX_VARIABLE_LEVEL; ++v) {
           table[v] = cost_base + VariableLevelCost(v, p);
         }
@@ -372,6 +79,12 @@ void VP8CalculateLevelCosts(VP8Proba* const proba) {
         // actually constant.
       }
     }
+    for (n = 0; n < 16; ++n) {    // replicate bands. We don't need to sentinel.
+      for (ctx = 0; ctx < NUM_CTX; ++ctx) {
+        proba->remapped_costs_[ctype][n][ctx] =
+            proba->level_cost_[ctype][VP8EncBands[n]][ctx];
+      }
+    }
   }
   proba->dirty_ = 0;
 }
@@ -385,110 +98,257 @@ const uint16_t VP8FixedCostsUV[4] = { 302, 984, 439, 642 };
 // note: these values include the fixed VP8BitCost(1, 145) mode selection cost.
 const uint16_t VP8FixedCostsI16[4] = { 663, 919, 872, 919 };
 const uint16_t VP8FixedCostsI4[NUM_BMODES][NUM_BMODES][NUM_BMODES] = {
-  { {  251, 1362, 1934, 2085, 2314, 2230, 1839, 1988, 2437, 2348 },
-    {  403,  680, 1507, 1519, 2060, 2005, 1992, 1914, 1924, 1733 },
-    {  353, 1121,  973, 1895, 2060, 1787, 1671, 1516, 2012, 1868 },
-    {  770,  852, 1581,  632, 1393, 1780, 1823, 1936, 1074, 1218 },
-    {  510, 1270, 1467, 1319,  847, 1279, 1792, 2094, 1080, 1353 },
-    {  488, 1322,  918, 1573, 1300,  883, 1814, 1752, 1756, 1502 },
-    {  425,  992, 1820, 1514, 1843, 2440,  937, 1771, 1924, 1129 },
-    {  363, 1248, 1257, 1970, 2194, 2385, 1569,  953, 1951, 1601 },
-    {  723, 1257, 1631,  964,  963, 1508, 1697, 1824,  671, 1418 },
-    {  635, 1038, 1573,  930, 1673, 1413, 1410, 1687, 1410,  749 } },
-  { {  451,  613, 1345, 1702, 1870, 1716, 1728, 1766, 2190, 2310 },
-    {  678,  453, 1171, 1443, 1925, 1831, 2045, 1781, 1887, 1602 },
-    {  711,  666,  674, 1718, 1910, 1493, 1775, 1193, 2325, 2325 },
-    {  883,  854, 1583,  542, 1800, 1878, 1664, 2149, 1207, 1087 },
-    {  669,  994, 1248, 1122,  949, 1179, 1376, 1729, 1070, 1244 },
-    {  715, 1026,  715, 1350, 1430,  930, 1717, 1296, 1479, 1479 },
-    {  544,  841, 1656, 1450, 2094, 3883, 1010, 1759, 2076,  809 },
-    {  610,  855,  957, 1553, 2067, 1561, 1704,  824, 2066, 1226 },
-    {  833,  960, 1416,  819, 1277, 1619, 1501, 1617,  757, 1182 },
-    {  711,  964, 1252,  879, 1441, 1828, 1508, 1636, 1594,  734 } },
-  { {  605,  764,  734, 1713, 1747, 1192, 1819, 1353, 1877, 2392 },
-    {  866,  641,  586, 1622, 2072, 1431, 1888, 1346, 2189, 1764 },
-    {  901,  851,  456, 2165, 2281, 1405, 1739, 1193, 2183, 2443 },
-    {  770, 1045,  952, 1078, 1342, 1191, 1436, 1063, 1303,  995 },
-    {  901, 1086,  727, 1170,  884, 1105, 1267, 1401, 1739, 1337 },
-    {  951, 1162,  595, 1488, 1388,  703, 1790, 1366, 2057, 1724 },
-    {  534,  986, 1273, 1987, 3273, 1485, 1024, 1399, 1583,  866 },
-    {  699, 1182,  695, 1978, 1726, 1986, 1326,  714, 1750, 1672 },
-    {  951, 1217, 1209,  920, 1062, 1441, 1548,  999,  952,  932 },
-    {  733, 1284,  784, 1256, 1557, 1098, 1257, 1357, 1414,  908 } },
-  { {  316, 1075, 1653, 1220, 2145, 2051, 1730, 2131, 1884, 1790 },
-    {  745,  516, 1404,  894, 1599, 2375, 2013, 2105, 1475, 1381 },
-    {  516,  729, 1088, 1319, 1637, 3426, 1636, 1275, 1531, 1453 },
-    {  894,  943, 2138,  468, 1704, 2259, 2069, 1763, 1266, 1158 },
-    {  605, 1025, 1235,  871, 1170, 1767, 1493, 1500, 1104, 1258 },
-    {  739,  826, 1207, 1151, 1412,  846, 1305, 2726, 1014, 1569 },
-    {  558,  825, 1820, 1398, 3344, 1556, 1218, 1550, 1228,  878 },
-    {  429,  951, 1089, 1816, 3861, 3861, 1556,  969, 1568, 1828 },
-    {  883,  961, 1752,  769, 1468, 1810, 2081, 2346,  613, 1298 },
-    {  803,  895, 1372,  641, 1303, 1708, 1686, 1700, 1306, 1033 } },
-  { {  439, 1267, 1270, 1579,  963, 1193, 1723, 1729, 1198, 1993 },
-    {  705,  725, 1029, 1153, 1176, 1103, 1821, 1567, 1259, 1574 },
-    {  723,  859,  802, 1253,  972, 1202, 1407, 1665, 1520, 1674 },
-    {  894,  960, 1254,  887, 1052, 1607, 1344, 1349,  865, 1150 },
-    {  833, 1312, 1337, 1205,  572, 1288, 1414, 1529, 1088, 1430 },
-    {  842, 1279, 1068, 1861,  862,  688, 1861, 1630, 1039, 1381 },
-    {  766,  938, 1279, 1546, 3338, 1550, 1031, 1542, 1288,  640 },
-    {  715, 1090,  835, 1609, 1100, 1100, 1603, 1019, 1102, 1617 },
-    {  894, 1813, 1500, 1188,  789, 1194, 1491, 1919,  617, 1333 },
-    {  610, 1076, 1644, 1281, 1283,  975, 1179, 1688, 1434,  889 } },
-  { {  544,  971, 1146, 1849, 1221,  740, 1857, 1621, 1683, 2430 },
-    {  723,  705,  961, 1371, 1426,  821, 2081, 2079, 1839, 1380 },
-    {  783,  857,  703, 2145, 1419,  814, 1791, 1310, 1609, 2206 },
-    {  997, 1000, 1153,  792, 1229, 1162, 1810, 1418,  942,  979 },
-    {  901, 1226,  883, 1289,  793,  715, 1904, 1649, 1319, 3108 },
-    {  979, 1478,  782, 2216, 1454,  455, 3092, 1591, 1997, 1664 },
-    {  663, 1110, 1504, 1114, 1522, 3311,  676, 1522, 1530, 1024 },
-    {  605, 1138, 1153, 1314, 1569, 1315, 1157,  804, 1574, 1320 },
-    {  770, 1216, 1218, 1227,  869, 1384, 1232, 1375,  834, 1239 },
-    {  775, 1007,  843, 1216, 1225, 1074, 2527, 1479, 1149,  975 } },
-  { {  477,  817, 1309, 1439, 1708, 1454, 1159, 1241, 1945, 1672 },
-    {  577,  796, 1112, 1271, 1618, 1458, 1087, 1345, 1831, 1265 },
-    {  663,  776,  753, 1940, 1690, 1690, 1227, 1097, 3149, 1361 },
-    {  766, 1299, 1744, 1161, 1565, 1106, 1045, 1230, 1232,  707 },
-    {  915, 1026, 1404, 1182, 1184,  851, 1428, 2425, 1043,  789 },
-    {  883, 1456,  790, 1082, 1086,  985, 1083, 1484, 1238, 1160 },
-    {  507, 1345, 2261, 1995, 1847, 3636,  653, 1761, 2287,  933 },
-    {  553, 1193, 1470, 2057, 2059, 2059,  833,  779, 2058, 1263 },
-    {  766, 1275, 1515, 1039,  957, 1554, 1286, 1540, 1289,  705 },
-    {  499, 1378, 1496, 1385, 1850, 1850, 1044, 2465, 1515,  720 } },
-  { {  553,  930,  978, 2077, 1968, 1481, 1457,  761, 1957, 2362 },
-    {  694,  864,  905, 1720, 1670, 1621, 1429,  718, 2125, 1477 },
-    {  699,  968,  658, 3190, 2024, 1479, 1865,  750, 2060, 2320 },
-    {  733, 1308, 1296, 1062, 1576, 1322, 1062, 1112, 1172,  816 },
-    {  920,  927, 1052,  939,  947, 1156, 1152, 1073, 3056, 1268 },
-    {  723, 1534,  711, 1547, 1294,  892, 1553,  928, 1815, 1561 },
-    {  663, 1366, 1583, 2111, 1712, 3501,  522, 1155, 2130, 1133 },
-    {  614, 1731, 1188, 2343, 1944, 3733, 1287,  487, 3546, 1758 },
-    {  770, 1585, 1312,  826,  884, 2673, 1185, 1006, 1195, 1195 },
-    {  758, 1333, 1273, 1023, 1621, 1162, 1351,  833, 1479,  862 } },
-  { {  376, 1193, 1446, 1149, 1545, 1577, 1870, 1789, 1175, 1823 },
-    {  803,  633, 1136, 1058, 1350, 1323, 1598, 2247, 1072, 1252 },
-    {  614, 1048,  943,  981, 1152, 1869, 1461, 1020, 1618, 1618 },
-    { 1107, 1085, 1282,  592, 1779, 1933, 1648, 2403,  691, 1246 },
-    {  851, 1309, 1223, 1243,  895, 1593, 1792, 2317,  627, 1076 },
-    {  770, 1216, 1030, 1125,  921,  981, 1629, 1131, 1049, 1646 },
-    {  626, 1469, 1456, 1081, 1489, 3278,  981, 1232, 1498,  733 },
-    {  617, 1201,  812, 1220, 1476, 1476, 1478,  970, 1228, 1488 },
-    { 1179, 1393, 1540,  999, 1243, 1503, 1916, 1925,  414, 1614 },
-    {  943, 1088, 1490,  682, 1112, 1372, 1756, 1505,  966,  966 } },
-  { {  322, 1142, 1589, 1396, 2144, 1859, 1359, 1925, 2084, 1518 },
-    {  617,  625, 1241, 1234, 2121, 1615, 1524, 1858, 1720, 1004 },
-    {  553,  851,  786, 1299, 1452, 1560, 1372, 1561, 1967, 1713 },
-    {  770,  977, 1396,  568, 1893, 1639, 1540, 2108, 1430, 1013 },
-    {  684, 1120, 1375,  982,  930, 2719, 1638, 1643,  933,  993 },
-    {  553, 1103,  996, 1356, 1361, 1005, 1507, 1761, 1184, 1268 },
-    {  419, 1247, 1537, 1554, 1817, 3606, 1026, 1666, 1829,  923 },
-    {  439, 1139, 1101, 1257, 3710, 1922, 1205, 1040, 1931, 1529 },
-    {  979,  935, 1269,  847, 1202, 1286, 1530, 1535,  827, 1036 },
-    {  516, 1378, 1569, 1110, 1798, 1798, 1198, 2199, 1543,  712 } },
+  { {   40, 1151, 1723, 1874, 2103, 2019, 1628, 1777, 2226, 2137 },
+    {  192,  469, 1296, 1308, 1849, 1794, 1781, 1703, 1713, 1522 },
+    {  142,  910,  762, 1684, 1849, 1576, 1460, 1305, 1801, 1657 },
+    {  559,  641, 1370,  421, 1182, 1569, 1612, 1725,  863, 1007 },
+    {  299, 1059, 1256, 1108,  636, 1068, 1581, 1883,  869, 1142 },
+    {  277, 1111,  707, 1362, 1089,  672, 1603, 1541, 1545, 1291 },
+    {  214,  781, 1609, 1303, 1632, 2229,  726, 1560, 1713,  918 },
+    {  152, 1037, 1046, 1759, 1983, 2174, 1358,  742, 1740, 1390 },
+    {  512, 1046, 1420,  753,  752, 1297, 1486, 1613,  460, 1207 },
+    {  424,  827, 1362,  719, 1462, 1202, 1199, 1476, 1199,  538 } },
+  { {  240,  402, 1134, 1491, 1659, 1505, 1517, 1555, 1979, 2099 },
+    {  467,  242,  960, 1232, 1714, 1620, 1834, 1570, 1676, 1391 },
+    {  500,  455,  463, 1507, 1699, 1282, 1564,  982, 2114, 2114 },
+    {  672,  643, 1372,  331, 1589, 1667, 1453, 1938,  996,  876 },
+    {  458,  783, 1037,  911,  738,  968, 1165, 1518,  859, 1033 },
+    {  504,  815,  504, 1139, 1219,  719, 1506, 1085, 1268, 1268 },
+    {  333,  630, 1445, 1239, 1883, 3672,  799, 1548, 1865,  598 },
+    {  399,  644,  746, 1342, 1856, 1350, 1493,  613, 1855, 1015 },
+    {  622,  749, 1205,  608, 1066, 1408, 1290, 1406,  546,  971 },
+    {  500,  753, 1041,  668, 1230, 1617, 1297, 1425, 1383,  523 } },
+  { {  394,  553,  523, 1502, 1536,  981, 1608, 1142, 1666, 2181 },
+    {  655,  430,  375, 1411, 1861, 1220, 1677, 1135, 1978, 1553 },
+    {  690,  640,  245, 1954, 2070, 1194, 1528,  982, 1972, 2232 },
+    {  559,  834,  741,  867, 1131,  980, 1225,  852, 1092,  784 },
+    {  690,  875,  516,  959,  673,  894, 1056, 1190, 1528, 1126 },
+    {  740,  951,  384, 1277, 1177,  492, 1579, 1155, 1846, 1513 },
+    {  323,  775, 1062, 1776, 3062, 1274,  813, 1188, 1372,  655 },
+    {  488,  971,  484, 1767, 1515, 1775, 1115,  503, 1539, 1461 },
+    {  740, 1006,  998,  709,  851, 1230, 1337,  788,  741,  721 },
+    {  522, 1073,  573, 1045, 1346,  887, 1046, 1146, 1203,  697 } },
+  { {  105,  864, 1442, 1009, 1934, 1840, 1519, 1920, 1673, 1579 },
+    {  534,  305, 1193,  683, 1388, 2164, 1802, 1894, 1264, 1170 },
+    {  305,  518,  877, 1108, 1426, 3215, 1425, 1064, 1320, 1242 },
+    {  683,  732, 1927,  257, 1493, 2048, 1858, 1552, 1055,  947 },
+    {  394,  814, 1024,  660,  959, 1556, 1282, 1289,  893, 1047 },
+    {  528,  615,  996,  940, 1201,  635, 1094, 2515,  803, 1358 },
+    {  347,  614, 1609, 1187, 3133, 1345, 1007, 1339, 1017,  667 },
+    {  218,  740,  878, 1605, 3650, 3650, 1345,  758, 1357, 1617 },
+    {  672,  750, 1541,  558, 1257, 1599, 1870, 2135,  402, 1087 },
+    {  592,  684, 1161,  430, 1092, 1497, 1475, 1489, 1095,  822 } },
+  { {  228, 1056, 1059, 1368,  752,  982, 1512, 1518,  987, 1782 },
+    {  494,  514,  818,  942,  965,  892, 1610, 1356, 1048, 1363 },
+    {  512,  648,  591, 1042,  761,  991, 1196, 1454, 1309, 1463 },
+    {  683,  749, 1043,  676,  841, 1396, 1133, 1138,  654,  939 },
+    {  622, 1101, 1126,  994,  361, 1077, 1203, 1318,  877, 1219 },
+    {  631, 1068,  857, 1650,  651,  477, 1650, 1419,  828, 1170 },
+    {  555,  727, 1068, 1335, 3127, 1339,  820, 1331, 1077,  429 },
+    {  504,  879,  624, 1398,  889,  889, 1392,  808,  891, 1406 },
+    {  683, 1602, 1289,  977,  578,  983, 1280, 1708,  406, 1122 },
+    {  399,  865, 1433, 1070, 1072,  764,  968, 1477, 1223,  678 } },
+  { {  333,  760,  935, 1638, 1010,  529, 1646, 1410, 1472, 2219 },
+    {  512,  494,  750, 1160, 1215,  610, 1870, 1868, 1628, 1169 },
+    {  572,  646,  492, 1934, 1208,  603, 1580, 1099, 1398, 1995 },
+    {  786,  789,  942,  581, 1018,  951, 1599, 1207,  731,  768 },
+    {  690, 1015,  672, 1078,  582,  504, 1693, 1438, 1108, 2897 },
+    {  768, 1267,  571, 2005, 1243,  244, 2881, 1380, 1786, 1453 },
+    {  452,  899, 1293,  903, 1311, 3100,  465, 1311, 1319,  813 },
+    {  394,  927,  942, 1103, 1358, 1104,  946,  593, 1363, 1109 },
+    {  559, 1005, 1007, 1016,  658, 1173, 1021, 1164,  623, 1028 },
+    {  564,  796,  632, 1005, 1014,  863, 2316, 1268,  938,  764 } },
+  { {  266,  606, 1098, 1228, 1497, 1243,  948, 1030, 1734, 1461 },
+    {  366,  585,  901, 1060, 1407, 1247,  876, 1134, 1620, 1054 },
+    {  452,  565,  542, 1729, 1479, 1479, 1016,  886, 2938, 1150 },
+    {  555, 1088, 1533,  950, 1354,  895,  834, 1019, 1021,  496 },
+    {  704,  815, 1193,  971,  973,  640, 1217, 2214,  832,  578 },
+    {  672, 1245,  579,  871,  875,  774,  872, 1273, 1027,  949 },
+    {  296, 1134, 2050, 1784, 1636, 3425,  442, 1550, 2076,  722 },
+    {  342,  982, 1259, 1846, 1848, 1848,  622,  568, 1847, 1052 },
+    {  555, 1064, 1304,  828,  746, 1343, 1075, 1329, 1078,  494 },
+    {  288, 1167, 1285, 1174, 1639, 1639,  833, 2254, 1304,  509 } },
+  { {  342,  719,  767, 1866, 1757, 1270, 1246,  550, 1746, 2151 },
+    {  483,  653,  694, 1509, 1459, 1410, 1218,  507, 1914, 1266 },
+    {  488,  757,  447, 2979, 1813, 1268, 1654,  539, 1849, 2109 },
+    {  522, 1097, 1085,  851, 1365, 1111,  851,  901,  961,  605 },
+    {  709,  716,  841,  728,  736,  945,  941,  862, 2845, 1057 },
+    {  512, 1323,  500, 1336, 1083,  681, 1342,  717, 1604, 1350 },
+    {  452, 1155, 1372, 1900, 1501, 3290,  311,  944, 1919,  922 },
+    {  403, 1520,  977, 2132, 1733, 3522, 1076,  276, 3335, 1547 },
+    {  559, 1374, 1101,  615,  673, 2462,  974,  795,  984,  984 },
+    {  547, 1122, 1062,  812, 1410,  951, 1140,  622, 1268,  651 } },
+  { {  165,  982, 1235,  938, 1334, 1366, 1659, 1578,  964, 1612 },
+    {  592,  422,  925,  847, 1139, 1112, 1387, 2036,  861, 1041 },
+    {  403,  837,  732,  770,  941, 1658, 1250,  809, 1407, 1407 },
+    {  896,  874, 1071,  381, 1568, 1722, 1437, 2192,  480, 1035 },
+    {  640, 1098, 1012, 1032,  684, 1382, 1581, 2106,  416,  865 },
+    {  559, 1005,  819,  914,  710,  770, 1418,  920,  838, 1435 },
+    {  415, 1258, 1245,  870, 1278, 3067,  770, 1021, 1287,  522 },
+    {  406,  990,  601, 1009, 1265, 1265, 1267,  759, 1017, 1277 },
+    {  968, 1182, 1329,  788, 1032, 1292, 1705, 1714,  203, 1403 },
+    {  732,  877, 1279,  471,  901, 1161, 1545, 1294,  755,  755 } },
+  { {  111,  931, 1378, 1185, 1933, 1648, 1148, 1714, 1873, 1307 },
+    {  406,  414, 1030, 1023, 1910, 1404, 1313, 1647, 1509,  793 },
+    {  342,  640,  575, 1088, 1241, 1349, 1161, 1350, 1756, 1502 },
+    {  559,  766, 1185,  357, 1682, 1428, 1329, 1897, 1219,  802 },
+    {  473,  909, 1164,  771,  719, 2508, 1427, 1432,  722,  782 },
+    {  342,  892,  785, 1145, 1150,  794, 1296, 1550,  973, 1057 },
+    {  208, 1036, 1326, 1343, 1606, 3395,  815, 1455, 1618,  712 },
+    {  228,  928,  890, 1046, 3499, 1711,  994,  829, 1720, 1318 },
+    {  768,  724, 1058,  636,  991, 1075, 1319, 1324,  616,  825 },
+    {  305, 1167, 1358,  899, 1587, 1587,  987, 1988, 1332,  501 } }
 };
 
 //------------------------------------------------------------------------------
+// helper functions for residuals struct VP8Residual.
+
+void VP8InitResidual(int first, int coeff_type,
+                     VP8Encoder* const enc, VP8Residual* const res) {
+  res->coeff_type = coeff_type;
+  res->prob  = enc->proba_.coeffs_[coeff_type];
+  res->stats = enc->proba_.stats_[coeff_type];
+  res->costs = enc->proba_.remapped_costs_[coeff_type];
+  res->first = first;
+}
+
+//------------------------------------------------------------------------------
+// Mode costs
+
+int VP8GetCostLuma4(VP8EncIterator* const it, const int16_t levels[16]) {
+  const int x = (it->i4_ & 3), y = (it->i4_ >> 2);
+  VP8Residual res;
+  VP8Encoder* const enc = it->enc_;
+  int R = 0;
+  int ctx;
+
+  VP8InitResidual(0, 3, enc, &res);
+  ctx = it->top_nz_[x] + it->left_nz_[y];
+  VP8SetResidualCoeffs(levels, &res);
+  R += VP8GetResidualCost(ctx, &res);
+  return R;
+}
+
+int VP8GetCostLuma16(VP8EncIterator* const it, const VP8ModeScore* const rd) {
+  VP8Residual res;
+  VP8Encoder* const enc = it->enc_;
+  int x, y;
+  int R = 0;
+
+  VP8IteratorNzToBytes(it);   // re-import the non-zero context
+
+  // DC
+  VP8InitResidual(0, 1, enc, &res);
+  VP8SetResidualCoeffs(rd->y_dc_levels, &res);
+  R += VP8GetResidualCost(it->top_nz_[8] + it->left_nz_[8], &res);
+
+  // AC
+  VP8InitResidual(1, 0, enc, &res);
+  for (y = 0; y < 4; ++y) {
+    for (x = 0; x < 4; ++x) {
+      const int ctx = it->top_nz_[x] + it->left_nz_[y];
+      VP8SetResidualCoeffs(rd->y_ac_levels[x + y * 4], &res);
+      R += VP8GetResidualCost(ctx, &res);
+      it->top_nz_[x] = it->left_nz_[y] = (res.last >= 0);
+    }
+  }
+  return R;
+}
+
+int VP8GetCostUV(VP8EncIterator* const it, const VP8ModeScore* const rd) {
+  VP8Residual res;
+  VP8Encoder* const enc = it->enc_;
+  int ch, x, y;
+  int R = 0;
+
+  VP8IteratorNzToBytes(it);  // re-import the non-zero context
+
+  VP8InitResidual(0, 2, enc, &res);
+  for (ch = 0; ch <= 2; ch += 2) {
+    for (y = 0; y < 2; ++y) {
+      for (x = 0; x < 2; ++x) {
+        const int ctx = it->top_nz_[4 + ch + x] + it->left_nz_[4 + ch + y];
+        VP8SetResidualCoeffs(rd->uv_levels[ch * 2 + x + y * 2], &res);
+        R += VP8GetResidualCost(ctx, &res);
+        it->top_nz_[4 + ch + x] = it->left_nz_[4 + ch + y] = (res.last >= 0);
+      }
+    }
+  }
+  return R;
+}
+
+
+//------------------------------------------------------------------------------
+// Recording of token probabilities.
+
+// Record proba context used
+static int Record(int bit, proba_t* const stats) {
+  proba_t p = *stats;
+  if (p >= 0xffff0000u) {               // an overflow is inbound.
+    p = ((p + 1u) >> 1) & 0x7fff7fffu;  // -> divide the stats by 2.
+  }
+  // record bit count (lower 16 bits) and increment total count (upper 16 bits).
+  p += 0x00010000u + bit;
+  *stats = p;
+  return bit;
+}
+
+// We keep the table-free variant around for reference, in case.
+#define USE_LEVEL_CODE_TABLE
 
-#if defined(__cplusplus) || defined(c_plusplus)
-}    // extern "C"
+// Simulate block coding, but only record statistics.
+// Note: no need to record the fixed probas.
+int VP8RecordCoeffs(int ctx, const VP8Residual* const res) {
+  int n = res->first;
+  // should be stats[VP8EncBands[n]], but it's equivalent for n=0 or 1
+  proba_t* s = res->stats[n][ctx];
+  if (res->last  < 0) {
+    Record(0, s + 0);
+    return 0;
+  }
+  while (n <= res->last) {
+    int v;
+    Record(1, s + 0);  // order of record doesn't matter
+    while ((v = res->coeffs[n++]) == 0) {
+      Record(0, s + 1);
+      s = res->stats[VP8EncBands[n]][0];
+    }
+    Record(1, s + 1);
+    if (!Record(2u < (unsigned int)(v + 1), s + 2)) {  // v = -1 or 1
+      s = res->stats[VP8EncBands[n]][1];
+    } else {
+      v = abs(v);
+#if !defined(USE_LEVEL_CODE_TABLE)
+      if (!Record(v > 4, s + 3)) {
+        if (Record(v != 2, s + 4))
+          Record(v == 4, s + 5);
+      } else if (!Record(v > 10, s + 6)) {
+        Record(v > 6, s + 7);
+      } else if (!Record((v >= 3 + (8 << 2)), s + 8)) {
+        Record((v >= 3 + (8 << 1)), s + 9);
+      } else {
+        Record((v >= 3 + (8 << 3)), s + 10);
+      }
+#else
+      if (v > MAX_VARIABLE_LEVEL) {
+        v = MAX_VARIABLE_LEVEL;
+      }
+
+      {
+        const int bits = VP8LevelCodes[v - 1][1];
+        int pattern = VP8LevelCodes[v - 1][0];
+        int i;
+        for (i = 0; (pattern >>= 1) != 0; ++i) {
+          const int mask = 2 << i;
+          if (pattern & 1) Record(!!(bits & mask), s + 3 + i);
+        }
+      }
 #endif
+      s = res->stats[VP8EncBands[n]][2];
+    }
+  }
+  if (n < 16) Record(0, s + 0);
+  return 1;
+}
+
+//------------------------------------------------------------------------------
diff --git a/drivers/webp/enc/cost.h b/drivers/webp/enc/cost.h
index 09b75b699d..20960d6d74 100644
--- a/drivers/webp/enc/cost.h
+++ b/drivers/webp/enc/cost.h
@@ -1,8 +1,10 @@
 // Copyright 2011 Google Inc. All Rights Reserved.
 //
-// This code is licensed under the same terms as WebM:
-//  Software License Agreement:  http://www.webmproject.org/license/software/
-//  Additional IP Rights Grant:  http://www.webmproject.org/license/additional/
+// Use of this source code is governed by a BSD-style license
+// that can be found in the COPYING file in the root of the source
+// tree. An additional intellectual property rights grant can be found
+// in the file PATENTS. All contributing project authors may
+// be found in the AUTHORS file in the root of the source tree.
 // -----------------------------------------------------------------------------
 //
 // Cost tables for level and modes.
@@ -12,14 +14,32 @@
 #ifndef WEBP_ENC_COST_H_
 #define WEBP_ENC_COST_H_
 
+#include <assert.h>
+#include <stdlib.h>
 #include "./vp8enci.h"
 
-#if defined(__cplusplus) || defined(c_plusplus)
+#ifdef __cplusplus
 extern "C" {
 #endif
 
-extern const uint16_t VP8LevelFixedCosts[2048];   // approximate cost per level
-extern const uint16_t VP8EntropyCost[256];        // 8bit fixed-point log(p)
+// On-the-fly info about the current set of residuals. Handy to avoid
+// passing zillions of params.
+typedef struct VP8Residual VP8Residual;
+struct VP8Residual {
+  int first;
+  int last;
+  const int16_t* coeffs;
+
+  int coeff_type;
+  ProbaArray*   prob;
+  StatsArray*   stats;
+  CostArrayPtr  costs;
+};
+
+void VP8InitResidual(int first, int coeff_type,
+                     VP8Encoder* const enc, VP8Residual* const res);
+
+int VP8RecordCoeffs(int ctx, const VP8Residual* const res);
 
 // Cost of coding one event with probability 'proba'.
 static WEBP_INLINE int VP8BitCost(int bit, uint8_t proba) {
@@ -28,7 +48,7 @@ static WEBP_INLINE int VP8BitCost(int bit, uint8_t proba) {
 
 // Level cost calculations
 extern const uint16_t VP8LevelCodes[MAX_VARIABLE_LEVEL][2];
-void VP8CalculateLevelCosts(VP8Proba* const proba);
+void VP8CalculateLevelCosts(VP8EncProba* const proba);
 static WEBP_INLINE int VP8LevelCost(const uint16_t* const table, int level) {
   return VP8LevelFixedCosts[level]
        + table[(level > MAX_VARIABLE_LEVEL) ? MAX_VARIABLE_LEVEL : level];
@@ -41,7 +61,7 @@ extern const uint16_t VP8FixedCostsI4[NUM_BMODES][NUM_BMODES][NUM_BMODES];
 
 //------------------------------------------------------------------------------
 
-#if defined(__cplusplus) || defined(c_plusplus)
+#ifdef __cplusplus
 }    // extern "C"
 #endif
 
diff --git a/drivers/webp/enc/delta_palettization.c b/drivers/webp/enc/delta_palettization.c
new file mode 100644
index 0000000000..8bd3a3d233
--- /dev/null
+++ b/drivers/webp/enc/delta_palettization.c
@@ -0,0 +1,455 @@
+// Copyright 2015 Google Inc. All Rights Reserved.
+//
+// Use of this source code is governed by a BSD-style license
+// that can be found in the COPYING file in the root of the source
+// tree. An additional intellectual property rights grant can be found
+// in the file PATENTS. All contributing project authors may
+// be found in the AUTHORS file in the root of the source tree.
+// -----------------------------------------------------------------------------
+//
+// Author: Mislav Bradac (mislavm@google.com)
+//
+
+#include "./delta_palettization.h"
+
+#ifdef WEBP_EXPERIMENTAL_FEATURES
+#include "webp/types.h"
+#include "../dsp/lossless.h"
+
+#define MK_COL(r, g, b) (((r) << 16) + ((g) << 8) + (b))
+
+// Format allows palette up to 256 entries, but more palette entries produce
+// bigger entropy. In the future it will probably be useful to add more entries
+// that are far from the origin of the palette or choose remaining entries
+// dynamically.
+#define DELTA_PALETTE_SIZE 226
+
+// Palette used for delta_palettization. Entries are roughly sorted by distance
+// of their signed equivalents from the origin.
+static const uint32_t kDeltaPalette[DELTA_PALETTE_SIZE] = {
+  MK_COL(0u, 0u, 0u),
+  MK_COL(255u, 255u, 255u),
+  MK_COL(1u, 1u, 1u),
+  MK_COL(254u, 254u, 254u),
+  MK_COL(2u, 2u, 2u),
+  MK_COL(4u, 4u, 4u),
+  MK_COL(252u, 252u, 252u),
+  MK_COL(250u, 0u, 0u),
+  MK_COL(0u, 250u, 0u),
+  MK_COL(0u, 0u, 250u),
+  MK_COL(6u, 0u, 0u),
+  MK_COL(0u, 6u, 0u),
+  MK_COL(0u, 0u, 6u),
+  MK_COL(0u, 0u, 248u),
+  MK_COL(0u, 0u, 8u),
+  MK_COL(0u, 248u, 0u),
+  MK_COL(0u, 248u, 248u),
+  MK_COL(0u, 248u, 8u),
+  MK_COL(0u, 8u, 0u),
+  MK_COL(0u, 8u, 248u),
+  MK_COL(0u, 8u, 8u),
+  MK_COL(8u, 8u, 8u),
+  MK_COL(248u, 0u, 0u),
+  MK_COL(248u, 0u, 248u),
+  MK_COL(248u, 0u, 8u),
+  MK_COL(248u, 248u, 0u),
+  MK_COL(248u, 8u, 0u),
+  MK_COL(8u, 0u, 0u),
+  MK_COL(8u, 0u, 248u),
+  MK_COL(8u, 0u, 8u),
+  MK_COL(8u, 248u, 0u),
+  MK_COL(8u, 8u, 0u),
+  MK_COL(23u, 23u, 23u),
+  MK_COL(13u, 13u, 13u),
+  MK_COL(232u, 232u, 232u),
+  MK_COL(244u, 244u, 244u),
+  MK_COL(245u, 245u, 250u),
+  MK_COL(50u, 50u, 50u),
+  MK_COL(204u, 204u, 204u),
+  MK_COL(236u, 236u, 236u),
+  MK_COL(16u, 16u, 16u),
+  MK_COL(240u, 16u, 16u),
+  MK_COL(16u, 240u, 16u),
+  MK_COL(240u, 240u, 16u),
+  MK_COL(16u, 16u, 240u),
+  MK_COL(240u, 16u, 240u),
+  MK_COL(16u, 240u, 240u),
+  MK_COL(240u, 240u, 240u),
+  MK_COL(0u, 0u, 232u),
+  MK_COL(0u, 232u, 0u),
+  MK_COL(232u, 0u, 0u),
+  MK_COL(0u, 0u, 24u),
+  MK_COL(0u, 24u, 0u),
+  MK_COL(24u, 0u, 0u),
+  MK_COL(32u, 32u, 32u),
+  MK_COL(224u, 32u, 32u),
+  MK_COL(32u, 224u, 32u),
+  MK_COL(224u, 224u, 32u),
+  MK_COL(32u, 32u, 224u),
+  MK_COL(224u, 32u, 224u),
+  MK_COL(32u, 224u, 224u),
+  MK_COL(224u, 224u, 224u),
+  MK_COL(0u, 0u, 176u),
+  MK_COL(0u, 0u, 80u),
+  MK_COL(0u, 176u, 0u),
+  MK_COL(0u, 176u, 176u),
+  MK_COL(0u, 176u, 80u),
+  MK_COL(0u, 80u, 0u),
+  MK_COL(0u, 80u, 176u),
+  MK_COL(0u, 80u, 80u),
+  MK_COL(176u, 0u, 0u),
+  MK_COL(176u, 0u, 176u),
+  MK_COL(176u, 0u, 80u),
+  MK_COL(176u, 176u, 0u),
+  MK_COL(176u, 80u, 0u),
+  MK_COL(80u, 0u, 0u),
+  MK_COL(80u, 0u, 176u),
+  MK_COL(80u, 0u, 80u),
+  MK_COL(80u, 176u, 0u),
+  MK_COL(80u, 80u, 0u),
+  MK_COL(0u, 0u, 152u),
+  MK_COL(0u, 0u, 104u),
+  MK_COL(0u, 152u, 0u),
+  MK_COL(0u, 152u, 152u),
+  MK_COL(0u, 152u, 104u),
+  MK_COL(0u, 104u, 0u),
+  MK_COL(0u, 104u, 152u),
+  MK_COL(0u, 104u, 104u),
+  MK_COL(152u, 0u, 0u),
+  MK_COL(152u, 0u, 152u),
+  MK_COL(152u, 0u, 104u),
+  MK_COL(152u, 152u, 0u),
+  MK_COL(152u, 104u, 0u),
+  MK_COL(104u, 0u, 0u),
+  MK_COL(104u, 0u, 152u),
+  MK_COL(104u, 0u, 104u),
+  MK_COL(104u, 152u, 0u),
+  MK_COL(104u, 104u, 0u),
+  MK_COL(216u, 216u, 216u),
+  MK_COL(216u, 216u, 40u),
+  MK_COL(216u, 216u, 176u),
+  MK_COL(216u, 216u, 80u),
+  MK_COL(216u, 40u, 216u),
+  MK_COL(216u, 40u, 40u),
+  MK_COL(216u, 40u, 176u),
+  MK_COL(216u, 40u, 80u),
+  MK_COL(216u, 176u, 216u),
+  MK_COL(216u, 176u, 40u),
+  MK_COL(216u, 176u, 176u),
+  MK_COL(216u, 176u, 80u),
+  MK_COL(216u, 80u, 216u),
+  MK_COL(216u, 80u, 40u),
+  MK_COL(216u, 80u, 176u),
+  MK_COL(216u, 80u, 80u),
+  MK_COL(40u, 216u, 216u),
+  MK_COL(40u, 216u, 40u),
+  MK_COL(40u, 216u, 176u),
+  MK_COL(40u, 216u, 80u),
+  MK_COL(40u, 40u, 216u),
+  MK_COL(40u, 40u, 40u),
+  MK_COL(40u, 40u, 176u),
+  MK_COL(40u, 40u, 80u),
+  MK_COL(40u, 176u, 216u),
+  MK_COL(40u, 176u, 40u),
+  MK_COL(40u, 176u, 176u),
+  MK_COL(40u, 176u, 80u),
+  MK_COL(40u, 80u, 216u),
+  MK_COL(40u, 80u, 40u),
+  MK_COL(40u, 80u, 176u),
+  MK_COL(40u, 80u, 80u),
+  MK_COL(80u, 216u, 216u),
+  MK_COL(80u, 216u, 40u),
+  MK_COL(80u, 216u, 176u),
+  MK_COL(80u, 216u, 80u),
+  MK_COL(80u, 40u, 216u),
+  MK_COL(80u, 40u, 40u),
+  MK_COL(80u, 40u, 176u),
+  MK_COL(80u, 40u, 80u),
+  MK_COL(80u, 176u, 216u),
+  MK_COL(80u, 176u, 40u),
+  MK_COL(80u, 176u, 176u),
+  MK_COL(80u, 176u, 80u),
+  MK_COL(80u, 80u, 216u),
+  MK_COL(80u, 80u, 40u),
+  MK_COL(80u, 80u, 176u),
+  MK_COL(80u, 80u, 80u),
+  MK_COL(0u, 0u, 192u),
+  MK_COL(0u, 0u, 64u),
+  MK_COL(0u, 0u, 128u),
+  MK_COL(0u, 192u, 0u),
+  MK_COL(0u, 192u, 192u),
+  MK_COL(0u, 192u, 64u),
+  MK_COL(0u, 192u, 128u),
+  MK_COL(0u, 64u, 0u),
+  MK_COL(0u, 64u, 192u),
+  MK_COL(0u, 64u, 64u),
+  MK_COL(0u, 64u, 128u),
+  MK_COL(0u, 128u, 0u),
+  MK_COL(0u, 128u, 192u),
+  MK_COL(0u, 128u, 64u),
+  MK_COL(0u, 128u, 128u),
+  MK_COL(176u, 216u, 216u),
+  MK_COL(176u, 216u, 40u),
+  MK_COL(176u, 216u, 176u),
+  MK_COL(176u, 216u, 80u),
+  MK_COL(176u, 40u, 216u),
+  MK_COL(176u, 40u, 40u),
+  MK_COL(176u, 40u, 176u),
+  MK_COL(176u, 40u, 80u),
+  MK_COL(176u, 176u, 216u),
+  MK_COL(176u, 176u, 40u),
+  MK_COL(176u, 176u, 176u),
+  MK_COL(176u, 176u, 80u),
+  MK_COL(176u, 80u, 216u),
+  MK_COL(176u, 80u, 40u),
+  MK_COL(176u, 80u, 176u),
+  MK_COL(176u, 80u, 80u),
+  MK_COL(192u, 0u, 0u),
+  MK_COL(192u, 0u, 192u),
+  MK_COL(192u, 0u, 64u),
+  MK_COL(192u, 0u, 128u),
+  MK_COL(192u, 192u, 0u),
+  MK_COL(192u, 192u, 192u),
+  MK_COL(192u, 192u, 64u),
+  MK_COL(192u, 192u, 128u),
+  MK_COL(192u, 64u, 0u),
+  MK_COL(192u, 64u, 192u),
+  MK_COL(192u, 64u, 64u),
+  MK_COL(192u, 64u, 128u),
+  MK_COL(192u, 128u, 0u),
+  MK_COL(192u, 128u, 192u),
+  MK_COL(192u, 128u, 64u),
+  MK_COL(192u, 128u, 128u),
+  MK_COL(64u, 0u, 0u),
+  MK_COL(64u, 0u, 192u),
+  MK_COL(64u, 0u, 64u),
+  MK_COL(64u, 0u, 128u),
+  MK_COL(64u, 192u, 0u),
+  MK_COL(64u, 192u, 192u),
+  MK_COL(64u, 192u, 64u),
+  MK_COL(64u, 192u, 128u),
+  MK_COL(64u, 64u, 0u),
+  MK_COL(64u, 64u, 192u),
+  MK_COL(64u, 64u, 64u),
+  MK_COL(64u, 64u, 128u),
+  MK_COL(64u, 128u, 0u),
+  MK_COL(64u, 128u, 192u),
+  MK_COL(64u, 128u, 64u),
+  MK_COL(64u, 128u, 128u),
+  MK_COL(128u, 0u, 0u),
+  MK_COL(128u, 0u, 192u),
+  MK_COL(128u, 0u, 64u),
+  MK_COL(128u, 0u, 128u),
+  MK_COL(128u, 192u, 0u),
+  MK_COL(128u, 192u, 192u),
+  MK_COL(128u, 192u, 64u),
+  MK_COL(128u, 192u, 128u),
+  MK_COL(128u, 64u, 0u),
+  MK_COL(128u, 64u, 192u),
+  MK_COL(128u, 64u, 64u),
+  MK_COL(128u, 64u, 128u),
+  MK_COL(128u, 128u, 0u),
+  MK_COL(128u, 128u, 192u),
+  MK_COL(128u, 128u, 64u),
+  MK_COL(128u, 128u, 128u),
+};
+
+#undef MK_COL
+
+//------------------------------------------------------------------------------
+// TODO(skal): move the functions to dsp/lossless.c when the correct
+// granularity is found. For now, we'll just copy-paste some useful bits
+// here instead.
+
+// In-place sum of each component with mod 256.
+static WEBP_INLINE void AddPixelsEq(uint32_t* a, uint32_t b) {
+  const uint32_t alpha_and_green = (*a & 0xff00ff00u) + (b & 0xff00ff00u);
+  const uint32_t red_and_blue = (*a & 0x00ff00ffu) + (b & 0x00ff00ffu);
+  *a = (alpha_and_green & 0xff00ff00u) | (red_and_blue & 0x00ff00ffu);
+}
+
+static WEBP_INLINE uint32_t Clip255(uint32_t a) {
+  if (a < 256) {
+    return a;
+  }
+  // return 0, when a is a negative integer.
+  // return 255, when a is positive.
+  return ~a >> 24;
+}
+
+// Delta palettization functions.
+static WEBP_INLINE int Square(int x) {
+  return x * x;
+}
+
+static WEBP_INLINE uint32_t Intensity(uint32_t a) {
+  return
+      30 * ((a >> 16) & 0xff) +
+      59 * ((a >>  8) & 0xff) +
+      11 * ((a >>  0) & 0xff);
+}
+
+static uint32_t CalcDist(uint32_t predicted_value, uint32_t actual_value,
+                         uint32_t palette_entry) {
+  int i;
+  uint32_t distance = 0;
+  AddPixelsEq(&predicted_value, palette_entry);
+  for (i = 0; i < 32; i += 8) {
+    const int32_t av = (actual_value >> i) & 0xff;
+    const int32_t pv = (predicted_value >> i) & 0xff;
+    distance += Square(pv - av);
+  }
+  // We sum square of intensity difference with factor 10, but because Intensity
+  // returns 100 times real intensity we need to multiply differences of colors
+  // by 1000.
+  distance *= 1000u;
+  distance += Square(Intensity(predicted_value)
+                     - Intensity(actual_value));
+  return distance;
+}
+
+static uint32_t Predict(int x, int y, uint32_t* image) {
+  const uint32_t t = (y == 0) ? ARGB_BLACK : image[x];
+  const uint32_t l = (x == 0) ? ARGB_BLACK : image[x - 1];
+  const uint32_t p =
+      (((((t >> 24) & 0xff) + ((l >> 24) & 0xff)) / 2) << 24) +
+      (((((t >> 16) & 0xff) + ((l >> 16) & 0xff)) / 2) << 16) +
+      (((((t >>  8) & 0xff) + ((l >>  8) & 0xff)) / 2) <<  8) +
+      (((((t >>  0) & 0xff) + ((l >>  0) & 0xff)) / 2) <<  0);
+  if (x == 0 && y == 0) return ARGB_BLACK;
+  if (x == 0) return t;
+  if (y == 0) return l;
+  return p;
+}
+
+static WEBP_INLINE int AddSubtractComponentFullWithCoefficient(
+    int a, int b, int c) {
+  return Clip255(a + ((b - c) >> 2));
+}
+
+static WEBP_INLINE uint32_t ClampedAddSubtractFullWithCoefficient(
+    uint32_t c0, uint32_t c1, uint32_t c2) {
+  const int a = AddSubtractComponentFullWithCoefficient(
+      c0 >> 24, c1 >> 24, c2 >> 24);
+  const int r = AddSubtractComponentFullWithCoefficient((c0 >> 16) & 0xff,
+                                                       (c1 >> 16) & 0xff,
+                                                       (c2 >> 16) & 0xff);
+  const int g = AddSubtractComponentFullWithCoefficient((c0 >> 8) & 0xff,
+                                                       (c1 >> 8) & 0xff,
+                                                       (c2 >> 8) & 0xff);
+  const int b = AddSubtractComponentFullWithCoefficient(
+      c0 & 0xff, c1 & 0xff, c2 & 0xff);
+  return ((uint32_t)a << 24) | (r << 16) | (g << 8) | b;
+}
+
+//------------------------------------------------------------------------------
+
+// Find palette entry with minimum error from difference of actual pixel value
+// and predicted pixel value. Propagate error of pixel to its top and left pixel
+// in src array. Write predicted_value + palette_entry to new_image. Return
+// index of best palette entry.
+static int FindBestPaletteEntry(uint32_t src, uint32_t predicted_value,
+                                const uint32_t palette[], int palette_size) {
+  int i;
+  int idx = 0;
+  uint32_t best_distance = CalcDist(predicted_value, src, palette[0]);
+  for (i = 1; i < palette_size; ++i) {
+    const uint32_t distance = CalcDist(predicted_value, src, palette[i]);
+    if (distance < best_distance) {
+      best_distance = distance;
+      idx = i;
+    }
+  }
+  return idx;
+}
+
+static void ApplyBestPaletteEntry(int x, int y,
+                                  uint32_t new_value, uint32_t palette_value,
+                                  uint32_t* src, int src_stride,
+                                  uint32_t* new_image) {
+  AddPixelsEq(&new_value, palette_value);
+  if (x > 0) {
+    src[x - 1] = ClampedAddSubtractFullWithCoefficient(src[x - 1],
+                                                       new_value, src[x]);
+  }
+  if (y > 0) {
+    src[x - src_stride] =
+        ClampedAddSubtractFullWithCoefficient(src[x - src_stride],
+                                              new_value, src[x]);
+  }
+  new_image[x] = new_value;
+}
+
+//------------------------------------------------------------------------------
+// Main entry point
+
+static WebPEncodingError ApplyDeltaPalette(uint32_t* src, uint32_t* dst,
+                                           uint32_t src_stride,
+                                           uint32_t dst_stride,
+                                           const uint32_t* palette,
+                                           int palette_size,
+                                           int width, int height,
+                                           int num_passes) {
+  int x, y;
+  WebPEncodingError err = VP8_ENC_OK;
+  uint32_t* new_image = (uint32_t*)WebPSafeMalloc(width, sizeof(*new_image));
+  uint8_t* const tmp_row = (uint8_t*)WebPSafeMalloc(width, sizeof(*tmp_row));
+  if (new_image == NULL || tmp_row == NULL) {
+    err = VP8_ENC_ERROR_OUT_OF_MEMORY;
+    goto Error;
+  }
+
+  while (num_passes--) {
+    uint32_t* cur_src = src;
+    uint32_t* cur_dst = dst;
+    for (y = 0; y < height; ++y) {
+      for (x = 0; x < width; ++x) {
+        const uint32_t predicted_value = Predict(x, y, new_image);
+        tmp_row[x] = FindBestPaletteEntry(cur_src[x], predicted_value,
+                                          palette, palette_size);
+        ApplyBestPaletteEntry(x, y, predicted_value, palette[tmp_row[x]],
+                              cur_src, src_stride, new_image);
+      }
+      for (x = 0; x < width; ++x) {
+        cur_dst[x] = palette[tmp_row[x]];
+      }
+      cur_src += src_stride;
+      cur_dst += dst_stride;
+    }
+  }
+ Error:
+  WebPSafeFree(new_image);
+  WebPSafeFree(tmp_row);
+  return err;
+}
+
+// replaces enc->argb_ by a palettizable approximation of it,
+// and generates optimal enc->palette_[]
+WebPEncodingError WebPSearchOptimalDeltaPalette(VP8LEncoder* const enc) {
+  const WebPPicture* const pic = enc->pic_;
+  uint32_t* src = pic->argb;
+  uint32_t* dst = enc->argb_;
+  const int width = pic->width;
+  const int height = pic->height;
+
+  WebPEncodingError err = VP8_ENC_OK;
+  memcpy(enc->palette_, kDeltaPalette, sizeof(kDeltaPalette));
+  enc->palette_[DELTA_PALETTE_SIZE - 1] = src[0] - 0xff000000u;
+  enc->palette_size_ = DELTA_PALETTE_SIZE;
+  err = ApplyDeltaPalette(src, dst, pic->argb_stride, enc->current_width_,
+                          enc->palette_, enc->palette_size_,
+                          width, height, 2);
+  if (err != VP8_ENC_OK) goto Error;
+
+ Error:
+  return err;
+}
+
+#else  // !WEBP_EXPERIMENTAL_FEATURES
+
+WebPEncodingError WebPSearchOptimalDeltaPalette(VP8LEncoder* const enc) {
+  (void)enc;
+  return VP8_ENC_ERROR_INVALID_CONFIGURATION;
+}
+
+#endif  // WEBP_EXPERIMENTAL_FEATURES
diff --git a/drivers/webp/enc/delta_palettization.h b/drivers/webp/enc/delta_palettization.h
new file mode 100644
index 0000000000..54195d452c
--- /dev/null
+++ b/drivers/webp/enc/delta_palettization.h
@@ -0,0 +1,25 @@
+// Copyright 2015 Google Inc. All Rights Reserved.
+//
+// Use of this source code is governed by a BSD-style license
+// that can be found in the COPYING file in the root of the source
+// tree. An additional intellectual property rights grant can be found
+// in the file PATENTS. All contributing project authors may
+// be found in the AUTHORS file in the root of the source tree.
+// -----------------------------------------------------------------------------
+//
+// Author: Mislav Bradac (mislavm@google.com)
+//
+
+#ifndef WEBP_ENC_DELTA_PALETTIZATION_H_
+#define WEBP_ENC_DELTA_PALETTIZATION_H_
+
+#include "webp/encode.h"
+#include "../enc/vp8li.h"
+
+// Replaces enc->argb_[] input by a palettizable approximation of it,
+// and generates optimal enc->palette_[].
+// This function can revert enc->use_palette_ / enc->use_predict_ flag
+// if delta-palettization is not producing expected saving.
+WebPEncodingError WebPSearchOptimalDeltaPalette(VP8LEncoder* const enc);
+
+#endif  // WEBP_ENC_DELTA_PALETTIZATION_H_
diff --git a/drivers/webp/enc/filter.c b/drivers/webp/enc/filter.c
index 7fb78a3949..1a4dd947fb 100644
--- a/drivers/webp/enc/filter.c
+++ b/drivers/webp/enc/filter.c
@@ -1,194 +1,68 @@
 // Copyright 2011 Google Inc. All Rights Reserved.
 //
-// This code is licensed under the same terms as WebM:
-//  Software License Agreement:  http://www.webmproject.org/license/software/
-//  Additional IP Rights Grant:  http://www.webmproject.org/license/additional/
+// Use of this source code is governed by a BSD-style license
+// that can be found in the COPYING file in the root of the source
+// tree. An additional intellectual property rights grant can be found
+// in the file PATENTS. All contributing project authors may
+// be found in the AUTHORS file in the root of the source tree.
 // -----------------------------------------------------------------------------
 //
 // Selecting filter level
 //
 // Author: somnath@google.com (Somnath Banerjee)
 
+#include <assert.h>
 #include "./vp8enci.h"
-
-#if defined(__cplusplus) || defined(c_plusplus)
-extern "C" {
-#endif
-
-// NOTE: clip1, tables and InitTables are repeated entries of dsp.c
-static uint8_t abs0[255 + 255 + 1];     // abs(i)
-static uint8_t abs1[255 + 255 + 1];     // abs(i)>>1
-static int8_t sclip1[1020 + 1020 + 1];  // clips [-1020, 1020] to [-128, 127]
-static int8_t sclip2[112 + 112 + 1];    // clips [-112, 112] to [-16, 15]
-static uint8_t clip1[255 + 510 + 1];    // clips [-255,510] to [0,255]
-
-static int tables_ok = 0;
-
-static void InitTables(void) {
-  if (!tables_ok) {
-    int i;
-    for (i = -255; i <= 255; ++i) {
-      abs0[255 + i] = (i < 0) ? -i : i;
-      abs1[255 + i] = abs0[255 + i] >> 1;
-    }
-    for (i = -1020; i <= 1020; ++i) {
-      sclip1[1020 + i] = (i < -128) ? -128 : (i > 127) ? 127 : i;
-    }
-    for (i = -112; i <= 112; ++i) {
-      sclip2[112 + i] = (i < -16) ? -16 : (i > 15) ? 15 : i;
-    }
-    for (i = -255; i <= 255 + 255; ++i) {
-      clip1[255 + i] = (i < 0) ? 0 : (i > 255) ? 255 : i;
-    }
-    tables_ok = 1;
-  }
-}
-
-//------------------------------------------------------------------------------
-// Edge filtering functions
-
-// 4 pixels in, 2 pixels out
-static WEBP_INLINE void do_filter2(uint8_t* p, int step) {
-  const int p1 = p[-2*step], p0 = p[-step], q0 = p[0], q1 = p[step];
-  const int a = 3 * (q0 - p0) + sclip1[1020 + p1 - q1];
-  const int a1 = sclip2[112 + ((a + 4) >> 3)];
-  const int a2 = sclip2[112 + ((a + 3) >> 3)];
-  p[-step] = clip1[255 + p0 + a2];
-  p[    0] = clip1[255 + q0 - a1];
-}
-
-// 4 pixels in, 4 pixels out
-static WEBP_INLINE void do_filter4(uint8_t* p, int step) {
-  const int p1 = p[-2*step], p0 = p[-step], q0 = p[0], q1 = p[step];
-  const int a = 3 * (q0 - p0);
-  const int a1 = sclip2[112 + ((a + 4) >> 3)];
-  const int a2 = sclip2[112 + ((a + 3) >> 3)];
-  const int a3 = (a1 + 1) >> 1;
-  p[-2*step] = clip1[255 + p1 + a3];
-  p[-  step] = clip1[255 + p0 + a2];
-  p[      0] = clip1[255 + q0 - a1];
-  p[   step] = clip1[255 + q1 - a3];
-}
-
-// high edge-variance
-static WEBP_INLINE int hev(const uint8_t* p, int step, int thresh) {
-  const int p1 = p[-2*step], p0 = p[-step], q0 = p[0], q1 = p[step];
-  return (abs0[255 + p1 - p0] > thresh) || (abs0[255 + q1 - q0] > thresh);
-}
-
-static WEBP_INLINE int needs_filter(const uint8_t* p, int step, int thresh) {
-  const int p1 = p[-2*step], p0 = p[-step], q0 = p[0], q1 = p[step];
-  return (2 * abs0[255 + p0 - q0] + abs1[255 + p1 - q1]) <= thresh;
-}
-
-static WEBP_INLINE int needs_filter2(const uint8_t* p,
-                                     int step, int t, int it) {
-  const int p3 = p[-4*step], p2 = p[-3*step], p1 = p[-2*step], p0 = p[-step];
-  const int q0 = p[0], q1 = p[step], q2 = p[2*step], q3 = p[3*step];
-  if ((2 * abs0[255 + p0 - q0] + abs1[255 + p1 - q1]) > t)
-    return 0;
-  return abs0[255 + p3 - p2] <= it && abs0[255 + p2 - p1] <= it &&
-         abs0[255 + p1 - p0] <= it && abs0[255 + q3 - q2] <= it &&
-         abs0[255 + q2 - q1] <= it && abs0[255 + q1 - q0] <= it;
+#include "../dsp/dsp.h"
+
+// This table gives, for a given sharpness, the filtering strength to be
+// used (at least) in order to filter a given edge step delta.
+// This is constructed by brute force inspection: for all delta, we iterate
+// over all possible filtering strength / thresh until needs_filter() returns
+// true.
+#define MAX_DELTA_SIZE 64
+static const uint8_t kLevelsFromDelta[8][MAX_DELTA_SIZE] = {
+  { 0,   1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15,
+    16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,
+    32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47,
+    48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63 },
+  { 0,  1,  2,  3,  5,  6,  7,  8,  9, 11, 12, 13, 14, 15, 17, 18,
+    20, 21, 23, 24, 26, 27, 29, 30, 32, 33, 35, 36, 38, 39, 41, 42,
+    44, 45, 47, 48, 50, 51, 53, 54, 56, 57, 59, 60, 62, 63, 63, 63,
+    63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63 },
+  {  0,  1,  2,  3,  5,  6,  7,  8,  9, 11, 12, 13, 14, 16, 17, 19,
+    20, 22, 23, 25, 26, 28, 29, 31, 32, 34, 35, 37, 38, 40, 41, 43,
+    44, 46, 47, 49, 50, 52, 53, 55, 56, 58, 59, 61, 62, 63, 63, 63,
+    63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63 },
+  {  0,  1,  2,  3,  5,  6,  7,  8,  9, 11, 12, 13, 15, 16, 18, 19,
+    21, 22, 24, 25, 27, 28, 30, 31, 33, 34, 36, 37, 39, 40, 42, 43,
+    45, 46, 48, 49, 51, 52, 54, 55, 57, 58, 60, 61, 63, 63, 63, 63,
+    63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63 },
+  {  0,  1,  2,  3,  5,  6,  7,  8,  9, 11, 12, 14, 15, 17, 18, 20,
+    21, 23, 24, 26, 27, 29, 30, 32, 33, 35, 36, 38, 39, 41, 42, 44,
+    45, 47, 48, 50, 51, 53, 54, 56, 57, 59, 60, 62, 63, 63, 63, 63,
+    63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63 },
+  {  0,  1,  2,  4,  5,  7,  8,  9, 11, 12, 13, 15, 16, 17, 19, 20,
+    22, 23, 25, 26, 28, 29, 31, 32, 34, 35, 37, 38, 40, 41, 43, 44,
+    46, 47, 49, 50, 52, 53, 55, 56, 58, 59, 61, 62, 63, 63, 63, 63,
+    63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63 },
+  {  0,  1,  2,  4,  5,  7,  8,  9, 11, 12, 13, 15, 16, 18, 19, 21,
+    22, 24, 25, 27, 28, 30, 31, 33, 34, 36, 37, 39, 40, 42, 43, 45,
+    46, 48, 49, 51, 52, 54, 55, 57, 58, 60, 61, 63, 63, 63, 63, 63,
+    63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63 },
+  {  0,  1,  2,  4,  5,  7,  8,  9, 11, 12, 14, 15, 17, 18, 20, 21,
+    23, 24, 26, 27, 29, 30, 32, 33, 35, 36, 38, 39, 41, 42, 44, 45,
+    47, 48, 50, 51, 53, 54, 56, 57, 59, 60, 62, 63, 63, 63, 63, 63,
+    63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63 }
+};
+
+int VP8FilterStrengthFromDelta(int sharpness, int delta) {
+  const int pos = (delta < MAX_DELTA_SIZE) ? delta : MAX_DELTA_SIZE - 1;
+  assert(sharpness >= 0 && sharpness <= 7);
+  return kLevelsFromDelta[sharpness][pos];
 }
 
 //------------------------------------------------------------------------------
-// Simple In-loop filtering (Paragraph 15.2)
-
-static void SimpleVFilter16(uint8_t* p, int stride, int thresh) {
-  int i;
-  for (i = 0; i < 16; ++i) {
-    if (needs_filter(p + i, stride, thresh)) {
-      do_filter2(p + i, stride);
-    }
-  }
-}
-
-static void SimpleHFilter16(uint8_t* p, int stride, int thresh) {
-  int i;
-  for (i = 0; i < 16; ++i) {
-    if (needs_filter(p + i * stride, 1, thresh)) {
-      do_filter2(p + i * stride, 1);
-    }
-  }
-}
-
-static void SimpleVFilter16i(uint8_t* p, int stride, int thresh) {
-  int k;
-  for (k = 3; k > 0; --k) {
-    p += 4 * stride;
-    SimpleVFilter16(p, stride, thresh);
-  }
-}
-
-static void SimpleHFilter16i(uint8_t* p, int stride, int thresh) {
-  int k;
-  for (k = 3; k > 0; --k) {
-    p += 4;
-    SimpleHFilter16(p, stride, thresh);
-  }
-}
-
-//------------------------------------------------------------------------------
-// Complex In-loop filtering (Paragraph 15.3)
-
-static WEBP_INLINE void FilterLoop24(uint8_t* p,
-                                     int hstride, int vstride, int size,
-                                     int thresh, int ithresh, int hev_thresh) {
-  while (size-- > 0) {
-    if (needs_filter2(p, hstride, thresh, ithresh)) {
-      if (hev(p, hstride, hev_thresh)) {
-        do_filter2(p, hstride);
-      } else {
-        do_filter4(p, hstride);
-      }
-    }
-    p += vstride;
-  }
-}
-
-// on three inner edges
-static void VFilter16i(uint8_t* p, int stride,
-                       int thresh, int ithresh, int hev_thresh) {
-  int k;
-  for (k = 3; k > 0; --k) {
-    p += 4 * stride;
-    FilterLoop24(p, stride, 1, 16, thresh, ithresh, hev_thresh);
-  }
-}
-
-static void HFilter16i(uint8_t* p, int stride,
-                       int thresh, int ithresh, int hev_thresh) {
-  int k;
-  for (k = 3; k > 0; --k) {
-    p += 4;
-    FilterLoop24(p, 1, stride, 16, thresh, ithresh, hev_thresh);
-  }
-}
-
-static void VFilter8i(uint8_t* u, uint8_t* v, int stride,
-                      int thresh, int ithresh, int hev_thresh) {
-  FilterLoop24(u + 4 * stride, stride, 1, 8, thresh, ithresh, hev_thresh);
-  FilterLoop24(v + 4 * stride, stride, 1, 8, thresh, ithresh, hev_thresh);
-}
-
-static void HFilter8i(uint8_t* u, uint8_t* v, int stride,
-                      int thresh, int ithresh, int hev_thresh) {
-  FilterLoop24(u + 4, 1, stride, 8, thresh, ithresh, hev_thresh);
-  FilterLoop24(v + 4, 1, stride, 8, thresh, ithresh, hev_thresh);
-}
-
-//------------------------------------------------------------------------------
-
-void (*VP8EncVFilter16i)(uint8_t*, int, int, int, int) = VFilter16i;
-void (*VP8EncHFilter16i)(uint8_t*, int, int, int, int) = HFilter16i;
-void (*VP8EncVFilter8i)(uint8_t*, uint8_t*, int, int, int, int) = VFilter8i;
-void (*VP8EncHFilter8i)(uint8_t*, uint8_t*, int, int, int, int) = HFilter8i;
-
-void (*VP8EncSimpleVFilter16i)(uint8_t*, int, int) = SimpleVFilter16i;
-void (*VP8EncSimpleHFilter16i)(uint8_t*, int, int) = SimpleHFilter16i;
-
-//------------------------------------------------------------------------------
 // Paragraph 15.4: compute the inner-edge filtering strength
 
 static int GetILevel(int sharpness, int level) {
@@ -211,22 +85,22 @@ static void DoFilter(const VP8EncIterator* const it, int level) {
   const int ilevel = GetILevel(enc->config_->filter_sharpness, level);
   const int limit = 2 * level + ilevel;
 
-  uint8_t* const y_dst = it->yuv_out2_ + Y_OFF;
-  uint8_t* const u_dst = it->yuv_out2_ + U_OFF;
-  uint8_t* const v_dst = it->yuv_out2_ + V_OFF;
+  uint8_t* const y_dst = it->yuv_out2_ + Y_OFF_ENC;
+  uint8_t* const u_dst = it->yuv_out2_ + U_OFF_ENC;
+  uint8_t* const v_dst = it->yuv_out2_ + V_OFF_ENC;
 
   // copy current block to yuv_out2_
-  memcpy(y_dst, it->yuv_out_, YUV_SIZE * sizeof(uint8_t));
+  memcpy(y_dst, it->yuv_out_, YUV_SIZE_ENC * sizeof(uint8_t));
 
   if (enc->filter_hdr_.simple_ == 1) {   // simple
-    VP8EncSimpleHFilter16i(y_dst, BPS, limit);
-    VP8EncSimpleVFilter16i(y_dst, BPS, limit);
+    VP8SimpleHFilter16i(y_dst, BPS, limit);
+    VP8SimpleVFilter16i(y_dst, BPS, limit);
   } else {    // complex
     const int hev_thresh = (level >= 40) ? 2 : (level >= 15) ? 1 : 0;
-    VP8EncHFilter16i(y_dst, BPS, limit, ilevel, hev_thresh);
-    VP8EncHFilter8i(u_dst, v_dst, BPS, limit, ilevel, hev_thresh);
-    VP8EncVFilter16i(y_dst, BPS, limit, ilevel, hev_thresh);
-    VP8EncVFilter8i(u_dst, v_dst, BPS, limit, ilevel, hev_thresh);
+    VP8HFilter16i(y_dst, BPS, limit, ilevel, hev_thresh);
+    VP8HFilter8i(u_dst, v_dst, BPS, limit, ilevel, hev_thresh);
+    VP8VFilter16i(y_dst, BPS, limit, ilevel, hev_thresh);
+    VP8VFilter8i(u_dst, v_dst, BPS, limit, ilevel, hev_thresh);
   }
 }
 
@@ -321,13 +195,16 @@ static double GetMBSSIM(const uint8_t* yuv1, const uint8_t* yuv2) {
   // compute SSIM in a 10 x 10 window
   for (x = 3; x < 13; x++) {
     for (y = 3; y < 13; y++) {
-      VP8SSIMAccumulate(yuv1 + Y_OFF, BPS, yuv2 + Y_OFF, BPS, x, y, 16, 16, &s);
+      VP8SSIMAccumulate(yuv1 + Y_OFF_ENC, BPS, yuv2 + Y_OFF_ENC, BPS,
+                        x, y, 16, 16, &s);
     }
   }
   for (x = 1; x < 7; x++) {
     for (y = 1; y < 7; y++) {
-      VP8SSIMAccumulate(yuv1 + U_OFF, BPS, yuv2 + U_OFF, BPS, x, y, 8, 8, &s);
-      VP8SSIMAccumulate(yuv1 + V_OFF, BPS, yuv2 + V_OFF, BPS, x, y, 8, 8, &s);
+      VP8SSIMAccumulate(yuv1 + U_OFF_ENC, BPS, yuv2 + U_OFF_ENC, BPS,
+                        x, y, 8, 8, &s);
+      VP8SSIMAccumulate(yuv1 + V_OFF_ENC, BPS, yuv2 + V_OFF_ENC, BPS,
+                        x, y, 8, 8, &s);
     }
   }
   return VP8SSIMGet(&s);
@@ -338,28 +215,28 @@ static double GetMBSSIM(const uint8_t* yuv1, const uint8_t* yuv2) {
 // loop filter strength
 
 void VP8InitFilter(VP8EncIterator* const it) {
-  int s, i;
-  if (!it->lf_stats_) return;
-
-  InitTables();
-  for (s = 0; s < NUM_MB_SEGMENTS; s++) {
-    for (i = 0; i < MAX_LF_LEVELS; i++) {
-      (*it->lf_stats_)[s][i] = 0;
+  if (it->lf_stats_ != NULL) {
+    int s, i;
+    for (s = 0; s < NUM_MB_SEGMENTS; s++) {
+      for (i = 0; i < MAX_LF_LEVELS; i++) {
+        (*it->lf_stats_)[s][i] = 0;
+      }
     }
   }
 }
 
 void VP8StoreFilterStats(VP8EncIterator* const it) {
   int d;
+  VP8Encoder* const enc = it->enc_;
   const int s = it->mb_->segment_;
-  const int level0 = it->enc_->dqm_[s].fstrength_;  // TODO: ref_lf_delta[]
+  const int level0 = enc->dqm_[s].fstrength_;  // TODO: ref_lf_delta[]
 
   // explore +/-quant range of values around level0
-  const int delta_min = -it->enc_->dqm_[s].quant_;
-  const int delta_max = it->enc_->dqm_[s].quant_;
+  const int delta_min = -enc->dqm_[s].quant_;
+  const int delta_max = enc->dqm_[s].quant_;
   const int step_size = (delta_max - delta_min >= 4) ? 4 : 1;
 
-  if (!it->lf_stats_) return;
+  if (it->lf_stats_ == NULL) return;
 
   // NOTE: Currently we are applying filter only across the sublock edges
   // There are two reasons for that.
@@ -383,27 +260,40 @@ void VP8StoreFilterStats(VP8EncIterator* const it) {
 }
 
 void VP8AdjustFilterStrength(VP8EncIterator* const it) {
-  int s;
   VP8Encoder* const enc = it->enc_;
-
-  if (!it->lf_stats_) {
-    return;
-  }
-  for (s = 0; s < NUM_MB_SEGMENTS; s++) {
-    int i, best_level = 0;
-    // Improvement over filter level 0 should be at least 1e-5 (relatively)
-    double best_v = 1.00001 * (*it->lf_stats_)[s][0];
-    for (i = 1; i < MAX_LF_LEVELS; i++) {
-      const double v = (*it->lf_stats_)[s][i];
-      if (v > best_v) {
-        best_v = v;
-        best_level = i;
+  if (it->lf_stats_ != NULL) {
+    int s;
+    for (s = 0; s < NUM_MB_SEGMENTS; s++) {
+      int i, best_level = 0;
+      // Improvement over filter level 0 should be at least 1e-5 (relatively)
+      double best_v = 1.00001 * (*it->lf_stats_)[s][0];
+      for (i = 1; i < MAX_LF_LEVELS; i++) {
+        const double v = (*it->lf_stats_)[s][i];
+        if (v > best_v) {
+          best_v = v;
+          best_level = i;
+        }
       }
+      enc->dqm_[s].fstrength_ = best_level;
     }
-    enc->dqm_[s].fstrength_ = best_level;
+  } else if (enc->config_->filter_strength > 0) {
+    int max_level = 0;
+    int s;
+    for (s = 0; s < NUM_MB_SEGMENTS; s++) {
+      VP8SegmentInfo* const dqm = &enc->dqm_[s];
+      // this '>> 3' accounts for some inverse WHT scaling
+      const int delta = (dqm->max_edge_ * dqm->y2_.q_[1]) >> 3;
+      const int level =
+          VP8FilterStrengthFromDelta(enc->filter_hdr_.sharpness_, delta);
+      if (level > dqm->fstrength_) {
+        dqm->fstrength_ = level;
+      }
+      if (max_level < dqm->fstrength_) {
+        max_level = dqm->fstrength_;
+      }
+    }
+    enc->filter_hdr_.level_ = max_level;
   }
 }
 
-#if defined(__cplusplus) || defined(c_plusplus)
-}    // extern "C"
-#endif
+// -----------------------------------------------------------------------------
diff --git a/drivers/webp/enc/frame.c b/drivers/webp/enc/frame.c
index bdd360069b..65a98ada4d 100644
--- a/drivers/webp/enc/frame.c
+++ b/drivers/webp/enc/frame.c
@@ -1,61 +1,98 @@
 // Copyright 2011 Google Inc. All Rights Reserved.
 //
-// This code is licensed under the same terms as WebM:
-//  Software License Agreement:  http://www.webmproject.org/license/software/
-//  Additional IP Rights Grant:  http://www.webmproject.org/license/additional/
+// Use of this source code is governed by a BSD-style license
+// that can be found in the COPYING file in the root of the source
+// tree. An additional intellectual property rights grant can be found
+// in the file PATENTS. All contributing project authors may
+// be found in the AUTHORS file in the root of the source tree.
 // -----------------------------------------------------------------------------
 //
 //   frame coding and analysis
 //
 // Author: Skal (pascal.massimino@gmail.com)
 
-#include <assert.h>
-#include <stdlib.h>
 #include <string.h>
 #include <math.h>
 
-#include "./vp8enci.h"
 #include "./cost.h"
-
-#if defined(__cplusplus) || defined(c_plusplus)
-extern "C" {
-#endif
+#include "./vp8enci.h"
+#include "../dsp/dsp.h"
+#include "webp/format_constants.h"  // RIFF constants
 
 #define SEGMENT_VISU 0
 #define DEBUG_SEARCH 0    // useful to track search convergence
 
-// On-the-fly info about the current set of residuals. Handy to avoid
-// passing zillions of params.
-typedef struct {
-  int first;
-  int last;
-  const int16_t* coeffs;
-
-  int coeff_type;
-  ProbaArray* prob;
-  StatsArray* stats;
-  CostArray*  cost;
-} VP8Residual;
+//------------------------------------------------------------------------------
+// multi-pass convergence
+
+#define HEADER_SIZE_ESTIMATE (RIFF_HEADER_SIZE + CHUNK_HEADER_SIZE +  \
+                              VP8_FRAME_HEADER_SIZE)
+#define DQ_LIMIT 0.4  // convergence is considered reached if dq < DQ_LIMIT
+// we allow 2k of extra head-room in PARTITION0 limit.
+#define PARTITION0_SIZE_LIMIT ((VP8_MAX_PARTITION0_SIZE - 2048ULL) << 11)
+
+typedef struct {  // struct for organizing convergence in either size or PSNR
+  int is_first;
+  float dq;
+  float q, last_q;
+  double value, last_value;   // PSNR or size
+  double target;
+  int do_size_search;
+} PassStats;
+
+static int InitPassStats(const VP8Encoder* const enc, PassStats* const s) {
+  const uint64_t target_size = (uint64_t)enc->config_->target_size;
+  const int do_size_search = (target_size != 0);
+  const float target_PSNR = enc->config_->target_PSNR;
+
+  s->is_first = 1;
+  s->dq = 10.f;
+  s->q = s->last_q = enc->config_->quality;
+  s->target = do_size_search ? (double)target_size
+            : (target_PSNR > 0.) ? target_PSNR
+            : 40.;   // default, just in case
+  s->value = s->last_value = 0.;
+  s->do_size_search = do_size_search;
+  return do_size_search;
+}
+
+static float Clamp(float v, float min, float max) {
+  return (v < min) ? min : (v > max) ? max : v;
+}
+
+static float ComputeNextQ(PassStats* const s) {
+  float dq;
+  if (s->is_first) {
+    dq = (s->value > s->target) ? -s->dq : s->dq;
+    s->is_first = 0;
+  } else if (s->value != s->last_value) {
+    const double slope = (s->target - s->value) / (s->last_value - s->value);
+    dq = (float)(slope * (s->last_q - s->q));
+  } else {
+    dq = 0.;  // we're done?!
+  }
+  // Limit variable to avoid large swings.
+  s->dq = Clamp(dq, -30.f, 30.f);
+  s->last_q = s->q;
+  s->last_value = s->value;
+  s->q = Clamp(s->q + s->dq, 0.f, 100.f);
+  return s->q;
+}
 
 //------------------------------------------------------------------------------
 // Tables for level coding
 
-const uint8_t VP8EncBands[16 + 1] = {
-  0, 1, 2, 3, 6, 4, 5, 6, 6, 6, 6, 6, 6, 6, 6, 7,
-  0  // sentinel
-};
-
-static const uint8_t kCat3[] = { 173, 148, 140 };
-static const uint8_t kCat4[] = { 176, 155, 140, 135 };
-static const uint8_t kCat5[] = { 180, 157, 141, 134, 130 };
-static const uint8_t kCat6[] =
+const uint8_t VP8Cat3[] = { 173, 148, 140 };
+const uint8_t VP8Cat4[] = { 176, 155, 140, 135 };
+const uint8_t VP8Cat5[] = { 180, 157, 141, 134, 130 };
+const uint8_t VP8Cat6[] =
     { 254, 254, 243, 230, 196, 177, 153, 140, 133, 130, 129 };
 
 //------------------------------------------------------------------------------
 // Reset the statistics about: number of skips, token proba, level cost,...
 
 static void ResetStats(VP8Encoder* const enc) {
-  VP8Proba* const proba = &enc->proba_;
+  VP8EncProba* const proba = &enc->proba_;
   VP8CalculateLevelCosts(proba);
   proba->nb_skip_ = 0;
 }
@@ -71,7 +108,7 @@ static int CalcSkipProba(uint64_t nb, uint64_t total) {
 
 // Returns the bit-cost for coding the skip probability.
 static int FinalizeSkipProba(VP8Encoder* const enc) {
-  VP8Proba* const proba = &enc->proba_;
+  VP8EncProba* const proba = &enc->proba_;
   const int nb_mbs = enc->mb_w_ * enc->mb_h_;
   const int nb_events = proba->nb_skip_;
   int size;
@@ -86,82 +123,6 @@ static int FinalizeSkipProba(VP8Encoder* const enc) {
   return size;
 }
 
-//------------------------------------------------------------------------------
-// Recording of token probabilities.
-
-static void ResetTokenStats(VP8Encoder* const enc) {
-  VP8Proba* const proba = &enc->proba_;
-  memset(proba->stats_, 0, sizeof(proba->stats_));
-}
-
-// Record proba context used
-static int Record(int bit, proba_t* const stats) {
-  proba_t p = *stats;
-  if (p >= 0xffff0000u) {               // an overflow is inbound.
-    p = ((p + 1u) >> 1) & 0x7fff7fffu;  // -> divide the stats by 2.
-  }
-  // record bit count (lower 16 bits) and increment total count (upper 16 bits).
-  p += 0x00010000u + bit;
-  *stats = p;
-  return bit;
-}
-
-// We keep the table free variant around for reference, in case.
-#define USE_LEVEL_CODE_TABLE
-
-// Simulate block coding, but only record statistics.
-// Note: no need to record the fixed probas.
-static int RecordCoeffs(int ctx, const VP8Residual* const res) {
-  int n = res->first;
-  proba_t* s = res->stats[VP8EncBands[n]][ctx];
-  if (res->last  < 0) {
-    Record(0, s + 0);
-    return 0;
-  }
-  while (n <= res->last) {
-    int v;
-    Record(1, s + 0);
-    while ((v = res->coeffs[n++]) == 0) {
-      Record(0, s + 1);
-      s = res->stats[VP8EncBands[n]][0];
-    }
-    Record(1, s + 1);
-    if (!Record(2u < (unsigned int)(v + 1), s + 2)) {  // v = -1 or 1
-      s = res->stats[VP8EncBands[n]][1];
-    } else {
-      v = abs(v);
-#if !defined(USE_LEVEL_CODE_TABLE)
-      if (!Record(v > 4, s + 3)) {
-        if (Record(v != 2, s + 4))
-          Record(v == 4, s + 5);
-      } else if (!Record(v > 10, s + 6)) {
-        Record(v > 6, s + 7);
-      } else if (!Record((v >= 3 + (8 << 2)), s + 8)) {
-        Record((v >= 3 + (8 << 1)), s + 9);
-      } else {
-        Record((v >= 3 + (8 << 3)), s + 10);
-      }
-#else
-      if (v > MAX_VARIABLE_LEVEL)
-        v = MAX_VARIABLE_LEVEL;
-
-      {
-        const int bits = VP8LevelCodes[v - 1][1];
-        int pattern = VP8LevelCodes[v - 1][0];
-        int i;
-        for (i = 0; (pattern >>= 1) != 0; ++i) {
-          const int mask = 2 << i;
-          if (pattern & 1) Record(!!(bits & mask), s + 3 + i);
-        }
-      }
-#endif
-      s = res->stats[VP8EncBands[n]][2];
-    }
-  }
-  if (n < 16) Record(0, s + 0);
-  return 1;
-}
-
 // Collect statistics and deduce probabilities for next coding pass.
 // Return the total bit-cost for coding the probability updates.
 static int CalcTokenProba(int nb, int total) {
@@ -174,8 +135,12 @@ static int BranchCost(int nb, int total, int proba) {
   return nb * VP8BitCost(1, proba) + (total - nb) * VP8BitCost(0, proba);
 }
 
-static int FinalizeTokenProbas(VP8Encoder* const enc) {
-  VP8Proba* const proba = &enc->proba_;
+static void ResetTokenStats(VP8Encoder* const enc) {
+  VP8EncProba* const proba = &enc->proba_;
+  memset(proba->stats_, 0, sizeof(proba->stats_));
+}
+
+static int FinalizeTokenProbas(VP8EncProba* const proba) {
   int has_changed = 0;
   int size = 0;
   int t, b, c, p;
@@ -212,129 +177,44 @@ static int FinalizeTokenProbas(VP8Encoder* const enc) {
 }
 
 //------------------------------------------------------------------------------
-// helper functions for residuals struct VP8Residual.
-
-static void InitResidual(int first, int coeff_type,
-                         VP8Encoder* const enc, VP8Residual* const res) {
-  res->coeff_type = coeff_type;
-  res->prob  = enc->proba_.coeffs_[coeff_type];
-  res->stats = enc->proba_.stats_[coeff_type];
-  res->cost  = enc->proba_.level_cost_[coeff_type];
-  res->first = first;
-}
+// Finalize Segment probability based on the coding tree
 
-static void SetResidualCoeffs(const int16_t* const coeffs,
-                              VP8Residual* const res) {
-  int n;
-  res->last = -1;
-  for (n = 15; n >= res->first; --n) {
-    if (coeffs[n]) {
-      res->last = n;
-      break;
-    }
-  }
-  res->coeffs = coeffs;
+static int GetProba(int a, int b) {
+  const int total = a + b;
+  return (total == 0) ? 255     // that's the default probability.
+                      : (255 * a + total / 2) / total;  // rounded proba
 }
 
-//------------------------------------------------------------------------------
-// Mode costs
-
-static int GetResidualCost(int ctx, const VP8Residual* const res) {
-  int n = res->first;
-  int p0 = res->prob[VP8EncBands[n]][ctx][0];
-  const uint16_t* t = res->cost[VP8EncBands[n]][ctx];
-  int cost;
+static void SetSegmentProbas(VP8Encoder* const enc) {
+  int p[NUM_MB_SEGMENTS] = { 0 };
+  int n;
 
-  if (res->last < 0) {
-    return VP8BitCost(0, p0);
-  }
-  cost = 0;
-  while (n <= res->last) {
-    const int v = res->coeffs[n];
-    const int b = VP8EncBands[n + 1];
-    ++n;
-    if (v == 0) {
-      // short-case for VP8LevelCost(t, 0) (note: VP8LevelFixedCosts[0] == 0):
-      cost += t[0];
-      t = res->cost[b][0];
-      continue;
-    }
-    cost += VP8BitCost(1, p0);
-    if (2u >= (unsigned int)(v + 1)) {   // v = -1 or 1
-      // short-case for "VP8LevelCost(t, 1)" (256 is VP8LevelFixedCosts[1]):
-      cost += 256 + t[1];
-      p0 = res->prob[b][1][0];
-      t = res->cost[b][1];
-    } else {
-      cost += VP8LevelCost(t, abs(v));
-      p0 = res->prob[b][2][0];
-      t = res->cost[b][2];
-    }
+  for (n = 0; n < enc->mb_w_ * enc->mb_h_; ++n) {
+    const VP8MBInfo* const mb = &enc->mb_info_[n];
+    p[mb->segment_]++;
   }
-  if (n < 16) cost += VP8BitCost(0, p0);
-  return cost;
-}
-
-int VP8GetCostLuma4(VP8EncIterator* const it, const int16_t levels[16]) {
-  const int x = (it->i4_ & 3), y = (it->i4_ >> 2);
-  VP8Residual res;
-  VP8Encoder* const enc = it->enc_;
-  int R = 0;
-  int ctx;
-
-  InitResidual(0, 3, enc, &res);
-  ctx = it->top_nz_[x] + it->left_nz_[y];
-  SetResidualCoeffs(levels, &res);
-  R += GetResidualCost(ctx, &res);
-  return R;
-}
-
-int VP8GetCostLuma16(VP8EncIterator* const it, const VP8ModeScore* const rd) {
-  VP8Residual res;
-  VP8Encoder* const enc = it->enc_;
-  int x, y;
-  int R = 0;
-
-  VP8IteratorNzToBytes(it);   // re-import the non-zero context
-
-  // DC
-  InitResidual(0, 1, enc, &res);
-  SetResidualCoeffs(rd->y_dc_levels, &res);
-  R += GetResidualCost(it->top_nz_[8] + it->left_nz_[8], &res);
-
-  // AC
-  InitResidual(1, 0, enc, &res);
-  for (y = 0; y < 4; ++y) {
-    for (x = 0; x < 4; ++x) {
-      const int ctx = it->top_nz_[x] + it->left_nz_[y];
-      SetResidualCoeffs(rd->y_ac_levels[x + y * 4], &res);
-      R += GetResidualCost(ctx, &res);
-      it->top_nz_[x] = it->left_nz_[y] = (res.last >= 0);
+  if (enc->pic_->stats != NULL) {
+    for (n = 0; n < NUM_MB_SEGMENTS; ++n) {
+      enc->pic_->stats->segment_size[n] = p[n];
     }
   }
-  return R;
-}
-
-int VP8GetCostUV(VP8EncIterator* const it, const VP8ModeScore* const rd) {
-  VP8Residual res;
-  VP8Encoder* const enc = it->enc_;
-  int ch, x, y;
-  int R = 0;
-
-  VP8IteratorNzToBytes(it);  // re-import the non-zero context
-
-  InitResidual(0, 2, enc, &res);
-  for (ch = 0; ch <= 2; ch += 2) {
-    for (y = 0; y < 2; ++y) {
-      for (x = 0; x < 2; ++x) {
-        const int ctx = it->top_nz_[4 + ch + x] + it->left_nz_[4 + ch + y];
-        SetResidualCoeffs(rd->uv_levels[ch * 2 + x + y * 2], &res);
-        R += GetResidualCost(ctx, &res);
-        it->top_nz_[4 + ch + x] = it->left_nz_[4 + ch + y] = (res.last >= 0);
-      }
-    }
+  if (enc->segment_hdr_.num_segments_ > 1) {
+    uint8_t* const probas = enc->proba_.segments_;
+    probas[0] = GetProba(p[0] + p[1], p[2] + p[3]);
+    probas[1] = GetProba(p[0], p[1]);
+    probas[2] = GetProba(p[2], p[3]);
+
+    enc->segment_hdr_.update_map_ =
+        (probas[0] != 255) || (probas[1] != 255) || (probas[2] != 255);
+    enc->segment_hdr_.size_ =
+        p[0] * (VP8BitCost(0, probas[0]) + VP8BitCost(0, probas[1])) +
+        p[1] * (VP8BitCost(0, probas[0]) + VP8BitCost(1, probas[1])) +
+        p[2] * (VP8BitCost(1, probas[0]) + VP8BitCost(0, probas[2])) +
+        p[3] * (VP8BitCost(1, probas[0]) + VP8BitCost(1, probas[2]));
+  } else {
+    enc->segment_hdr_.update_map_ = 0;
+    enc->segment_hdr_.size_ = 0;
   }
-  return R;
 }
 
 //------------------------------------------------------------------------------
@@ -342,7 +222,8 @@ int VP8GetCostUV(VP8EncIterator* const it, const VP8ModeScore* const rd) {
 
 static int PutCoeffs(VP8BitWriter* const bw, int ctx, const VP8Residual* res) {
   int n = res->first;
-  const uint8_t* p = res->prob[VP8EncBands[n]][ctx];
+  // should be prob[VP8EncBands[n]], but it's equivalent for n=0 or 1
+  const uint8_t* p = res->prob[n][ctx];
   if (!VP8PutBit(bw, res->last >= 0, p[0])) {
     return 0;
   }
@@ -371,30 +252,30 @@ static int PutCoeffs(VP8BitWriter* const bw, int ctx, const VP8Residual* res) {
       } else {
         int mask;
         const uint8_t* tab;
-        if (v < 3 + (8 << 1)) {          // kCat3  (3b)
+        if (v < 3 + (8 << 1)) {          // VP8Cat3  (3b)
           VP8PutBit(bw, 0, p[8]);
           VP8PutBit(bw, 0, p[9]);
           v -= 3 + (8 << 0);
           mask = 1 << 2;
-          tab = kCat3;
-        } else if (v < 3 + (8 << 2)) {   // kCat4  (4b)
+          tab = VP8Cat3;
+        } else if (v < 3 + (8 << 2)) {   // VP8Cat4  (4b)
           VP8PutBit(bw, 0, p[8]);
           VP8PutBit(bw, 1, p[9]);
           v -= 3 + (8 << 1);
           mask = 1 << 3;
-          tab = kCat4;
-        } else if (v < 3 + (8 << 3)) {   // kCat5  (5b)
+          tab = VP8Cat4;
+        } else if (v < 3 + (8 << 3)) {   // VP8Cat5  (5b)
           VP8PutBit(bw, 1, p[8]);
           VP8PutBit(bw, 0, p[10]);
           v -= 3 + (8 << 2);
           mask = 1 << 4;
-          tab = kCat5;
-        } else {                         // kCat6 (11b)
+          tab = VP8Cat5;
+        } else {                         // VP8Cat6 (11b)
           VP8PutBit(bw, 1, p[8]);
           VP8PutBit(bw, 1, p[10]);
           v -= 3 + (8 << 3);
           mask = 1 << 10;
-          tab = kCat6;
+          tab = VP8Cat6;
         }
         while (mask) {
           VP8PutBit(bw, !!(v & mask), *tab++);
@@ -411,8 +292,7 @@ static int PutCoeffs(VP8BitWriter* const bw, int ctx, const VP8Residual* res) {
   return 1;
 }
 
-static void CodeResiduals(VP8BitWriter* const bw,
-                          VP8EncIterator* const it,
+static void CodeResiduals(VP8BitWriter* const bw, VP8EncIterator* const it,
                           const VP8ModeScore* const rd) {
   int x, y, ch;
   VP8Residual res;
@@ -425,32 +305,32 @@ static void CodeResiduals(VP8BitWriter* const bw,
 
   pos1 = VP8BitWriterPos(bw);
   if (i16) {
-    InitResidual(0, 1, enc, &res);
-    SetResidualCoeffs(rd->y_dc_levels, &res);
+    VP8InitResidual(0, 1, enc, &res);
+    VP8SetResidualCoeffs(rd->y_dc_levels, &res);
     it->top_nz_[8] = it->left_nz_[8] =
       PutCoeffs(bw, it->top_nz_[8] + it->left_nz_[8], &res);
-    InitResidual(1, 0, enc, &res);
+    VP8InitResidual(1, 0, enc, &res);
   } else {
-    InitResidual(0, 3, enc, &res);
+    VP8InitResidual(0, 3, enc, &res);
   }
 
   // luma-AC
   for (y = 0; y < 4; ++y) {
     for (x = 0; x < 4; ++x) {
       const int ctx = it->top_nz_[x] + it->left_nz_[y];
-      SetResidualCoeffs(rd->y_ac_levels[x + y * 4], &res);
+      VP8SetResidualCoeffs(rd->y_ac_levels[x + y * 4], &res);
       it->top_nz_[x] = it->left_nz_[y] = PutCoeffs(bw, ctx, &res);
     }
   }
   pos2 = VP8BitWriterPos(bw);
 
   // U/V
-  InitResidual(0, 2, enc, &res);
+  VP8InitResidual(0, 2, enc, &res);
   for (ch = 0; ch <= 2; ch += 2) {
     for (y = 0; y < 2; ++y) {
       for (x = 0; x < 2; ++x) {
         const int ctx = it->top_nz_[4 + ch + x] + it->left_nz_[4 + ch + y];
-        SetResidualCoeffs(rd->uv_levels[ch * 2 + x + y * 2], &res);
+        VP8SetResidualCoeffs(rd->uv_levels[ch * 2 + x + y * 2], &res);
         it->top_nz_[4 + ch + x] = it->left_nz_[4 + ch + y] =
             PutCoeffs(bw, ctx, &res);
       }
@@ -475,33 +355,33 @@ static void RecordResiduals(VP8EncIterator* const it,
   VP8IteratorNzToBytes(it);
 
   if (it->mb_->type_ == 1) {   // i16x16
-    InitResidual(0, 1, enc, &res);
-    SetResidualCoeffs(rd->y_dc_levels, &res);
+    VP8InitResidual(0, 1, enc, &res);
+    VP8SetResidualCoeffs(rd->y_dc_levels, &res);
     it->top_nz_[8] = it->left_nz_[8] =
-      RecordCoeffs(it->top_nz_[8] + it->left_nz_[8], &res);
-    InitResidual(1, 0, enc, &res);
+      VP8RecordCoeffs(it->top_nz_[8] + it->left_nz_[8], &res);
+    VP8InitResidual(1, 0, enc, &res);
   } else {
-    InitResidual(0, 3, enc, &res);
+    VP8InitResidual(0, 3, enc, &res);
   }
 
   // luma-AC
   for (y = 0; y < 4; ++y) {
     for (x = 0; x < 4; ++x) {
       const int ctx = it->top_nz_[x] + it->left_nz_[y];
-      SetResidualCoeffs(rd->y_ac_levels[x + y * 4], &res);
-      it->top_nz_[x] = it->left_nz_[y] = RecordCoeffs(ctx, &res);
+      VP8SetResidualCoeffs(rd->y_ac_levels[x + y * 4], &res);
+      it->top_nz_[x] = it->left_nz_[y] = VP8RecordCoeffs(ctx, &res);
     }
   }
 
   // U/V
-  InitResidual(0, 2, enc, &res);
+  VP8InitResidual(0, 2, enc, &res);
   for (ch = 0; ch <= 2; ch += 2) {
     for (y = 0; y < 2; ++y) {
       for (x = 0; x < 2; ++x) {
         const int ctx = it->top_nz_[4 + ch + x] + it->left_nz_[4 + ch + y];
-        SetResidualCoeffs(rd->uv_levels[ch * 2 + x + y * 2], &res);
+        VP8SetResidualCoeffs(rd->uv_levels[ch * 2 + x + y * 2], &res);
         it->top_nz_[4 + ch + x] = it->left_nz_[4 + ch + y] =
-            RecordCoeffs(ctx, &res);
+            VP8RecordCoeffs(ctx, &res);
       }
     }
   }
@@ -512,176 +392,59 @@ static void RecordResiduals(VP8EncIterator* const it,
 //------------------------------------------------------------------------------
 // Token buffer
 
-#ifdef USE_TOKEN_BUFFER
-
-void VP8TBufferInit(VP8TBuffer* const b) {
-  b->rows_ = NULL;
-  b->tokens_ = NULL;
-  b->last_ = &b->rows_;
-  b->left_ = 0;
-  b->error_ = 0;
-}
-
-int VP8TBufferNewPage(VP8TBuffer* const b) {
-  VP8Tokens* const page = b->error_ ? NULL : (VP8Tokens*)malloc(sizeof(*page));
-  if (page == NULL) {
-    b->error_ = 1;
-    return 0;
-  }
-  *b->last_ = page;
-  b->last_ = &page->next_;
-  b->left_ = MAX_NUM_TOKEN;
-  b->tokens_ = page->tokens_;
-  return 1;
-}
-
-void VP8TBufferClear(VP8TBuffer* const b) {
-  if (b != NULL) {
-    const VP8Tokens* p = b->rows_;
-    while (p != NULL) {
-      const VP8Tokens* const next = p->next_;
-      free((void*)p);
-      p = next;
-    }
-    VP8TBufferInit(b);
-  }
-}
-
-int VP8EmitTokens(const VP8TBuffer* const b, VP8BitWriter* const bw,
-                  const uint8_t* const probas) {
-  VP8Tokens* p = b->rows_;
-  if (b->error_) return 0;
-  while (p != NULL) {
-    const int N = (p->next_ == NULL) ? b->left_ : 0;
-    int n = MAX_NUM_TOKEN;
-    while (n-- > N) {
-      VP8PutBit(bw, (p->tokens_[n] >> 15) & 1, probas[p->tokens_[n] & 0x7fff]);
-    }
-    p = p->next_;
-  }
-  return 1;
-}
-
-#define TOKEN_ID(b, ctx, p) ((p) + NUM_PROBAS * ((ctx) + (b) * NUM_CTX))
-
-static int RecordCoeffTokens(int ctx, const VP8Residual* const res,
-                             VP8TBuffer* tokens) {
-  int n = res->first;
-  int b = VP8EncBands[n];
-  if (!VP8AddToken(tokens, res->last >= 0, TOKEN_ID(b, ctx, 0))) {
-    return 0;
-  }
-
-  while (n < 16) {
-    const int c = res->coeffs[n++];
-    const int sign = c < 0;
-    int v = sign ? -c : c;
-    const int base_id = TOKEN_ID(b, ctx, 0);
-    if (!VP8AddToken(tokens, v != 0, base_id + 1)) {
-      b = VP8EncBands[n];
-      ctx = 0;
-      continue;
-    }
-    if (!VP8AddToken(tokens, v > 1, base_id + 2)) {
-      b = VP8EncBands[n];
-      ctx = 1;
-    } else {
-      if (!VP8AddToken(tokens, v > 4, base_id + 3)) {
-        if (VP8AddToken(tokens, v != 2, base_id + 4))
-          VP8AddToken(tokens, v == 4, base_id + 5);
-      } else if (!VP8AddToken(tokens, v > 10, base_id + 6)) {
-        if (!VP8AddToken(tokens, v > 6, base_id + 7)) {
-//          VP8AddToken(tokens, v == 6, 159);
-        } else {
-//          VP8AddToken(tokens, v >= 9, 165);
-//          VP8AddToken(tokens, !(v & 1), 145);
-        }
-      } else {
-        int mask;
-        const uint8_t* tab;
-        if (v < 3 + (8 << 1)) {          // kCat3  (3b)
-          VP8AddToken(tokens, 0, base_id + 8);
-          VP8AddToken(tokens, 0, base_id + 9);
-          v -= 3 + (8 << 0);
-          mask = 1 << 2;
-          tab = kCat3;
-        } else if (v < 3 + (8 << 2)) {   // kCat4  (4b)
-          VP8AddToken(tokens, 0, base_id + 8);
-          VP8AddToken(tokens, 1, base_id + 9);
-          v -= 3 + (8 << 1);
-          mask = 1 << 3;
-          tab = kCat4;
-        } else if (v < 3 + (8 << 3)) {   // kCat5  (5b)
-          VP8AddToken(tokens, 1, base_id + 8);
-          VP8AddToken(tokens, 0, base_id + 10);
-          v -= 3 + (8 << 2);
-          mask = 1 << 4;
-          tab = kCat5;
-        } else {                         // kCat6 (11b)
-          VP8AddToken(tokens, 1, base_id + 8);
-          VP8AddToken(tokens, 1, base_id + 10);
-          v -= 3 + (8 << 3);
-          mask = 1 << 10;
-          tab = kCat6;
-        }
-        while (mask) {
-          // VP8AddToken(tokens, !!(v & mask), *tab++);
-          mask >>= 1;
-        }
-      }
-      ctx = 2;
-    }
-    b = VP8EncBands[n];
-    // VP8PutBitUniform(bw, sign);
-    if (n == 16 || !VP8AddToken(tokens, n <= res->last, TOKEN_ID(b, ctx, 0))) {
-      return 1;   // EOB
-    }
-  }
-  return 1;
-}
+#if !defined(DISABLE_TOKEN_BUFFER)
 
-static void RecordTokens(VP8EncIterator* const it,
-                         const VP8ModeScore* const rd, VP8TBuffer tokens[2]) {
+static int RecordTokens(VP8EncIterator* const it, const VP8ModeScore* const rd,
+                        VP8TBuffer* const tokens) {
   int x, y, ch;
   VP8Residual res;
   VP8Encoder* const enc = it->enc_;
 
   VP8IteratorNzToBytes(it);
   if (it->mb_->type_ == 1) {   // i16x16
-    InitResidual(0, 1, enc, &res);
-    SetResidualCoeffs(rd->y_dc_levels, &res);
-// TODO(skal): FIX ->    it->top_nz_[8] = it->left_nz_[8] =
-      RecordCoeffTokens(it->top_nz_[8] + it->left_nz_[8], &res, &tokens[0]);
-    InitResidual(1, 0, enc, &res);
+    const int ctx = it->top_nz_[8] + it->left_nz_[8];
+    VP8InitResidual(0, 1, enc, &res);
+    VP8SetResidualCoeffs(rd->y_dc_levels, &res);
+    it->top_nz_[8] = it->left_nz_[8] =
+        VP8RecordCoeffTokens(ctx, 1,
+                             res.first, res.last, res.coeffs, tokens);
+    VP8RecordCoeffs(ctx, &res);
+    VP8InitResidual(1, 0, enc, &res);
   } else {
-    InitResidual(0, 3, enc, &res);
+    VP8InitResidual(0, 3, enc, &res);
   }
 
   // luma-AC
   for (y = 0; y < 4; ++y) {
     for (x = 0; x < 4; ++x) {
       const int ctx = it->top_nz_[x] + it->left_nz_[y];
-      SetResidualCoeffs(rd->y_ac_levels[x + y * 4], &res);
+      VP8SetResidualCoeffs(rd->y_ac_levels[x + y * 4], &res);
       it->top_nz_[x] = it->left_nz_[y] =
-          RecordCoeffTokens(ctx, &res, &tokens[0]);
+          VP8RecordCoeffTokens(ctx, res.coeff_type,
+                               res.first, res.last, res.coeffs, tokens);
+      VP8RecordCoeffs(ctx, &res);
     }
   }
 
   // U/V
-  InitResidual(0, 2, enc, &res);
+  VP8InitResidual(0, 2, enc, &res);
   for (ch = 0; ch <= 2; ch += 2) {
     for (y = 0; y < 2; ++y) {
       for (x = 0; x < 2; ++x) {
         const int ctx = it->top_nz_[4 + ch + x] + it->left_nz_[4 + ch + y];
-        SetResidualCoeffs(rd->uv_levels[ch * 2 + x + y * 2], &res);
+        VP8SetResidualCoeffs(rd->uv_levels[ch * 2 + x + y * 2], &res);
         it->top_nz_[4 + ch + x] = it->left_nz_[4 + ch + y] =
-            RecordCoeffTokens(ctx, &res, &tokens[1]);
+            VP8RecordCoeffTokens(ctx, 2,
+                                 res.first, res.last, res.coeffs, tokens);
+        VP8RecordCoeffs(ctx, &res);
       }
     }
   }
+  VP8IteratorBytesToNz(it);
+  return !tokens->error_;
 }
 
-#endif    // USE_TOKEN_BUFFER
+#endif    // !DISABLE_TOKEN_BUFFER
 
 //------------------------------------------------------------------------------
 // ExtraInfo map / Debug function
@@ -697,7 +460,10 @@ static void SetBlock(uint8_t* p, int value, int size) {
 #endif
 
 static void ResetSSE(VP8Encoder* const enc) {
-  memset(enc->sse_, 0, sizeof(enc->sse_));
+  enc->sse_[0] = 0;
+  enc->sse_[1] = 0;
+  enc->sse_[2] = 0;
+  // Note: enc->sse_[3] is managed by alpha.c
   enc->sse_count_ = 0;
 }
 
@@ -706,9 +472,9 @@ static void StoreSSE(const VP8EncIterator* const it) {
   const uint8_t* const in = it->yuv_in_;
   const uint8_t* const out = it->yuv_out_;
   // Note: not totally accurate at boundary. And doesn't include in-loop filter.
-  enc->sse_[0] += VP8SSE16x16(in + Y_OFF, out + Y_OFF);
-  enc->sse_[1] += VP8SSE8x8(in + U_OFF, out + U_OFF);
-  enc->sse_[2] += VP8SSE8x8(in + V_OFF, out + V_OFF);
+  enc->sse_[0] += VP8SSE16x16(in + Y_OFF_ENC, out + Y_OFF_ENC);
+  enc->sse_[1] += VP8SSE8x8(in + U_OFF_ENC, out + U_OFF_ENC);
+  enc->sse_[2] += VP8SSE8x8(in + V_OFF_ENC, out + V_OFF_ENC);
   enc->sse_count_ += 16 * 16;
 }
 
@@ -736,72 +502,163 @@ static void StoreSideInfo(const VP8EncIterator* const it) {
         const int b = (int)((it->luma_bits_ + it->uv_bits_ + 7) >> 3);
         *info = (b > 255) ? 255 : b; break;
       }
+      case 7: *info = mb->alpha_; break;
       default: *info = 0; break;
-    };
+    }
   }
 #if SEGMENT_VISU  // visualize segments and prediction modes
-  SetBlock(it->yuv_out_ + Y_OFF, mb->segment_ * 64, 16);
-  SetBlock(it->yuv_out_ + U_OFF, it->preds_[0] * 64, 8);
-  SetBlock(it->yuv_out_ + V_OFF, mb->uv_mode_ * 64, 8);
+  SetBlock(it->yuv_out_ + Y_OFF_ENC, mb->segment_ * 64, 16);
+  SetBlock(it->yuv_out_ + U_OFF_ENC, it->preds_[0] * 64, 8);
+  SetBlock(it->yuv_out_ + V_OFF_ENC, mb->uv_mode_ * 64, 8);
 #endif
 }
 
-//------------------------------------------------------------------------------
-// Main loops
-//
-//  VP8EncLoop(): does the final bitstream coding.
-
-static void ResetAfterSkip(VP8EncIterator* const it) {
-  if (it->mb_->type_ == 1) {
-    *it->nz_ = 0;  // reset all predictors
-    it->left_nz_[8] = 0;
-  } else {
-    *it->nz_ &= (1 << 24);  // preserve the dc_nz bit
-  }
+static double GetPSNR(uint64_t mse, uint64_t size) {
+  return (mse > 0 && size > 0) ? 10. * log10(255. * 255. * size / mse) : 99;
 }
 
-int VP8EncLoop(VP8Encoder* const enc) {
-  int i, s, p;
-  int ok = 1;
-  VP8EncIterator it;
-  VP8ModeScore info;
-  const int dont_use_skip = !enc->proba_.use_skip_proba_;
-  const int rd_opt = enc->rd_opt_level_;
-  const int kAverageBytesPerMB = 5;     // TODO: have a kTable[quality/10]
-  const int bytes_per_parts =
-    enc->mb_w_ * enc->mb_h_ * kAverageBytesPerMB / enc->num_parts_;
+//------------------------------------------------------------------------------
+//  StatLoop(): only collect statistics (number of skips, token usage, ...).
+//  This is used for deciding optimal probabilities. It also modifies the
+//  quantizer value if some target (size, PSNR) was specified.
 
-  // Initialize the bit-writers
-  for (p = 0; p < enc->num_parts_; ++p) {
-    VP8BitWriterInit(enc->parts_ + p, bytes_per_parts);
-  }
+static void SetLoopParams(VP8Encoder* const enc, float q) {
+  // Make sure the quality parameter is inside valid bounds
+  q = Clamp(q, 0.f, 100.f);
+
+  VP8SetSegmentParams(enc, q);      // setup segment quantizations and filters
+  SetSegmentProbas(enc);            // compute segment probabilities
 
   ResetStats(enc);
   ResetSSE(enc);
+}
+
+static uint64_t OneStatPass(VP8Encoder* const enc, VP8RDLevel rd_opt,
+                            int nb_mbs, int percent_delta,
+                            PassStats* const s) {
+  VP8EncIterator it;
+  uint64_t size = 0;
+  uint64_t size_p0 = 0;
+  uint64_t distortion = 0;
+  const uint64_t pixel_count = nb_mbs * 384;
 
   VP8IteratorInit(enc, &it);
-  VP8InitFilter(&it);
+  SetLoopParams(enc, s->q);
   do {
-    VP8IteratorImport(&it);
-    // Warning! order is important: first call VP8Decimate() and
-    // *then* decide how to code the skip decision if there's one.
-    if (!VP8Decimate(&it, &info, rd_opt) || dont_use_skip) {
-      CodeResiduals(it.bw_, &it, &info);
-    } else {   // reset predictors after a skip
-      ResetAfterSkip(&it);
+    VP8ModeScore info;
+    VP8IteratorImport(&it, NULL);
+    if (VP8Decimate(&it, &info, rd_opt)) {
+      // Just record the number of skips and act like skip_proba is not used.
+      enc->proba_.nb_skip_++;
     }
-#ifdef WEBP_EXPERIMENTAL_FEATURES
-    if (enc->use_layer_) {
-      VP8EncCodeLayerBlock(&it);
+    RecordResiduals(&it, &info);
+    size += info.R + info.H;
+    size_p0 += info.H;
+    distortion += info.D;
+    if (percent_delta && !VP8IteratorProgress(&it, percent_delta))
+      return 0;
+    VP8IteratorSaveBoundary(&it);
+  } while (VP8IteratorNext(&it) && --nb_mbs > 0);
+
+  size_p0 += enc->segment_hdr_.size_;
+  if (s->do_size_search) {
+    size += FinalizeSkipProba(enc);
+    size += FinalizeTokenProbas(&enc->proba_);
+    size = ((size + size_p0 + 1024) >> 11) + HEADER_SIZE_ESTIMATE;
+    s->value = (double)size;
+  } else {
+    s->value = GetPSNR(distortion, pixel_count);
+  }
+  return size_p0;
+}
+
+static int StatLoop(VP8Encoder* const enc) {
+  const int method = enc->method_;
+  const int do_search = enc->do_search_;
+  const int fast_probe = ((method == 0 || method == 3) && !do_search);
+  int num_pass_left = enc->config_->pass;
+  const int task_percent = 20;
+  const int percent_per_pass =
+      (task_percent + num_pass_left / 2) / num_pass_left;
+  const int final_percent = enc->percent_ + task_percent;
+  const VP8RDLevel rd_opt =
+      (method >= 3 || do_search) ? RD_OPT_BASIC : RD_OPT_NONE;
+  int nb_mbs = enc->mb_w_ * enc->mb_h_;
+  PassStats stats;
+
+  InitPassStats(enc, &stats);
+  ResetTokenStats(enc);
+
+  // Fast mode: quick analysis pass over few mbs. Better than nothing.
+  if (fast_probe) {
+    if (method == 3) {  // we need more stats for method 3 to be reliable.
+      nb_mbs = (nb_mbs > 200) ? nb_mbs >> 1 : 100;
+    } else {
+      nb_mbs = (nb_mbs > 200) ? nb_mbs >> 2 : 50;
     }
+  }
+
+  while (num_pass_left-- > 0) {
+    const int is_last_pass = (fabs(stats.dq) <= DQ_LIMIT) ||
+                             (num_pass_left == 0) ||
+                             (enc->max_i4_header_bits_ == 0);
+    const uint64_t size_p0 =
+        OneStatPass(enc, rd_opt, nb_mbs, percent_per_pass, &stats);
+    if (size_p0 == 0) return 0;
+#if (DEBUG_SEARCH > 0)
+    printf("#%d value:%.1lf -> %.1lf   q:%.2f -> %.2f\n",
+           num_pass_left, stats.last_value, stats.value, stats.last_q, stats.q);
 #endif
-    StoreSideInfo(&it);
-    VP8StoreFilterStats(&it);
-    VP8IteratorExport(&it);
-    ok = VP8IteratorProgress(&it, 20);
-  } while (ok && VP8IteratorNext(&it, it.yuv_out_));
+    if (enc->max_i4_header_bits_ > 0 && size_p0 > PARTITION0_SIZE_LIMIT) {
+      ++num_pass_left;
+      enc->max_i4_header_bits_ >>= 1;  // strengthen header bit limitation...
+      continue;                        // ...and start over
+    }
+    if (is_last_pass) {
+      break;
+    }
+    // If no target size: just do several pass without changing 'q'
+    if (do_search) {
+      ComputeNextQ(&stats);
+      if (fabs(stats.dq) <= DQ_LIMIT) break;
+    }
+  }
+  if (!do_search || !stats.do_size_search) {
+    // Need to finalize probas now, since it wasn't done during the search.
+    FinalizeSkipProba(enc);
+    FinalizeTokenProbas(&enc->proba_);
+  }
+  VP8CalculateLevelCosts(&enc->proba_);  // finalize costs
+  return WebPReportProgress(enc->pic_, final_percent, &enc->percent_);
+}
+
+//------------------------------------------------------------------------------
+// Main loops
+//
+
+static const int kAverageBytesPerMB[8] = { 50, 24, 16, 9, 7, 5, 3, 2 };
 
+static int PreLoopInitialize(VP8Encoder* const enc) {
+  int p;
+  int ok = 1;
+  const int average_bytes_per_MB = kAverageBytesPerMB[enc->base_quant_ >> 4];
+  const int bytes_per_parts =
+      enc->mb_w_ * enc->mb_h_ * average_bytes_per_MB / enc->num_parts_;
+  // Initialize the bit-writers
+  for (p = 0; ok && p < enc->num_parts_; ++p) {
+    ok = VP8BitWriterInit(enc->parts_ + p, bytes_per_parts);
+  }
+  if (!ok) {
+    VP8EncFreeBitWriters(enc);  // malloc error occurred
+    WebPEncodingSetError(enc->pic_, VP8_ENC_ERROR_OUT_OF_MEMORY);
+  }
+  return ok;
+}
+
+static int PostLoopFinalize(VP8EncIterator* const it, int ok) {
+  VP8Encoder* const enc = it->enc_;
   if (ok) {      // Finalize the partitions, check for extra errors.
+    int p;
     for (p = 0; p < enc->num_parts_; ++p) {
       VP8BitWriterFinish(enc->parts_ + p);
       ok &= !enc->parts_[p].error_;
@@ -809,131 +666,185 @@ int VP8EncLoop(VP8Encoder* const enc) {
   }
 
   if (ok) {      // All good. Finish up.
-    if (enc->pic_->stats) {           // finalize byte counters...
+    if (enc->pic_->stats != NULL) {  // finalize byte counters...
+      int i, s;
       for (i = 0; i <= 2; ++i) {
         for (s = 0; s < NUM_MB_SEGMENTS; ++s) {
-          enc->residual_bytes_[i][s] = (int)((it.bit_count_[s][i] + 7) >> 3);
+          enc->residual_bytes_[i][s] = (int)((it->bit_count_[s][i] + 7) >> 3);
         }
       }
     }
-    VP8AdjustFilterStrength(&it);     // ...and store filter stats.
+    VP8AdjustFilterStrength(it);     // ...and store filter stats.
   } else {
     // Something bad happened -> need to do some memory cleanup.
     VP8EncFreeBitWriters(enc);
   }
-
   return ok;
 }
 
 //------------------------------------------------------------------------------
-//  VP8StatLoop(): only collect statistics (number of skips, token usage, ...)
-//                 This is used for deciding optimal probabilities. It also
-//                 modifies the quantizer value if some target (size, PNSR)
-//                 was specified.
-
-#define kHeaderSizeEstimate (15 + 20 + 10)      // TODO: fix better
-
-static int OneStatPass(VP8Encoder* const enc, float q, int rd_opt, int nb_mbs,
-                       float* const PSNR, int percent_delta) {
-  VP8EncIterator it;
-  uint64_t size = 0;
-  uint64_t distortion = 0;
-  const uint64_t pixel_count = nb_mbs * 384;
+//  VP8EncLoop(): does the final bitstream coding.
 
-  // Make sure the quality parameter is inside valid bounds
-  if (q < 0.) {
-    q = 0;
-  } else if (q > 100.) {
-    q = 100;
+static void ResetAfterSkip(VP8EncIterator* const it) {
+  if (it->mb_->type_ == 1) {
+    *it->nz_ = 0;  // reset all predictors
+    it->left_nz_[8] = 0;
+  } else {
+    *it->nz_ &= (1 << 24);  // preserve the dc_nz bit
   }
+}
 
-  VP8SetSegmentParams(enc, q);      // setup segment quantizations and filters
+int VP8EncLoop(VP8Encoder* const enc) {
+  VP8EncIterator it;
+  int ok = PreLoopInitialize(enc);
+  if (!ok) return 0;
 
-  ResetStats(enc);
-  ResetTokenStats(enc);
+  StatLoop(enc);  // stats-collection loop
 
   VP8IteratorInit(enc, &it);
+  VP8InitFilter(&it);
   do {
     VP8ModeScore info;
-    VP8IteratorImport(&it);
-    if (VP8Decimate(&it, &info, rd_opt)) {
-      // Just record the number of skips and act like skip_proba is not used.
-      enc->proba_.nb_skip_++;
+    const int dont_use_skip = !enc->proba_.use_skip_proba_;
+    const VP8RDLevel rd_opt = enc->rd_opt_level_;
+
+    VP8IteratorImport(&it, NULL);
+    // Warning! order is important: first call VP8Decimate() and
+    // *then* decide how to code the skip decision if there's one.
+    if (!VP8Decimate(&it, &info, rd_opt) || dont_use_skip) {
+      CodeResiduals(it.bw_, &it, &info);
+    } else {   // reset predictors after a skip
+      ResetAfterSkip(&it);
     }
-    RecordResiduals(&it, &info);
-    size += info.R;
-    distortion += info.D;
-    if (percent_delta && !VP8IteratorProgress(&it, percent_delta))
-      return 0;
-  } while (VP8IteratorNext(&it, it.yuv_out_) && --nb_mbs > 0);
-  size += FinalizeSkipProba(enc);
-  size += FinalizeTokenProbas(enc);
-  size += enc->segment_hdr_.size_;
-  size = ((size + 1024) >> 11) + kHeaderSizeEstimate;
-
-  if (PSNR) {
-    *PSNR = (float)(10.* log10(255. * 255. * pixel_count / distortion));
-  }
-  return (int)size;
+    StoreSideInfo(&it);
+    VP8StoreFilterStats(&it);
+    VP8IteratorExport(&it);
+    ok = VP8IteratorProgress(&it, 20);
+    VP8IteratorSaveBoundary(&it);
+  } while (ok && VP8IteratorNext(&it));
+
+  return PostLoopFinalize(&it, ok);
 }
 
-// successive refinement increments.
-static const int dqs[] = { 20, 15, 10, 8, 6, 4, 2, 1, 0 };
+//------------------------------------------------------------------------------
+// Single pass using Token Buffer.
 
-int VP8StatLoop(VP8Encoder* const enc) {
-  const int do_search =
-    (enc->config_->target_size > 0 || enc->config_->target_PSNR > 0);
-  const int fast_probe = (enc->method_ < 2 && !do_search);
-  float q = enc->config_->quality;
-  const int max_passes = enc->config_->pass;
-  const int task_percent = 20;
-  const int percent_per_pass = (task_percent + max_passes / 2) / max_passes;
-  const int final_percent = enc->percent_ + task_percent;
-  int pass;
-  int nb_mbs;
+#if !defined(DISABLE_TOKEN_BUFFER)
 
-  // Fast mode: quick analysis pass over few mbs. Better than nothing.
-  nb_mbs = enc->mb_w_ * enc->mb_h_;
-  if (fast_probe && nb_mbs > 100) nb_mbs = 100;
-
-  // No target size: just do several pass without changing 'q'
-  if (!do_search) {
-    for (pass = 0; pass < max_passes; ++pass) {
-      const int rd_opt = (enc->method_ > 2);
-      if (!OneStatPass(enc, q, rd_opt, nb_mbs, NULL, percent_per_pass)) {
-        return 0;
-      }
+#define MIN_COUNT 96  // minimum number of macroblocks before updating stats
+
+int VP8EncTokenLoop(VP8Encoder* const enc) {
+  // Roughly refresh the proba eight times per pass
+  int max_count = (enc->mb_w_ * enc->mb_h_) >> 3;
+  int num_pass_left = enc->config_->pass;
+  const int do_search = enc->do_search_;
+  VP8EncIterator it;
+  VP8EncProba* const proba = &enc->proba_;
+  const VP8RDLevel rd_opt = enc->rd_opt_level_;
+  const uint64_t pixel_count = enc->mb_w_ * enc->mb_h_ * 384;
+  PassStats stats;
+  int ok;
+
+  InitPassStats(enc, &stats);
+  ok = PreLoopInitialize(enc);
+  if (!ok) return 0;
+
+  if (max_count < MIN_COUNT) max_count = MIN_COUNT;
+
+  assert(enc->num_parts_ == 1);
+  assert(enc->use_tokens_);
+  assert(proba->use_skip_proba_ == 0);
+  assert(rd_opt >= RD_OPT_BASIC);   // otherwise, token-buffer won't be useful
+  assert(num_pass_left > 0);
+
+  while (ok && num_pass_left-- > 0) {
+    const int is_last_pass = (fabs(stats.dq) <= DQ_LIMIT) ||
+                             (num_pass_left == 0) ||
+                             (enc->max_i4_header_bits_ == 0);
+    uint64_t size_p0 = 0;
+    uint64_t distortion = 0;
+    int cnt = max_count;
+    VP8IteratorInit(enc, &it);
+    SetLoopParams(enc, stats.q);
+    if (is_last_pass) {
+      ResetTokenStats(enc);
+      VP8InitFilter(&it);  // don't collect stats until last pass (too costly)
     }
-  } else {
-    // binary search for a size close to target
-    for (pass = 0; pass < max_passes && (dqs[pass] > 0); ++pass) {
-      const int rd_opt = 1;
-      float PSNR;
-      int criterion;
-      const int size = OneStatPass(enc, q, rd_opt, nb_mbs, &PSNR,
-                                   percent_per_pass);
-#if DEBUG_SEARCH
-      printf("#%d size=%d PSNR=%.2f q=%.2f\n", pass, size, PSNR, q);
-#endif
-      if (!size) return 0;
-      if (enc->config_->target_PSNR > 0) {
-        criterion = (PSNR < enc->config_->target_PSNR);
-      } else {
-        criterion = (size < enc->config_->target_size);
+    VP8TBufferClear(&enc->tokens_);
+    do {
+      VP8ModeScore info;
+      VP8IteratorImport(&it, NULL);
+      if (--cnt < 0) {
+        FinalizeTokenProbas(proba);
+        VP8CalculateLevelCosts(proba);  // refresh cost tables for rd-opt
+        cnt = max_count;
       }
-      // dichotomize
-      if (criterion) {
-        q += dqs[pass];
-      } else {
-        q -= dqs[pass];
+      VP8Decimate(&it, &info, rd_opt);
+      ok = RecordTokens(&it, &info, &enc->tokens_);
+      if (!ok) {
+        WebPEncodingSetError(enc->pic_, VP8_ENC_ERROR_OUT_OF_MEMORY);
+        break;
+      }
+      size_p0 += info.H;
+      distortion += info.D;
+      if (is_last_pass) {
+        StoreSideInfo(&it);
+        VP8StoreFilterStats(&it);
+        VP8IteratorExport(&it);
+        ok = VP8IteratorProgress(&it, 20);
       }
+      VP8IteratorSaveBoundary(&it);
+    } while (ok && VP8IteratorNext(&it));
+    if (!ok) break;
+
+    size_p0 += enc->segment_hdr_.size_;
+    if (stats.do_size_search) {
+      uint64_t size = FinalizeTokenProbas(&enc->proba_);
+      size += VP8EstimateTokenSize(&enc->tokens_,
+                                   (const uint8_t*)proba->coeffs_);
+      size = (size + size_p0 + 1024) >> 11;  // -> size in bytes
+      size += HEADER_SIZE_ESTIMATE;
+      stats.value = (double)size;
+    } else {  // compute and store PSNR
+      stats.value = GetPSNR(distortion, pixel_count);
+    }
+
+#if (DEBUG_SEARCH > 0)
+    printf("#%2d metric:%.1lf -> %.1lf   last_q=%.2lf q=%.2lf dq=%.2lf\n",
+           num_pass_left, stats.last_value, stats.value,
+           stats.last_q, stats.q, stats.dq);
+#endif
+    if (size_p0 > PARTITION0_SIZE_LIMIT) {
+      ++num_pass_left;
+      enc->max_i4_header_bits_ >>= 1;  // strengthen header bit limitation...
+      continue;                        // ...and start over
+    }
+    if (is_last_pass) {
+      break;   // done
+    }
+    if (do_search) {
+      ComputeNextQ(&stats);  // Adjust q
     }
   }
-  return WebPReportProgress(enc->pic_, final_percent, &enc->percent_);
+  if (ok) {
+    if (!stats.do_size_search) {
+      FinalizeTokenProbas(&enc->proba_);
+    }
+    ok = VP8EmitTokens(&enc->tokens_, enc->parts_ + 0,
+                       (const uint8_t*)proba->coeffs_, 1);
+  }
+  ok = ok && WebPReportProgress(enc->pic_, enc->percent_ + 20, &enc->percent_);
+  return PostLoopFinalize(&it, ok);
+}
+
+#else
+
+int VP8EncTokenLoop(VP8Encoder* const enc) {
+  (void)enc;
+  return 0;   // we shouldn't be here.
 }
 
+#endif    // DISABLE_TOKEN_BUFFER
+
 //------------------------------------------------------------------------------
 
-#if defined(__cplusplus) || defined(c_plusplus)
-}    // extern "C"
-#endif
diff --git a/drivers/webp/enc/histogram.c b/drivers/webp/enc/histogram.c
index ca838e064d..68c27fb1db 100644
--- a/drivers/webp/enc/histogram.c
+++ b/drivers/webp/enc/histogram.c
@@ -1,38 +1,82 @@
 // Copyright 2012 Google Inc. All Rights Reserved.
 //
-// This code is licensed under the same terms as WebM:
-//  Software License Agreement:  http://www.webmproject.org/license/software/
-//  Additional IP Rights Grant:  http://www.webmproject.org/license/additional/
+// Use of this source code is governed by a BSD-style license
+// that can be found in the COPYING file in the root of the source
+// tree. An additional intellectual property rights grant can be found
+// in the file PATENTS. All contributing project authors may
+// be found in the AUTHORS file in the root of the source tree.
 // -----------------------------------------------------------------------------
 //
 // Author: Jyrki Alakuijala (jyrki@google.com)
 //
 #ifdef HAVE_CONFIG_H
-#include "config.h"
+#include "webp/config.h"
 #endif
 
 #include <math.h>
-#include <stdio.h>
 
 #include "./backward_references.h"
 #include "./histogram.h"
 #include "../dsp/lossless.h"
 #include "../utils/utils.h"
 
+#define MAX_COST 1.e38
+
+// Number of partitions for the three dominant (literal, red and blue) symbol
+// costs.
+#define NUM_PARTITIONS 4
+// The size of the bin-hash corresponding to the three dominant costs.
+#define BIN_SIZE (NUM_PARTITIONS * NUM_PARTITIONS * NUM_PARTITIONS)
+// Maximum number of histograms allowed in greedy combining algorithm.
+#define MAX_HISTO_GREEDY 100
+
 static void HistogramClear(VP8LHistogram* const p) {
-  memset(p->literal_, 0, sizeof(p->literal_));
-  memset(p->red_, 0, sizeof(p->red_));
-  memset(p->blue_, 0, sizeof(p->blue_));
-  memset(p->alpha_, 0, sizeof(p->alpha_));
-  memset(p->distance_, 0, sizeof(p->distance_));
-  p->bit_cost_ = 0;
+  uint32_t* const literal = p->literal_;
+  const int cache_bits = p->palette_code_bits_;
+  const int histo_size = VP8LGetHistogramSize(cache_bits);
+  memset(p, 0, histo_size);
+  p->palette_code_bits_ = cache_bits;
+  p->literal_ = literal;
+}
+
+// Swap two histogram pointers.
+static void HistogramSwap(VP8LHistogram** const A, VP8LHistogram** const B) {
+  VP8LHistogram* const tmp = *A;
+  *A = *B;
+  *B = tmp;
+}
+
+static void HistogramCopy(const VP8LHistogram* const src,
+                          VP8LHistogram* const dst) {
+  uint32_t* const dst_literal = dst->literal_;
+  const int dst_cache_bits = dst->palette_code_bits_;
+  const int histo_size = VP8LGetHistogramSize(dst_cache_bits);
+  assert(src->palette_code_bits_ == dst_cache_bits);
+  memcpy(dst, src, histo_size);
+  dst->literal_ = dst_literal;
+}
+
+int VP8LGetHistogramSize(int cache_bits) {
+  const int literal_size = VP8LHistogramNumCodes(cache_bits);
+  const size_t total_size = sizeof(VP8LHistogram) + sizeof(int) * literal_size;
+  assert(total_size <= (size_t)0x7fffffff);
+  return (int)total_size;
+}
+
+void VP8LFreeHistogram(VP8LHistogram* const histo) {
+  WebPSafeFree(histo);
+}
+
+void VP8LFreeHistogramSet(VP8LHistogramSet* const histo) {
+  WebPSafeFree(histo);
 }
 
 void VP8LHistogramStoreRefs(const VP8LBackwardRefs* const refs,
                             VP8LHistogram* const histo) {
-  int i;
-  for (i = 0; i < refs->size; ++i) {
-    VP8LHistogramAddSinglePixOrCopy(histo, &refs->refs[i]);
+  VP8LRefsCursor c = VP8LRefsCursorInit(refs);
+  while (VP8LRefsCursorOk(&c)) {
+    VP8LHistogramAddSinglePixOrCopy(histo, c.cur_pos);
+    VP8LRefsCursorNext(&c);
   }
 }
 
@@ -51,13 +95,25 @@ void VP8LHistogramInit(VP8LHistogram* const p, int palette_code_bits) {
   HistogramClear(p);
 }
 
+VP8LHistogram* VP8LAllocateHistogram(int cache_bits) {
+  VP8LHistogram* histo = NULL;
+  const int total_size = VP8LGetHistogramSize(cache_bits);
+  uint8_t* const memory = (uint8_t*)WebPSafeMalloc(total_size, sizeof(*memory));
+  if (memory == NULL) return NULL;
+  histo = (VP8LHistogram*)memory;
+  // literal_ won't necessary be aligned.
+  histo->literal_ = (uint32_t*)(memory + sizeof(VP8LHistogram));
+  VP8LHistogramInit(histo, cache_bits);
+  return histo;
+}
+
 VP8LHistogramSet* VP8LAllocateHistogramSet(int size, int cache_bits) {
   int i;
   VP8LHistogramSet* set;
-  VP8LHistogram* bulk;
-  const uint64_t total_size = (uint64_t)sizeof(*set)
-                            + size * sizeof(*set->histograms)
-                            + size * sizeof(**set->histograms);
+  const int histo_size = VP8LGetHistogramSize(cache_bits);
+  const size_t total_size =
+      sizeof(*set) + size * (sizeof(*set->histograms) +
+      histo_size + WEBP_ALIGN_CST);
   uint8_t* memory = (uint8_t*)WebPSafeMalloc(total_size, sizeof(*memory));
   if (memory == NULL) return NULL;
 
@@ -65,12 +121,15 @@ VP8LHistogramSet* VP8LAllocateHistogramSet(int size, int cache_bits) {
   memory += sizeof(*set);
   set->histograms = (VP8LHistogram**)memory;
   memory += size * sizeof(*set->histograms);
-  bulk = (VP8LHistogram*)memory;
   set->max_size = size;
   set->size = size;
   for (i = 0; i < size; ++i) {
-    set->histograms[i] = bulk + i;
+    memory = (uint8_t*)WEBP_ALIGN(memory);
+    set->histograms[i] = (VP8LHistogram*)memory;
+    // literal_ won't necessary be aligned.
+    set->histograms[i]->literal_ = (uint32_t*)(memory + sizeof(VP8LHistogram));
     VP8LHistogramInit(set->histograms[i], cache_bits);
+    memory += histo_size;
   }
   return set;
 }
@@ -85,151 +144,183 @@ void VP8LHistogramAddSinglePixOrCopy(VP8LHistogram* const histo,
     ++histo->literal_[PixOrCopyLiteral(v, 1)];
     ++histo->blue_[PixOrCopyLiteral(v, 0)];
   } else if (PixOrCopyIsCacheIdx(v)) {
-    int literal_ix = 256 + NUM_LENGTH_CODES + PixOrCopyCacheIdx(v);
+    const int literal_ix =
+        NUM_LITERAL_CODES + NUM_LENGTH_CODES + PixOrCopyCacheIdx(v);
     ++histo->literal_[literal_ix];
   } else {
-    int code, extra_bits_count, extra_bits_value;
-    PrefixEncode(PixOrCopyLength(v),
-                 &code, &extra_bits_count, &extra_bits_value);
-    ++histo->literal_[256 + code];
-    PrefixEncode(PixOrCopyDistance(v),
-                 &code, &extra_bits_count, &extra_bits_value);
+    int code, extra_bits;
+    VP8LPrefixEncodeBits(PixOrCopyLength(v), &code, &extra_bits);
+    ++histo->literal_[NUM_LITERAL_CODES + code];
+    VP8LPrefixEncodeBits(PixOrCopyDistance(v), &code, &extra_bits);
     ++histo->distance_[code];
   }
 }
 
+// -----------------------------------------------------------------------------
+// Various histogram combine/cost-eval functions
+
+static int GetCombinedHistogramEntropy(const VP8LHistogram* const a,
+                                       const VP8LHistogram* const b,
+                                       double cost_threshold,
+                                       double* cost) {
+  const int palette_code_bits = a->palette_code_bits_;
+  assert(a->palette_code_bits_ == b->palette_code_bits_);
+  *cost += VP8LGetCombinedEntropy(a->literal_, b->literal_,
+                                  VP8LHistogramNumCodes(palette_code_bits));
+  *cost += VP8LExtraCostCombined(a->literal_ + NUM_LITERAL_CODES,
+                                 b->literal_ + NUM_LITERAL_CODES,
+                                 NUM_LENGTH_CODES);
+  if (*cost > cost_threshold) return 0;
+
+  *cost += VP8LGetCombinedEntropy(a->red_, b->red_, NUM_LITERAL_CODES);
+  if (*cost > cost_threshold) return 0;
+
+  *cost += VP8LGetCombinedEntropy(a->blue_, b->blue_, NUM_LITERAL_CODES);
+  if (*cost > cost_threshold) return 0;
+
+  *cost += VP8LGetCombinedEntropy(a->alpha_, b->alpha_, NUM_LITERAL_CODES);
+  if (*cost > cost_threshold) return 0;
+
+  *cost += VP8LGetCombinedEntropy(a->distance_, b->distance_,
+                                  NUM_DISTANCE_CODES);
+  *cost += VP8LExtraCostCombined(a->distance_, b->distance_,
+                                 NUM_DISTANCE_CODES);
+  if (*cost > cost_threshold) return 0;
+
+  return 1;
+}
 
-
-static double BitsEntropy(const int* const array, int n) {
-  double retval = 0.;
-  int sum = 0;
-  int nonzeros = 0;
-  int max_val = 0;
-  int i;
-  double mix;
-  for (i = 0; i < n; ++i) {
-    if (array[i] != 0) {
-      sum += array[i];
-      ++nonzeros;
-      retval -= VP8LFastSLog2(array[i]);
-      if (max_val < array[i]) {
-        max_val = array[i];
-      }
-    }
+// Performs out = a + b, computing the cost C(a+b) - C(a) - C(b) while comparing
+// to the threshold value 'cost_threshold'. The score returned is
+//  Score = C(a+b) - C(a) - C(b), where C(a) + C(b) is known and fixed.
+// Since the previous score passed is 'cost_threshold', we only need to compare
+// the partial cost against 'cost_threshold + C(a) + C(b)' to possibly bail-out
+// early.
+static double HistogramAddEval(const VP8LHistogram* const a,
+                               const VP8LHistogram* const b,
+                               VP8LHistogram* const out,
+                               double cost_threshold) {
+  double cost = 0;
+  const double sum_cost = a->bit_cost_ + b->bit_cost_;
+  cost_threshold += sum_cost;
+
+  if (GetCombinedHistogramEntropy(a, b, cost_threshold, &cost)) {
+    VP8LHistogramAdd(a, b, out);
+    out->bit_cost_ = cost;
+    out->palette_code_bits_ = a->palette_code_bits_;
+    out->trivial_symbol_ = (a->trivial_symbol_ == b->trivial_symbol_) ?
+        a->trivial_symbol_ : VP8L_NON_TRIVIAL_SYM;
   }
-  retval += VP8LFastSLog2(sum);
 
-  if (nonzeros < 5) {
-    if (nonzeros <= 1) {
-      return 0;
-    }
-    // Two symbols, they will be 0 and 1 in a Huffman code.
-    // Let's mix in a bit of entropy to favor good clustering when
-    // distributions of these are combined.
-    if (nonzeros == 2) {
-      return 0.99 * sum + 0.01 * retval;
-    }
-    // No matter what the entropy says, we cannot be better than min_limit
-    // with Huffman coding. I am mixing a bit of entropy into the
-    // min_limit since it produces much better (~0.5 %) compression results
-    // perhaps because of better entropy clustering.
-    if (nonzeros == 3) {
-      mix = 0.95;
-    } else {
-      mix = 0.7;  // nonzeros == 4.
-    }
-  } else {
-    mix = 0.627;
-  }
+  return cost - sum_cost;
+}
 
-  {
-    double min_limit = 2 * sum - max_val;
-    min_limit = mix * min_limit + (1.0 - mix) * retval;
-    return (retval < min_limit) ? min_limit : retval;
-  }
+// Same as HistogramAddEval(), except that the resulting histogram
+// is not stored. Only the cost C(a+b) - C(a) is evaluated. We omit
+// the term C(b) which is constant over all the evaluations.
+static double HistogramAddThresh(const VP8LHistogram* const a,
+                                 const VP8LHistogram* const b,
+                                 double cost_threshold) {
+  double cost = -a->bit_cost_;
+  GetCombinedHistogramEntropy(a, b, cost_threshold, &cost);
+  return cost;
 }
 
-double VP8LHistogramEstimateBitsBulk(const VP8LHistogram* const p) {
-  double retval = BitsEntropy(&p->literal_[0], VP8LHistogramNumCodes(p))
-                + BitsEntropy(&p->red_[0], 256)
-                + BitsEntropy(&p->blue_[0], 256)
-                + BitsEntropy(&p->alpha_[0], 256)
-                + BitsEntropy(&p->distance_[0], NUM_DISTANCE_CODES);
-  // Compute the extra bits cost.
-  int i;
-  for (i = 2; i < NUM_LENGTH_CODES - 2; ++i) {
-    retval +=
-        (i >> 1) * p->literal_[256 + i + 2];
-  }
-  for (i = 2; i < NUM_DISTANCE_CODES - 2; ++i) {
-    retval += (i >> 1) * p->distance_[i + 2];
-  }
-  return retval;
-}
-
-
-// Returns the cost encode the rle-encoded entropy code.
-// The constants in this function are experimental.
-static double HuffmanCost(const int* const population, int length) {
-  // Small bias because Huffman code length is typically not stored in
-  // full length.
-  static const int kHuffmanCodeOfHuffmanCodeSize = CODE_LENGTH_CODES * 3;
-  static const double kSmallBias = 9.1;
-  double retval = kHuffmanCodeOfHuffmanCodeSize - kSmallBias;
-  int streak = 0;
-  int i = 0;
-  for (; i < length - 1; ++i) {
-    ++streak;
-    if (population[i] == population[i + 1]) {
-      continue;
-    }
- last_streak_hack:
-    // population[i] points now to the symbol in the streak of same values.
-    if (streak > 3) {
-      if (population[i] == 0) {
-        retval += 1.5625 + 0.234375 * streak;
-      } else {
-        retval += 2.578125 + 0.703125 * streak;
-      }
-    } else {
-      if (population[i] == 0) {
-        retval += 1.796875 * streak;
-      } else {
-        retval += 3.28125 * streak;
-      }
-    }
-    streak = 0;
-  }
-  if (i == length - 1) {
-    ++streak;
-    goto last_streak_hack;
+// -----------------------------------------------------------------------------
+
+// The structure to keep track of cost range for the three dominant entropy
+// symbols.
+// TODO(skal): Evaluate if float can be used here instead of double for
+// representing the entropy costs.
+typedef struct {
+  double literal_max_;
+  double literal_min_;
+  double red_max_;
+  double red_min_;
+  double blue_max_;
+  double blue_min_;
+} DominantCostRange;
+
+static void DominantCostRangeInit(DominantCostRange* const c) {
+  c->literal_max_ = 0.;
+  c->literal_min_ = MAX_COST;
+  c->red_max_ = 0.;
+  c->red_min_ = MAX_COST;
+  c->blue_max_ = 0.;
+  c->blue_min_ = MAX_COST;
+}
+
+static void UpdateDominantCostRange(
+    const VP8LHistogram* const h, DominantCostRange* const c) {
+  if (c->literal_max_ < h->literal_cost_) c->literal_max_ = h->literal_cost_;
+  if (c->literal_min_ > h->literal_cost_) c->literal_min_ = h->literal_cost_;
+  if (c->red_max_ < h->red_cost_) c->red_max_ = h->red_cost_;
+  if (c->red_min_ > h->red_cost_) c->red_min_ = h->red_cost_;
+  if (c->blue_max_ < h->blue_cost_) c->blue_max_ = h->blue_cost_;
+  if (c->blue_min_ > h->blue_cost_) c->blue_min_ = h->blue_cost_;
+}
+
+static void UpdateHistogramCost(VP8LHistogram* const h) {
+  uint32_t alpha_sym, red_sym, blue_sym;
+  const double alpha_cost = VP8LPopulationCost(h->alpha_, NUM_LITERAL_CODES,
+                                               &alpha_sym);
+  const double distance_cost =
+      VP8LPopulationCost(h->distance_, NUM_DISTANCE_CODES, NULL) +
+      VP8LExtraCost(h->distance_, NUM_DISTANCE_CODES);
+  const int num_codes = VP8LHistogramNumCodes(h->palette_code_bits_);
+  h->literal_cost_ = VP8LPopulationCost(h->literal_, num_codes, NULL) +
+                     VP8LExtraCost(h->literal_ + NUM_LITERAL_CODES,
+                                   NUM_LENGTH_CODES);
+  h->red_cost_ = VP8LPopulationCost(h->red_, NUM_LITERAL_CODES, &red_sym);
+  h->blue_cost_ = VP8LPopulationCost(h->blue_, NUM_LITERAL_CODES, &blue_sym);
+  h->bit_cost_ = h->literal_cost_ + h->red_cost_ + h->blue_cost_ +
+                 alpha_cost + distance_cost;
+  if ((alpha_sym | red_sym | blue_sym) == VP8L_NON_TRIVIAL_SYM) {
+    h->trivial_symbol_ = VP8L_NON_TRIVIAL_SYM;
+  } else {
+    h->trivial_symbol_ =
+        ((uint32_t)alpha_sym << 24) | (red_sym << 16) | (blue_sym << 0);
   }
-  return retval;
 }
 
-// Estimates the Huffman dictionary + other block overhead size.
-static double HistogramEstimateBitsHeader(const VP8LHistogram* const p) {
-  return HuffmanCost(&p->alpha_[0], 256) +
-         HuffmanCost(&p->red_[0], 256) +
-         HuffmanCost(&p->literal_[0], VP8LHistogramNumCodes(p)) +
-         HuffmanCost(&p->blue_[0], 256) +
-         HuffmanCost(&p->distance_[0], NUM_DISTANCE_CODES);
+static int GetBinIdForEntropy(double min, double max, double val) {
+  const double range = max - min + 1e-6;
+  const double delta = val - min;
+  return (int)(NUM_PARTITIONS * delta / range);
 }
 
-double VP8LHistogramEstimateBits(const VP8LHistogram* const p) {
-  return HistogramEstimateBitsHeader(p) + VP8LHistogramEstimateBitsBulk(p);
+static int GetHistoBinIndexLowEffort(
+    const VP8LHistogram* const h, const DominantCostRange* const c) {
+  const int bin_id = GetBinIdForEntropy(c->literal_min_, c->literal_max_,
+                                        h->literal_cost_);
+  assert(bin_id < NUM_PARTITIONS);
+  return bin_id;
 }
 
-static void HistogramBuildImage(int xsize, int histo_bits,
-                                const VP8LBackwardRefs* const backward_refs,
-                                VP8LHistogramSet* const image) {
-  int i;
+static int GetHistoBinIndex(
+    const VP8LHistogram* const h, const DominantCostRange* const c) {
+  const int bin_id =
+      GetBinIdForEntropy(c->blue_min_, c->blue_max_, h->blue_cost_) +
+      NUM_PARTITIONS * GetBinIdForEntropy(c->red_min_, c->red_max_,
+                                          h->red_cost_) +
+      NUM_PARTITIONS * NUM_PARTITIONS * GetBinIdForEntropy(c->literal_min_,
+                                                           c->literal_max_,
+                                                           h->literal_cost_);
+  assert(bin_id < BIN_SIZE);
+  return bin_id;
+}
+
+// Construct the histograms from backward references.
+static void HistogramBuild(
+    int xsize, int histo_bits, const VP8LBackwardRefs* const backward_refs,
+    VP8LHistogramSet* const image_histo) {
   int x = 0, y = 0;
   const int histo_xsize = VP8LSubSampleSize(xsize, histo_bits);
-  VP8LHistogram** const histograms = image->histograms;
+  VP8LHistogram** const histograms = image_histo->histograms;
+  VP8LRefsCursor c = VP8LRefsCursorInit(backward_refs);
   assert(histo_bits > 0);
-  for (i = 0; i < backward_refs->size; ++i) {
-    const PixOrCopy* const v = &backward_refs->refs[i];
+  while (VP8LRefsCursorOk(&c)) {
+    const PixOrCopy* const v = c.cur_pos;
     const int ix = (y >> histo_bits) * histo_xsize + (x >> histo_bits);
     VP8LHistogramAddSinglePixOrCopy(histograms[ix], v);
     x += PixOrCopyLength(v);
@@ -237,7 +328,134 @@ static void HistogramBuildImage(int xsize, int histo_bits,
       x -= xsize;
       ++y;
     }
+    VP8LRefsCursorNext(&c);
+  }
+}
+
+// Copies the histograms and computes its bit_cost.
+static void HistogramCopyAndAnalyze(
+    VP8LHistogramSet* const orig_histo, VP8LHistogramSet* const image_histo) {
+  int i;
+  const int histo_size = orig_histo->size;
+  VP8LHistogram** const orig_histograms = orig_histo->histograms;
+  VP8LHistogram** const histograms = image_histo->histograms;
+  for (i = 0; i < histo_size; ++i) {
+    VP8LHistogram* const histo = orig_histograms[i];
+    UpdateHistogramCost(histo);
+    // Copy histograms from orig_histo[] to image_histo[].
+    HistogramCopy(histo, histograms[i]);
+  }
+}
+
+// Partition histograms to different entropy bins for three dominant (literal,
+// red and blue) symbol costs and compute the histogram aggregate bit_cost.
+static void HistogramAnalyzeEntropyBin(VP8LHistogramSet* const image_histo,
+                                       int16_t* const bin_map, int low_effort) {
+  int i;
+  VP8LHistogram** const histograms = image_histo->histograms;
+  const int histo_size = image_histo->size;
+  const int bin_depth = histo_size + 1;
+  DominantCostRange cost_range;
+  DominantCostRangeInit(&cost_range);
+
+  // Analyze the dominant (literal, red and blue) entropy costs.
+  for (i = 0; i < histo_size; ++i) {
+    VP8LHistogram* const histo = histograms[i];
+    UpdateDominantCostRange(histo, &cost_range);
+  }
+
+  // bin-hash histograms on three of the dominant (literal, red and blue)
+  // symbol costs.
+  for (i = 0; i < histo_size; ++i) {
+    int num_histos;
+    VP8LHistogram* const histo = histograms[i];
+    const int16_t bin_id = low_effort ?
+        (int16_t)GetHistoBinIndexLowEffort(histo, &cost_range) :
+        (int16_t)GetHistoBinIndex(histo, &cost_range);
+    const int bin_offset = bin_id * bin_depth;
+    // bin_map[n][0] for every bin 'n' maintains the counter for the number of
+    // histograms in that bin.
+    // Get and increment the num_histos in that bin.
+    num_histos = ++bin_map[bin_offset];
+    assert(bin_offset + num_histos < bin_depth * BIN_SIZE);
+    // Add histogram i'th index at num_histos (last) position in the bin_map.
+    bin_map[bin_offset + num_histos] = i;
+  }
+}
+
+// Compact the histogram set by removing unused entries.
+static void HistogramCompactBins(VP8LHistogramSet* const image_histo) {
+  VP8LHistogram** const histograms = image_histo->histograms;
+  int i, j;
+
+  for (i = 0, j = 0; i < image_histo->size; ++i) {
+    if (histograms[i] != NULL && histograms[i]->bit_cost_ != 0.) {
+      if (j < i) {
+        histograms[j] = histograms[i];
+        histograms[i] = NULL;
+      }
+      ++j;
+    }
   }
+  image_histo->size = j;
+}
+
+static VP8LHistogram* HistogramCombineEntropyBin(
+    VP8LHistogramSet* const image_histo,
+    VP8LHistogram* cur_combo,
+    int16_t* const bin_map, int bin_depth, int num_bins,
+    double combine_cost_factor, int low_effort) {
+  int bin_id;
+  VP8LHistogram** const histograms = image_histo->histograms;
+
+  for (bin_id = 0; bin_id < num_bins; ++bin_id) {
+    const int bin_offset = bin_id * bin_depth;
+    const int num_histos = bin_map[bin_offset];
+    const int idx1 = bin_map[bin_offset + 1];
+    int num_combine_failures = 0;
+    int n;
+    for (n = 2; n <= num_histos; ++n) {
+      const int idx2 = bin_map[bin_offset + n];
+      if (low_effort) {
+        // Merge all histograms with the same bin index, irrespective of cost of
+        // the merged histograms.
+        VP8LHistogramAdd(histograms[idx1], histograms[idx2], histograms[idx1]);
+        histograms[idx2]->bit_cost_ = 0.;
+      } else {
+        const double bit_cost_idx2 = histograms[idx2]->bit_cost_;
+        if (bit_cost_idx2 > 0.) {
+          const double bit_cost_thresh = -bit_cost_idx2 * combine_cost_factor;
+          const double curr_cost_diff =
+              HistogramAddEval(histograms[idx1], histograms[idx2],
+                               cur_combo, bit_cost_thresh);
+          if (curr_cost_diff < bit_cost_thresh) {
+            // Try to merge two histograms only if the combo is a trivial one or
+            // the two candidate histograms are already non-trivial.
+            // For some images, 'try_combine' turns out to be false for a lot of
+            // histogram pairs. In that case, we fallback to combining
+            // histograms as usual to avoid increasing the header size.
+            const int try_combine =
+                (cur_combo->trivial_symbol_ != VP8L_NON_TRIVIAL_SYM) ||
+                ((histograms[idx1]->trivial_symbol_ == VP8L_NON_TRIVIAL_SYM) &&
+                 (histograms[idx2]->trivial_symbol_ == VP8L_NON_TRIVIAL_SYM));
+            const int max_combine_failures = 32;
+            if (try_combine || (num_combine_failures >= max_combine_failures)) {
+              HistogramSwap(&cur_combo, &histograms[idx1]);
+              histograms[idx2]->bit_cost_ = 0.;
+            } else {
+              ++num_combine_failures;
+            }
+          }
+        }
+      }
+    }
+    if (low_effort) {
+      // Update the bit_cost for the merged histograms (per bin index).
+      UpdateHistogramCost(histograms[idx1]);
+    }
+  }
+  HistogramCompactBins(image_histo);
+  return cur_combo;
 }
 
 static uint32_t MyRand(uint32_t *seed) {
@@ -248,159 +466,433 @@ static uint32_t MyRand(uint32_t *seed) {
   return *seed;
 }
 
-static int HistogramCombine(const VP8LHistogramSet* const in,
-                            VP8LHistogramSet* const out, int num_pairs) {
+// -----------------------------------------------------------------------------
+// Histogram pairs priority queue
+
+// Pair of histograms. Negative idx1 value means that pair is out-of-date.
+typedef struct {
+  int idx1;
+  int idx2;
+  double cost_diff;
+  double cost_combo;
+} HistogramPair;
+
+typedef struct {
+  HistogramPair* heap;
+  int* positions;
+  int size;
+  int max_index;
+} HistoHeap;
+
+static int HistoHeapInit(HistoHeap* const histo_heap, const int max_index) {
+  histo_heap->size = 0;
+  histo_heap->max_index = max_index;
+  histo_heap->heap = WebPSafeMalloc(max_index * max_index,
+                                    sizeof(*histo_heap->heap));
+  histo_heap->positions = WebPSafeMalloc(max_index * max_index,
+                                         sizeof(*histo_heap->positions));
+  return histo_heap->heap != NULL && histo_heap->positions != NULL;
+}
+
+static void HistoHeapClear(HistoHeap* const histo_heap) {
+  assert(histo_heap != NULL);
+  WebPSafeFree(histo_heap->heap);
+  WebPSafeFree(histo_heap->positions);
+}
+
+static void SwapHistogramPairs(HistogramPair *p1,
+                               HistogramPair *p2) {
+  const HistogramPair tmp = *p1;
+  *p1 = *p2;
+  *p2 = tmp;
+}
+
+// Given a valid min-heap in range [0, heap_size-1) this function places value
+// heap[heap_size-1] into right location within heap and sets its position in
+// positions array.
+static void HeapPush(HistoHeap* const histo_heap) {
+  HistogramPair* const heap = histo_heap->heap - 1;
+  int* const positions = histo_heap->positions;
+  const int max_index = histo_heap->max_index;
+  int v;
+  ++histo_heap->size;
+  v = histo_heap->size;
+  while (v > 1 && heap[v].cost_diff < heap[v >> 1].cost_diff) {
+    SwapHistogramPairs(&heap[v], &heap[v >> 1]);
+    // Change position of moved pair in heap.
+    if (heap[v].idx1 >= 0) {
+      const int pos = heap[v].idx1 * max_index + heap[v].idx2;
+      assert(pos >= 0 && pos < max_index * max_index);
+      positions[pos] = v;
+    }
+    v >>= 1;
+  }
+  positions[heap[v].idx1 * max_index + heap[v].idx2] = v;
+}
+
+// Given a valid min-heap in range [0, heap_size) this function shortens heap
+// range by one and places element with the lowest value to (heap_size-1).
+static void HeapPop(HistoHeap* const histo_heap) {
+  HistogramPair* const heap = histo_heap->heap - 1;
+  int* const positions = histo_heap->positions;
+  const int heap_size = histo_heap->size;
+  const int max_index = histo_heap->max_index;
+  int v = 1;
+  if (heap[v].idx1 >= 0) {
+    positions[heap[v].idx1 * max_index + heap[v].idx2] = -1;
+  }
+  SwapHistogramPairs(&heap[v], &heap[heap_size]);
+  while ((v << 1) < heap_size) {
+    int son = (heap[v << 1].cost_diff < heap[v].cost_diff) ? (v << 1) : v;
+    if (((v << 1) + 1) < heap_size &&
+        heap[(v << 1) + 1].cost_diff < heap[son].cost_diff) {
+      son = (v << 1) + 1;
+    }
+    if (son == v) break;
+    SwapHistogramPairs(&heap[v], &heap[son]);
+    // Change position of moved pair in heap.
+    if (heap[v].idx1 >= 0) {
+      positions[heap[v].idx1 * max_index + heap[v].idx2] = v;
+    }
+    v = son;
+  }
+  if (heap[v].idx1 >= 0) {
+    positions[heap[v].idx1 * max_index + heap[v].idx2] = v;
+  }
+  --histo_heap->size;
+}
+
+// -----------------------------------------------------------------------------
+
+static void PreparePair(VP8LHistogram** histograms, int idx1, int idx2,
+                        HistogramPair* const pair,
+                        VP8LHistogram* const histos) {
+  if (idx1 > idx2) {
+    const int tmp = idx2;
+    idx2 = idx1;
+    idx1 = tmp;
+  }
+  pair->idx1 = idx1;
+  pair->idx2 = idx2;
+  pair->cost_diff =
+      HistogramAddEval(histograms[idx1], histograms[idx2], histos, 0);
+  pair->cost_combo = histos->bit_cost_;
+}
+
+#define POSITION_INVALID (-1)
+
+// Invalidates pairs intersecting (idx1, idx2) in heap.
+static void InvalidatePairs(int idx1, int idx2,
+                            const HistoHeap* const histo_heap) {
+  HistogramPair* const heap = histo_heap->heap - 1;
+  int* const positions = histo_heap->positions;
+  const int max_index = histo_heap->max_index;
+  int i;
+  for (i = 0; i < idx1; ++i) {
+    const int pos = positions[i * max_index + idx1];
+    if (pos >= 0) {
+      heap[pos].idx1 = POSITION_INVALID;
+    }
+  }
+  for (i = idx1 + 1; i < max_index; ++i) {
+    const int pos = positions[idx1 * max_index + i];
+    if (pos >= 0) {
+      heap[pos].idx1 = POSITION_INVALID;
+    }
+  }
+  for (i = 0; i < idx2; ++i) {
+    const int pos = positions[i * max_index + idx2];
+    if (pos >= 0) {
+      heap[pos].idx1 = POSITION_INVALID;
+    }
+  }
+  for (i = idx2 + 1; i < max_index; ++i) {
+    const int pos = positions[idx2 * max_index + i];
+    if (pos >= 0) {
+      heap[pos].idx1 = POSITION_INVALID;
+    }
+  }
+}
+
+// Combines histograms by continuously choosing the one with the highest cost
+// reduction.
+static int HistogramCombineGreedy(VP8LHistogramSet* const image_histo,
+                                  VP8LHistogram* const histos) {
   int ok = 0;
-  int i, iter;
+  int image_histo_size = image_histo->size;
+  int i, j;
+  VP8LHistogram** const histograms = image_histo->histograms;
+  // Indexes of remaining histograms.
+  int* const clusters = WebPSafeMalloc(image_histo_size, sizeof(*clusters));
+  // Heap of histogram pairs.
+  HistoHeap histo_heap;
+
+  if (!HistoHeapInit(&histo_heap, image_histo_size) || clusters == NULL) {
+    goto End;
+  }
+
+  for (i = 0; i < image_histo_size; ++i) {
+    // Initialize clusters indexes.
+    clusters[i] = i;
+    for (j = i + 1; j < image_histo_size; ++j) {
+      // Initialize positions array.
+      histo_heap.positions[i * histo_heap.max_index + j] = POSITION_INVALID;
+      PreparePair(histograms, i, j, &histo_heap.heap[histo_heap.size], histos);
+      if (histo_heap.heap[histo_heap.size].cost_diff < 0) {
+        HeapPush(&histo_heap);
+      }
+    }
+  }
+
+  while (image_histo_size > 1 && histo_heap.size > 0) {
+    const int idx1 = histo_heap.heap[0].idx1;
+    const int idx2 = histo_heap.heap[0].idx2;
+    VP8LHistogramAdd(histograms[idx2], histograms[idx1], histograms[idx1]);
+    histograms[idx1]->bit_cost_ = histo_heap.heap[0].cost_combo;
+    // Remove merged histogram.
+    for (i = 0; i + 1 < image_histo_size; ++i) {
+      if (clusters[i] >= idx2) {
+        clusters[i] = clusters[i + 1];
+      }
+    }
+    --image_histo_size;
+
+    // Invalidate pairs intersecting the just combined best pair.
+    InvalidatePairs(idx1, idx2, &histo_heap);
+
+    // Pop invalid pairs from the top of the heap.
+    while (histo_heap.size > 0 && histo_heap.heap[0].idx1 < 0) {
+      HeapPop(&histo_heap);
+    }
+
+    // Push new pairs formed with combined histogram to the heap.
+    for (i = 0; i < image_histo_size; ++i) {
+      if (clusters[i] != idx1) {
+        PreparePair(histograms, idx1, clusters[i],
+                    &histo_heap.heap[histo_heap.size], histos);
+        if (histo_heap.heap[histo_heap.size].cost_diff < 0) {
+          HeapPush(&histo_heap);
+        }
+      }
+    }
+  }
+  // Move remaining histograms to the beginning of the array.
+  for (i = 0; i < image_histo_size; ++i) {
+    if (i != clusters[i]) {  // swap the two histograms
+      HistogramSwap(&histograms[i], &histograms[clusters[i]]);
+    }
+  }
+
+  image_histo->size = image_histo_size;
+  ok = 1;
+
+ End:
+  WebPSafeFree(clusters);
+  HistoHeapClear(&histo_heap);
+  return ok;
+}
+
+static VP8LHistogram* HistogramCombineStochastic(
+    VP8LHistogramSet* const image_histo,
+    VP8LHistogram* tmp_histo,
+    VP8LHistogram* best_combo,
+    int quality, int min_cluster_size) {
+  int iter;
   uint32_t seed = 0;
   int tries_with_no_success = 0;
-  const int min_cluster_size = 2;
-  int out_size = in->size;
-  const int outer_iters = in->size * 3;
-  VP8LHistogram* const histos = (VP8LHistogram*)malloc(2 * sizeof(*histos));
-  VP8LHistogram* cur_combo = histos + 0;    // trial merged histogram
-  VP8LHistogram* best_combo = histos + 1;   // best merged histogram so far
-  if (histos == NULL) goto End;
-
-  // Copy histograms from in[] to out[].
-  assert(in->size <= out->size);
-  for (i = 0; i < in->size; ++i) {
-    in->histograms[i]->bit_cost_ = VP8LHistogramEstimateBits(in->histograms[i]);
-    *out->histograms[i] = *in->histograms[i];
-  }
-
-  // Collapse similar histograms in 'out'.
-  for (iter = 0; iter < outer_iters && out_size >= min_cluster_size; ++iter) {
-    // We pick the best pair to be combined out of 'inner_iters' pairs.
+  int image_histo_size = image_histo->size;
+  const int iter_mult = (quality < 25) ? 2 : 2 + (quality - 25) / 8;
+  const int outer_iters = image_histo_size * iter_mult;
+  const int num_pairs = image_histo_size / 2;
+  const int num_tries_no_success = outer_iters / 2;
+  VP8LHistogram** const histograms = image_histo->histograms;
+
+  // Collapse similar histograms in 'image_histo'.
+  ++min_cluster_size;
+  for (iter = 0;
+       iter < outer_iters && image_histo_size >= min_cluster_size;
+       ++iter) {
     double best_cost_diff = 0.;
-    int best_idx1 = 0, best_idx2 = 1;
+    int best_idx1 = -1, best_idx2 = 1;
     int j;
+    const int num_tries =
+        (num_pairs < image_histo_size) ? num_pairs : image_histo_size;
     seed += iter;
-    for (j = 0; j < num_pairs; ++j) {
+    for (j = 0; j < num_tries; ++j) {
       double curr_cost_diff;
       // Choose two histograms at random and try to combine them.
-      const uint32_t idx1 = MyRand(&seed) % out_size;
-      const uint32_t tmp = ((j & 7) + 1) % (out_size - 1);
-      const uint32_t diff = (tmp < 3) ? tmp : MyRand(&seed) % (out_size - 1);
-      const uint32_t idx2 = (idx1 + diff + 1) % out_size;
+      const uint32_t idx1 = MyRand(&seed) % image_histo_size;
+      const uint32_t tmp = (j & 7) + 1;
+      const uint32_t diff =
+          (tmp < 3) ? tmp : MyRand(&seed) % (image_histo_size - 1);
+      const uint32_t idx2 = (idx1 + diff + 1) % image_histo_size;
       if (idx1 == idx2) {
         continue;
       }
-      *cur_combo = *out->histograms[idx1];
-      VP8LHistogramAdd(cur_combo, out->histograms[idx2]);
-      cur_combo->bit_cost_ = VP8LHistogramEstimateBits(cur_combo);
+
       // Calculate cost reduction on combining.
-      curr_cost_diff = cur_combo->bit_cost_
-                     - out->histograms[idx1]->bit_cost_
-                     - out->histograms[idx2]->bit_cost_;
-      if (best_cost_diff > curr_cost_diff) {    // found a better pair?
-        {     // swap cur/best combo histograms
-          VP8LHistogram* const tmp_histo = cur_combo;
-          cur_combo = best_combo;
-          best_combo = tmp_histo;
-        }
+      curr_cost_diff = HistogramAddEval(histograms[idx1], histograms[idx2],
+                                        tmp_histo, best_cost_diff);
+      if (curr_cost_diff < best_cost_diff) {    // found a better pair?
+        HistogramSwap(&best_combo, &tmp_histo);
         best_cost_diff = curr_cost_diff;
         best_idx1 = idx1;
         best_idx2 = idx2;
       }
     }
 
-    if (best_cost_diff < 0.0) {
-      *out->histograms[best_idx1] = *best_combo;
+    if (best_idx1 >= 0) {
+      HistogramSwap(&best_combo, &histograms[best_idx1]);
       // swap best_idx2 slot with last one (which is now unused)
-      --out_size;
-      if (best_idx2 != out_size) {
-        out->histograms[best_idx2] = out->histograms[out_size];
-        out->histograms[out_size] = NULL;   // just for sanity check.
+      --image_histo_size;
+      if (best_idx2 != image_histo_size) {
+        HistogramSwap(&histograms[image_histo_size], &histograms[best_idx2]);
+        histograms[image_histo_size] = NULL;
       }
       tries_with_no_success = 0;
     }
-    if (++tries_with_no_success >= 50) {
+    if (++tries_with_no_success >= num_tries_no_success) {
       break;
     }
   }
-  out->size = out_size;
-  ok = 1;
-
- End:
-  free(histos);
-  return ok;
+  image_histo->size = image_histo_size;
+  return best_combo;
 }
 
 // -----------------------------------------------------------------------------
 // Histogram refinement
 
-// What is the bit cost of moving square_histogram from
-// cur_symbol to candidate_symbol.
-// TODO(skal): we don't really need to copy the histogram and Add(). Instead
-// we just need VP8LDualHistogramEstimateBits(A, B) estimation function.
-static double HistogramDistance(const VP8LHistogram* const square_histogram,
-                                const VP8LHistogram* const candidate) {
-  const double previous_bit_cost = candidate->bit_cost_;
-  double new_bit_cost;
-  VP8LHistogram modified_histo;
-  modified_histo = *candidate;
-  VP8LHistogramAdd(&modified_histo, square_histogram);
-  new_bit_cost = VP8LHistogramEstimateBits(&modified_histo);
-
-  return new_bit_cost - previous_bit_cost;
-}
-
 // Find the best 'out' histogram for each of the 'in' histograms.
 // Note: we assume that out[]->bit_cost_ is already up-to-date.
-static void HistogramRemap(const VP8LHistogramSet* const in,
-                           const VP8LHistogramSet* const out,
+static void HistogramRemap(const VP8LHistogramSet* const orig_histo,
+                           const VP8LHistogramSet* const image_histo,
                            uint16_t* const symbols) {
   int i;
-  for (i = 0; i < in->size; ++i) {
-    int best_out = 0;
-    double best_bits = HistogramDistance(in->histograms[i], out->histograms[0]);
-    int k;
-    for (k = 1; k < out->size; ++k) {
-      const double cur_bits =
-          HistogramDistance(in->histograms[i], out->histograms[k]);
-      if (cur_bits < best_bits) {
-        best_bits = cur_bits;
-        best_out = k;
+  VP8LHistogram** const orig_histograms = orig_histo->histograms;
+  VP8LHistogram** const histograms = image_histo->histograms;
+  const int orig_histo_size = orig_histo->size;
+  const int image_histo_size = image_histo->size;
+  if (image_histo_size > 1) {
+    for (i = 0; i < orig_histo_size; ++i) {
+      int best_out = 0;
+      double best_bits =
+          HistogramAddThresh(histograms[0], orig_histograms[i], MAX_COST);
+      int k;
+      for (k = 1; k < image_histo_size; ++k) {
+        const double cur_bits =
+            HistogramAddThresh(histograms[k], orig_histograms[i], best_bits);
+        if (cur_bits < best_bits) {
+          best_bits = cur_bits;
+          best_out = k;
+        }
       }
+      symbols[i] = best_out;
+    }
+  } else {
+    assert(image_histo_size == 1);
+    for (i = 0; i < orig_histo_size; ++i) {
+      symbols[i] = 0;
     }
-    symbols[i] = best_out;
   }
 
   // Recompute each out based on raw and symbols.
-  for (i = 0; i < out->size; ++i) {
-    HistogramClear(out->histograms[i]);
+  for (i = 0; i < image_histo_size; ++i) {
+    HistogramClear(histograms[i]);
   }
-  for (i = 0; i < in->size; ++i) {
-    VP8LHistogramAdd(out->histograms[symbols[i]], in->histograms[i]);
+
+  for (i = 0; i < orig_histo_size; ++i) {
+    const int idx = symbols[i];
+    VP8LHistogramAdd(orig_histograms[i], histograms[idx], histograms[idx]);
   }
 }
 
+static double GetCombineCostFactor(int histo_size, int quality) {
+  double combine_cost_factor = 0.16;
+  if (quality < 90) {
+    if (histo_size > 256) combine_cost_factor /= 2.;
+    if (histo_size > 512) combine_cost_factor /= 2.;
+    if (histo_size > 1024) combine_cost_factor /= 2.;
+    if (quality <= 50) combine_cost_factor /= 2.;
+  }
+  return combine_cost_factor;
+}
+
 int VP8LGetHistoImageSymbols(int xsize, int ysize,
                              const VP8LBackwardRefs* const refs,
-                             int quality, int histo_bits, int cache_bits,
-                             VP8LHistogramSet* const image_in,
+                             int quality, int low_effort,
+                             int histo_bits, int cache_bits,
+                             VP8LHistogramSet* const image_histo,
+                             VP8LHistogramSet* const tmp_histos,
                              uint16_t* const histogram_symbols) {
   int ok = 0;
   const int histo_xsize = histo_bits ? VP8LSubSampleSize(xsize, histo_bits) : 1;
   const int histo_ysize = histo_bits ? VP8LSubSampleSize(ysize, histo_bits) : 1;
-  const int num_histo_pairs = 10 + quality / 2;  // For HistogramCombine().
-  const int histo_image_raw_size = histo_xsize * histo_ysize;
-  VP8LHistogramSet* const image_out =
-      VP8LAllocateHistogramSet(histo_image_raw_size, cache_bits);
-  if (image_out == NULL) return 0;
-
-  // Build histogram image.
-  HistogramBuildImage(xsize, histo_bits, refs, image_out);
-  // Collapse similar histograms.
-  if (!HistogramCombine(image_out, image_in, num_histo_pairs)) {
-    goto Error;
+  const int image_histo_raw_size = histo_xsize * histo_ysize;
+  const int entropy_combine_num_bins = low_effort ? NUM_PARTITIONS : BIN_SIZE;
+
+  // The bin_map for every bin follows following semantics:
+  // bin_map[n][0] = num_histo; // The number of histograms in that bin.
+  // bin_map[n][1] = index of first histogram in that bin;
+  // bin_map[n][num_histo] = index of last histogram in that bin;
+  // bin_map[n][num_histo + 1] ... bin_map[n][bin_depth - 1] = unused indices.
+  const int bin_depth = image_histo_raw_size + 1;
+  int16_t* bin_map = NULL;
+  VP8LHistogramSet* const orig_histo =
+      VP8LAllocateHistogramSet(image_histo_raw_size, cache_bits);
+  VP8LHistogram* cur_combo;
+  const int entropy_combine =
+      (orig_histo->size > entropy_combine_num_bins * 2) && (quality < 100);
+
+  if (orig_histo == NULL) goto Error;
+
+  // Don't attempt linear bin-partition heuristic for:
+  // histograms of small sizes, as bin_map will be very sparse and;
+  // Maximum quality (q==100), to preserve the compression gains at that level.
+  if (entropy_combine) {
+    const int bin_map_size = bin_depth * entropy_combine_num_bins;
+    bin_map = (int16_t*)WebPSafeCalloc(bin_map_size, sizeof(*bin_map));
+    if (bin_map == NULL) goto Error;
   }
+
+  // Construct the histograms from backward references.
+  HistogramBuild(xsize, histo_bits, refs, orig_histo);
+  // Copies the histograms and computes its bit_cost.
+  HistogramCopyAndAnalyze(orig_histo, image_histo);
+
+  cur_combo = tmp_histos->histograms[1];  // pick up working slot
+  if (entropy_combine) {
+    const double combine_cost_factor =
+        GetCombineCostFactor(image_histo_raw_size, quality);
+    HistogramAnalyzeEntropyBin(orig_histo, bin_map, low_effort);
+    // Collapse histograms with similar entropy.
+    cur_combo = HistogramCombineEntropyBin(image_histo, cur_combo, bin_map,
+                                           bin_depth, entropy_combine_num_bins,
+                                           combine_cost_factor, low_effort);
+  }
+
+  // Don't combine the histograms using stochastic and greedy heuristics for
+  // low-effort compression mode.
+  if (!low_effort || !entropy_combine) {
+    const float x = quality / 100.f;
+    // cubic ramp between 1 and MAX_HISTO_GREEDY:
+    const int threshold_size = (int)(1 + (x * x * x) * (MAX_HISTO_GREEDY - 1));
+    cur_combo = HistogramCombineStochastic(image_histo,
+                                           tmp_histos->histograms[0],
+                                           cur_combo, quality, threshold_size);
+    if ((image_histo->size <= threshold_size) &&
+        !HistogramCombineGreedy(image_histo, cur_combo)) {
+      goto Error;
+    }
+  }
+
+  // TODO(vikasa): Optimize HistogramRemap for low-effort compression mode also.
   // Find the optimal map from original histograms to the final ones.
-  HistogramRemap(image_out, image_in, histogram_symbols);
+  HistogramRemap(orig_histo, image_histo, histogram_symbols);
+
   ok = 1;
 
-Error:
-  free(image_out);
+ Error:
+  WebPSafeFree(bin_map);
+  VP8LFreeHistogramSet(orig_histo);
   return ok;
 }
diff --git a/drivers/webp/enc/histogram.h b/drivers/webp/enc/histogram.h
index 5b5de25539..72f045793a 100644
--- a/drivers/webp/enc/histogram.h
+++ b/drivers/webp/enc/histogram.h
@@ -1,8 +1,10 @@
 // Copyright 2012 Google Inc. All Rights Reserved.
 //
-// This code is licensed under the same terms as WebM:
-//  Software License Agreement:  http://www.webmproject.org/license/software/
-//  Additional IP Rights Grant:  http://www.webmproject.org/license/additional/
+// Use of this source code is governed by a BSD-style license
+// that can be found in the COPYING file in the root of the source
+// tree. An additional intellectual property rights grant can be found
+// in the file PATENTS. All contributing project authors may
+// be found in the AUTHORS file in the root of the source tree.
 // -----------------------------------------------------------------------------
 //
 // Author: Jyrki Alakuijala (jyrki@google.com)
@@ -12,17 +14,13 @@
 #ifndef WEBP_ENC_HISTOGRAM_H_
 #define WEBP_ENC_HISTOGRAM_H_
 
-#include <assert.h>
-#include <stddef.h>
-#include <stdlib.h>
-#include <stdio.h>
 #include <string.h>
 
 #include "./backward_references.h"
-#include "../format_constants.h"
-#include "../types.h"
+#include "webp/format_constants.h"
+#include "webp/types.h"
 
-#if defined(__cplusplus) || defined(c_plusplus)
+#ifdef __cplusplus
 extern "C" {
 #endif
 
@@ -30,18 +28,23 @@ extern "C" {
 typedef struct {
   // literal_ contains green literal, palette-code and
   // copy-length-prefix histogram
-  int literal_[PIX_OR_COPY_CODES_MAX];
-  int red_[256];
-  int blue_[256];
-  int alpha_[256];
+  uint32_t* literal_;         // Pointer to the allocated buffer for literal.
+  uint32_t red_[NUM_LITERAL_CODES];
+  uint32_t blue_[NUM_LITERAL_CODES];
+  uint32_t alpha_[NUM_LITERAL_CODES];
   // Backward reference prefix-code histogram.
-  int distance_[NUM_DISTANCE_CODES];
+  uint32_t distance_[NUM_DISTANCE_CODES];
   int palette_code_bits_;
-  double bit_cost_;   // cached value of VP8LHistogramEstimateBits(this)
+  uint32_t trivial_symbol_;  // True, if histograms for Red, Blue & Alpha
+                             // literal symbols are single valued.
+  double bit_cost_;          // cached value of bit cost.
+  double literal_cost_;      // Cached values of dominant entropy costs:
+  double red_cost_;          // literal, red & blue.
+  double blue_cost_;
 } VP8LHistogram;
 
 // Collection of histograms with fixed capacity, allocated as one
-// big memory chunk. Can be destroyed by simply calling 'free()'.
+// big memory chunk. Can be destroyed by calling WebPSafeFree().
 typedef struct {
   int size;         // number of slots currently in use
   int max_size;     // maximum capacity
@@ -57,6 +60,9 @@ void VP8LHistogramCreate(VP8LHistogram* const p,
                          const VP8LBackwardRefs* const refs,
                          int palette_code_bits);
 
+// Return the size of the histogram for a given palette_code_bits.
+int VP8LGetHistogramSize(int palette_code_bits);
+
 // Set the palette_code_bits and reset the stats.
 void VP8LHistogramInit(VP8LHistogram* const p, int palette_code_bits);
 
@@ -64,51 +70,40 @@ void VP8LHistogramInit(VP8LHistogram* const p, int palette_code_bits);
 void VP8LHistogramStoreRefs(const VP8LBackwardRefs* const refs,
                             VP8LHistogram* const histo);
 
+// Free the memory allocated for the histogram.
+void VP8LFreeHistogram(VP8LHistogram* const histo);
+
+// Free the memory allocated for the histogram set.
+void VP8LFreeHistogramSet(VP8LHistogramSet* const histo);
+
 // Allocate an array of pointer to histograms, allocated and initialized
 // using 'cache_bits'. Return NULL in case of memory error.
 VP8LHistogramSet* VP8LAllocateHistogramSet(int size, int cache_bits);
 
+// Allocate and initialize histogram object with specified 'cache_bits'.
+// Returns NULL in case of memory error.
+// Special case of VP8LAllocateHistogramSet, with size equals 1.
+VP8LHistogram* VP8LAllocateHistogram(int cache_bits);
+
 // Accumulate a token 'v' into a histogram.
 void VP8LHistogramAddSinglePixOrCopy(VP8LHistogram* const histo,
                                      const PixOrCopy* const v);
 
-// Estimate how many bits the combined entropy of literals and distance
-// approximately maps to.
-double VP8LHistogramEstimateBits(const VP8LHistogram* const p);
-
-// This function estimates the cost in bits excluding the bits needed to
-// represent the entropy code itself.
-double VP8LHistogramEstimateBitsBulk(const VP8LHistogram* const p);
-
-static WEBP_INLINE void VP8LHistogramAdd(VP8LHistogram* const p,
-                                         const VP8LHistogram* const a) {
-  int i;
-  for (i = 0; i < PIX_OR_COPY_CODES_MAX; ++i) {
-    p->literal_[i] += a->literal_[i];
-  }
-  for (i = 0; i < NUM_DISTANCE_CODES; ++i) {
-    p->distance_[i] += a->distance_[i];
-  }
-  for (i = 0; i < 256; ++i) {
-    p->red_[i] += a->red_[i];
-    p->blue_[i] += a->blue_[i];
-    p->alpha_[i] += a->alpha_[i];
-  }
-}
-
-static WEBP_INLINE int VP8LHistogramNumCodes(const VP8LHistogram* const p) {
-  return 256 + NUM_LENGTH_CODES +
-      ((p->palette_code_bits_ > 0) ? (1 << p->palette_code_bits_) : 0);
+static WEBP_INLINE int VP8LHistogramNumCodes(int palette_code_bits) {
+  return NUM_LITERAL_CODES + NUM_LENGTH_CODES +
+      ((palette_code_bits > 0) ? (1 << palette_code_bits) : 0);
 }
 
 // Builds the histogram image.
 int VP8LGetHistoImageSymbols(int xsize, int ysize,
                              const VP8LBackwardRefs* const refs,
-                             int quality, int histogram_bits, int cache_bits,
+                             int quality, int low_effort,
+                             int histogram_bits, int cache_bits,
                              VP8LHistogramSet* const image_in,
+                             VP8LHistogramSet* const tmp_histos,
                              uint16_t* const histogram_symbols);
 
-#if defined(__cplusplus) || defined(c_plusplus)
+#ifdef __cplusplus
 }
 #endif
 
diff --git a/drivers/webp/enc/iterator.c b/drivers/webp/enc/iterator.c
index 86e473bcf0..99d960a547 100644
--- a/drivers/webp/enc/iterator.c
+++ b/drivers/webp/enc/iterator.c
@@ -1,8 +1,10 @@
 // Copyright 2011 Google Inc. All Rights Reserved.
 //
-// This code is licensed under the same terms as WebM:
-//  Software License Agreement:  http://www.webmproject.org/license/software/
-//  Additional IP Rights Grant:  http://www.webmproject.org/license/additional/
+// Use of this source code is governed by a BSD-style license
+// that can be found in the COPYING file in the root of the source
+// tree. An additional intellectual property rights grant can be found
+// in the file PATENTS. All contributing project authors may
+// be found in the AUTHORS file in the root of the source tree.
 // -----------------------------------------------------------------------------
 //
 // VP8Iterator: block iterator
@@ -13,21 +15,16 @@
 
 #include "./vp8enci.h"
 
-#if defined(__cplusplus) || defined(c_plusplus)
-extern "C" {
-#endif
-
 //------------------------------------------------------------------------------
 // VP8Iterator
 //------------------------------------------------------------------------------
 
 static void InitLeft(VP8EncIterator* const it) {
-  const VP8Encoder* const enc = it->enc_;
-  enc->y_left_[-1] = enc->u_left_[-1] = enc->v_left_[-1] =
+  it->y_left_[-1] = it->u_left_[-1] = it->v_left_[-1] =
       (it->y_ > 0) ? 129 : 127;
-  memset(enc->y_left_, 129, 16);
-  memset(enc->u_left_, 129, 8);
-  memset(enc->v_left_, 129, 8);
+  memset(it->y_left_, 129, 16);
+  memset(it->u_left_, 129, 8);
+  memset(it->v_left_, 129, 8);
   it->left_nz_[8] = 0;
 }
 
@@ -38,43 +35,60 @@ static void InitTop(VP8EncIterator* const it) {
   memset(enc->nz_, 0, enc->mb_w_ * sizeof(*enc->nz_));
 }
 
-void VP8IteratorReset(VP8EncIterator* const it) {
+void VP8IteratorSetRow(VP8EncIterator* const it, int y) {
   VP8Encoder* const enc = it->enc_;
   it->x_ = 0;
-  it->y_ = 0;
-  it->y_offset_ = 0;
-  it->uv_offset_ = 0;
-  it->mb_ = enc->mb_info_;
-  it->preds_ = enc->preds_;
+  it->y_ = y;
+  it->bw_ = &enc->parts_[y & (enc->num_parts_ - 1)];
+  it->preds_ = enc->preds_ + y * 4 * enc->preds_w_;
   it->nz_ = enc->nz_;
-  it->bw_ = &enc->parts_[0];
-  it->done_ = enc->mb_w_* enc->mb_h_;
+  it->mb_ = enc->mb_info_ + y * enc->mb_w_;
+  it->y_top_ = enc->y_top_;
+  it->uv_top_ = enc->uv_top_;
+  InitLeft(it);
+}
+
+void VP8IteratorReset(VP8EncIterator* const it) {
+  VP8Encoder* const enc = it->enc_;
+  VP8IteratorSetRow(it, 0);
+  VP8IteratorSetCountDown(it, enc->mb_w_ * enc->mb_h_);  // default
   InitTop(it);
   InitLeft(it);
   memset(it->bit_count_, 0, sizeof(it->bit_count_));
   it->do_trellis_ = 0;
 }
 
+void VP8IteratorSetCountDown(VP8EncIterator* const it, int count_down) {
+  it->count_down_ = it->count_down0_ = count_down;
+}
+
+int VP8IteratorIsDone(const VP8EncIterator* const it) {
+  return (it->count_down_ <= 0);
+}
+
 void VP8IteratorInit(VP8Encoder* const enc, VP8EncIterator* const it) {
   it->enc_ = enc;
   it->y_stride_  = enc->pic_->y_stride;
   it->uv_stride_ = enc->pic_->uv_stride;
-  // TODO(later): for multithreading, these should be owned by 'it'.
-  it->yuv_in_   = enc->yuv_in_;
-  it->yuv_out_  = enc->yuv_out_;
-  it->yuv_out2_ = enc->yuv_out2_;
-  it->yuv_p_    = enc->yuv_p_;
+  it->yuv_in_   = (uint8_t*)WEBP_ALIGN(it->yuv_mem_);
+  it->yuv_out_  = it->yuv_in_ + YUV_SIZE_ENC;
+  it->yuv_out2_ = it->yuv_out_ + YUV_SIZE_ENC;
+  it->yuv_p_    = it->yuv_out2_ + YUV_SIZE_ENC;
   it->lf_stats_ = enc->lf_stats_;
   it->percent0_ = enc->percent_;
+  it->y_left_ = (uint8_t*)WEBP_ALIGN(it->yuv_left_mem_ + 1);
+  it->u_left_ = it->y_left_ + 16 + 16;
+  it->v_left_ = it->u_left_ + 16;
   VP8IteratorReset(it);
 }
 
 int VP8IteratorProgress(const VP8EncIterator* const it, int delta) {
   VP8Encoder* const enc = it->enc_;
-  if (delta && enc->pic_->progress_hook) {
-    const int percent = (enc->mb_h_ <= 1)
+  if (delta && enc->pic_->progress_hook != NULL) {
+    const int done = it->count_down0_ - it->count_down_;
+    const int percent = (it->count_down0_ <= 0)
                       ? it->percent0_
-                      : it->percent0_ + delta * it->y_ / (enc->mb_h_ - 1);
+                      : it->percent0_ + delta * done / it->count_down0_;
     return WebPReportProgress(enc->pic_, percent, &enc->percent_);
   }
   return 1;
@@ -84,6 +98,8 @@ int VP8IteratorProgress(const VP8EncIterator* const it, int delta) {
 // Import the source samples into the cache. Takes care of replicating
 // boundary pixels if necessary.
 
+static WEBP_INLINE int MinSize(int a, int b) { return (a < b) ? a : b; }
+
 static void ImportBlock(const uint8_t* src, int src_stride,
                         uint8_t* dst, int w, int h, int size) {
   int i;
@@ -101,30 +117,55 @@ static void ImportBlock(const uint8_t* src, int src_stride,
   }
 }
 
-void VP8IteratorImport(const VP8EncIterator* const it) {
+static void ImportLine(const uint8_t* src, int src_stride,
+                       uint8_t* dst, int len, int total_len) {
+  int i;
+  for (i = 0; i < len; ++i, src += src_stride) dst[i] = *src;
+  for (; i < total_len; ++i) dst[i] = dst[len - 1];
+}
+
+void VP8IteratorImport(VP8EncIterator* const it, uint8_t* tmp_32) {
   const VP8Encoder* const enc = it->enc_;
   const int x = it->x_, y = it->y_;
   const WebPPicture* const pic = enc->pic_;
-  const uint8_t* const ysrc = pic->y + (y * pic->y_stride + x) * 16;
+  const uint8_t* const ysrc = pic->y + (y * pic->y_stride  + x) * 16;
   const uint8_t* const usrc = pic->u + (y * pic->uv_stride + x) * 8;
   const uint8_t* const vsrc = pic->v + (y * pic->uv_stride + x) * 8;
-  uint8_t* const ydst = it->yuv_in_ + Y_OFF;
-  uint8_t* const udst = it->yuv_in_ + U_OFF;
-  uint8_t* const vdst = it->yuv_in_ + V_OFF;
-  int w = (pic->width - x * 16);
-  int h = (pic->height - y * 16);
-
-  if (w > 16) w = 16;
-  if (h > 16) h = 16;
-
-  // Luma plane
-  ImportBlock(ysrc, pic->y_stride, ydst, w, h, 16);
-
-  {   // U/V planes
-    const int uv_w = (w + 1) >> 1;
-    const int uv_h = (h + 1) >> 1;
-    ImportBlock(usrc, pic->uv_stride, udst, uv_w, uv_h, 8);
-    ImportBlock(vsrc, pic->uv_stride, vdst, uv_w, uv_h, 8);
+  const int w = MinSize(pic->width - x * 16, 16);
+  const int h = MinSize(pic->height - y * 16, 16);
+  const int uv_w = (w + 1) >> 1;
+  const int uv_h = (h + 1) >> 1;
+
+  ImportBlock(ysrc, pic->y_stride,  it->yuv_in_ + Y_OFF_ENC, w, h, 16);
+  ImportBlock(usrc, pic->uv_stride, it->yuv_in_ + U_OFF_ENC, uv_w, uv_h, 8);
+  ImportBlock(vsrc, pic->uv_stride, it->yuv_in_ + V_OFF_ENC, uv_w, uv_h, 8);
+
+  if (tmp_32 == NULL) return;
+
+  // Import source (uncompressed) samples into boundary.
+  if (x == 0) {
+    InitLeft(it);
+  } else {
+    if (y == 0) {
+      it->y_left_[-1] = it->u_left_[-1] = it->v_left_[-1] = 127;
+    } else {
+      it->y_left_[-1] = ysrc[- 1 - pic->y_stride];
+      it->u_left_[-1] = usrc[- 1 - pic->uv_stride];
+      it->v_left_[-1] = vsrc[- 1 - pic->uv_stride];
+    }
+    ImportLine(ysrc - 1, pic->y_stride,  it->y_left_, h,   16);
+    ImportLine(usrc - 1, pic->uv_stride, it->u_left_, uv_h, 8);
+    ImportLine(vsrc - 1, pic->uv_stride, it->v_left_, uv_h, 8);
+  }
+
+  it->y_top_  = tmp_32 + 0;
+  it->uv_top_ = tmp_32 + 16;
+  if (y == 0) {
+    memset(tmp_32, 127, 32 * sizeof(*tmp_32));
+  } else {
+    ImportLine(ysrc - pic->y_stride,  1, tmp_32,          w,   16);
+    ImportLine(usrc - pic->uv_stride, 1, tmp_32 + 16,     uv_w, 8);
+    ImportLine(vsrc - pic->uv_stride, 1, tmp_32 + 16 + 8, uv_w, 8);
   }
 }
 
@@ -144,9 +185,9 @@ void VP8IteratorExport(const VP8EncIterator* const it) {
   const VP8Encoder* const enc = it->enc_;
   if (enc->config_->show_compressed) {
     const int x = it->x_, y = it->y_;
-    const uint8_t* const ysrc = it->yuv_out_ + Y_OFF;
-    const uint8_t* const usrc = it->yuv_out_ + U_OFF;
-    const uint8_t* const vsrc = it->yuv_out_ + V_OFF;
+    const uint8_t* const ysrc = it->yuv_out_ + Y_OFF_ENC;
+    const uint8_t* const usrc = it->yuv_out_ + U_OFF_ENC;
+    const uint8_t* const vsrc = it->yuv_out_ + V_OFF_ENC;
     const WebPPicture* const pic = enc->pic_;
     uint8_t* const ydst = pic->y + (y * pic->y_stride + x) * 16;
     uint8_t* const udst = pic->u + (y * pic->uv_stride + x) * 8;
@@ -240,48 +281,44 @@ void VP8IteratorBytesToNz(VP8EncIterator* const it) {
 #undef BIT
 
 //------------------------------------------------------------------------------
-// Advance to the next position, doing the bookeeping.
+// Advance to the next position, doing the bookkeeping.
 
-int VP8IteratorNext(VP8EncIterator* const it,
-                    const uint8_t* const block_to_save) {
+void VP8IteratorSaveBoundary(VP8EncIterator* const it) {
   VP8Encoder* const enc = it->enc_;
-  if (block_to_save) {
-    const int x = it->x_, y = it->y_;
-    const uint8_t* const ysrc = block_to_save + Y_OFF;
-    const uint8_t* const usrc = block_to_save + U_OFF;
-    if (x < enc->mb_w_ - 1) {   // left
-      int i;
-      for (i = 0; i < 16; ++i) {
-        enc->y_left_[i] = ysrc[15 + i * BPS];
-      }
-      for (i = 0; i < 8; ++i) {
-        enc->u_left_[i] = usrc[7 + i * BPS];
-        enc->v_left_[i] = usrc[15 + i * BPS];
-      }
-      // top-left (before 'top'!)
-      enc->y_left_[-1] = enc->y_top_[x * 16 + 15];
-      enc->u_left_[-1] = enc->uv_top_[x * 16 + 0 + 7];
-      enc->v_left_[-1] = enc->uv_top_[x * 16 + 8 + 7];
+  const int x = it->x_, y = it->y_;
+  const uint8_t* const ysrc = it->yuv_out_ + Y_OFF_ENC;
+  const uint8_t* const uvsrc = it->yuv_out_ + U_OFF_ENC;
+  if (x < enc->mb_w_ - 1) {   // left
+    int i;
+    for (i = 0; i < 16; ++i) {
+      it->y_left_[i] = ysrc[15 + i * BPS];
     }
-    if (y < enc->mb_h_ - 1) {  // top
-      memcpy(enc->y_top_ + x * 16, ysrc + 15 * BPS, 16);
-      memcpy(enc->uv_top_ + x * 16, usrc + 7 * BPS, 8 + 8);
+    for (i = 0; i < 8; ++i) {
+      it->u_left_[i] = uvsrc[7 + i * BPS];
+      it->v_left_[i] = uvsrc[15 + i * BPS];
     }
+    // top-left (before 'top'!)
+    it->y_left_[-1] = it->y_top_[15];
+    it->u_left_[-1] = it->uv_top_[0 + 7];
+    it->v_left_[-1] = it->uv_top_[8 + 7];
   }
+  if (y < enc->mb_h_ - 1) {  // top
+    memcpy(it->y_top_, ysrc + 15 * BPS, 16);
+    memcpy(it->uv_top_, uvsrc + 7 * BPS, 8 + 8);
+  }
+}
 
-  it->mb_++;
+int VP8IteratorNext(VP8EncIterator* const it) {
   it->preds_ += 4;
-  it->nz_++;
-  it->x_++;
-  if (it->x_ == enc->mb_w_) {
-    it->x_ = 0;
-    it->y_++;
-    it->bw_ = &enc->parts_[it->y_ & (enc->num_parts_ - 1)];
-    it->preds_ = enc->preds_ + it->y_ * 4 * enc->preds_w_;
-    it->nz_ = enc->nz_;
-    InitLeft(it);
+  it->mb_ += 1;
+  it->nz_ += 1;
+  it->y_top_ += 16;
+  it->uv_top_ += 16;
+  it->x_ += 1;
+  if (it->x_ == it->enc_->mb_w_) {
+    VP8IteratorSetRow(it, ++it->y_);
   }
-  return (0 < --it->done_);
+  return (0 < --it->count_down_);
 }
 
 //------------------------------------------------------------------------------
@@ -368,15 +405,15 @@ void VP8IteratorStartI4(VP8EncIterator* const it) {
 
   // Import the boundary samples
   for (i = 0; i < 17; ++i) {    // left
-    it->i4_boundary_[i] = enc->y_left_[15 - i];
+    it->i4_boundary_[i] = it->y_left_[15 - i];
   }
   for (i = 0; i < 16; ++i) {    // top
-    it->i4_boundary_[17 + i] = enc->y_top_[it->x_ * 16 + i];
+    it->i4_boundary_[17 + i] = it->y_top_[i];
   }
   // top-right samples have a special case on the far right of the picture
   if (it->x_ < enc->mb_w_ - 1) {
     for (i = 16; i < 16 + 4; ++i) {
-      it->i4_boundary_[17 + i] = enc->y_top_[it->x_ * 16 + i];
+      it->i4_boundary_[17 + i] = it->y_top_[i];
     }
   } else {    // else, replicate the last valid pixel four times
     for (i = 16; i < 16 + 4; ++i) {
@@ -417,6 +454,3 @@ int VP8IteratorRotateI4(VP8EncIterator* const it,
 
 //------------------------------------------------------------------------------
 
-#if defined(__cplusplus) || defined(c_plusplus)
-}    // extern "C"
-#endif
diff --git a/drivers/webp/enc/near_lossless.c b/drivers/webp/enc/near_lossless.c
new file mode 100644
index 0000000000..9bc0f0e786
--- /dev/null
+++ b/drivers/webp/enc/near_lossless.c
@@ -0,0 +1,160 @@
+// Copyright 2014 Google Inc. All Rights Reserved.
+//
+// Use of this source code is governed by a BSD-style license
+// that can be found in the COPYING file in the root of the source
+// tree. An additional intellectual property rights grant can be found
+// in the file PATENTS. All contributing project authors may
+// be found in the AUTHORS file in the root of the source tree.
+// -----------------------------------------------------------------------------
+//
+// Near-lossless image preprocessing adjusts pixel values to help
+// compressibility with a guarantee of maximum deviation between original and
+// resulting pixel values.
+//
+// Author: Jyrki Alakuijala (jyrki@google.com)
+// Converted to C by Aleksander Kramarz (akramarz@google.com)
+
+#include <stdlib.h>
+
+#include "../dsp/lossless.h"
+#include "../utils/utils.h"
+#include "./vp8enci.h"
+
+#define MIN_DIM_FOR_NEAR_LOSSLESS 64
+#define MAX_LIMIT_BITS             5
+
+// Computes quantized pixel value and distance from original value.
+static void GetValAndDistance(int a, int initial, int bits,
+                              int* const val, int* const distance) {
+  const int mask = ~((1 << bits) - 1);
+  *val = (initial & mask) | (initial >> (8 - bits));
+  *distance = 2 * abs(a - *val);
+}
+
+// Clamps the value to range [0, 255].
+static int Clamp8b(int val) {
+  const int min_val = 0;
+  const int max_val = 0xff;
+  return (val < min_val) ? min_val : (val > max_val) ? max_val : val;
+}
+
+// Quantizes values {a, a+(1<<bits), a-(1<<bits)} and returns the nearest one.
+static int FindClosestDiscretized(int a, int bits) {
+  int best_val = a, i;
+  int min_distance = 256;
+
+  for (i = -1; i <= 1; ++i) {
+    int candidate, distance;
+    const int val = Clamp8b(a + i * (1 << bits));
+    GetValAndDistance(a, val, bits, &candidate, &distance);
+    if (i != 0) {
+      ++distance;
+    }
+    // Smallest distance but favor i == 0 over i == -1 and i == 1
+    // since that keeps the overall intensity more constant in the
+    // images.
+    if (distance < min_distance) {
+      min_distance = distance;
+      best_val = candidate;
+    }
+  }
+  return best_val;
+}
+
+// Applies FindClosestDiscretized to all channels of pixel.
+static uint32_t ClosestDiscretizedArgb(uint32_t a, int bits) {
+  return
+      (FindClosestDiscretized(a >> 24, bits) << 24) |
+      (FindClosestDiscretized((a >> 16) & 0xff, bits) << 16) |
+      (FindClosestDiscretized((a >> 8) & 0xff, bits) << 8) |
+      (FindClosestDiscretized(a & 0xff, bits));
+}
+
+// Checks if distance between corresponding channel values of pixels a and b
+// is within the given limit.
+static int IsNear(uint32_t a, uint32_t b, int limit) {
+  int k;
+  for (k = 0; k < 4; ++k) {
+    const int delta =
+        (int)((a >> (k * 8)) & 0xff) - (int)((b >> (k * 8)) & 0xff);
+    if (delta >= limit || delta <= -limit) {
+      return 0;
+    }
+  }
+  return 1;
+}
+
+static int IsSmooth(const uint32_t* const prev_row,
+                    const uint32_t* const curr_row,
+                    const uint32_t* const next_row,
+                    int ix, int limit) {
+  // Check that all pixels in 4-connected neighborhood are smooth.
+  return (IsNear(curr_row[ix], curr_row[ix - 1], limit) &&
+          IsNear(curr_row[ix], curr_row[ix + 1], limit) &&
+          IsNear(curr_row[ix], prev_row[ix], limit) &&
+          IsNear(curr_row[ix], next_row[ix], limit));
+}
+
+// Adjusts pixel values of image with given maximum error.
+static void NearLossless(int xsize, int ysize, uint32_t* argb,
+                         int limit_bits, uint32_t* copy_buffer) {
+  int x, y;
+  const int limit = 1 << limit_bits;
+  uint32_t* prev_row = copy_buffer;
+  uint32_t* curr_row = prev_row + xsize;
+  uint32_t* next_row = curr_row + xsize;
+  memcpy(copy_buffer, argb, xsize * 2 * sizeof(argb[0]));
+
+  for (y = 1; y < ysize - 1; ++y) {
+    uint32_t* const curr_argb_row = argb + y * xsize;
+    uint32_t* const next_argb_row = curr_argb_row + xsize;
+    memcpy(next_row, next_argb_row, xsize * sizeof(argb[0]));
+    for (x = 1; x < xsize - 1; ++x) {
+      if (!IsSmooth(prev_row, curr_row, next_row, x, limit)) {
+        curr_argb_row[x] = ClosestDiscretizedArgb(curr_row[x], limit_bits);
+      }
+    }
+    {
+      // Three-way swap.
+      uint32_t* const temp = prev_row;
+      prev_row = curr_row;
+      curr_row = next_row;
+      next_row = temp;
+    }
+  }
+}
+
+static int QualityToLimitBits(int quality) {
+  // quality mapping:
+  //  0..19 -> 5
+  //  0..39 -> 4
+  //  0..59 -> 3
+  //  0..79 -> 2
+  //  0..99 -> 1
+  //  100   -> 0
+  return MAX_LIMIT_BITS - quality / 20;
+}
+
+int VP8ApplyNearLossless(int xsize, int ysize, uint32_t* argb, int quality) {
+  int i;
+  uint32_t* const copy_buffer =
+      (uint32_t*)WebPSafeMalloc(xsize * 3, sizeof(*copy_buffer));
+  const int limit_bits = QualityToLimitBits(quality);
+  assert(argb != NULL);
+  assert(limit_bits >= 0);
+  assert(limit_bits <= MAX_LIMIT_BITS);
+  if (copy_buffer == NULL) {
+    return 0;
+  }
+  // For small icon images, don't attempt to apply near-lossless compression.
+  if (xsize < MIN_DIM_FOR_NEAR_LOSSLESS && ysize < MIN_DIM_FOR_NEAR_LOSSLESS) {
+    WebPSafeFree(copy_buffer);
+    return 1;
+  }
+
+  for (i = limit_bits; i != 0; --i) {
+    NearLossless(xsize, ysize, argb, i, copy_buffer);
+  }
+  WebPSafeFree(copy_buffer);
+  return 1;
+}
diff --git a/drivers/webp/enc/picture.c b/drivers/webp/enc/picture.c
index 44eed06083..26679a72e4 100644
--- a/drivers/webp/enc/picture.c
+++ b/drivers/webp/enc/picture.c
@@ -1,470 +1,179 @@
 // Copyright 2011 Google Inc. All Rights Reserved.
 //
-// This code is licensed under the same terms as WebM:
-//  Software License Agreement:  http://www.webmproject.org/license/software/
-//  Additional IP Rights Grant:  http://www.webmproject.org/license/additional/
+// Use of this source code is governed by a BSD-style license
+// that can be found in the COPYING file in the root of the source
+// tree. An additional intellectual property rights grant can be found
+// in the file PATENTS. All contributing project authors may
+// be found in the AUTHORS file in the root of the source tree.
 // -----------------------------------------------------------------------------
 //
-// WebPPicture utils: colorspace conversion, crop, ...
+// WebPPicture class basis
 //
 // Author: Skal (pascal.massimino@gmail.com)
 
 #include <assert.h>
 #include <stdlib.h>
-#include <math.h>
 
 #include "./vp8enci.h"
-#include "../utils/rescaler.h"
-#include "../utils/utils.h"
 #include "../dsp/dsp.h"
-#include "../dsp/yuv.h"
-
-#if defined(__cplusplus) || defined(c_plusplus)
-extern "C" {
-#endif
-
-#define HALVE(x) (((x) + 1) >> 1)
-#define IS_YUV_CSP(csp, YUV_CSP) (((csp) & WEBP_CSP_UV_MASK) == (YUV_CSP))
-
-static const union {
-  uint32_t argb;
-  uint8_t  bytes[4];
-} test_endian = { 0xff000000u };
-#define ALPHA_IS_LAST (test_endian.bytes[3] == 0xff)
+#include "../utils/utils.h"
 
 //------------------------------------------------------------------------------
 // WebPPicture
 //------------------------------------------------------------------------------
 
-int WebPPictureAlloc(WebPPicture* picture) {
-  if (picture != NULL) {
-    const WebPEncCSP uv_csp = picture->colorspace & WEBP_CSP_UV_MASK;
-    const int has_alpha = picture->colorspace & WEBP_CSP_ALPHA_BIT;
-    const int width = picture->width;
-    const int height = picture->height;
-
-    if (!picture->use_argb) {
-      const int y_stride = width;
-      const int uv_width = HALVE(width);
-      const int uv_height = HALVE(height);
-      const int uv_stride = uv_width;
-      int uv0_stride = 0;
-      int a_width, a_stride;
-      uint64_t y_size, uv_size, uv0_size, a_size, total_size;
-      uint8_t* mem;
-
-      // U/V
-      switch (uv_csp) {
-        case WEBP_YUV420:
-          break;
-#ifdef WEBP_EXPERIMENTAL_FEATURES
-        case WEBP_YUV400:    // for now, we'll just reset the U/V samples
-          break;
-        case WEBP_YUV422:
-          uv0_stride = uv_width;
-          break;
-        case WEBP_YUV444:
-          uv0_stride = width;
-          break;
-#endif
-        default:
-          return 0;
-      }
-      uv0_size = height * uv0_stride;
-
-      // alpha
-      a_width = has_alpha ? width : 0;
-      a_stride = a_width;
-      y_size = (uint64_t)y_stride * height;
-      uv_size = (uint64_t)uv_stride * uv_height;
-      a_size =  (uint64_t)a_stride * height;
-
-      total_size = y_size + a_size + 2 * uv_size + 2 * uv0_size;
-
-      // Security and validation checks
-      if (width <= 0 || height <= 0 ||         // luma/alpha param error
-          uv_width < 0 || uv_height < 0) {     // u/v param error
-        return 0;
-      }
-      // Clear previous buffer and allocate a new one.
-      WebPPictureFree(picture);   // erase previous buffer
-      mem = (uint8_t*)WebPSafeMalloc(total_size, sizeof(*mem));
-      if (mem == NULL) return 0;
-
-      // From now on, we're in the clear, we can no longer fail...
-      picture->memory_ = (void*)mem;
-      picture->y_stride  = y_stride;
-      picture->uv_stride = uv_stride;
-      picture->a_stride  = a_stride;
-      picture->uv0_stride = uv0_stride;
-      // TODO(skal): we could align the y/u/v planes and adjust stride.
-      picture->y = mem;
-      mem += y_size;
-
-      picture->u = mem;
-      mem += uv_size;
-      picture->v = mem;
-      mem += uv_size;
-
-      if (a_size) {
-        picture->a = mem;
-        mem += a_size;
-      }
-      if (uv0_size) {
-        picture->u0 = mem;
-        mem += uv0_size;
-        picture->v0 = mem;
-        mem += uv0_size;
-      }
-    } else {
-      void* memory;
-      const uint64_t argb_size = (uint64_t)width * height;
-      if (width <= 0 || height <= 0) {
-        return 0;
-      }
-      // Clear previous buffer and allocate a new one.
-      WebPPictureFree(picture);   // erase previous buffer
-      memory = WebPSafeMalloc(argb_size, sizeof(*picture->argb));
-      if (memory == NULL) return 0;
+static int DummyWriter(const uint8_t* data, size_t data_size,
+                       const WebPPicture* const picture) {
+  // The following are to prevent 'unused variable' error message.
+  (void)data;
+  (void)data_size;
+  (void)picture;
+  return 1;
+}
 
-      // TODO(skal): align plane to cache line?
-      picture->memory_argb_ = memory;
-      picture->argb = (uint32_t*)memory;
-      picture->argb_stride = width;
-    }
+int WebPPictureInitInternal(WebPPicture* picture, int version) {
+  if (WEBP_ABI_IS_INCOMPATIBLE(version, WEBP_ENCODER_ABI_VERSION)) {
+    return 0;   // caller/system version mismatch!
+  }
+  if (picture != NULL) {
+    memset(picture, 0, sizeof(*picture));
+    picture->writer = DummyWriter;
+    WebPEncodingSetError(picture, VP8_ENC_OK);
   }
   return 1;
 }
 
-// Remove reference to the ARGB buffer (doesn't free anything).
-static void PictureResetARGB(WebPPicture* const picture) {
+//------------------------------------------------------------------------------
+
+static void WebPPictureResetBufferARGB(WebPPicture* const picture) {
   picture->memory_argb_ = NULL;
   picture->argb = NULL;
   picture->argb_stride = 0;
 }
 
-// Remove reference to the YUVA buffer (doesn't free anything).
-static void PictureResetYUVA(WebPPicture* const picture) {
+static void WebPPictureResetBufferYUVA(WebPPicture* const picture) {
   picture->memory_ = NULL;
   picture->y = picture->u = picture->v = picture->a = NULL;
-  picture->u0 = picture->v0 = NULL;
   picture->y_stride = picture->uv_stride = 0;
   picture->a_stride = 0;
-  picture->uv0_stride = 0;
 }
 
-// Grab the 'specs' (writer, *opaque, width, height...) from 'src' and copy them
-// into 'dst'. Mark 'dst' as not owning any memory.
-static void WebPPictureGrabSpecs(const WebPPicture* const src,
-                                 WebPPicture* const dst) {
-  assert(src != NULL && dst != NULL);
-  *dst = *src;
-  PictureResetYUVA(dst);
-  PictureResetARGB(dst);
+void WebPPictureResetBuffers(WebPPicture* const picture) {
+  WebPPictureResetBufferARGB(picture);
+  WebPPictureResetBufferYUVA(picture);
 }
 
-// Allocate a new argb buffer, discarding any existing one and preserving
-// the other YUV(A) buffer.
-static int PictureAllocARGB(WebPPicture* const picture) {
-  WebPPicture tmp;
-  free(picture->memory_argb_);
-  PictureResetARGB(picture);
-  picture->use_argb = 1;
-  WebPPictureGrabSpecs(picture, &tmp);
-  if (!WebPPictureAlloc(&tmp)) {
-    return WebPEncodingSetError(picture, VP8_ENC_ERROR_OUT_OF_MEMORY);
-  }
-  picture->memory_argb_ = tmp.memory_argb_;
-  picture->argb = tmp.argb;
-  picture->argb_stride = tmp.argb_stride;
-  return 1;
-}
+int WebPPictureAllocARGB(WebPPicture* const picture, int width, int height) {
+  void* memory;
+  const uint64_t argb_size = (uint64_t)width * height;
 
-// Release memory owned by 'picture' (both YUV and ARGB buffers).
-void WebPPictureFree(WebPPicture* picture) {
-  if (picture != NULL) {
-    free(picture->memory_);
-    free(picture->memory_argb_);
-    PictureResetYUVA(picture);
-    PictureResetARGB(picture);
-  }
-}
+  assert(picture != NULL);
 
-//------------------------------------------------------------------------------
-// Picture copying
+  WebPSafeFree(picture->memory_argb_);
+  WebPPictureResetBufferARGB(picture);
 
-// Not worth moving to dsp/enc.c (only used here).
-static void CopyPlane(const uint8_t* src, int src_stride,
-                      uint8_t* dst, int dst_stride, int width, int height) {
-  while (height-- > 0) {
-    memcpy(dst, src, width);
-    src += src_stride;
-    dst += dst_stride;
+  if (width <= 0 || height <= 0) {
+    return WebPEncodingSetError(picture, VP8_ENC_ERROR_BAD_DIMENSION);
   }
-}
-
-// Adjust top-left corner to chroma sample position.
-static void SnapTopLeftPosition(const WebPPicture* const pic,
-                                int* const left, int* const top) {
-  if (!pic->use_argb) {
-    const int is_yuv422 = IS_YUV_CSP(pic->colorspace, WEBP_YUV422);
-    if (IS_YUV_CSP(pic->colorspace, WEBP_YUV420) || is_yuv422) {
-      *left &= ~1;
-      if (!is_yuv422) *top &= ~1;
-    }
+  // allocate a new buffer.
+  memory = WebPSafeMalloc(argb_size, sizeof(*picture->argb));
+  if (memory == NULL) {
+    return WebPEncodingSetError(picture, VP8_ENC_ERROR_OUT_OF_MEMORY);
   }
-}
-
-// Adjust top-left corner and verify that the sub-rectangle is valid.
-static int AdjustAndCheckRectangle(const WebPPicture* const pic,
-                                   int* const left, int* const top,
-                                   int width, int height) {
-  SnapTopLeftPosition(pic, left, top);
-  if ((*left) < 0 || (*top) < 0) return 0;
-  if (width <= 0 || height <= 0) return 0;
-  if ((*left) + width > pic->width) return 0;
-  if ((*top) + height > pic->height) return 0;
+  // TODO(skal): align plane to cache line?
+  picture->memory_argb_ = memory;
+  picture->argb = (uint32_t*)memory;
+  picture->argb_stride = width;
   return 1;
 }
 
-int WebPPictureCopy(const WebPPicture* src, WebPPicture* dst) {
-  if (src == NULL || dst == NULL) return 0;
-  if (src == dst) return 1;
+int WebPPictureAllocYUVA(WebPPicture* const picture, int width, int height) {
+  const WebPEncCSP uv_csp = picture->colorspace & WEBP_CSP_UV_MASK;
+  const int has_alpha = picture->colorspace & WEBP_CSP_ALPHA_BIT;
+  const int y_stride = width;
+  const int uv_width = (width + 1) >> 1;
+  const int uv_height = (height + 1) >> 1;
+  const int uv_stride = uv_width;
+  int a_width, a_stride;
+  uint64_t y_size, uv_size, a_size, total_size;
+  uint8_t* mem;
 
-  WebPPictureGrabSpecs(src, dst);
-  if (!WebPPictureAlloc(dst)) return 0;
+  assert(picture != NULL);
 
-  if (!src->use_argb) {
-    CopyPlane(src->y, src->y_stride,
-              dst->y, dst->y_stride, dst->width, dst->height);
-    CopyPlane(src->u, src->uv_stride,
-              dst->u, dst->uv_stride, HALVE(dst->width), HALVE(dst->height));
-    CopyPlane(src->v, src->uv_stride,
-              dst->v, dst->uv_stride, HALVE(dst->width), HALVE(dst->height));
-    if (dst->a != NULL)  {
-      CopyPlane(src->a, src->a_stride,
-                dst->a, dst->a_stride, dst->width, dst->height);
-    }
-#ifdef WEBP_EXPERIMENTAL_FEATURES
-    if (dst->u0 != NULL)  {
-      int uv0_width = src->width;
-      if (IS_YUV_CSP(dst->colorspace, WEBP_YUV422)) {
-        uv0_width = HALVE(uv0_width);
-      }
-      CopyPlane(src->u0, src->uv0_stride,
-                dst->u0, dst->uv0_stride, uv0_width, dst->height);
-      CopyPlane(src->v0, src->uv0_stride,
-                dst->v0, dst->uv0_stride, uv0_width, dst->height);
-    }
-#endif
-  } else {
-    CopyPlane((const uint8_t*)src->argb, 4 * src->argb_stride,
-              (uint8_t*)dst->argb, 4 * dst->argb_stride,
-              4 * dst->width, dst->height);
-  }
-  return 1;
-}
+  WebPSafeFree(picture->memory_);
+  WebPPictureResetBufferYUVA(picture);
 
-int WebPPictureIsView(const WebPPicture* picture) {
-  if (picture == NULL) return 0;
-  if (picture->use_argb) {
-    return (picture->memory_argb_ == NULL);
+  if (uv_csp != WEBP_YUV420) {
+    return WebPEncodingSetError(picture, VP8_ENC_ERROR_INVALID_CONFIGURATION);
   }
-  return (picture->memory_ == NULL);
-}
 
-int WebPPictureView(const WebPPicture* src,
-                    int left, int top, int width, int height,
-                    WebPPicture* dst) {
-  if (src == NULL || dst == NULL) return 0;
+  // alpha
+  a_width = has_alpha ? width : 0;
+  a_stride = a_width;
+  y_size = (uint64_t)y_stride * height;
+  uv_size = (uint64_t)uv_stride * uv_height;
+  a_size =  (uint64_t)a_stride * height;
 
-  // verify rectangle position.
-  if (!AdjustAndCheckRectangle(src, &left, &top, width, height)) return 0;
+  total_size = y_size + a_size + 2 * uv_size;
 
-  if (src != dst) {  // beware of aliasing! We don't want to leak 'memory_'.
-    WebPPictureGrabSpecs(src, dst);
+  // Security and validation checks
+  if (width <= 0 || height <= 0 ||         // luma/alpha param error
+      uv_width < 0 || uv_height < 0) {     // u/v param error
+    return WebPEncodingSetError(picture, VP8_ENC_ERROR_BAD_DIMENSION);
   }
-  dst->width = width;
-  dst->height = height;
-  if (!src->use_argb) {
-    dst->y = src->y + top * src->y_stride + left;
-    dst->u = src->u + (top >> 1) * src->uv_stride + (left >> 1);
-    dst->v = src->v + (top >> 1) * src->uv_stride + (left >> 1);
-    if (src->a != NULL) {
-      dst->a = src->a + top * src->a_stride + left;
-    }
-#ifdef WEBP_EXPERIMENTAL_FEATURES
-    if (src->u0 != NULL) {
-      const int left_pos =
-          IS_YUV_CSP(dst->colorspace, WEBP_YUV422) ? (left >> 1) : left;
-      dst->u0 = src->u0 + top * src->uv0_stride + left_pos;
-      dst->v0 = src->v0 + top * src->uv0_stride + left_pos;
-    }
-#endif
-  } else {
-    dst->argb = src->argb + top * src->argb_stride + left;
+  // allocate a new buffer.
+  mem = (uint8_t*)WebPSafeMalloc(total_size, sizeof(*mem));
+  if (mem == NULL) {
+    return WebPEncodingSetError(picture, VP8_ENC_ERROR_OUT_OF_MEMORY);
   }
-  return 1;
-}
 
-//------------------------------------------------------------------------------
-// Picture cropping
+  // From now on, we're in the clear, we can no longer fail...
+  picture->memory_ = (void*)mem;
+  picture->y_stride  = y_stride;
+  picture->uv_stride = uv_stride;
+  picture->a_stride  = a_stride;
 
-int WebPPictureCrop(WebPPicture* pic,
-                    int left, int top, int width, int height) {
-  WebPPicture tmp;
+  // TODO(skal): we could align the y/u/v planes and adjust stride.
+  picture->y = mem;
+  mem += y_size;
 
-  if (pic == NULL) return 0;
-  if (!AdjustAndCheckRectangle(pic, &left, &top, width, height)) return 0;
+  picture->u = mem;
+  mem += uv_size;
+  picture->v = mem;
+  mem += uv_size;
 
-  WebPPictureGrabSpecs(pic, &tmp);
-  tmp.width = width;
-  tmp.height = height;
-  if (!WebPPictureAlloc(&tmp)) return 0;
-
-  if (!pic->use_argb) {
-    const int y_offset = top * pic->y_stride + left;
-    const int uv_offset = (top / 2) * pic->uv_stride + left / 2;
-    CopyPlane(pic->y + y_offset, pic->y_stride,
-              tmp.y, tmp.y_stride, width, height);
-    CopyPlane(pic->u + uv_offset, pic->uv_stride,
-              tmp.u, tmp.uv_stride, HALVE(width), HALVE(height));
-    CopyPlane(pic->v + uv_offset, pic->uv_stride,
-              tmp.v, tmp.uv_stride, HALVE(width), HALVE(height));
-
-    if (tmp.a != NULL) {
-      const int a_offset = top * pic->a_stride + left;
-      CopyPlane(pic->a + a_offset, pic->a_stride,
-                tmp.a, tmp.a_stride, width, height);
-    }
-#ifdef WEBP_EXPERIMENTAL_FEATURES
-    if (tmp.u0 != NULL) {
-      int w = width;
-      int left_pos = left;
-      if (IS_YUV_CSP(tmp.colorspace, WEBP_YUV422)) {
-        w = HALVE(w);
-        left_pos = HALVE(left_pos);
-      }
-      CopyPlane(pic->u0 + top * pic->uv0_stride + left_pos, pic->uv0_stride,
-                tmp.u0, tmp.uv0_stride, w, height);
-      CopyPlane(pic->v0 + top * pic->uv0_stride + left_pos, pic->uv0_stride,
-                tmp.v0, tmp.uv0_stride, w, height);
-    }
-#endif
-  } else {
-    const uint8_t* const src =
-        (const uint8_t*)(pic->argb + top * pic->argb_stride + left);
-    CopyPlane(src, pic->argb_stride * 4,
-              (uint8_t*)tmp.argb, tmp.argb_stride * 4,
-              width * 4, height);
+  if (a_size > 0) {
+    picture->a = mem;
+    mem += a_size;
   }
-  WebPPictureFree(pic);
-  *pic = tmp;
+  (void)mem;  // makes the static analyzer happy
   return 1;
 }
 
-//------------------------------------------------------------------------------
-// Simple picture rescaler
-
-static void RescalePlane(const uint8_t* src,
-                         int src_width, int src_height, int src_stride,
-                         uint8_t* dst,
-                         int dst_width, int dst_height, int dst_stride,
-                         int32_t* const work,
-                         int num_channels) {
-  WebPRescaler rescaler;
-  int y = 0;
-  WebPRescalerInit(&rescaler, src_width, src_height,
-                   dst, dst_width, dst_height, dst_stride,
-                   num_channels,
-                   src_width, dst_width,
-                   src_height, dst_height,
-                   work);
-  memset(work, 0, 2 * dst_width * num_channels * sizeof(*work));
-  while (y < src_height) {
-    y += WebPRescalerImport(&rescaler, src_height - y,
-                            src + y * src_stride, src_stride);
-    WebPRescalerExport(&rescaler);
-  }
-}
-
-int WebPPictureRescale(WebPPicture* pic, int width, int height) {
-  WebPPicture tmp;
-  int prev_width, prev_height;
-  int32_t* work;
-
-  if (pic == NULL) return 0;
-  prev_width = pic->width;
-  prev_height = pic->height;
-  // if width is unspecified, scale original proportionally to height ratio.
-  if (width == 0) {
-    width = (prev_width * height + prev_height / 2) / prev_height;
-  }
-  // if height is unspecified, scale original proportionally to width ratio.
-  if (height == 0) {
-    height = (prev_height * width + prev_width / 2) / prev_width;
-  }
-  // Check if the overall dimensions still make sense.
-  if (width <= 0 || height <= 0) return 0;
-
-  WebPPictureGrabSpecs(pic, &tmp);
-  tmp.width = width;
-  tmp.height = height;
-  if (!WebPPictureAlloc(&tmp)) return 0;
-
-  if (!pic->use_argb) {
-    work = (int32_t*)WebPSafeMalloc(2ULL * width, sizeof(*work));
-    if (work == NULL) {
-      WebPPictureFree(&tmp);
-      return 0;
-    }
+int WebPPictureAlloc(WebPPicture* picture) {
+  if (picture != NULL) {
+    const int width = picture->width;
+    const int height = picture->height;
 
-    RescalePlane(pic->y, prev_width, prev_height, pic->y_stride,
-                 tmp.y, width, height, tmp.y_stride, work, 1);
-    RescalePlane(pic->u,
-                 HALVE(prev_width), HALVE(prev_height), pic->uv_stride,
-                 tmp.u,
-                 HALVE(width), HALVE(height), tmp.uv_stride, work, 1);
-    RescalePlane(pic->v,
-                 HALVE(prev_width), HALVE(prev_height), pic->uv_stride,
-                 tmp.v,
-                 HALVE(width), HALVE(height), tmp.uv_stride, work, 1);
+    WebPPictureFree(picture);   // erase previous buffer
 
-    if (tmp.a != NULL) {
-      RescalePlane(pic->a, prev_width, prev_height, pic->a_stride,
-                   tmp.a, width, height, tmp.a_stride, work, 1);
-    }
-#ifdef WEBP_EXPERIMENTAL_FEATURES
-    if (tmp.u0 != NULL) {
-      const int s = IS_YUV_CSP(tmp.colorspace, WEBP_YUV422) ? 2 : 1;
-      RescalePlane(
-          pic->u0, (prev_width + s / 2) / s, prev_height, pic->uv0_stride,
-          tmp.u0, (width + s / 2) / s, height, tmp.uv0_stride, work, 1);
-      RescalePlane(
-          pic->v0, (prev_width + s / 2) / s, prev_height, pic->uv0_stride,
-          tmp.v0, (width + s / 2) / s, height, tmp.uv0_stride, work, 1);
-    }
-#endif
-  } else {
-    work = (int32_t*)WebPSafeMalloc(2ULL * width * 4, sizeof(*work));
-    if (work == NULL) {
-      WebPPictureFree(&tmp);
-      return 0;
+    if (!picture->use_argb) {
+      return WebPPictureAllocYUVA(picture, width, height);
+    } else {
+      return WebPPictureAllocARGB(picture, width, height);
     }
-
-    RescalePlane((const uint8_t*)pic->argb, prev_width, prev_height,
-                 pic->argb_stride * 4,
-                 (uint8_t*)tmp.argb, width, height,
-                 tmp.argb_stride * 4,
-                 work, 4);
-
   }
-  WebPPictureFree(pic);
-  free(work);
-  *pic = tmp;
   return 1;
 }
 
+void WebPPictureFree(WebPPicture* picture) {
+  if (picture != NULL) {
+    WebPSafeFree(picture->memory_);
+    WebPSafeFree(picture->memory_argb_);
+    WebPPictureResetBuffers(picture);
+  }
+}
+
 //------------------------------------------------------------------------------
 // WebPMemoryWriter: Write-to-memory
 
@@ -494,7 +203,7 @@ int WebPMemoryWrite(const uint8_t* data, size_t data_size,
     if (w->size > 0) {
       memcpy(new_mem, w->mem, w->size);
     }
-    free(w->mem);
+    WebPSafeFree(w->mem);
     w->mem = new_mem;
     // down-cast is ok, thanks to WebPSafeMalloc
     w->max_size = (size_t)next_max_size;
@@ -506,469 +215,13 @@ int WebPMemoryWrite(const uint8_t* data, size_t data_size,
   return 1;
 }
 
-//------------------------------------------------------------------------------
-// Detection of non-trivial transparency
-
-// Returns true if alpha[] has non-0xff values.
-static int CheckNonOpaque(const uint8_t* alpha, int width, int height,
-                          int x_step, int y_step) {
-  if (alpha == NULL) return 0;
-  while (height-- > 0) {
-    int x;
-    for (x = 0; x < width * x_step; x += x_step) {
-      if (alpha[x] != 0xff) return 1;  // TODO(skal): check 4/8 bytes at a time.
-    }
-    alpha += y_step;
-  }
-  return 0;
-}
-
-// Checking for the presence of non-opaque alpha.
-int WebPPictureHasTransparency(const WebPPicture* picture) {
-  if (picture == NULL) return 0;
-  if (!picture->use_argb) {
-    return CheckNonOpaque(picture->a, picture->width, picture->height,
-                          1, picture->a_stride);
-  } else {
-    int x, y;
-    const uint32_t* argb = picture->argb;
-    if (argb == NULL) return 0;
-    for (y = 0; y < picture->height; ++y) {
-      for (x = 0; x < picture->width; ++x) {
-        if (argb[x] < 0xff000000u) return 1;   // test any alpha values != 0xff
-      }
-      argb += picture->argb_stride;
-    }
-  }
-  return 0;
-}
-
-//------------------------------------------------------------------------------
-// RGB -> YUV conversion
-
-// TODO: we can do better than simply 2x2 averaging on U/V samples.
-#define SUM4(ptr) ((ptr)[0] + (ptr)[step] + \
-                   (ptr)[rgb_stride] + (ptr)[rgb_stride + step])
-#define SUM2H(ptr) (2 * (ptr)[0] + 2 * (ptr)[step])
-#define SUM2V(ptr) (2 * (ptr)[0] + 2 * (ptr)[rgb_stride])
-#define SUM1(ptr)  (4 * (ptr)[0])
-#define RGB_TO_UV(x, y, SUM) {                           \
-  const int src = (2 * (step * (x) + (y) * rgb_stride)); \
-  const int dst = (x) + (y) * picture->uv_stride;        \
-  const int r = SUM(r_ptr + src);                        \
-  const int g = SUM(g_ptr + src);                        \
-  const int b = SUM(b_ptr + src);                        \
-  picture->u[dst] = VP8RGBToU(r, g, b);                  \
-  picture->v[dst] = VP8RGBToV(r, g, b);                  \
-}
-
-#define RGB_TO_UV0(x_in, x_out, y, SUM) {                \
-  const int src = (step * (x_in) + (y) * rgb_stride);    \
-  const int dst = (x_out) + (y) * picture->uv0_stride;   \
-  const int r = SUM(r_ptr + src);                        \
-  const int g = SUM(g_ptr + src);                        \
-  const int b = SUM(b_ptr + src);                        \
-  picture->u0[dst] = VP8RGBToU(r, g, b);                 \
-  picture->v0[dst] = VP8RGBToV(r, g, b);                 \
-}
-
-static void MakeGray(WebPPicture* const picture) {
-  int y;
-  const int uv_width = HALVE(picture->width);
-  const int uv_height = HALVE(picture->height);
-  for (y = 0; y < uv_height; ++y) {
-    memset(picture->u + y * picture->uv_stride, 128, uv_width);
-    memset(picture->v + y * picture->uv_stride, 128, uv_width);
-  }
-}
-
-static int ImportYUVAFromRGBA(const uint8_t* const r_ptr,
-                              const uint8_t* const g_ptr,
-                              const uint8_t* const b_ptr,
-                              const uint8_t* const a_ptr,
-                              int step,         // bytes per pixel
-                              int rgb_stride,   // bytes per scanline
-                              WebPPicture* const picture) {
-  const WebPEncCSP uv_csp = picture->colorspace & WEBP_CSP_UV_MASK;
-  int x, y;
-  const int width = picture->width;
-  const int height = picture->height;
-  const int has_alpha = CheckNonOpaque(a_ptr, width, height, step, rgb_stride);
-
-  picture->colorspace = uv_csp;
-  picture->use_argb = 0;
-  if (has_alpha) {
-    picture->colorspace |= WEBP_CSP_ALPHA_BIT;
-  }
-  if (!WebPPictureAlloc(picture)) return 0;
-
-  // Import luma plane
-  for (y = 0; y < height; ++y) {
-    for (x = 0; x < width; ++x) {
-      const int offset = step * x + y * rgb_stride;
-      picture->y[x + y * picture->y_stride] =
-          VP8RGBToY(r_ptr[offset], g_ptr[offset], b_ptr[offset]);
-    }
-  }
-
-  // Downsample U/V plane
-  if (uv_csp != WEBP_YUV400) {
-    for (y = 0; y < (height >> 1); ++y) {
-      for (x = 0; x < (width >> 1); ++x) {
-        RGB_TO_UV(x, y, SUM4);
-      }
-      if (width & 1) {
-        RGB_TO_UV(x, y, SUM2V);
-      }
-    }
-    if (height & 1) {
-      for (x = 0; x < (width >> 1); ++x) {
-        RGB_TO_UV(x, y, SUM2H);
-      }
-      if (width & 1) {
-        RGB_TO_UV(x, y, SUM1);
-      }
-    }
-
-#ifdef WEBP_EXPERIMENTAL_FEATURES
-    // Store original U/V samples too
-    if (uv_csp == WEBP_YUV422) {
-      for (y = 0; y < height; ++y) {
-        for (x = 0; x < (width >> 1); ++x) {
-          RGB_TO_UV0(2 * x, x, y, SUM2H);
-        }
-        if (width & 1) {
-          RGB_TO_UV0(2 * x, x, y, SUM1);
-        }
-      }
-    } else if (uv_csp == WEBP_YUV444) {
-      for (y = 0; y < height; ++y) {
-        for (x = 0; x < width; ++x) {
-          RGB_TO_UV0(x, x, y, SUM1);
-        }
-      }
-    }
-#endif
-  } else {
-    MakeGray(picture);
-  }
-
-  if (has_alpha) {
-    assert(step >= 4);
-    for (y = 0; y < height; ++y) {
-      for (x = 0; x < width; ++x) {
-        picture->a[x + y * picture->a_stride] =
-            a_ptr[step * x + y * rgb_stride];
-      }
-    }
-  }
-  return 1;
-}
-
-static int Import(WebPPicture* const picture,
-                  const uint8_t* const rgb, int rgb_stride,
-                  int step, int swap_rb, int import_alpha) {
-  const uint8_t* const r_ptr = rgb + (swap_rb ? 2 : 0);
-  const uint8_t* const g_ptr = rgb + 1;
-  const uint8_t* const b_ptr = rgb + (swap_rb ? 0 : 2);
-  const uint8_t* const a_ptr = import_alpha ? rgb + 3 : NULL;
-  const int width = picture->width;
-  const int height = picture->height;
-
-  if (!picture->use_argb) {
-    return ImportYUVAFromRGBA(r_ptr, g_ptr, b_ptr, a_ptr, step, rgb_stride,
-                              picture);
-  }
-  if (import_alpha) {
-    picture->colorspace |= WEBP_CSP_ALPHA_BIT;
-  } else {
-    picture->colorspace &= ~WEBP_CSP_ALPHA_BIT;
-  }
-  if (!WebPPictureAlloc(picture)) return 0;
-
-  if (!import_alpha) {
-    int x, y;
-    for (y = 0; y < height; ++y) {
-      for (x = 0; x < width; ++x) {
-        const int offset = step * x + y * rgb_stride;
-        const uint32_t argb =
-            0xff000000u |
-            (r_ptr[offset] << 16) |
-            (g_ptr[offset] <<  8) |
-            (b_ptr[offset]);
-        picture->argb[x + y * picture->argb_stride] = argb;
-      }
-    }
-  } else {
-    int x, y;
-    assert(step >= 4);
-    for (y = 0; y < height; ++y) {
-      for (x = 0; x < width; ++x) {
-        const int offset = step * x + y * rgb_stride;
-        const uint32_t argb = (a_ptr[offset] << 24) |
-                              (r_ptr[offset] << 16) |
-                              (g_ptr[offset] <<  8) |
-                              (b_ptr[offset]);
-        picture->argb[x + y * picture->argb_stride] = argb;
-      }
-    }
-  }
-  return 1;
-}
-#undef SUM4
-#undef SUM2V
-#undef SUM2H
-#undef SUM1
-#undef RGB_TO_UV
-
-int WebPPictureImportRGB(WebPPicture* picture,
-                         const uint8_t* rgb, int rgb_stride) {
-  return Import(picture, rgb, rgb_stride, 3, 0, 0);
-}
-
-int WebPPictureImportBGR(WebPPicture* picture,
-                         const uint8_t* rgb, int rgb_stride) {
-  return Import(picture, rgb, rgb_stride, 3, 1, 0);
-}
-
-int WebPPictureImportRGBA(WebPPicture* picture,
-                          const uint8_t* rgba, int rgba_stride) {
-  return Import(picture, rgba, rgba_stride, 4, 0, 1);
-}
-
-int WebPPictureImportBGRA(WebPPicture* picture,
-                          const uint8_t* rgba, int rgba_stride) {
-  return Import(picture, rgba, rgba_stride, 4, 1, 1);
-}
-
-int WebPPictureImportRGBX(WebPPicture* picture,
-                          const uint8_t* rgba, int rgba_stride) {
-  return Import(picture, rgba, rgba_stride, 4, 0, 0);
-}
-
-int WebPPictureImportBGRX(WebPPicture* picture,
-                          const uint8_t* rgba, int rgba_stride) {
-  return Import(picture, rgba, rgba_stride, 4, 1, 0);
-}
-
-//------------------------------------------------------------------------------
-// Automatic YUV <-> ARGB conversions.
-
-int WebPPictureYUVAToARGB(WebPPicture* picture) {
-  if (picture == NULL) return 0;
-  if (picture->memory_ == NULL || picture->y == NULL ||
-      picture->u == NULL || picture->v == NULL) {
-    return WebPEncodingSetError(picture, VP8_ENC_ERROR_NULL_PARAMETER);
-  }
-  if ((picture->colorspace & WEBP_CSP_ALPHA_BIT) && picture->a == NULL) {
-    return WebPEncodingSetError(picture, VP8_ENC_ERROR_NULL_PARAMETER);
-  }
-  if ((picture->colorspace & WEBP_CSP_UV_MASK) != WEBP_YUV420) {
-    return WebPEncodingSetError(picture, VP8_ENC_ERROR_INVALID_CONFIGURATION);
-  }
-  // Allocate a new argb buffer (discarding the previous one).
-  if (!PictureAllocARGB(picture)) return 0;
-
-  // Convert
-  {
-    int y;
-    const int width = picture->width;
-    const int height = picture->height;
-    const int argb_stride = 4 * picture->argb_stride;
-    uint8_t* dst = (uint8_t*)picture->argb;
-    const uint8_t *cur_u = picture->u, *cur_v = picture->v, *cur_y = picture->y;
-    WebPUpsampleLinePairFunc upsample = WebPGetLinePairConverter(ALPHA_IS_LAST);
-
-    // First row, with replicated top samples.
-    upsample(NULL, cur_y, cur_u, cur_v, cur_u, cur_v, NULL, dst, width);
-    cur_y += picture->y_stride;
-    dst += argb_stride;
-    // Center rows.
-    for (y = 1; y + 1 < height; y += 2) {
-      const uint8_t* const top_u = cur_u;
-      const uint8_t* const top_v = cur_v;
-      cur_u += picture->uv_stride;
-      cur_v += picture->uv_stride;
-      upsample(cur_y, cur_y + picture->y_stride, top_u, top_v, cur_u, cur_v,
-               dst, dst + argb_stride, width);
-      cur_y += 2 * picture->y_stride;
-      dst += 2 * argb_stride;
-    }
-    // Last row (if needed), with replicated bottom samples.
-    if (height > 1 && !(height & 1)) {
-      upsample(cur_y, NULL, cur_u, cur_v, cur_u, cur_v, dst, NULL, width);
-    }
-    // Insert alpha values if needed, in replacement for the default 0xff ones.
-    if (picture->colorspace & WEBP_CSP_ALPHA_BIT) {
-      for (y = 0; y < height; ++y) {
-        uint32_t* const dst = picture->argb + y * picture->argb_stride;
-        const uint8_t* const src = picture->a + y * picture->a_stride;
-        int x;
-        for (x = 0; x < width; ++x) {
-          dst[x] = (dst[x] & 0x00ffffffu) | (src[x] << 24);
-        }
-      }
-    }
-  }
-  return 1;
-}
-
-int WebPPictureARGBToYUVA(WebPPicture* picture, WebPEncCSP colorspace) {
-  if (picture == NULL) return 0;
-  if (picture->argb == NULL) {
-    return WebPEncodingSetError(picture, VP8_ENC_ERROR_NULL_PARAMETER);
-  } else {
-    const uint8_t* const argb = (const uint8_t*)picture->argb;
-    const uint8_t* const r = ALPHA_IS_LAST ? argb + 2 : argb + 1;
-    const uint8_t* const g = ALPHA_IS_LAST ? argb + 1 : argb + 2;
-    const uint8_t* const b = ALPHA_IS_LAST ? argb + 0 : argb + 3;
-    const uint8_t* const a = ALPHA_IS_LAST ? argb + 3 : argb + 0;
-    // We work on a tmp copy of 'picture', because ImportYUVAFromRGBA()
-    // would be calling WebPPictureFree(picture) otherwise.
-    WebPPicture tmp = *picture;
-    PictureResetARGB(&tmp);  // reset ARGB buffer so that it's not free()'d.
-    tmp.use_argb = 0;
-    tmp.colorspace = colorspace & WEBP_CSP_UV_MASK;
-    if (!ImportYUVAFromRGBA(r, g, b, a, 4, 4 * picture->argb_stride, &tmp)) {
-      return WebPEncodingSetError(picture, VP8_ENC_ERROR_OUT_OF_MEMORY);
-    }
-    // Copy back the YUV specs into 'picture'.
-    tmp.argb = picture->argb;
-    tmp.argb_stride = picture->argb_stride;
-    tmp.memory_argb_ = picture->memory_argb_;
-    *picture = tmp;
+void WebPMemoryWriterClear(WebPMemoryWriter* writer) {
+  if (writer != NULL) {
+    WebPSafeFree(writer->mem);
+    writer->mem = NULL;
+    writer->size = 0;
+    writer->max_size = 0;
   }
-  return 1;
-}
-
-//------------------------------------------------------------------------------
-// Helper: clean up fully transparent area to help compressibility.
-
-#define SIZE 8
-#define SIZE2 (SIZE / 2)
-static int is_transparent_area(const uint8_t* ptr, int stride, int size) {
-  int y, x;
-  for (y = 0; y < size; ++y) {
-    for (x = 0; x < size; ++x) {
-      if (ptr[x]) {
-        return 0;
-      }
-    }
-    ptr += stride;
-  }
-  return 1;
-}
-
-static WEBP_INLINE void flatten(uint8_t* ptr, int v, int stride, int size) {
-  int y;
-  for (y = 0; y < size; ++y) {
-    memset(ptr, v, size);
-    ptr += stride;
-  }
-}
-
-void WebPCleanupTransparentArea(WebPPicture* pic) {
-  int x, y, w, h;
-  const uint8_t* a_ptr;
-  int values[3] = { 0 };
-
-  if (pic == NULL) return;
-
-  a_ptr = pic->a;
-  if (a_ptr == NULL) return;    // nothing to do
-
-  w = pic->width / SIZE;
-  h = pic->height / SIZE;
-  for (y = 0; y < h; ++y) {
-    int need_reset = 1;
-    for (x = 0; x < w; ++x) {
-      const int off_a = (y * pic->a_stride + x) * SIZE;
-      const int off_y = (y * pic->y_stride + x) * SIZE;
-      const int off_uv = (y * pic->uv_stride + x) * SIZE2;
-      if (is_transparent_area(a_ptr + off_a, pic->a_stride, SIZE)) {
-        if (need_reset) {
-          values[0] = pic->y[off_y];
-          values[1] = pic->u[off_uv];
-          values[2] = pic->v[off_uv];
-          need_reset = 0;
-        }
-        flatten(pic->y + off_y, values[0], pic->y_stride, SIZE);
-        flatten(pic->u + off_uv, values[1], pic->uv_stride, SIZE2);
-        flatten(pic->v + off_uv, values[2], pic->uv_stride, SIZE2);
-      } else {
-        need_reset = 1;
-      }
-    }
-    // ignore the left-overs on right/bottom
-  }
-}
-
-#undef SIZE
-#undef SIZE2
-
-
-//------------------------------------------------------------------------------
-// Distortion
-
-// Max value returned in case of exact similarity.
-static const double kMinDistortion_dB = 99.;
-
-int WebPPictureDistortion(const WebPPicture* pic1, const WebPPicture* pic2,
-                          int type, float result[5]) {
-  int c;
-  DistoStats stats[5];
-  int has_alpha;
-
-  if (pic1 == NULL || pic2 == NULL ||
-      pic1->width != pic2->width || pic1->height != pic2->height ||
-      pic1->y == NULL || pic2->y == NULL ||
-      pic1->u == NULL || pic2->u == NULL ||
-      pic1->v == NULL || pic2->v == NULL ||
-      result == NULL) {
-    return 0;
-  }
-  // TODO(skal): provide distortion for ARGB too.
-  if (pic1->use_argb == 1 || pic1->use_argb != pic2->use_argb) {
-    return 0;
-  }
-
-  has_alpha = !!(pic1->colorspace & WEBP_CSP_ALPHA_BIT);
-  if (has_alpha != !!(pic2->colorspace & WEBP_CSP_ALPHA_BIT) ||
-      (has_alpha && (pic1->a == NULL || pic2->a == NULL))) {
-    return 0;
-  }
-
-  memset(stats, 0, sizeof(stats));
-  VP8SSIMAccumulatePlane(pic1->y, pic1->y_stride,
-                         pic2->y, pic2->y_stride,
-                         pic1->width, pic1->height, &stats[0]);
-  VP8SSIMAccumulatePlane(pic1->u, pic1->uv_stride,
-                         pic2->u, pic2->uv_stride,
-                         (pic1->width + 1) >> 1, (pic1->height + 1) >> 1,
-                         &stats[1]);
-  VP8SSIMAccumulatePlane(pic1->v, pic1->uv_stride,
-                         pic2->v, pic2->uv_stride,
-                         (pic1->width + 1) >> 1, (pic1->height + 1) >> 1,
-                         &stats[2]);
-  if (has_alpha) {
-    VP8SSIMAccumulatePlane(pic1->a, pic1->a_stride,
-                           pic2->a, pic2->a_stride,
-                           pic1->width, pic1->height, &stats[3]);
-  }
-  for (c = 0; c <= 4; ++c) {
-    if (type == 1) {
-      const double v = VP8SSIMGet(&stats[c]);
-      result[c] = (float)((v < 1.) ? -10.0 * log10(1. - v)
-                                   : kMinDistortion_dB);
-    } else {
-      const double v = VP8SSIMGetSquaredError(&stats[c]);
-      result[c] = (float)((v > 0.) ? -4.3429448 * log(v / (255 * 255.))
-                                   : kMinDistortion_dB);
-    }
-    // Accumulate forward
-    if (c < 4) VP8SSIMAddStats(&stats[c], &stats[4]);
-  }
-  return 1;
 }
 
 //------------------------------------------------------------------------------
@@ -1000,7 +253,7 @@ static size_t Encode(const uint8_t* rgba, int width, int height, int stride,
   ok = import(&pic, rgba, stride) && WebPEncode(&config, &pic);
   WebPPictureFree(&pic);
   if (!ok) {
-    free(wrt.mem);
+    WebPMemoryWriterClear(&wrt);
     *output = NULL;
     return 0;
   }
@@ -1014,10 +267,10 @@ size_t NAME(const uint8_t* in, int w, int h, int bps, float q,          \
   return Encode(in, w, h, bps, IMPORTER, q, 0, out);                    \
 }
 
-ENCODE_FUNC(WebPEncodeRGB, WebPPictureImportRGB);
-ENCODE_FUNC(WebPEncodeBGR, WebPPictureImportBGR);
-ENCODE_FUNC(WebPEncodeRGBA, WebPPictureImportRGBA);
-ENCODE_FUNC(WebPEncodeBGRA, WebPPictureImportBGRA);
+ENCODE_FUNC(WebPEncodeRGB, WebPPictureImportRGB)
+ENCODE_FUNC(WebPEncodeBGR, WebPPictureImportBGR)
+ENCODE_FUNC(WebPEncodeRGBA, WebPPictureImportRGBA)
+ENCODE_FUNC(WebPEncodeBGRA, WebPPictureImportBGRA)
 
 #undef ENCODE_FUNC
 
@@ -1027,15 +280,11 @@ size_t NAME(const uint8_t* in, int w, int h, int bps, uint8_t** out) {       \
   return Encode(in, w, h, bps, IMPORTER, LOSSLESS_DEFAULT_QUALITY, 1, out);  \
 }
 
-LOSSLESS_ENCODE_FUNC(WebPEncodeLosslessRGB, WebPPictureImportRGB);
-LOSSLESS_ENCODE_FUNC(WebPEncodeLosslessBGR, WebPPictureImportBGR);
-LOSSLESS_ENCODE_FUNC(WebPEncodeLosslessRGBA, WebPPictureImportRGBA);
-LOSSLESS_ENCODE_FUNC(WebPEncodeLosslessBGRA, WebPPictureImportBGRA);
+LOSSLESS_ENCODE_FUNC(WebPEncodeLosslessRGB, WebPPictureImportRGB)
+LOSSLESS_ENCODE_FUNC(WebPEncodeLosslessBGR, WebPPictureImportBGR)
+LOSSLESS_ENCODE_FUNC(WebPEncodeLosslessRGBA, WebPPictureImportRGBA)
+LOSSLESS_ENCODE_FUNC(WebPEncodeLosslessBGRA, WebPPictureImportBGRA)
 
 #undef LOSSLESS_ENCODE_FUNC
 
 //------------------------------------------------------------------------------
-
-#if defined(__cplusplus) || defined(c_plusplus)
-}    // extern "C"
-#endif
diff --git a/drivers/webp/enc/picture_csp.c b/drivers/webp/enc/picture_csp.c
new file mode 100644
index 0000000000..0ef5f9eee2
--- /dev/null
+++ b/drivers/webp/enc/picture_csp.c
@@ -0,0 +1,1156 @@
+// Copyright 2014 Google Inc. All Rights Reserved.
+//
+// Use of this source code is governed by a BSD-style license
+// that can be found in the COPYING file in the root of the source
+// tree. An additional intellectual property rights grant can be found
+// in the file PATENTS. All contributing project authors may
+// be found in the AUTHORS file in the root of the source tree.
+// -----------------------------------------------------------------------------
+//
+// WebPPicture utils for colorspace conversion
+//
+// Author: Skal (pascal.massimino@gmail.com)
+
+#include <assert.h>
+#include <stdlib.h>
+#include <math.h>
+
+#include "./vp8enci.h"
+#include "../utils/random.h"
+#include "../utils/utils.h"
+#include "../dsp/yuv.h"
+
+// Uncomment to disable gamma-compression during RGB->U/V averaging
+#define USE_GAMMA_COMPRESSION
+
+// If defined, use table to compute x / alpha.
+#define USE_INVERSE_ALPHA_TABLE
+
+static const union {
+  uint32_t argb;
+  uint8_t  bytes[4];
+} test_endian = { 0xff000000u };
+#define ALPHA_IS_LAST (test_endian.bytes[3] == 0xff)
+
+//------------------------------------------------------------------------------
+// Detection of non-trivial transparency
+
+// Returns true if alpha[] has non-0xff values.
+static int CheckNonOpaque(const uint8_t* alpha, int width, int height,
+                          int x_step, int y_step) {
+  if (alpha == NULL) return 0;
+  while (height-- > 0) {
+    int x;
+    for (x = 0; x < width * x_step; x += x_step) {
+      if (alpha[x] != 0xff) return 1;  // TODO(skal): check 4/8 bytes at a time.
+    }
+    alpha += y_step;
+  }
+  return 0;
+}
+
+// Checking for the presence of non-opaque alpha.
+int WebPPictureHasTransparency(const WebPPicture* picture) {
+  if (picture == NULL) return 0;
+  if (!picture->use_argb) {
+    return CheckNonOpaque(picture->a, picture->width, picture->height,
+                          1, picture->a_stride);
+  } else {
+    int x, y;
+    const uint32_t* argb = picture->argb;
+    if (argb == NULL) return 0;
+    for (y = 0; y < picture->height; ++y) {
+      for (x = 0; x < picture->width; ++x) {
+        if (argb[x] < 0xff000000u) return 1;   // test any alpha values != 0xff
+      }
+      argb += picture->argb_stride;
+    }
+  }
+  return 0;
+}
+
+//------------------------------------------------------------------------------
+// Code for gamma correction
+
+#if defined(USE_GAMMA_COMPRESSION)
+
+// gamma-compensates loss of resolution during chroma subsampling
+#define kGamma 0.80      // for now we use a different gamma value than kGammaF
+#define kGammaFix 12     // fixed-point precision for linear values
+#define kGammaScale ((1 << kGammaFix) - 1)
+#define kGammaTabFix 7   // fixed-point fractional bits precision
+#define kGammaTabScale (1 << kGammaTabFix)
+#define kGammaTabRounder (kGammaTabScale >> 1)
+#define kGammaTabSize (1 << (kGammaFix - kGammaTabFix))
+
+static int kLinearToGammaTab[kGammaTabSize + 1];
+static uint16_t kGammaToLinearTab[256];
+static volatile int kGammaTablesOk = 0;
+
+static WEBP_TSAN_IGNORE_FUNCTION void InitGammaTables(void) {
+  if (!kGammaTablesOk) {
+    int v;
+    const double scale = (double)(1 << kGammaTabFix) / kGammaScale;
+    const double norm = 1. / 255.;
+    for (v = 0; v <= 255; ++v) {
+      kGammaToLinearTab[v] =
+          (uint16_t)(pow(norm * v, kGamma) * kGammaScale + .5);
+    }
+    for (v = 0; v <= kGammaTabSize; ++v) {
+      kLinearToGammaTab[v] = (int)(255. * pow(scale * v, 1. / kGamma) + .5);
+    }
+    kGammaTablesOk = 1;
+  }
+}
+
+static WEBP_INLINE uint32_t GammaToLinear(uint8_t v) {
+  return kGammaToLinearTab[v];
+}
+
+static WEBP_INLINE int Interpolate(int v) {
+  const int tab_pos = v >> (kGammaTabFix + 2);    // integer part
+  const int x = v & ((kGammaTabScale << 2) - 1);  // fractional part
+  const int v0 = kLinearToGammaTab[tab_pos];
+  const int v1 = kLinearToGammaTab[tab_pos + 1];
+  const int y = v1 * x + v0 * ((kGammaTabScale << 2) - x);   // interpolate
+  assert(tab_pos + 1 < kGammaTabSize + 1);
+  return y;
+}
+
+// Convert a linear value 'v' to YUV_FIX+2 fixed-point precision
+// U/V value, suitable for RGBToU/V calls.
+static WEBP_INLINE int LinearToGamma(uint32_t base_value, int shift) {
+  const int y = Interpolate(base_value << shift);   // final uplifted value
+  return (y + kGammaTabRounder) >> kGammaTabFix;    // descale
+}
+
+#else
+
+static WEBP_TSAN_IGNORE_FUNCTION void InitGammaTables(void) {}
+static WEBP_INLINE uint32_t GammaToLinear(uint8_t v) { return v; }
+static WEBP_INLINE int LinearToGamma(uint32_t base_value, int shift) {
+  return (int)(base_value << shift);
+}
+
+#endif    // USE_GAMMA_COMPRESSION
+
+//------------------------------------------------------------------------------
+// RGB -> YUV conversion
+
+static int RGBToY(int r, int g, int b, VP8Random* const rg) {
+  return (rg == NULL) ? VP8RGBToY(r, g, b, YUV_HALF)
+                      : VP8RGBToY(r, g, b, VP8RandomBits(rg, YUV_FIX));
+}
+
+static int RGBToU(int r, int g, int b, VP8Random* const rg) {
+  return (rg == NULL) ? VP8RGBToU(r, g, b, YUV_HALF << 2)
+                      : VP8RGBToU(r, g, b, VP8RandomBits(rg, YUV_FIX + 2));
+}
+
+static int RGBToV(int r, int g, int b, VP8Random* const rg) {
+  return (rg == NULL) ? VP8RGBToV(r, g, b, YUV_HALF << 2)
+                      : VP8RGBToV(r, g, b, VP8RandomBits(rg, YUV_FIX + 2));
+}
+
+//------------------------------------------------------------------------------
+// Smart RGB->YUV conversion
+
+static const int kNumIterations = 6;
+static const int kMinDimensionIterativeConversion = 4;
+
+// We could use SFIX=0 and only uint8_t for fixed_y_t, but it produces some
+// banding sometimes. Better use extra precision.
+#define SFIX 2                // fixed-point precision of RGB and Y/W
+typedef int16_t fixed_t;      // signed type with extra SFIX precision for UV
+typedef uint16_t fixed_y_t;   // unsigned type with extra SFIX precision for W
+
+#define SHALF (1 << SFIX >> 1)
+#define MAX_Y_T ((256 << SFIX) - 1)
+#define SROUNDER (1 << (YUV_FIX + SFIX - 1))
+
+#if defined(USE_GAMMA_COMPRESSION)
+
+// float variant of gamma-correction
+// We use tables of different size and precision, along with a 'real-world'
+// Gamma value close to ~2.
+#define kGammaF 2.2
+static float kGammaToLinearTabF[MAX_Y_T + 1];   // size scales with Y_FIX
+static float kLinearToGammaTabF[kGammaTabSize + 2];
+static volatile int kGammaTablesFOk = 0;
+
+static WEBP_TSAN_IGNORE_FUNCTION void InitGammaTablesF(void) {
+  if (!kGammaTablesFOk) {
+    int v;
+    const double norm = 1. / MAX_Y_T;
+    const double scale = 1. / kGammaTabSize;
+    for (v = 0; v <= MAX_Y_T; ++v) {
+      kGammaToLinearTabF[v] = (float)pow(norm * v, kGammaF);
+    }
+    for (v = 0; v <= kGammaTabSize; ++v) {
+      kLinearToGammaTabF[v] = (float)(MAX_Y_T * pow(scale * v, 1. / kGammaF));
+    }
+    // to prevent small rounding errors to cause read-overflow:
+    kLinearToGammaTabF[kGammaTabSize + 1] = kLinearToGammaTabF[kGammaTabSize];
+    kGammaTablesFOk = 1;
+  }
+}
+
+static WEBP_INLINE float GammaToLinearF(int v) {
+  return kGammaToLinearTabF[v];
+}
+
+static WEBP_INLINE int LinearToGammaF(float value) {
+  const float v = value * kGammaTabSize;
+  const int tab_pos = (int)v;
+  const float x = v - (float)tab_pos;      // fractional part
+  const float v0 = kLinearToGammaTabF[tab_pos + 0];
+  const float v1 = kLinearToGammaTabF[tab_pos + 1];
+  const float y = v1 * x + v0 * (1.f - x);  // interpolate
+  return (int)(y + .5);
+}
+
+#else
+
+static WEBP_TSAN_IGNORE_FUNCTION void InitGammaTablesF(void) {}
+static WEBP_INLINE float GammaToLinearF(int v) {
+  const float norm = 1.f / MAX_Y_T;
+  return norm * v;
+}
+static WEBP_INLINE int LinearToGammaF(float value) {
+  return (int)(MAX_Y_T * value + .5);
+}
+
+#endif    // USE_GAMMA_COMPRESSION
+
+//------------------------------------------------------------------------------
+
+static uint8_t clip_8b(fixed_t v) {
+  return (!(v & ~0xff)) ? (uint8_t)v : (v < 0) ? 0u : 255u;
+}
+
+static fixed_y_t clip_y(int y) {
+  return (!(y & ~MAX_Y_T)) ? (fixed_y_t)y : (y < 0) ? 0 : MAX_Y_T;
+}
+
+//------------------------------------------------------------------------------
+
+static int RGBToGray(int r, int g, int b) {
+  const int luma = 19595 * r + 38470 * g + 7471 * b + YUV_HALF;
+  return (luma >> YUV_FIX);
+}
+
+static float RGBToGrayF(float r, float g, float b) {
+  return 0.299f * r + 0.587f * g + 0.114f * b;
+}
+
+static int ScaleDown(int a, int b, int c, int d) {
+  const float A = GammaToLinearF(a);
+  const float B = GammaToLinearF(b);
+  const float C = GammaToLinearF(c);
+  const float D = GammaToLinearF(d);
+  return LinearToGammaF(0.25f * (A + B + C + D));
+}
+
+static WEBP_INLINE void UpdateW(const fixed_y_t* src, fixed_y_t* dst, int len) {
+  while (len-- > 0) {
+    const float R = GammaToLinearF(src[0]);
+    const float G = GammaToLinearF(src[1]);
+    const float B = GammaToLinearF(src[2]);
+    const float Y = RGBToGrayF(R, G, B);
+    *dst++ = (fixed_y_t)LinearToGammaF(Y);
+    src += 3;
+  }
+}
+
+static int UpdateChroma(const fixed_y_t* src1,
+                        const fixed_y_t* src2,
+                        fixed_t* dst, fixed_y_t* tmp, int len) {
+  int diff = 0;
+  while (len--> 0) {
+    const int r = ScaleDown(src1[0], src1[3], src2[0], src2[3]);
+    const int g = ScaleDown(src1[1], src1[4], src2[1], src2[4]);
+    const int b = ScaleDown(src1[2], src1[5], src2[2], src2[5]);
+    const int W = RGBToGray(r, g, b);
+    const int r_avg = (src1[0] + src1[3] + src2[0] + src2[3] + 2) >> 2;
+    const int g_avg = (src1[1] + src1[4] + src2[1] + src2[4] + 2) >> 2;
+    const int b_avg = (src1[2] + src1[5] + src2[2] + src2[5] + 2) >> 2;
+    dst[0] = (fixed_t)(r - W);
+    dst[1] = (fixed_t)(g - W);
+    dst[2] = (fixed_t)(b - W);
+    dst += 3;
+    src1 += 6;
+    src2 += 6;
+    if (tmp != NULL) {
+      tmp[0] = tmp[1] = clip_y(W);
+      tmp += 2;
+    }
+    diff += abs(RGBToGray(r_avg, g_avg, b_avg) - W);
+  }
+  return diff;
+}
+
+//------------------------------------------------------------------------------
+
+static WEBP_INLINE int Filter(const fixed_t* const A, const fixed_t* const B,
+                              int rightwise) {
+  int v;
+  if (!rightwise) {
+    v = (A[0] * 9 + A[-3] * 3 + B[0] * 3 + B[-3]);
+  } else {
+    v = (A[0] * 9 + A[+3] * 3 + B[0] * 3 + B[+3]);
+  }
+  return (v + 8) >> 4;
+}
+
+static WEBP_INLINE int Filter2(int A, int B) { return (A * 3 + B + 2) >> 2; }
+
+//------------------------------------------------------------------------------
+
+static WEBP_INLINE fixed_y_t UpLift(uint8_t a) {  // 8bit -> SFIX
+  return ((fixed_y_t)a << SFIX) | SHALF;
+}
+
+static void ImportOneRow(const uint8_t* const r_ptr,
+                         const uint8_t* const g_ptr,
+                         const uint8_t* const b_ptr,
+                         int step,
+                         int pic_width,
+                         fixed_y_t* const dst) {
+  int i;
+  for (i = 0; i < pic_width; ++i) {
+    const int off = i * step;
+    dst[3 * i + 0] = UpLift(r_ptr[off]);
+    dst[3 * i + 1] = UpLift(g_ptr[off]);
+    dst[3 * i + 2] = UpLift(b_ptr[off]);
+  }
+  if (pic_width & 1) {  // replicate rightmost pixel
+    memcpy(dst + 3 * pic_width, dst + 3 * (pic_width - 1), 3 * sizeof(*dst));
+  }
+}
+
+static void InterpolateTwoRows(const fixed_y_t* const best_y,
+                               const fixed_t* const prev_uv,
+                               const fixed_t* const cur_uv,
+                               const fixed_t* const next_uv,
+                               int w,
+                               fixed_y_t* const out1,
+                               fixed_y_t* const out2) {
+  int i, k;
+  {  // special boundary case for i==0
+    const int W0 = best_y[0];
+    const int W1 = best_y[w];
+    for (k = 0; k <= 2; ++k) {
+      out1[k] = clip_y(Filter2(cur_uv[k], prev_uv[k]) + W0);
+      out2[k] = clip_y(Filter2(cur_uv[k], next_uv[k]) + W1);
+    }
+  }
+  for (i = 1; i < w - 1; ++i) {
+    const int W0 = best_y[i + 0];
+    const int W1 = best_y[i + w];
+    const int off = 3 * (i >> 1);
+    for (k = 0; k <= 2; ++k) {
+      const int tmp0 = Filter(cur_uv + off + k, prev_uv + off + k, i & 1);
+      const int tmp1 = Filter(cur_uv + off + k, next_uv + off + k, i & 1);
+      out1[3 * i + k] = clip_y(tmp0 + W0);
+      out2[3 * i + k] = clip_y(tmp1 + W1);
+    }
+  }
+  {  // special boundary case for i == w - 1
+    const int W0 = best_y[i + 0];
+    const int W1 = best_y[i + w];
+    const int off = 3 * (i >> 1);
+    for (k = 0; k <= 2; ++k) {
+      out1[3 * i + k] = clip_y(Filter2(cur_uv[off + k], prev_uv[off + k]) + W0);
+      out2[3 * i + k] = clip_y(Filter2(cur_uv[off + k], next_uv[off + k]) + W1);
+    }
+  }
+}
+
+static WEBP_INLINE uint8_t ConvertRGBToY(int r, int g, int b) {
+  const int luma = 16839 * r + 33059 * g + 6420 * b + SROUNDER;
+  return clip_8b(16 + (luma >> (YUV_FIX + SFIX)));
+}
+
+static WEBP_INLINE uint8_t ConvertRGBToU(int r, int g, int b) {
+  const int u =  -9719 * r - 19081 * g + 28800 * b + SROUNDER;
+  return clip_8b(128 + (u >> (YUV_FIX + SFIX)));
+}
+
+static WEBP_INLINE uint8_t ConvertRGBToV(int r, int g, int b) {
+  const int v = +28800 * r - 24116 * g -  4684 * b + SROUNDER;
+  return clip_8b(128 + (v >> (YUV_FIX + SFIX)));
+}
+
+static int ConvertWRGBToYUV(const fixed_y_t* const best_y,
+                            const fixed_t* const best_uv,
+                            WebPPicture* const picture) {
+  int i, j;
+  const int w = (picture->width + 1) & ~1;
+  const int h = (picture->height + 1) & ~1;
+  const int uv_w = w >> 1;
+  const int uv_h = h >> 1;
+  for (j = 0; j < picture->height; ++j) {
+    for (i = 0; i < picture->width; ++i) {
+      const int off = 3 * ((i >> 1) + (j >> 1) * uv_w);
+      const int off2 = i + j * picture->y_stride;
+      const int W = best_y[i + j * w];
+      const int r = best_uv[off + 0] + W;
+      const int g = best_uv[off + 1] + W;
+      const int b = best_uv[off + 2] + W;
+      picture->y[off2] = ConvertRGBToY(r, g, b);
+    }
+  }
+  for (j = 0; j < uv_h; ++j) {
+    uint8_t* const dst_u = picture->u + j * picture->uv_stride;
+    uint8_t* const dst_v = picture->v + j * picture->uv_stride;
+    for (i = 0; i < uv_w; ++i) {
+      const int off = 3 * (i + j * uv_w);
+      const int r = best_uv[off + 0];
+      const int g = best_uv[off + 1];
+      const int b = best_uv[off + 2];
+      dst_u[i] = ConvertRGBToU(r, g, b);
+      dst_v[i] = ConvertRGBToV(r, g, b);
+    }
+  }
+  return 1;
+}
+
+//------------------------------------------------------------------------------
+// Main function
+
+#define SAFE_ALLOC(W, H, T) ((T*)WebPSafeMalloc((W) * (H), sizeof(T)))
+
+static int PreprocessARGB(const uint8_t* const r_ptr,
+                          const uint8_t* const g_ptr,
+                          const uint8_t* const b_ptr,
+                          int step, int rgb_stride,
+                          WebPPicture* const picture) {
+  // we expand the right/bottom border if needed
+  const int w = (picture->width + 1) & ~1;
+  const int h = (picture->height + 1) & ~1;
+  const int uv_w = w >> 1;
+  const int uv_h = h >> 1;
+  int i, j, iter;
+
+  // TODO(skal): allocate one big memory chunk. But for now, it's easier
+  // for valgrind debugging to have several chunks.
+  fixed_y_t* const tmp_buffer = SAFE_ALLOC(w * 3, 2, fixed_y_t);   // scratch
+  fixed_y_t* const best_y = SAFE_ALLOC(w, h, fixed_y_t);
+  fixed_y_t* const target_y = SAFE_ALLOC(w, h, fixed_y_t);
+  fixed_y_t* const best_rgb_y = SAFE_ALLOC(w, 2, fixed_y_t);
+  fixed_t* const best_uv = SAFE_ALLOC(uv_w * 3, uv_h, fixed_t);
+  fixed_t* const target_uv = SAFE_ALLOC(uv_w * 3, uv_h, fixed_t);
+  fixed_t* const best_rgb_uv = SAFE_ALLOC(uv_w * 3, 1, fixed_t);
+  int ok;
+  int diff_sum = 0;
+  const int first_diff_threshold = (int)(2.5 * w * h);
+  const int min_improvement = 5;   // stop if improvement is below this %
+  const int min_first_improvement = 80;
+
+  if (best_y == NULL || best_uv == NULL ||
+      target_y == NULL || target_uv == NULL ||
+      best_rgb_y == NULL || best_rgb_uv == NULL ||
+      tmp_buffer == NULL) {
+    ok = WebPEncodingSetError(picture, VP8_ENC_ERROR_OUT_OF_MEMORY);
+    goto End;
+  }
+  assert(picture->width >= kMinDimensionIterativeConversion);
+  assert(picture->height >= kMinDimensionIterativeConversion);
+
+  // Import RGB samples to W/RGB representation.
+  for (j = 0; j < picture->height; j += 2) {
+    const int is_last_row = (j == picture->height - 1);
+    fixed_y_t* const src1 = tmp_buffer;
+    fixed_y_t* const src2 = tmp_buffer + 3 * w;
+    const int off1 = j * rgb_stride;
+    const int off2 = off1 + rgb_stride;
+    const int uv_off = (j >> 1) * 3 * uv_w;
+    fixed_y_t* const dst_y = best_y + j * w;
+
+    // prepare two rows of input
+    ImportOneRow(r_ptr + off1, g_ptr + off1, b_ptr + off1,
+                 step, picture->width, src1);
+    if (!is_last_row) {
+      ImportOneRow(r_ptr + off2, g_ptr + off2, b_ptr + off2,
+                   step, picture->width, src2);
+    } else {
+      memcpy(src2, src1, 3 * w * sizeof(*src2));
+    }
+    UpdateW(src1, target_y + (j + 0) * w, w);
+    UpdateW(src2, target_y + (j + 1) * w, w);
+    diff_sum += UpdateChroma(src1, src2, target_uv + uv_off, dst_y, uv_w);
+    memcpy(best_uv + uv_off, target_uv + uv_off, 3 * uv_w * sizeof(*best_uv));
+    memcpy(dst_y + w, dst_y, w * sizeof(*dst_y));
+  }
+
+  // Iterate and resolve clipping conflicts.
+  for (iter = 0; iter < kNumIterations; ++iter) {
+    int k;
+    const fixed_t* cur_uv = best_uv;
+    const fixed_t* prev_uv = best_uv;
+    const int old_diff_sum = diff_sum;
+    diff_sum = 0;
+    for (j = 0; j < h; j += 2) {
+      fixed_y_t* const src1 = tmp_buffer;
+      fixed_y_t* const src2 = tmp_buffer + 3 * w;
+      {
+        const fixed_t* const next_uv = cur_uv + ((j < h - 2) ? 3 * uv_w : 0);
+        InterpolateTwoRows(best_y + j * w, prev_uv, cur_uv, next_uv,
+                           w, src1, src2);
+        prev_uv = cur_uv;
+        cur_uv = next_uv;
+      }
+
+      UpdateW(src1, best_rgb_y + 0 * w, w);
+      UpdateW(src2, best_rgb_y + 1 * w, w);
+      diff_sum += UpdateChroma(src1, src2, best_rgb_uv, NULL, uv_w);
+
+      // update two rows of Y and one row of RGB
+      for (i = 0; i < 2 * w; ++i) {
+        const int off = i + j * w;
+        const int diff_y = target_y[off] - best_rgb_y[i];
+        const int new_y = (int)best_y[off] + diff_y;
+        best_y[off] = clip_y(new_y);
+      }
+      for (i = 0; i < uv_w; ++i) {
+        const int off = 3 * (i + (j >> 1) * uv_w);
+        int W;
+        for (k = 0; k <= 2; ++k) {
+          const int diff_uv = (int)target_uv[off + k] - best_rgb_uv[3 * i + k];
+          best_uv[off + k] += diff_uv;
+        }
+        W = RGBToGray(best_uv[off + 0], best_uv[off + 1], best_uv[off + 2]);
+        for (k = 0; k <= 2; ++k) {
+          best_uv[off + k] -= W;
+        }
+      }
+    }
+    // test exit condition
+    if (diff_sum > 0) {
+      const int improvement = 100 * abs(diff_sum - old_diff_sum) / diff_sum;
+      // Check if first iteration gave good result already, without a large
+      // jump of improvement (otherwise it means we need to try few extra
+      // iterations, just to be sure).
+      if (iter == 0 && diff_sum < first_diff_threshold &&
+          improvement < min_first_improvement) {
+        break;
+      }
+      // then, check if improvement is stalling.
+      if (improvement < min_improvement) {
+        break;
+      }
+    } else {
+      break;
+    }
+  }
+
+  // final reconstruction
+  ok = ConvertWRGBToYUV(best_y, best_uv, picture);
+
+ End:
+  WebPSafeFree(best_y);
+  WebPSafeFree(best_uv);
+  WebPSafeFree(target_y);
+  WebPSafeFree(target_uv);
+  WebPSafeFree(best_rgb_y);
+  WebPSafeFree(best_rgb_uv);
+  WebPSafeFree(tmp_buffer);
+  return ok;
+}
+#undef SAFE_ALLOC
+
+//------------------------------------------------------------------------------
+// "Fast" regular RGB->YUV
+
+#define SUM4(ptr, step) LinearToGamma(                     \
+    GammaToLinear((ptr)[0]) +                              \
+    GammaToLinear((ptr)[(step)]) +                         \
+    GammaToLinear((ptr)[rgb_stride]) +                     \
+    GammaToLinear((ptr)[rgb_stride + (step)]), 0)          \
+
+#define SUM2(ptr) \
+    LinearToGamma(GammaToLinear((ptr)[0]) + GammaToLinear((ptr)[rgb_stride]), 1)
+
+#define SUM2ALPHA(ptr) ((ptr)[0] + (ptr)[rgb_stride])
+#define SUM4ALPHA(ptr) (SUM2ALPHA(ptr) + SUM2ALPHA((ptr) + 4))
+
+#if defined(USE_INVERSE_ALPHA_TABLE)
+
+static const int kAlphaFix = 19;
+// Following table is (1 << kAlphaFix) / a. The (v * kInvAlpha[a]) >> kAlphaFix
+// formula is then equal to v / a in most (99.6%) cases. Note that this table
+// and constant are adjusted very tightly to fit 32b arithmetic.
+// In particular, they use the fact that the operands for 'v / a' are actually
+// derived as v = (a0.p0 + a1.p1 + a2.p2 + a3.p3) and a = a0 + a1 + a2 + a3
+// with ai in [0..255] and pi in [0..1<<kGammaFix). The constraint to avoid
+// overflow is: kGammaFix + kAlphaFix <= 31.
+static const uint32_t kInvAlpha[4 * 0xff + 1] = {
+  0,  /* alpha = 0 */
+  524288, 262144, 174762, 131072, 104857, 87381, 74898, 65536,
+  58254, 52428, 47662, 43690, 40329, 37449, 34952, 32768,
+  30840, 29127, 27594, 26214, 24966, 23831, 22795, 21845,
+  20971, 20164, 19418, 18724, 18078, 17476, 16912, 16384,
+  15887, 15420, 14979, 14563, 14169, 13797, 13443, 13107,
+  12787, 12483, 12192, 11915, 11650, 11397, 11155, 10922,
+  10699, 10485, 10280, 10082, 9892, 9709, 9532, 9362,
+  9198, 9039, 8886, 8738, 8594, 8456, 8322, 8192,
+  8065, 7943, 7825, 7710, 7598, 7489, 7384, 7281,
+  7182, 7084, 6990, 6898, 6808, 6721, 6636, 6553,
+  6472, 6393, 6316, 6241, 6168, 6096, 6026, 5957,
+  5890, 5825, 5761, 5698, 5637, 5577, 5518, 5461,
+  5405, 5349, 5295, 5242, 5190, 5140, 5090, 5041,
+  4993, 4946, 4899, 4854, 4809, 4766, 4723, 4681,
+  4639, 4599, 4559, 4519, 4481, 4443, 4405, 4369,
+  4332, 4297, 4262, 4228, 4194, 4161, 4128, 4096,
+  4064, 4032, 4002, 3971, 3942, 3912, 3883, 3855,
+  3826, 3799, 3771, 3744, 3718, 3692, 3666, 3640,
+  3615, 3591, 3566, 3542, 3518, 3495, 3472, 3449,
+  3426, 3404, 3382, 3360, 3339, 3318, 3297, 3276,
+  3256, 3236, 3216, 3196, 3177, 3158, 3139, 3120,
+  3102, 3084, 3066, 3048, 3030, 3013, 2995, 2978,
+  2962, 2945, 2928, 2912, 2896, 2880, 2864, 2849,
+  2833, 2818, 2803, 2788, 2774, 2759, 2744, 2730,
+  2716, 2702, 2688, 2674, 2661, 2647, 2634, 2621,
+  2608, 2595, 2582, 2570, 2557, 2545, 2532, 2520,
+  2508, 2496, 2484, 2473, 2461, 2449, 2438, 2427,
+  2416, 2404, 2394, 2383, 2372, 2361, 2351, 2340,
+  2330, 2319, 2309, 2299, 2289, 2279, 2269, 2259,
+  2250, 2240, 2231, 2221, 2212, 2202, 2193, 2184,
+  2175, 2166, 2157, 2148, 2139, 2131, 2122, 2114,
+  2105, 2097, 2088, 2080, 2072, 2064, 2056, 2048,
+  2040, 2032, 2024, 2016, 2008, 2001, 1993, 1985,
+  1978, 1971, 1963, 1956, 1949, 1941, 1934, 1927,
+  1920, 1913, 1906, 1899, 1892, 1885, 1879, 1872,
+  1865, 1859, 1852, 1846, 1839, 1833, 1826, 1820,
+  1814, 1807, 1801, 1795, 1789, 1783, 1777, 1771,
+  1765, 1759, 1753, 1747, 1741, 1736, 1730, 1724,
+  1718, 1713, 1707, 1702, 1696, 1691, 1685, 1680,
+  1675, 1669, 1664, 1659, 1653, 1648, 1643, 1638,
+  1633, 1628, 1623, 1618, 1613, 1608, 1603, 1598,
+  1593, 1588, 1583, 1579, 1574, 1569, 1565, 1560,
+  1555, 1551, 1546, 1542, 1537, 1533, 1528, 1524,
+  1519, 1515, 1510, 1506, 1502, 1497, 1493, 1489,
+  1485, 1481, 1476, 1472, 1468, 1464, 1460, 1456,
+  1452, 1448, 1444, 1440, 1436, 1432, 1428, 1424,
+  1420, 1416, 1413, 1409, 1405, 1401, 1398, 1394,
+  1390, 1387, 1383, 1379, 1376, 1372, 1368, 1365,
+  1361, 1358, 1354, 1351, 1347, 1344, 1340, 1337,
+  1334, 1330, 1327, 1323, 1320, 1317, 1314, 1310,
+  1307, 1304, 1300, 1297, 1294, 1291, 1288, 1285,
+  1281, 1278, 1275, 1272, 1269, 1266, 1263, 1260,
+  1257, 1254, 1251, 1248, 1245, 1242, 1239, 1236,
+  1233, 1230, 1227, 1224, 1222, 1219, 1216, 1213,
+  1210, 1208, 1205, 1202, 1199, 1197, 1194, 1191,
+  1188, 1186, 1183, 1180, 1178, 1175, 1172, 1170,
+  1167, 1165, 1162, 1159, 1157, 1154, 1152, 1149,
+  1147, 1144, 1142, 1139, 1137, 1134, 1132, 1129,
+  1127, 1125, 1122, 1120, 1117, 1115, 1113, 1110,
+  1108, 1106, 1103, 1101, 1099, 1096, 1094, 1092,
+  1089, 1087, 1085, 1083, 1081, 1078, 1076, 1074,
+  1072, 1069, 1067, 1065, 1063, 1061, 1059, 1057,
+  1054, 1052, 1050, 1048, 1046, 1044, 1042, 1040,
+  1038, 1036, 1034, 1032, 1030, 1028, 1026, 1024,
+  1022, 1020, 1018, 1016, 1014, 1012, 1010, 1008,
+  1006, 1004, 1002, 1000, 998, 996, 994, 992,
+  991, 989, 987, 985, 983, 981, 979, 978,
+  976, 974, 972, 970, 969, 967, 965, 963,
+  961, 960, 958, 956, 954, 953, 951, 949,
+  948, 946, 944, 942, 941, 939, 937, 936,
+  934, 932, 931, 929, 927, 926, 924, 923,
+  921, 919, 918, 916, 914, 913, 911, 910,
+  908, 907, 905, 903, 902, 900, 899, 897,
+  896, 894, 893, 891, 890, 888, 887, 885,
+  884, 882, 881, 879, 878, 876, 875, 873,
+  872, 870, 869, 868, 866, 865, 863, 862,
+  860, 859, 858, 856, 855, 853, 852, 851,
+  849, 848, 846, 845, 844, 842, 841, 840,
+  838, 837, 836, 834, 833, 832, 830, 829,
+  828, 826, 825, 824, 823, 821, 820, 819,
+  817, 816, 815, 814, 812, 811, 810, 809,
+  807, 806, 805, 804, 802, 801, 800, 799,
+  798, 796, 795, 794, 793, 791, 790, 789,
+  788, 787, 786, 784, 783, 782, 781, 780,
+  779, 777, 776, 775, 774, 773, 772, 771,
+  769, 768, 767, 766, 765, 764, 763, 762,
+  760, 759, 758, 757, 756, 755, 754, 753,
+  752, 751, 750, 748, 747, 746, 745, 744,
+  743, 742, 741, 740, 739, 738, 737, 736,
+  735, 734, 733, 732, 731, 730, 729, 728,
+  727, 726, 725, 724, 723, 722, 721, 720,
+  719, 718, 717, 716, 715, 714, 713, 712,
+  711, 710, 709, 708, 707, 706, 705, 704,
+  703, 702, 701, 700, 699, 699, 698, 697,
+  696, 695, 694, 693, 692, 691, 690, 689,
+  688, 688, 687, 686, 685, 684, 683, 682,
+  681, 680, 680, 679, 678, 677, 676, 675,
+  674, 673, 673, 672, 671, 670, 669, 668,
+  667, 667, 666, 665, 664, 663, 662, 661,
+  661, 660, 659, 658, 657, 657, 656, 655,
+  654, 653, 652, 652, 651, 650, 649, 648,
+  648, 647, 646, 645, 644, 644, 643, 642,
+  641, 640, 640, 639, 638, 637, 637, 636,
+  635, 634, 633, 633, 632, 631, 630, 630,
+  629, 628, 627, 627, 626, 625, 624, 624,
+  623, 622, 621, 621, 620, 619, 618, 618,
+  617, 616, 616, 615, 614, 613, 613, 612,
+  611, 611, 610, 609, 608, 608, 607, 606,
+  606, 605, 604, 604, 603, 602, 601, 601,
+  600, 599, 599, 598, 597, 597, 596, 595,
+  595, 594, 593, 593, 592, 591, 591, 590,
+  589, 589, 588, 587, 587, 586, 585, 585,
+  584, 583, 583, 582, 581, 581, 580, 579,
+  579, 578, 578, 577, 576, 576, 575, 574,
+  574, 573, 572, 572, 571, 571, 570, 569,
+  569, 568, 568, 567, 566, 566, 565, 564,
+  564, 563, 563, 562, 561, 561, 560, 560,
+  559, 558, 558, 557, 557, 556, 555, 555,
+  554, 554, 553, 553, 552, 551, 551, 550,
+  550, 549, 548, 548, 547, 547, 546, 546,
+  545, 544, 544, 543, 543, 542, 542, 541,
+  541, 540, 539, 539, 538, 538, 537, 537,
+  536, 536, 535, 534, 534, 533, 533, 532,
+  532, 531, 531, 530, 530, 529, 529, 528,
+  527, 527, 526, 526, 525, 525, 524, 524,
+  523, 523, 522, 522, 521, 521, 520, 520,
+  519, 519, 518, 518, 517, 517, 516, 516,
+  515, 515, 514, 514
+};
+
+// Note that LinearToGamma() expects the values to be premultiplied by 4,
+// so we incorporate this factor 4 inside the DIVIDE_BY_ALPHA macro directly.
+#define DIVIDE_BY_ALPHA(sum, a)  (((sum) * kInvAlpha[(a)]) >> (kAlphaFix - 2))
+
+#else
+
+#define DIVIDE_BY_ALPHA(sum, a) (4 * (sum) / (a))
+
+#endif  // USE_INVERSE_ALPHA_TABLE
+
+static WEBP_INLINE int LinearToGammaWeighted(const uint8_t* src,
+                                             const uint8_t* a_ptr,
+                                             uint32_t total_a, int step,
+                                             int rgb_stride) {
+  const uint32_t sum =
+      a_ptr[0] * GammaToLinear(src[0]) +
+      a_ptr[step] * GammaToLinear(src[step]) +
+      a_ptr[rgb_stride] * GammaToLinear(src[rgb_stride]) +
+      a_ptr[rgb_stride + step] * GammaToLinear(src[rgb_stride + step]);
+  assert(total_a > 0 && total_a <= 4 * 0xff);
+#if defined(USE_INVERSE_ALPHA_TABLE)
+  assert((uint64_t)sum * kInvAlpha[total_a] < ((uint64_t)1 << 32));
+#endif
+  return LinearToGamma(DIVIDE_BY_ALPHA(sum, total_a), 0);
+}
+
+static WEBP_INLINE void ConvertRowToY(const uint8_t* const r_ptr,
+                                      const uint8_t* const g_ptr,
+                                      const uint8_t* const b_ptr,
+                                      int step,
+                                      uint8_t* const dst_y,
+                                      int width,
+                                      VP8Random* const rg) {
+  int i, j;
+  for (i = 0, j = 0; i < width; i += 1, j += step) {
+    dst_y[i] = RGBToY(r_ptr[j], g_ptr[j], b_ptr[j], rg);
+  }
+}
+
+static WEBP_INLINE void AccumulateRGBA(const uint8_t* const r_ptr,
+                                       const uint8_t* const g_ptr,
+                                       const uint8_t* const b_ptr,
+                                       const uint8_t* const a_ptr,
+                                       int rgb_stride,
+                                       uint16_t* dst, int width) {
+  int i, j;
+  // we loop over 2x2 blocks and produce one R/G/B/A value for each.
+  for (i = 0, j = 0; i < (width >> 1); i += 1, j += 2 * 4, dst += 4) {
+    const uint32_t a = SUM4ALPHA(a_ptr + j);
+    int r, g, b;
+    if (a == 4 * 0xff || a == 0) {
+      r = SUM4(r_ptr + j, 4);
+      g = SUM4(g_ptr + j, 4);
+      b = SUM4(b_ptr + j, 4);
+    } else {
+      r = LinearToGammaWeighted(r_ptr + j, a_ptr + j, a, 4, rgb_stride);
+      g = LinearToGammaWeighted(g_ptr + j, a_ptr + j, a, 4, rgb_stride);
+      b = LinearToGammaWeighted(b_ptr + j, a_ptr + j, a, 4, rgb_stride);
+    }
+    dst[0] = r;
+    dst[1] = g;
+    dst[2] = b;
+    dst[3] = a;
+  }
+  if (width & 1) {
+    const uint32_t a = 2u * SUM2ALPHA(a_ptr + j);
+    int r, g, b;
+    if (a == 4 * 0xff || a == 0) {
+      r = SUM2(r_ptr + j);
+      g = SUM2(g_ptr + j);
+      b = SUM2(b_ptr + j);
+    } else {
+      r = LinearToGammaWeighted(r_ptr + j, a_ptr + j, a, 0, rgb_stride);
+      g = LinearToGammaWeighted(g_ptr + j, a_ptr + j, a, 0, rgb_stride);
+      b = LinearToGammaWeighted(b_ptr + j, a_ptr + j, a, 0, rgb_stride);
+    }
+    dst[0] = r;
+    dst[1] = g;
+    dst[2] = b;
+    dst[3] = a;
+  }
+}
+
+static WEBP_INLINE void AccumulateRGB(const uint8_t* const r_ptr,
+                                      const uint8_t* const g_ptr,
+                                      const uint8_t* const b_ptr,
+                                      int step, int rgb_stride,
+                                      uint16_t* dst, int width) {
+  int i, j;
+  for (i = 0, j = 0; i < (width >> 1); i += 1, j += 2 * step, dst += 4) {
+    dst[0] = SUM4(r_ptr + j, step);
+    dst[1] = SUM4(g_ptr + j, step);
+    dst[2] = SUM4(b_ptr + j, step);
+  }
+  if (width & 1) {
+    dst[0] = SUM2(r_ptr + j);
+    dst[1] = SUM2(g_ptr + j);
+    dst[2] = SUM2(b_ptr + j);
+  }
+}
+
+static WEBP_INLINE void ConvertRowsToUV(const uint16_t* rgb,
+                                        uint8_t* const dst_u,
+                                        uint8_t* const dst_v,
+                                        int width,
+                                        VP8Random* const rg) {
+  int i;
+  for (i = 0; i < width; i += 1, rgb += 4) {
+    const int r = rgb[0], g = rgb[1], b = rgb[2];
+    dst_u[i] = RGBToU(r, g, b, rg);
+    dst_v[i] = RGBToV(r, g, b, rg);
+  }
+}
+
+static int ImportYUVAFromRGBA(const uint8_t* const r_ptr,
+                              const uint8_t* const g_ptr,
+                              const uint8_t* const b_ptr,
+                              const uint8_t* const a_ptr,
+                              int step,         // bytes per pixel
+                              int rgb_stride,   // bytes per scanline
+                              float dithering,
+                              int use_iterative_conversion,
+                              WebPPicture* const picture) {
+  int y;
+  const int width = picture->width;
+  const int height = picture->height;
+  const int has_alpha = CheckNonOpaque(a_ptr, width, height, step, rgb_stride);
+  const int is_rgb = (r_ptr < b_ptr);  // otherwise it's bgr
+
+  picture->colorspace = has_alpha ? WEBP_YUV420A : WEBP_YUV420;
+  picture->use_argb = 0;
+
+  // disable smart conversion if source is too small (overkill).
+  if (width < kMinDimensionIterativeConversion ||
+      height < kMinDimensionIterativeConversion) {
+    use_iterative_conversion = 0;
+  }
+
+  if (!WebPPictureAllocYUVA(picture, width, height)) {
+    return 0;
+  }
+  if (has_alpha) {
+    WebPInitAlphaProcessing();
+    assert(step == 4);
+#if defined(USE_GAMMA_COMPRESSION) && defined(USE_INVERSE_ALPHA_TABLE)
+    assert(kAlphaFix + kGammaFix <= 31);
+#endif
+  }
+
+  if (use_iterative_conversion) {
+    InitGammaTablesF();
+    if (!PreprocessARGB(r_ptr, g_ptr, b_ptr, step, rgb_stride, picture)) {
+      return 0;
+    }
+    if (has_alpha) {
+      WebPExtractAlpha(a_ptr, rgb_stride, width, height,
+                       picture->a, picture->a_stride);
+    }
+  } else {
+    const int uv_width = (width + 1) >> 1;
+    int use_dsp = (step == 3);  // use special function in this case
+    // temporary storage for accumulated R/G/B values during conversion to U/V
+    uint16_t* const tmp_rgb =
+        (uint16_t*)WebPSafeMalloc(4 * uv_width, sizeof(*tmp_rgb));
+    uint8_t* dst_y = picture->y;
+    uint8_t* dst_u = picture->u;
+    uint8_t* dst_v = picture->v;
+    uint8_t* dst_a = picture->a;
+
+    VP8Random base_rg;
+    VP8Random* rg = NULL;
+    if (dithering > 0.) {
+      VP8InitRandom(&base_rg, dithering);
+      rg = &base_rg;
+      use_dsp = 0;   // can't use dsp in this case
+    }
+    WebPInitConvertARGBToYUV();
+    InitGammaTables();
+
+    if (tmp_rgb == NULL) return 0;  // malloc error
+
+    // Downsample Y/U/V planes, two rows at a time
+    for (y = 0; y < (height >> 1); ++y) {
+      int rows_have_alpha = has_alpha;
+      const int off1 = (2 * y + 0) * rgb_stride;
+      const int off2 = (2 * y + 1) * rgb_stride;
+      if (use_dsp) {
+        if (is_rgb) {
+          WebPConvertRGB24ToY(r_ptr + off1, dst_y, width);
+          WebPConvertRGB24ToY(r_ptr + off2, dst_y + picture->y_stride, width);
+        } else {
+          WebPConvertBGR24ToY(b_ptr + off1, dst_y, width);
+          WebPConvertBGR24ToY(b_ptr + off2, dst_y + picture->y_stride, width);
+        }
+      } else {
+        ConvertRowToY(r_ptr + off1, g_ptr + off1, b_ptr + off1, step,
+                      dst_y, width, rg);
+        ConvertRowToY(r_ptr + off2, g_ptr + off2, b_ptr + off2, step,
+                      dst_y + picture->y_stride, width, rg);
+      }
+      dst_y += 2 * picture->y_stride;
+      if (has_alpha) {
+        rows_have_alpha &= !WebPExtractAlpha(a_ptr + off1, rgb_stride,
+                                             width, 2,
+                                             dst_a, picture->a_stride);
+        dst_a += 2 * picture->a_stride;
+      }
+      // Collect averaged R/G/B(/A)
+      if (!rows_have_alpha) {
+        AccumulateRGB(r_ptr + off1, g_ptr + off1, b_ptr + off1,
+                      step, rgb_stride, tmp_rgb, width);
+      } else {
+        AccumulateRGBA(r_ptr + off1, g_ptr + off1, b_ptr + off1, a_ptr + off1,
+                       rgb_stride, tmp_rgb, width);
+      }
+      // Convert to U/V
+      if (rg == NULL) {
+        WebPConvertRGBA32ToUV(tmp_rgb, dst_u, dst_v, uv_width);
+      } else {
+        ConvertRowsToUV(tmp_rgb, dst_u, dst_v, uv_width, rg);
+      }
+      dst_u += picture->uv_stride;
+      dst_v += picture->uv_stride;
+    }
+    if (height & 1) {    // extra last row
+      const int off = 2 * y * rgb_stride;
+      int row_has_alpha = has_alpha;
+      if (use_dsp) {
+        if (r_ptr < b_ptr) {
+          WebPConvertRGB24ToY(r_ptr + off, dst_y, width);
+        } else {
+          WebPConvertBGR24ToY(b_ptr + off, dst_y, width);
+        }
+      } else {
+        ConvertRowToY(r_ptr + off, g_ptr + off, b_ptr + off, step,
+                      dst_y, width, rg);
+      }
+      if (row_has_alpha) {
+        row_has_alpha &= !WebPExtractAlpha(a_ptr + off, 0, width, 1, dst_a, 0);
+      }
+      // Collect averaged R/G/B(/A)
+      if (!row_has_alpha) {
+        // Collect averaged R/G/B
+        AccumulateRGB(r_ptr + off, g_ptr + off, b_ptr + off,
+                      step, /* rgb_stride = */ 0, tmp_rgb, width);
+      } else {
+        AccumulateRGBA(r_ptr + off, g_ptr + off, b_ptr + off, a_ptr + off,
+                       /* rgb_stride = */ 0, tmp_rgb, width);
+      }
+      if (rg == NULL) {
+        WebPConvertRGBA32ToUV(tmp_rgb, dst_u, dst_v, uv_width);
+      } else {
+        ConvertRowsToUV(tmp_rgb, dst_u, dst_v, uv_width, rg);
+      }
+    }
+    WebPSafeFree(tmp_rgb);
+  }
+  return 1;
+}
+
+#undef SUM4
+#undef SUM2
+#undef SUM4ALPHA
+#undef SUM2ALPHA
+
+//------------------------------------------------------------------------------
+// call for ARGB->YUVA conversion
+
+static int PictureARGBToYUVA(WebPPicture* picture, WebPEncCSP colorspace,
+                             float dithering, int use_iterative_conversion) {
+  if (picture == NULL) return 0;
+  if (picture->argb == NULL) {
+    return WebPEncodingSetError(picture, VP8_ENC_ERROR_NULL_PARAMETER);
+  } else if ((colorspace & WEBP_CSP_UV_MASK) != WEBP_YUV420) {
+    return WebPEncodingSetError(picture, VP8_ENC_ERROR_INVALID_CONFIGURATION);
+  } else {
+    const uint8_t* const argb = (const uint8_t*)picture->argb;
+    const uint8_t* const r = ALPHA_IS_LAST ? argb + 2 : argb + 1;
+    const uint8_t* const g = ALPHA_IS_LAST ? argb + 1 : argb + 2;
+    const uint8_t* const b = ALPHA_IS_LAST ? argb + 0 : argb + 3;
+    const uint8_t* const a = ALPHA_IS_LAST ? argb + 3 : argb + 0;
+
+    picture->colorspace = WEBP_YUV420;
+    return ImportYUVAFromRGBA(r, g, b, a, 4, 4 * picture->argb_stride,
+                              dithering, use_iterative_conversion, picture);
+  }
+}
+
+int WebPPictureARGBToYUVADithered(WebPPicture* picture, WebPEncCSP colorspace,
+                                  float dithering) {
+  return PictureARGBToYUVA(picture, colorspace, dithering, 0);
+}
+
+int WebPPictureARGBToYUVA(WebPPicture* picture, WebPEncCSP colorspace) {
+  return PictureARGBToYUVA(picture, colorspace, 0.f, 0);
+}
+
+int WebPPictureSmartARGBToYUVA(WebPPicture* picture) {
+  return PictureARGBToYUVA(picture, WEBP_YUV420, 0.f, 1);
+}
+
+//------------------------------------------------------------------------------
+// call for YUVA -> ARGB conversion
+
+int WebPPictureYUVAToARGB(WebPPicture* picture) {
+  if (picture == NULL) return 0;
+  if (picture->y == NULL || picture->u == NULL || picture->v == NULL) {
+    return WebPEncodingSetError(picture, VP8_ENC_ERROR_NULL_PARAMETER);
+  }
+  if ((picture->colorspace & WEBP_CSP_ALPHA_BIT) && picture->a == NULL) {
+    return WebPEncodingSetError(picture, VP8_ENC_ERROR_NULL_PARAMETER);
+  }
+  if ((picture->colorspace & WEBP_CSP_UV_MASK) != WEBP_YUV420) {
+    return WebPEncodingSetError(picture, VP8_ENC_ERROR_INVALID_CONFIGURATION);
+  }
+  // Allocate a new argb buffer (discarding the previous one).
+  if (!WebPPictureAllocARGB(picture, picture->width, picture->height)) return 0;
+  picture->use_argb = 1;
+
+  // Convert
+  {
+    int y;
+    const int width = picture->width;
+    const int height = picture->height;
+    const int argb_stride = 4 * picture->argb_stride;
+    uint8_t* dst = (uint8_t*)picture->argb;
+    const uint8_t *cur_u = picture->u, *cur_v = picture->v, *cur_y = picture->y;
+    WebPUpsampleLinePairFunc upsample = WebPGetLinePairConverter(ALPHA_IS_LAST);
+
+    // First row, with replicated top samples.
+    upsample(cur_y, NULL, cur_u, cur_v, cur_u, cur_v, dst, NULL, width);
+    cur_y += picture->y_stride;
+    dst += argb_stride;
+    // Center rows.
+    for (y = 1; y + 1 < height; y += 2) {
+      const uint8_t* const top_u = cur_u;
+      const uint8_t* const top_v = cur_v;
+      cur_u += picture->uv_stride;
+      cur_v += picture->uv_stride;
+      upsample(cur_y, cur_y + picture->y_stride, top_u, top_v, cur_u, cur_v,
+               dst, dst + argb_stride, width);
+      cur_y += 2 * picture->y_stride;
+      dst += 2 * argb_stride;
+    }
+    // Last row (if needed), with replicated bottom samples.
+    if (height > 1 && !(height & 1)) {
+      upsample(cur_y, NULL, cur_u, cur_v, cur_u, cur_v, dst, NULL, width);
+    }
+    // Insert alpha values if needed, in replacement for the default 0xff ones.
+    if (picture->colorspace & WEBP_CSP_ALPHA_BIT) {
+      for (y = 0; y < height; ++y) {
+        uint32_t* const argb_dst = picture->argb + y * picture->argb_stride;
+        const uint8_t* const src = picture->a + y * picture->a_stride;
+        int x;
+        for (x = 0; x < width; ++x) {
+          argb_dst[x] = (argb_dst[x] & 0x00ffffffu) | ((uint32_t)src[x] << 24);
+        }
+      }
+    }
+  }
+  return 1;
+}
+
+//------------------------------------------------------------------------------
+// automatic import / conversion
+
+static int Import(WebPPicture* const picture,
+                  const uint8_t* const rgb, int rgb_stride,
+                  int step, int swap_rb, int import_alpha) {
+  int y;
+  const uint8_t* const r_ptr = rgb + (swap_rb ? 2 : 0);
+  const uint8_t* const g_ptr = rgb + 1;
+  const uint8_t* const b_ptr = rgb + (swap_rb ? 0 : 2);
+  const uint8_t* const a_ptr = import_alpha ? rgb + 3 : NULL;
+  const int width = picture->width;
+  const int height = picture->height;
+
+  if (!picture->use_argb) {
+    return ImportYUVAFromRGBA(r_ptr, g_ptr, b_ptr, a_ptr, step, rgb_stride,
+                              0.f /* no dithering */, 0, picture);
+  }
+  if (!WebPPictureAlloc(picture)) return 0;
+
+  VP8EncDspARGBInit();
+
+  if (import_alpha) {
+    assert(step == 4);
+    for (y = 0; y < height; ++y) {
+      uint32_t* const dst = &picture->argb[y * picture->argb_stride];
+      const int offset = y * rgb_stride;
+      VP8PackARGB(a_ptr + offset, r_ptr + offset, g_ptr + offset,
+                  b_ptr + offset, width, dst);
+    }
+  } else {
+    assert(step >= 3);
+    for (y = 0; y < height; ++y) {
+      uint32_t* const dst = &picture->argb[y * picture->argb_stride];
+      const int offset = y * rgb_stride;
+      VP8PackRGB(r_ptr + offset, g_ptr + offset, b_ptr + offset,
+                 width, step, dst);
+    }
+  }
+  return 1;
+}
+
+// Public API
+
+int WebPPictureImportRGB(WebPPicture* picture,
+                         const uint8_t* rgb, int rgb_stride) {
+  return (picture != NULL) ? Import(picture, rgb, rgb_stride, 3, 0, 0) : 0;
+}
+
+int WebPPictureImportBGR(WebPPicture* picture,
+                         const uint8_t* rgb, int rgb_stride) {
+  return (picture != NULL) ? Import(picture, rgb, rgb_stride, 3, 1, 0) : 0;
+}
+
+int WebPPictureImportRGBA(WebPPicture* picture,
+                          const uint8_t* rgba, int rgba_stride) {
+  return (picture != NULL) ? Import(picture, rgba, rgba_stride, 4, 0, 1) : 0;
+}
+
+int WebPPictureImportBGRA(WebPPicture* picture,
+                          const uint8_t* rgba, int rgba_stride) {
+  return (picture != NULL) ? Import(picture, rgba, rgba_stride, 4, 1, 1) : 0;
+}
+
+int WebPPictureImportRGBX(WebPPicture* picture,
+                          const uint8_t* rgba, int rgba_stride) {
+  return (picture != NULL) ? Import(picture, rgba, rgba_stride, 4, 0, 0) : 0;
+}
+
+int WebPPictureImportBGRX(WebPPicture* picture,
+                          const uint8_t* rgba, int rgba_stride) {
+  return (picture != NULL) ? Import(picture, rgba, rgba_stride, 4, 1, 0) : 0;
+}
+
+//------------------------------------------------------------------------------
diff --git a/drivers/webp/enc/picture_psnr.c b/drivers/webp/enc/picture_psnr.c
new file mode 100644
index 0000000000..40214efc95
--- /dev/null
+++ b/drivers/webp/enc/picture_psnr.c
@@ -0,0 +1,175 @@
+// Copyright 2014 Google Inc. All Rights Reserved.
+//
+// Use of this source code is governed by a BSD-style license
+// that can be found in the COPYING file in the root of the source
+// tree. An additional intellectual property rights grant can be found
+// in the file PATENTS. All contributing project authors may
+// be found in the AUTHORS file in the root of the source tree.
+// -----------------------------------------------------------------------------
+//
+// WebPPicture tools for measuring distortion
+//
+// Author: Skal (pascal.massimino@gmail.com)
+
+#include <math.h>
+#include <stdlib.h>
+
+#include "./vp8enci.h"
+#include "../utils/utils.h"
+
+//------------------------------------------------------------------------------
+// local-min distortion
+//
+// For every pixel in the *reference* picture, we search for the local best
+// match in the compressed image. This is not a symmetrical measure.
+
+#define RADIUS 2  // search radius. Shouldn't be too large.
+
+static void AccumulateLSIM(const uint8_t* src, int src_stride,
+                           const uint8_t* ref, int ref_stride,
+                           int w, int h, DistoStats* stats) {
+  int x, y;
+  double total_sse = 0.;
+  for (y = 0; y < h; ++y) {
+    const int y_0 = (y - RADIUS < 0) ? 0 : y - RADIUS;
+    const int y_1 = (y + RADIUS + 1 >= h) ? h : y + RADIUS + 1;
+    for (x = 0; x < w; ++x) {
+      const int x_0 = (x - RADIUS < 0) ? 0 : x - RADIUS;
+      const int x_1 = (x + RADIUS + 1 >= w) ? w : x + RADIUS + 1;
+      double best_sse = 255. * 255.;
+      const double value = (double)ref[y * ref_stride + x];
+      int i, j;
+      for (j = y_0; j < y_1; ++j) {
+        const uint8_t* const s = src + j * src_stride;
+        for (i = x_0; i < x_1; ++i) {
+          const double diff = s[i] - value;
+          const double sse = diff * diff;
+          if (sse < best_sse) best_sse = sse;
+        }
+      }
+      total_sse += best_sse;
+    }
+  }
+  stats->w = w * h;
+  stats->xm = 0;
+  stats->ym = 0;
+  stats->xxm = total_sse;
+  stats->yym = 0;
+  stats->xxm = 0;
+}
+#undef RADIUS
+
+//------------------------------------------------------------------------------
+// Distortion
+
+// Max value returned in case of exact similarity.
+static const double kMinDistortion_dB = 99.;
+static float GetPSNR(const double v) {
+  return (float)((v > 0.) ? -4.3429448 * log(v / (255 * 255.))
+                          : kMinDistortion_dB);
+}
+
+int WebPPictureDistortion(const WebPPicture* src, const WebPPicture* ref,
+                          int type, float result[5]) {
+  DistoStats stats[5];
+  int w, h;
+
+  memset(stats, 0, sizeof(stats));
+
+  if (src == NULL || ref == NULL ||
+      src->width != ref->width || src->height != ref->height ||
+      src->use_argb != ref->use_argb || result == NULL) {
+    return 0;
+  }
+  w = src->width;
+  h = src->height;
+
+  if (src->use_argb == 1) {
+    if (src->argb == NULL || ref->argb == NULL) {
+      return 0;
+    } else {
+      int i, j, c;
+      uint8_t* tmp1, *tmp2;
+      uint8_t* const tmp_plane =
+          (uint8_t*)WebPSafeMalloc(2ULL * w * h, sizeof(*tmp_plane));
+      if (tmp_plane == NULL) return 0;
+      tmp1 = tmp_plane;
+      tmp2 = tmp_plane + w * h;
+      for (c = 0; c < 4; ++c) {
+        for (j = 0; j < h; ++j) {
+          for (i = 0; i < w; ++i) {
+            tmp1[j * w + i] = src->argb[i + j * src->argb_stride] >> (c * 8);
+            tmp2[j * w + i] = ref->argb[i + j * ref->argb_stride] >> (c * 8);
+          }
+        }
+        if (type >= 2) {
+          AccumulateLSIM(tmp1, w, tmp2, w, w, h, &stats[c]);
+        } else {
+          VP8SSIMAccumulatePlane(tmp1, w, tmp2, w, w, h, &stats[c]);
+        }
+      }
+      free(tmp_plane);
+    }
+  } else {
+    int has_alpha, uv_w, uv_h;
+    if (src->y == NULL || ref->y == NULL ||
+        src->u == NULL || ref->u == NULL ||
+        src->v == NULL || ref->v == NULL) {
+      return 0;
+    }
+    has_alpha = !!(src->colorspace & WEBP_CSP_ALPHA_BIT);
+    if (has_alpha != !!(ref->colorspace & WEBP_CSP_ALPHA_BIT) ||
+        (has_alpha && (src->a == NULL || ref->a == NULL))) {
+      return 0;
+    }
+
+    uv_w = (src->width + 1) >> 1;
+    uv_h = (src->height + 1) >> 1;
+    if (type >= 2) {
+      AccumulateLSIM(src->y, src->y_stride, ref->y, ref->y_stride,
+                     w, h, &stats[0]);
+      AccumulateLSIM(src->u, src->uv_stride, ref->u, ref->uv_stride,
+                     uv_w, uv_h, &stats[1]);
+      AccumulateLSIM(src->v, src->uv_stride, ref->v, ref->uv_stride,
+                     uv_w, uv_h, &stats[2]);
+      if (has_alpha) {
+        AccumulateLSIM(src->a, src->a_stride, ref->a, ref->a_stride,
+                       w, h, &stats[3]);
+      }
+    } else {
+      VP8SSIMAccumulatePlane(src->y, src->y_stride,
+                             ref->y, ref->y_stride,
+                             w, h, &stats[0]);
+      VP8SSIMAccumulatePlane(src->u, src->uv_stride,
+                             ref->u, ref->uv_stride,
+                             uv_w, uv_h, &stats[1]);
+      VP8SSIMAccumulatePlane(src->v, src->uv_stride,
+                             ref->v, ref->uv_stride,
+                             uv_w, uv_h, &stats[2]);
+      if (has_alpha) {
+        VP8SSIMAccumulatePlane(src->a, src->a_stride,
+                               ref->a, ref->a_stride,
+                               w, h, &stats[3]);
+      }
+    }
+  }
+  // Final stat calculations.
+  {
+    int c;
+    for (c = 0; c <= 4; ++c) {
+      if (type == 1) {
+        const double v = VP8SSIMGet(&stats[c]);
+        result[c] = (float)((v < 1.) ? -10.0 * log10(1. - v)
+                                     : kMinDistortion_dB);
+      } else {
+        const double v = VP8SSIMGetSquaredError(&stats[c]);
+        result[c] = GetPSNR(v);
+      }
+      // Accumulate forward
+      if (c < 4) VP8SSIMAddStats(&stats[c], &stats[4]);
+    }
+  }
+  return 1;
+}
+
+//------------------------------------------------------------------------------
diff --git a/drivers/webp/enc/picture_rescale.c b/drivers/webp/enc/picture_rescale.c
new file mode 100644
index 0000000000..9f19e8e80f
--- /dev/null
+++ b/drivers/webp/enc/picture_rescale.c
@@ -0,0 +1,264 @@
+// Copyright 2014 Google Inc. All Rights Reserved.
+//
+// Use of this source code is governed by a BSD-style license
+// that can be found in the COPYING file in the root of the source
+// tree. An additional intellectual property rights grant can be found
+// in the file PATENTS. All contributing project authors may
+// be found in the AUTHORS file in the root of the source tree.
+// -----------------------------------------------------------------------------
+//
+// WebPPicture tools: copy, crop, rescaling and view.
+//
+// Author: Skal (pascal.massimino@gmail.com)
+
+#include <assert.h>
+#include <stdlib.h>
+
+#include "./vp8enci.h"
+#include "../utils/rescaler.h"
+#include "../utils/utils.h"
+
+#define HALVE(x) (((x) + 1) >> 1)
+
+// Grab the 'specs' (writer, *opaque, width, height...) from 'src' and copy them
+// into 'dst'. Mark 'dst' as not owning any memory.
+static void PictureGrabSpecs(const WebPPicture* const src,
+                             WebPPicture* const dst) {
+  assert(src != NULL && dst != NULL);
+  *dst = *src;
+  WebPPictureResetBuffers(dst);
+}
+
+//------------------------------------------------------------------------------
+
+// Adjust top-left corner to chroma sample position.
+static void SnapTopLeftPosition(const WebPPicture* const pic,
+                                int* const left, int* const top) {
+  if (!pic->use_argb) {
+    *left &= ~1;
+    *top &= ~1;
+  }
+}
+
+// Adjust top-left corner and verify that the sub-rectangle is valid.
+static int AdjustAndCheckRectangle(const WebPPicture* const pic,
+                                   int* const left, int* const top,
+                                   int width, int height) {
+  SnapTopLeftPosition(pic, left, top);
+  if ((*left) < 0 || (*top) < 0) return 0;
+  if (width <= 0 || height <= 0) return 0;
+  if ((*left) + width > pic->width) return 0;
+  if ((*top) + height > pic->height) return 0;
+  return 1;
+}
+
+int WebPPictureCopy(const WebPPicture* src, WebPPicture* dst) {
+  if (src == NULL || dst == NULL) return 0;
+  if (src == dst) return 1;
+
+  PictureGrabSpecs(src, dst);
+  if (!WebPPictureAlloc(dst)) return 0;
+
+  if (!src->use_argb) {
+    WebPCopyPlane(src->y, src->y_stride,
+                  dst->y, dst->y_stride, dst->width, dst->height);
+    WebPCopyPlane(src->u, src->uv_stride, dst->u, dst->uv_stride,
+                  HALVE(dst->width), HALVE(dst->height));
+    WebPCopyPlane(src->v, src->uv_stride, dst->v, dst->uv_stride,
+                  HALVE(dst->width), HALVE(dst->height));
+    if (dst->a != NULL)  {
+      WebPCopyPlane(src->a, src->a_stride,
+                    dst->a, dst->a_stride, dst->width, dst->height);
+    }
+  } else {
+    WebPCopyPlane((const uint8_t*)src->argb, 4 * src->argb_stride,
+                  (uint8_t*)dst->argb, 4 * dst->argb_stride,
+                  4 * dst->width, dst->height);
+  }
+  return 1;
+}
+
+int WebPPictureIsView(const WebPPicture* picture) {
+  if (picture == NULL) return 0;
+  if (picture->use_argb) {
+    return (picture->memory_argb_ == NULL);
+  }
+  return (picture->memory_ == NULL);
+}
+
+int WebPPictureView(const WebPPicture* src,
+                    int left, int top, int width, int height,
+                    WebPPicture* dst) {
+  if (src == NULL || dst == NULL) return 0;
+
+  // verify rectangle position.
+  if (!AdjustAndCheckRectangle(src, &left, &top, width, height)) return 0;
+
+  if (src != dst) {  // beware of aliasing! We don't want to leak 'memory_'.
+    PictureGrabSpecs(src, dst);
+  }
+  dst->width = width;
+  dst->height = height;
+  if (!src->use_argb) {
+    dst->y = src->y + top * src->y_stride + left;
+    dst->u = src->u + (top >> 1) * src->uv_stride + (left >> 1);
+    dst->v = src->v + (top >> 1) * src->uv_stride + (left >> 1);
+    dst->y_stride = src->y_stride;
+    dst->uv_stride = src->uv_stride;
+    if (src->a != NULL) {
+      dst->a = src->a + top * src->a_stride + left;
+      dst->a_stride = src->a_stride;
+    }
+  } else {
+    dst->argb = src->argb + top * src->argb_stride + left;
+    dst->argb_stride = src->argb_stride;
+  }
+  return 1;
+}
+
+//------------------------------------------------------------------------------
+// Picture cropping
+
+int WebPPictureCrop(WebPPicture* pic,
+                    int left, int top, int width, int height) {
+  WebPPicture tmp;
+
+  if (pic == NULL) return 0;
+  if (!AdjustAndCheckRectangle(pic, &left, &top, width, height)) return 0;
+
+  PictureGrabSpecs(pic, &tmp);
+  tmp.width = width;
+  tmp.height = height;
+  if (!WebPPictureAlloc(&tmp)) return 0;
+
+  if (!pic->use_argb) {
+    const int y_offset = top * pic->y_stride + left;
+    const int uv_offset = (top / 2) * pic->uv_stride + left / 2;
+    WebPCopyPlane(pic->y + y_offset, pic->y_stride,
+                  tmp.y, tmp.y_stride, width, height);
+    WebPCopyPlane(pic->u + uv_offset, pic->uv_stride,
+                  tmp.u, tmp.uv_stride, HALVE(width), HALVE(height));
+    WebPCopyPlane(pic->v + uv_offset, pic->uv_stride,
+                  tmp.v, tmp.uv_stride, HALVE(width), HALVE(height));
+
+    if (tmp.a != NULL) {
+      const int a_offset = top * pic->a_stride + left;
+      WebPCopyPlane(pic->a + a_offset, pic->a_stride,
+                    tmp.a, tmp.a_stride, width, height);
+    }
+  } else {
+    const uint8_t* const src =
+        (const uint8_t*)(pic->argb + top * pic->argb_stride + left);
+    WebPCopyPlane(src, pic->argb_stride * 4, (uint8_t*)tmp.argb,
+                  tmp.argb_stride * 4, width * 4, height);
+  }
+  WebPPictureFree(pic);
+  *pic = tmp;
+  return 1;
+}
+
+//------------------------------------------------------------------------------
+// Simple picture rescaler
+
+static void RescalePlane(const uint8_t* src,
+                         int src_width, int src_height, int src_stride,
+                         uint8_t* dst,
+                         int dst_width, int dst_height, int dst_stride,
+                         rescaler_t* const work,
+                         int num_channels) {
+  WebPRescaler rescaler;
+  int y = 0;
+  WebPRescalerInit(&rescaler, src_width, src_height,
+                   dst, dst_width, dst_height, dst_stride,
+                   num_channels, work);
+  while (y < src_height) {
+    y += WebPRescalerImport(&rescaler, src_height - y,
+                            src + y * src_stride, src_stride);
+    WebPRescalerExport(&rescaler);
+  }
+}
+
+static void AlphaMultiplyARGB(WebPPicture* const pic, int inverse) {
+  assert(pic->argb != NULL);
+  WebPMultARGBRows((uint8_t*)pic->argb, pic->argb_stride * sizeof(*pic->argb),
+                   pic->width, pic->height, inverse);
+}
+
+static void AlphaMultiplyY(WebPPicture* const pic, int inverse) {
+  if (pic->a != NULL) {
+    WebPMultRows(pic->y, pic->y_stride, pic->a, pic->a_stride,
+                 pic->width, pic->height, inverse);
+  }
+}
+
+int WebPPictureRescale(WebPPicture* pic, int width, int height) {
+  WebPPicture tmp;
+  int prev_width, prev_height;
+  rescaler_t* work;
+
+  if (pic == NULL) return 0;
+  prev_width = pic->width;
+  prev_height = pic->height;
+  if (!WebPRescalerGetScaledDimensions(
+          prev_width, prev_height, &width, &height)) {
+    return 0;
+  }
+
+  PictureGrabSpecs(pic, &tmp);
+  tmp.width = width;
+  tmp.height = height;
+  if (!WebPPictureAlloc(&tmp)) return 0;
+
+  if (!pic->use_argb) {
+    work = (rescaler_t*)WebPSafeMalloc(2ULL * width, sizeof(*work));
+    if (work == NULL) {
+      WebPPictureFree(&tmp);
+      return 0;
+    }
+    // If present, we need to rescale alpha first (for AlphaMultiplyY).
+    if (pic->a != NULL) {
+      WebPInitAlphaProcessing();
+      RescalePlane(pic->a, prev_width, prev_height, pic->a_stride,
+                   tmp.a, width, height, tmp.a_stride, work, 1);
+    }
+
+    // We take transparency into account on the luma plane only. That's not
+    // totally exact blending, but still is a good approximation.
+    AlphaMultiplyY(pic, 0);
+    RescalePlane(pic->y, prev_width, prev_height, pic->y_stride,
+                 tmp.y, width, height, tmp.y_stride, work, 1);
+    AlphaMultiplyY(&tmp, 1);
+
+    RescalePlane(pic->u,
+                 HALVE(prev_width), HALVE(prev_height), pic->uv_stride,
+                 tmp.u,
+                 HALVE(width), HALVE(height), tmp.uv_stride, work, 1);
+    RescalePlane(pic->v,
+                 HALVE(prev_width), HALVE(prev_height), pic->uv_stride,
+                 tmp.v,
+                 HALVE(width), HALVE(height), tmp.uv_stride, work, 1);
+  } else {
+    work = (rescaler_t*)WebPSafeMalloc(2ULL * width * 4, sizeof(*work));
+    if (work == NULL) {
+      WebPPictureFree(&tmp);
+      return 0;
+    }
+    // In order to correctly interpolate colors, we need to apply the alpha
+    // weighting first (black-matting), scale the RGB values, and remove
+    // the premultiplication afterward (while preserving the alpha channel).
+    WebPInitAlphaProcessing();
+    AlphaMultiplyARGB(pic, 0);
+    RescalePlane((const uint8_t*)pic->argb, prev_width, prev_height,
+                 pic->argb_stride * 4,
+                 (uint8_t*)tmp.argb, width, height,
+                 tmp.argb_stride * 4,
+                 work, 4);
+    AlphaMultiplyARGB(&tmp, 1);
+  }
+  WebPPictureFree(pic);
+  WebPSafeFree(work);
+  *pic = tmp;
+  return 1;
+}
+
+//------------------------------------------------------------------------------
diff --git a/drivers/webp/enc/picture_tools.c b/drivers/webp/enc/picture_tools.c
new file mode 100644
index 0000000000..7c73646397
--- /dev/null
+++ b/drivers/webp/enc/picture_tools.c
@@ -0,0 +1,206 @@
+// Copyright 2014 Google Inc. All Rights Reserved.
+//
+// Use of this source code is governed by a BSD-style license
+// that can be found in the COPYING file in the root of the source
+// tree. An additional intellectual property rights grant can be found
+// in the file PATENTS. All contributing project authors may
+// be found in the AUTHORS file in the root of the source tree.
+// -----------------------------------------------------------------------------
+//
+// WebPPicture tools: alpha handling, etc.
+//
+// Author: Skal (pascal.massimino@gmail.com)
+
+#include "./vp8enci.h"
+#include "../dsp/yuv.h"
+
+static WEBP_INLINE uint32_t MakeARGB32(int r, int g, int b) {
+  return (0xff000000u | (r << 16) | (g << 8) | b);
+}
+
+//------------------------------------------------------------------------------
+// Helper: clean up fully transparent area to help compressibility.
+
+#define SIZE 8
+#define SIZE2 (SIZE / 2)
+static int is_transparent_area(const uint8_t* ptr, int stride, int size) {
+  int y, x;
+  for (y = 0; y < size; ++y) {
+    for (x = 0; x < size; ++x) {
+      if (ptr[x]) {
+        return 0;
+      }
+    }
+    ptr += stride;
+  }
+  return 1;
+}
+
+static int is_transparent_argb_area(const uint32_t* ptr, int stride, int size) {
+  int y, x;
+  for (y = 0; y < size; ++y) {
+    for (x = 0; x < size; ++x) {
+      if (ptr[x] & 0xff000000u) {
+        return 0;
+      }
+    }
+    ptr += stride;
+  }
+  return 1;
+}
+
+static void flatten(uint8_t* ptr, int v, int stride, int size) {
+  int y;
+  for (y = 0; y < size; ++y) {
+    memset(ptr, v, size);
+    ptr += stride;
+  }
+}
+
+static void flatten_argb(uint32_t* ptr, uint32_t v, int stride, int size) {
+  int x, y;
+  for (y = 0; y < size; ++y) {
+    for (x = 0; x < size; ++x) ptr[x] = v;
+    ptr += stride;
+  }
+}
+
+void WebPCleanupTransparentArea(WebPPicture* pic) {
+  int x, y, w, h;
+  if (pic == NULL) return;
+  w = pic->width / SIZE;
+  h = pic->height / SIZE;
+
+  // note: we ignore the left-overs on right/bottom
+  if (pic->use_argb) {
+    uint32_t argb_value = 0;
+    for (y = 0; y < h; ++y) {
+      int need_reset = 1;
+      for (x = 0; x < w; ++x) {
+        const int off = (y * pic->argb_stride + x) * SIZE;
+        if (is_transparent_argb_area(pic->argb + off, pic->argb_stride, SIZE)) {
+          if (need_reset) {
+            argb_value = pic->argb[off];
+            need_reset = 0;
+          }
+          flatten_argb(pic->argb + off, argb_value, pic->argb_stride, SIZE);
+        } else {
+          need_reset = 1;
+        }
+      }
+    }
+  } else {
+    const uint8_t* const a_ptr = pic->a;
+    int values[3] = { 0 };
+    if (a_ptr == NULL) return;    // nothing to do
+    for (y = 0; y < h; ++y) {
+      int need_reset = 1;
+      for (x = 0; x < w; ++x) {
+        const int off_a = (y * pic->a_stride + x) * SIZE;
+        const int off_y = (y * pic->y_stride + x) * SIZE;
+        const int off_uv = (y * pic->uv_stride + x) * SIZE2;
+        if (is_transparent_area(a_ptr + off_a, pic->a_stride, SIZE)) {
+          if (need_reset) {
+            values[0] = pic->y[off_y];
+            values[1] = pic->u[off_uv];
+            values[2] = pic->v[off_uv];
+            need_reset = 0;
+          }
+          flatten(pic->y + off_y, values[0], pic->y_stride, SIZE);
+          flatten(pic->u + off_uv, values[1], pic->uv_stride, SIZE2);
+          flatten(pic->v + off_uv, values[2], pic->uv_stride, SIZE2);
+        } else {
+          need_reset = 1;
+        }
+      }
+    }
+  }
+}
+
+#undef SIZE
+#undef SIZE2
+
+//------------------------------------------------------------------------------
+// Blend color and remove transparency info
+
+#define BLEND(V0, V1, ALPHA) \
+    ((((V0) * (255 - (ALPHA)) + (V1) * (ALPHA)) * 0x101) >> 16)
+#define BLEND_10BIT(V0, V1, ALPHA) \
+    ((((V0) * (1020 - (ALPHA)) + (V1) * (ALPHA)) * 0x101) >> 18)
+
+void WebPBlendAlpha(WebPPicture* pic, uint32_t background_rgb) {
+  const int red = (background_rgb >> 16) & 0xff;
+  const int green = (background_rgb >> 8) & 0xff;
+  const int blue = (background_rgb >> 0) & 0xff;
+  int x, y;
+  if (pic == NULL) return;
+  if (!pic->use_argb) {
+    const int uv_width = (pic->width >> 1);  // omit last pixel during u/v loop
+    const int Y0 = VP8RGBToY(red, green, blue, YUV_HALF);
+    // VP8RGBToU/V expects the u/v values summed over four pixels
+    const int U0 = VP8RGBToU(4 * red, 4 * green, 4 * blue, 4 * YUV_HALF);
+    const int V0 = VP8RGBToV(4 * red, 4 * green, 4 * blue, 4 * YUV_HALF);
+    const int has_alpha = pic->colorspace & WEBP_CSP_ALPHA_BIT;
+    if (!has_alpha || pic->a == NULL) return;    // nothing to do
+    for (y = 0; y < pic->height; ++y) {
+      // Luma blending
+      uint8_t* const y_ptr = pic->y + y * pic->y_stride;
+      uint8_t* const a_ptr = pic->a + y * pic->a_stride;
+      for (x = 0; x < pic->width; ++x) {
+        const int alpha = a_ptr[x];
+        if (alpha < 0xff) {
+          y_ptr[x] = BLEND(Y0, y_ptr[x], a_ptr[x]);
+        }
+      }
+      // Chroma blending every even line
+      if ((y & 1) == 0) {
+        uint8_t* const u = pic->u + (y >> 1) * pic->uv_stride;
+        uint8_t* const v = pic->v + (y >> 1) * pic->uv_stride;
+        uint8_t* const a_ptr2 =
+            (y + 1 == pic->height) ? a_ptr : a_ptr + pic->a_stride;
+        for (x = 0; x < uv_width; ++x) {
+          // Average four alpha values into a single blending weight.
+          // TODO(skal): might lead to visible contouring. Can we do better?
+          const int alpha =
+              a_ptr[2 * x + 0] + a_ptr[2 * x + 1] +
+              a_ptr2[2 * x + 0] + a_ptr2[2 * x + 1];
+          u[x] = BLEND_10BIT(U0, u[x], alpha);
+          v[x] = BLEND_10BIT(V0, v[x], alpha);
+        }
+        if (pic->width & 1) {   // rightmost pixel
+          const int alpha = 2 * (a_ptr[2 * x + 0] + a_ptr2[2 * x + 0]);
+          u[x] = BLEND_10BIT(U0, u[x], alpha);
+          v[x] = BLEND_10BIT(V0, v[x], alpha);
+        }
+      }
+      memset(a_ptr, 0xff, pic->width);
+    }
+  } else {
+    uint32_t* argb = pic->argb;
+    const uint32_t background = MakeARGB32(red, green, blue);
+    for (y = 0; y < pic->height; ++y) {
+      for (x = 0; x < pic->width; ++x) {
+        const int alpha = (argb[x] >> 24) & 0xff;
+        if (alpha != 0xff) {
+          if (alpha > 0) {
+            int r = (argb[x] >> 16) & 0xff;
+            int g = (argb[x] >>  8) & 0xff;
+            int b = (argb[x] >>  0) & 0xff;
+            r = BLEND(red, r, alpha);
+            g = BLEND(green, g, alpha);
+            b = BLEND(blue, b, alpha);
+            argb[x] = MakeARGB32(r, g, b);
+          } else {
+            argb[x] = background;
+          }
+        }
+      }
+      argb += pic->argb_stride;
+    }
+  }
+}
+
+#undef BLEND
+#undef BLEND_10BIT
+
+//------------------------------------------------------------------------------
diff --git a/drivers/webp/enc/quant.c b/drivers/webp/enc/quant.c
index ea153849c8..002c326b82 100644
--- a/drivers/webp/enc/quant.c
+++ b/drivers/webp/enc/quant.c
@@ -1,8 +1,10 @@
 // Copyright 2011 Google Inc. All Rights Reserved.
 //
-// This code is licensed under the same terms as WebM:
-//  Software License Agreement:  http://www.webmproject.org/license/software/
-//  Additional IP Rights Grant:  http://www.webmproject.org/license/additional/
+// Use of this source code is governed by a BSD-style license
+// that can be found in the COPYING file in the root of the source
+// tree. An additional intellectual property rights grant can be found
+// in the file PATENTS. All contributing project authors may
+// be found in the AUTHORS file in the root of the source tree.
 // -----------------------------------------------------------------------------
 //
 //   Quantization
@@ -11,6 +13,7 @@
 
 #include <assert.h>
 #include <math.h>
+#include <stdlib.h>  // for abs()
 
 #include "./vp8enci.h"
 #include "./cost.h"
@@ -22,16 +25,78 @@
 
 #define MID_ALPHA 64      // neutral value for susceptibility
 #define MIN_ALPHA 30      // lowest usable value for susceptibility
-#define MAX_ALPHA 100     // higher meaninful value for susceptibility
+#define MAX_ALPHA 100     // higher meaningful value for susceptibility
 
 #define SNS_TO_DQ 0.9     // Scaling constant between the sns value and the QP
                           // power-law modulation. Must be strictly less than 1.
 
+#define I4_PENALTY 4000   // Rate-penalty for quick i4/i16 decision
+
+// number of non-zero coeffs below which we consider the block very flat
+// (and apply a penalty to complex predictions)
+#define FLATNESS_LIMIT_I16 10      // I16 mode
+#define FLATNESS_LIMIT_I4  3       // I4 mode
+#define FLATNESS_LIMIT_UV  2       // UV mode
+#define FLATNESS_PENALTY   140     // roughly ~1bit per block
+
 #define MULT_8B(a, b) (((a) * (b) + 128) >> 8)
 
-#if defined(__cplusplus) || defined(c_plusplus)
-extern "C" {
-#endif
+// #define DEBUG_BLOCK
+
+//------------------------------------------------------------------------------
+
+#if defined(DEBUG_BLOCK)
+
+#include <stdio.h>
+#include <stdlib.h>
+
+static void PrintBlockInfo(const VP8EncIterator* const it,
+                           const VP8ModeScore* const rd) {
+  int i, j;
+  const int is_i16 = (it->mb_->type_ == 1);
+  printf("SOURCE / OUTPUT / ABS DELTA\n");
+  for (j = 0; j < 24; ++j) {
+    if (j == 16) printf("\n");   // newline before the U/V block
+    for (i = 0; i < 16; ++i) printf("%3d ", it->yuv_in_[i + j * BPS]);
+    printf("     ");
+    for (i = 0; i < 16; ++i) printf("%3d ", it->yuv_out_[i + j * BPS]);
+    printf("     ");
+    for (i = 0; i < 16; ++i) {
+      printf("%1d ", abs(it->yuv_out_[i + j * BPS] - it->yuv_in_[i + j * BPS]));
+    }
+    printf("\n");
+  }
+  printf("\nD:%d SD:%d R:%d H:%d nz:0x%x score:%d\n",
+    (int)rd->D, (int)rd->SD, (int)rd->R, (int)rd->H, (int)rd->nz,
+    (int)rd->score);
+  if (is_i16) {
+    printf("Mode: %d\n", rd->mode_i16);
+    printf("y_dc_levels:");
+    for (i = 0; i < 16; ++i) printf("%3d ", rd->y_dc_levels[i]);
+    printf("\n");
+  } else {
+    printf("Modes[16]: ");
+    for (i = 0; i < 16; ++i) printf("%d ", rd->modes_i4[i]);
+    printf("\n");
+  }
+  printf("y_ac_levels:\n");
+  for (j = 0; j < 16; ++j) {
+    for (i = is_i16 ? 1 : 0; i < 16; ++i) {
+      printf("%4d ", rd->y_ac_levels[j][i]);
+    }
+    printf("\n");
+  }
+  printf("\n");
+  printf("uv_levels (mode=%d):\n", rd->mode_uv);
+  for (j = 0; j < 8; ++j) {
+    for (i = 0; i < 16; ++i) {
+      printf("%4d ", rd->uv_levels[j][i]);
+    }
+    printf("\n");
+  }
+}
+
+#endif   // DEBUG_BLOCK
 
 //------------------------------------------------------------------------------
 
@@ -100,31 +165,13 @@ static const uint16_t kAcTable2[128] = {
   385, 393, 401, 409, 416, 424, 432, 440
 };
 
-static const uint16_t kCoeffThresh[16] = {
-  0,  10, 20, 30,
-  10, 20, 30, 30,
-  20, 30, 30, 30,
-  30, 30, 30, 30
-};
-
-// TODO(skal): tune more. Coeff thresholding?
-static const uint8_t kBiasMatrices[3][16] = {  // [3] = [luma-ac,luma-dc,chroma]
-  { 96, 96, 96, 96,
-    96, 96, 96, 96,
-    96, 96, 96, 96,
-    96, 96, 96, 96 },
-  { 96, 96, 96, 96,
-    96, 96, 96, 96,
-    96, 96, 96, 96,
-    96, 96, 96, 96 },
-  { 96, 96, 96, 96,
-    96, 96, 96, 96,
-    96, 96, 96, 96,
-    96, 96, 96, 96 }
+static const uint8_t kBiasMatrices[3][2] = {  // [luma-ac,luma-dc,chroma][dc,ac]
+  { 96, 110 }, { 96, 108 }, { 110, 115 }
 };
 
-// Sharpening by (slightly) raising the hi-frequency coeffs (only for trellis).
+// Sharpening by (slightly) raising the hi-frequency coeffs.
 // Hack-ish but helpful for mid-bitrate range. Use with care.
+#define SHARPEN_BITS 11  // number of descaling bits for sharpening bias
 static const uint8_t kFreqSharpening[16] = {
   0,  30, 60, 90,
   30, 60, 90, 90,
@@ -137,20 +184,30 @@ static const uint8_t kFreqSharpening[16] = {
 
 // Returns the average quantizer
 static int ExpandMatrix(VP8Matrix* const m, int type) {
-  int i;
-  int sum = 0;
+  int i, sum;
+  for (i = 0; i < 2; ++i) {
+    const int is_ac_coeff = (i > 0);
+    const int bias = kBiasMatrices[type][is_ac_coeff];
+    m->iq_[i] = (1 << QFIX) / m->q_[i];
+    m->bias_[i] = BIAS(bias);
+    // zthresh_ is the exact value such that QUANTDIV(coeff, iQ, B) is:
+    //   * zero if coeff <= zthresh
+    //   * non-zero if coeff > zthresh
+    m->zthresh_[i] = ((1 << QFIX) - 1 - m->bias_[i]) / m->iq_[i];
+  }
   for (i = 2; i < 16; ++i) {
     m->q_[i] = m->q_[1];
+    m->iq_[i] = m->iq_[1];
+    m->bias_[i] = m->bias_[1];
+    m->zthresh_[i] = m->zthresh_[1];
   }
-  for (i = 0; i < 16; ++i) {
-    const int j = kZigzag[i];
-    const int bias = kBiasMatrices[type][j];
-    m->iq_[j] = (1 << QFIX) / m->q_[j];
-    m->bias_[j] = BIAS(bias);
-    // TODO(skal): tune kCoeffThresh[]
-    m->zthresh_[j] = ((256 /*+ kCoeffThresh[j]*/ - bias) * m->q_[j] + 127) >> 8;
-    m->sharpen_[j] = (kFreqSharpening[j] * m->q_[j]) >> 11;
-    sum += m->q_[j];
+  for (sum = 0, i = 0; i < 16; ++i) {
+    if (type == 0) {  // we only use sharpening for AC luma coeffs
+      m->sharpen_[i] = (kFreqSharpening[i] * m->q_[i]) >> SHARPEN_BITS;
+    } else {
+      m->sharpen_[i] = 0;
+    }
+    sum += m->q_[i];
   }
   return (sum + 8) >> 4;
 }
@@ -178,17 +235,17 @@ static void SetupMatrices(VP8Encoder* enc) {
     q16 = ExpandMatrix(&m->y2_, 1);
     quv = ExpandMatrix(&m->uv_, 2);
 
-    // TODO: Switch to kLambda*[] tables?
-    {
-      m->lambda_i4_  = (3 * q4 * q4) >> 7;
-      m->lambda_i16_ = (3 * q16 * q16);
-      m->lambda_uv_  = (3 * quv * quv) >> 6;
-      m->lambda_mode_    = (1 * q4 * q4) >> 7;
-      m->lambda_trellis_i4_  = (7 * q4 * q4) >> 3;
-      m->lambda_trellis_i16_ = (q16 * q16) >> 2;
-      m->lambda_trellis_uv_  = (quv *quv) << 1;
-      m->tlambda_            = (tlambda_scale * q4) >> 5;
-    }
+    m->lambda_i4_          = (3 * q4 * q4) >> 7;
+    m->lambda_i16_         = (3 * q16 * q16);
+    m->lambda_uv_          = (3 * quv * quv) >> 6;
+    m->lambda_mode_        = (1 * q4 * q4) >> 7;
+    m->lambda_trellis_i4_  = (7 * q4 * q4) >> 3;
+    m->lambda_trellis_i16_ = (q16 * q16) >> 2;
+    m->lambda_trellis_uv_  = (quv *quv) << 1;
+    m->tlambda_            = (tlambda_scale * q4) >> 5;
+
+    m->min_disto_ = 10 * m->y1_.q_[0];   // quantization-aware min disto
+    m->max_edge_  = 0;
   }
 }
 
@@ -197,16 +254,21 @@ static void SetupMatrices(VP8Encoder* enc) {
 
 // Very small filter-strength values have close to no visual effect. So we can
 // save a little decoding-CPU by turning filtering off for these.
-#define FSTRENGTH_CUTOFF 3
+#define FSTRENGTH_CUTOFF 2
 
 static void SetupFilterStrength(VP8Encoder* const enc) {
   int i;
-  const int level0 = enc->config_->filter_strength;
+  // level0 is in [0..500]. Using '-f 50' as filter_strength is mid-filtering.
+  const int level0 = 5 * enc->config_->filter_strength;
   for (i = 0; i < NUM_MB_SEGMENTS; ++i) {
-    // Segments with lower quantizer will be less filtered. TODO: tune (wrt SNS)
-    const int level = level0 * 256 * enc->dqm_[i].quant_ / 128;
-    const int f = level / (256 + enc->dqm_[i].beta_);
-    enc->dqm_[i].fstrength_ = (f < FSTRENGTH_CUTOFF) ? 0 : (f > 63) ? 63 : f;
+    VP8SegmentInfo* const m = &enc->dqm_[i];
+    // We focus on the quantization of AC coeffs.
+    const int qstep = kAcTable[clip(m->quant_, 0, 127)] >> 2;
+    const int base_strength =
+        VP8FilterStrengthFromDelta(enc->filter_hdr_.sharpness_, qstep);
+    // Segments with lower complexity ('beta') will be less filtered.
+    const int f = base_strength * level0 / (256 + m->beta_);
+    m->fstrength_ = (f < FSTRENGTH_CUTOFF) ? 0 : (f > 63) ? 63 : f;
   }
   // We record the initial strength (mainly for the case of 1-segment only).
   enc->filter_hdr_.level_ = enc->dqm_[0].fstrength_;
@@ -224,28 +286,90 @@ static void SetupFilterStrength(VP8Encoder* const enc) {
 // We want to emulate jpeg-like behaviour where the expected "good" quality
 // is around q=75. Internally, our "good" middle is around c=50. So we
 // map accordingly using linear piece-wise function
-static double QualityToCompression(double q) {
-  const double c = q / 100.;
-  return (c < 0.75) ? c * (2. / 3.) : 2. * c - 1.;
+static double QualityToCompression(double c) {
+  const double linear_c = (c < 0.75) ? c * (2. / 3.) : 2. * c - 1.;
+  // The file size roughly scales as pow(quantizer, 3.). Actually, the
+  // exponent is somewhere between 2.8 and 3.2, but we're mostly interested
+  // in the mid-quant range. So we scale the compressibility inversely to
+  // this power-law: quant ~= compression ^ 1/3. This law holds well for
+  // low quant. Finer modeling for high-quant would make use of kAcTable[]
+  // more explicitly.
+  const double v = pow(linear_c, 1 / 3.);
+  return v;
+}
+
+static double QualityToJPEGCompression(double c, double alpha) {
+  // We map the complexity 'alpha' and quality setting 'c' to a compression
+  // exponent empirically matched to the compression curve of libjpeg6b.
+  // On average, the WebP output size will be roughly similar to that of a
+  // JPEG file compressed with same quality factor.
+  const double amin = 0.30;
+  const double amax = 0.85;
+  const double exp_min = 0.4;
+  const double exp_max = 0.9;
+  const double slope = (exp_min - exp_max) / (amax - amin);
+  // Linearly interpolate 'expn' from exp_min to exp_max
+  // in the [amin, amax] range.
+  const double expn = (alpha > amax) ? exp_min
+                    : (alpha < amin) ? exp_max
+                    : exp_max + slope * (alpha - amin);
+  const double v = pow(c, expn);
+  return v;
+}
+
+static int SegmentsAreEquivalent(const VP8SegmentInfo* const S1,
+                                 const VP8SegmentInfo* const S2) {
+  return (S1->quant_ == S2->quant_) && (S1->fstrength_ == S2->fstrength_);
+}
+
+static void SimplifySegments(VP8Encoder* const enc) {
+  int map[NUM_MB_SEGMENTS] = { 0, 1, 2, 3 };
+  const int num_segments = enc->segment_hdr_.num_segments_;
+  int num_final_segments = 1;
+  int s1, s2;
+  for (s1 = 1; s1 < num_segments; ++s1) {    // find similar segments
+    const VP8SegmentInfo* const S1 = &enc->dqm_[s1];
+    int found = 0;
+    // check if we already have similar segment
+    for (s2 = 0; s2 < num_final_segments; ++s2) {
+      const VP8SegmentInfo* const S2 = &enc->dqm_[s2];
+      if (SegmentsAreEquivalent(S1, S2)) {
+        found = 1;
+        break;
+      }
+    }
+    map[s1] = s2;
+    if (!found) {
+      if (num_final_segments != s1) {
+        enc->dqm_[num_final_segments] = enc->dqm_[s1];
+      }
+      ++num_final_segments;
+    }
+  }
+  if (num_final_segments < num_segments) {  // Remap
+    int i = enc->mb_w_ * enc->mb_h_;
+    while (i-- > 0) enc->mb_info_[i].segment_ = map[enc->mb_info_[i].segment_];
+    enc->segment_hdr_.num_segments_ = num_final_segments;
+    // Replicate the trailing segment infos (it's mostly cosmetics)
+    for (i = num_final_segments; i < num_segments; ++i) {
+      enc->dqm_[i] = enc->dqm_[num_final_segments - 1];
+    }
+  }
 }
 
 void VP8SetSegmentParams(VP8Encoder* const enc, float quality) {
   int i;
   int dq_uv_ac, dq_uv_dc;
-  const int num_segments = enc->config_->segments;
+  const int num_segments = enc->segment_hdr_.num_segments_;
   const double amp = SNS_TO_DQ * enc->config_->sns_strength / 100. / 128.;
-  const double c_base = QualityToCompression(quality);
+  const double Q = quality / 100.;
+  const double c_base = enc->config_->emulate_jpeg_size ?
+      QualityToJPEGCompression(Q, enc->alpha_ / 255.) :
+      QualityToCompression(Q);
   for (i = 0; i < num_segments; ++i) {
-    // The file size roughly scales as pow(quantizer, 3.). Actually, the
-    // exponent is somewhere between 2.8 and 3.2, but we're mostly interested
-    // in the mid-quant range. So we scale the compressibility inversely to
-    // this power-law: quant ~= compression ^ 1/3. This law holds well for
-    // low quant. Finer modelling for high-quant would make use of kAcTable[]
-    // more explicitely.
-    // Additionally, we modulate the base exponent 1/3 to accommodate for the
-    // quantization susceptibility and allow denser segments to be quantized
-    // more.
-    const double expn = (1. - amp * enc->dqm_[i].alpha_) / 3.;
+    // We modulate the base coefficient to accommodate for the quantization
+    // susceptibility and allow denser segments to be quantized more.
+    const double expn = 1. - amp * enc->dqm_[i].alpha_;
     const double c = pow(c_base, expn);
     const int q = (int)(127. * (1. - c));
     assert(expn > 0.);
@@ -271,7 +395,7 @@ void VP8SetSegmentParams(VP8Encoder* const enc, float quality) {
   dq_uv_ac = clip(dq_uv_ac, MIN_DQ_UV, MAX_DQ_UV);
   // We also boost the dc-uv-quant a little, based on sns-strength, since
   // U/V channels are quite more reactive to high quants (flat DC-blocks
-  // tend to appear, and are displeasant).
+  // tend to appear, and are unpleasant).
   dq_uv_dc = -4 * enc->config_->sns_strength / 100;
   dq_uv_dc = clip(dq_uv_dc, -15, 15);   // 4bit-signed max allowed
 
@@ -281,9 +405,11 @@ void VP8SetSegmentParams(VP8Encoder* const enc, float quality) {
   enc->dq_uv_dc_ = dq_uv_dc;
   enc->dq_uv_ac_ = dq_uv_ac;
 
-  SetupMatrices(enc);
-
   SetupFilterStrength(enc);   // initialize segments' filtering, eventually
+
+  if (num_segments > 1) SimplifySegments(enc);
+
+  SetupMatrices(enc);         // finalize quantization matrices
 }
 
 //------------------------------------------------------------------------------
@@ -299,16 +425,14 @@ const int VP8I4ModeOffsets[NUM_BMODES] = {
 };
 
 void VP8MakeLuma16Preds(const VP8EncIterator* const it) {
-  const VP8Encoder* const enc = it->enc_;
-  const uint8_t* const left = it->x_ ? enc->y_left_ : NULL;
-  const uint8_t* const top = it->y_ ? enc->y_top_ + it->x_ * 16 : NULL;
+  const uint8_t* const left = it->x_ ? it->y_left_ : NULL;
+  const uint8_t* const top = it->y_ ? it->y_top_ : NULL;
   VP8EncPredLuma16(it->yuv_p_, left, top);
 }
 
 void VP8MakeChroma8Preds(const VP8EncIterator* const it) {
-  const VP8Encoder* const enc = it->enc_;
-  const uint8_t* const left = it->x_ ? enc->u_left_ : NULL;
-  const uint8_t* const top = it->y_ ? enc->uv_top_ + it->x_ * 16 : NULL;
+  const uint8_t* const left = it->x_ ? it->u_left_ : NULL;
+  const uint8_t* const top = it->y_ ? it->uv_top_ : NULL;
   VP8EncPredChroma8(it->yuv_p_, left, top);
 }
 
@@ -320,23 +444,21 @@ void VP8MakeIntra4Preds(const VP8EncIterator* const it) {
 // Quantize
 
 // Layout:
-// +----+
-// |YYYY| 0
-// |YYYY| 4
-// |YYYY| 8
-// |YYYY| 12
-// +----+
-// |UUVV| 16
-// |UUVV| 20
-// +----+
-
-const int VP8Scan[16 + 4 + 4] = {
-  // Luma
+// +----+----+
+// |YYYY|UUVV| 0
+// |YYYY|UUVV| 4
+// |YYYY|....| 8
+// |YYYY|....| 12
+// +----+----+
+
+const int VP8Scan[16] = {  // Luma
   0 +  0 * BPS,  4 +  0 * BPS, 8 +  0 * BPS, 12 +  0 * BPS,
   0 +  4 * BPS,  4 +  4 * BPS, 8 +  4 * BPS, 12 +  4 * BPS,
   0 +  8 * BPS,  4 +  8 * BPS, 8 +  8 * BPS, 12 +  8 * BPS,
   0 + 12 * BPS,  4 + 12 * BPS, 8 + 12 * BPS, 12 + 12 * BPS,
+};
 
+static const int VP8ScanUV[4 + 4] = {
   0 + 0 * BPS,   4 + 0 * BPS, 0 + 4 * BPS,  4 + 4 * BPS,    // U
   8 + 0 * BPS,  12 + 0 * BPS, 8 + 4 * BPS, 12 + 4 * BPS     // V
 };
@@ -364,6 +486,7 @@ static void InitScore(VP8ModeScore* const rd) {
   rd->D  = 0;
   rd->SD = 0;
   rd->R  = 0;
+  rd->H  = 0;
   rd->nz = 0;
   rd->score = MAX_COST;
 }
@@ -372,6 +495,7 @@ static void CopyScore(VP8ModeScore* const dst, const VP8ModeScore* const src) {
   dst->D  = src->D;
   dst->SD = src->SD;
   dst->R  = src->R;
+  dst->H  = src->H;
   dst->nz = src->nz;      // note that nz is not accumulated, but just copied.
   dst->score = src->score;
 }
@@ -380,6 +504,7 @@ static void AddScore(VP8ModeScore* const dst, const VP8ModeScore* const src) {
   dst->D  += src->D;
   dst->SD += src->SD;
   dst->R  += src->R;
+  dst->H  += src->H;
   dst->nz |= src->nz;     // here, new nz bits are accumulated.
   dst->score += src->score;
 }
@@ -387,28 +512,31 @@ static void AddScore(VP8ModeScore* const dst, const VP8ModeScore* const src) {
 //------------------------------------------------------------------------------
 // Performs trellis-optimized quantization.
 
-// Trellis
-
+// Trellis node
 typedef struct {
-  int prev;        // best previous
-  int level;       // level
-  int sign;        // sign of coeff_i
-  score_t cost;    // bit cost
-  score_t error;   // distortion = sum of (|coeff_i| - level_i * Q_i)^2
-  int ctx;         // context (only depends on 'level'. Could be spared.)
+  int8_t prev;            // best previous node
+  int8_t sign;            // sign of coeff_i
+  int16_t level;          // level
 } Node;
 
+// Score state
+typedef struct {
+  score_t score;          // partial RD score
+  const uint16_t* costs;  // shortcut to cost tables
+} ScoreState;
+
 // If a coefficient was quantized to a value Q (using a neutral bias),
 // we test all alternate possibilities between [Q-MIN_DELTA, Q+MAX_DELTA]
 // We don't test negative values though.
 #define MIN_DELTA 0   // how much lower level to try
 #define MAX_DELTA 1   // how much higher
 #define NUM_NODES (MIN_DELTA + 1 + MAX_DELTA)
-#define NODE(n, l) (nodes[(n) + 1][(l) + MIN_DELTA])
+#define NODE(n, l) (nodes[(n)][(l) + MIN_DELTA])
+#define SCORE_STATE(n, l) (score_states[n][(l) + MIN_DELTA])
 
 static WEBP_INLINE void SetRDScore(int lambda, VP8ModeScore* const rd) {
   // TODO: incorporate the "* 256" in the tables?
-  rd->score = rd->R * lambda + 256 * (rd->D + rd->SD);
+  rd->score = (rd->R + rd->H) * lambda + 256 * (rd->D + rd->SD);
 }
 
 static WEBP_INLINE score_t RDScoreTrellis(int lambda, score_t rate,
@@ -416,34 +544,37 @@ static WEBP_INLINE score_t RDScoreTrellis(int lambda, score_t rate,
   return rate * lambda + 256 * distortion;
 }
 
-static int TrellisQuantizeBlock(const VP8EncIterator* const it,
+static int TrellisQuantizeBlock(const VP8Encoder* const enc,
                                 int16_t in[16], int16_t out[16],
                                 int ctx0, int coeff_type,
                                 const VP8Matrix* const mtx,
                                 int lambda) {
-  ProbaArray* const last_costs = it->enc_->proba_.coeffs_[coeff_type];
-  CostArray* const costs = it->enc_->proba_.level_cost_[coeff_type];
+  const ProbaArray* const probas = enc->proba_.coeffs_[coeff_type];
+  CostArrayPtr const costs =
+      (CostArrayPtr)enc->proba_.remapped_costs_[coeff_type];
   const int first = (coeff_type == 0) ? 1 : 0;
-  Node nodes[17][NUM_NODES];
+  Node nodes[16][NUM_NODES];
+  ScoreState score_states[2][NUM_NODES];
+  ScoreState* ss_cur = &SCORE_STATE(0, MIN_DELTA);
+  ScoreState* ss_prev = &SCORE_STATE(1, MIN_DELTA);
   int best_path[3] = {-1, -1, -1};   // store best-last/best-level/best-previous
   score_t best_score;
-  int best_node;
-  int last = first - 1;
-  int n, m, p, nz;
+  int n, m, p, last;
 
   {
     score_t cost;
-    score_t max_error;
     const int thresh = mtx->q_[1] * mtx->q_[1] / 4;
-    const int last_proba = last_costs[VP8EncBands[first]][ctx0][0];
+    const int last_proba = probas[VP8EncBands[first]][ctx0][0];
 
-    // compute maximal distortion.
-    max_error = 0;
-    for (n = first; n < 16; ++n) {
-      const int j  = kZigzag[n];
+    // compute the position of the last interesting coefficient
+    last = first - 1;
+    for (n = 15; n >= first; --n) {
+      const int j = kZigzag[n];
       const int err = in[j] * in[j];
-      max_error += kWeightTrellis[j] * err;
-      if (err > thresh) last = n;
+      if (err > thresh) {
+        last = n;
+        break;
+      }
     }
     // we don't need to go inspect up to n = 16 coeffs. We can just go up
     // to last + 1 (inclusive) without losing much.
@@ -451,93 +582,95 @@ static int TrellisQuantizeBlock(const VP8EncIterator* const it,
 
     // compute 'skip' score. This is the max score one can do.
     cost = VP8BitCost(0, last_proba);
-    best_score = RDScoreTrellis(lambda, cost, max_error);
+    best_score = RDScoreTrellis(lambda, cost, 0);
 
     // initialize source node.
-    n = first - 1;
     for (m = -MIN_DELTA; m <= MAX_DELTA; ++m) {
-      NODE(n, m).cost = 0;
-      NODE(n, m).error = max_error;
-      NODE(n, m).ctx = ctx0;
+      const score_t rate = (ctx0 == 0) ? VP8BitCost(1, last_proba) : 0;
+      ss_cur[m].score = RDScoreTrellis(lambda, rate, 0);
+      ss_cur[m].costs = costs[first][ctx0];
     }
   }
 
   // traverse trellis.
   for (n = first; n <= last; ++n) {
-    const int j  = kZigzag[n];
-    const int Q  = mtx->q_[j];
-    const int iQ = mtx->iq_[j];
-    const int B = BIAS(0x00);     // neutral bias
+    const int j = kZigzag[n];
+    const uint32_t Q  = mtx->q_[j];
+    const uint32_t iQ = mtx->iq_[j];
+    const uint32_t B = BIAS(0x00);     // neutral bias
     // note: it's important to take sign of the _original_ coeff,
     // so we don't have to consider level < 0 afterward.
     const int sign = (in[j] < 0);
-    int coeff0 = (sign ? -in[j] : in[j]) + mtx->sharpen_[j];
-    int level0;
-    if (coeff0 > 2047) coeff0 = 2047;
+    const uint32_t coeff0 = (sign ? -in[j] : in[j]) + mtx->sharpen_[j];
+    int level0 = QUANTDIV(coeff0, iQ, B);
+    if (level0 > MAX_LEVEL) level0 = MAX_LEVEL;
+
+    {   // Swap current and previous score states
+      ScoreState* const tmp = ss_cur;
+      ss_cur = ss_prev;
+      ss_prev = tmp;
+    }
 
-    level0 = QUANTDIV(coeff0, iQ, B);
     // test all alternate level values around level0.
     for (m = -MIN_DELTA; m <= MAX_DELTA; ++m) {
       Node* const cur = &NODE(n, m);
-      int delta_error, new_error;
-      score_t cur_score = MAX_COST;
       int level = level0 + m;
-      int last_proba;
-
-      cur->sign = sign;
-      cur->level = level;
-      cur->ctx = (level == 0) ? 0 : (level == 1) ? 1 : 2;
-      if (level >= 2048 || level < 0) {   // node is dead?
-        cur->cost = MAX_COST;
+      const int ctx = (level > 2) ? 2 : level;
+      const int band = VP8EncBands[n + 1];
+      score_t base_score, last_pos_score;
+      score_t best_cur_score = MAX_COST;
+      int best_prev = 0;   // default, in case
+
+      ss_cur[m].score = MAX_COST;
+      ss_cur[m].costs = costs[n + 1][ctx];
+      if (level > MAX_LEVEL || level < 0) {   // node is dead?
         continue;
       }
-      last_proba = last_costs[VP8EncBands[n + 1]][cur->ctx][0];
 
-      // Compute delta_error = how much coding this level will
-      // subtract as distortion to max_error
-      new_error = coeff0 - level * Q;
-      delta_error =
-        kWeightTrellis[j] * (coeff0 * coeff0 - new_error * new_error);
+      // Compute extra rate cost if last coeff's position is < 15
+      {
+        const score_t last_pos_cost =
+            (n < 15) ? VP8BitCost(0, probas[band][ctx][0]) : 0;
+        last_pos_score = RDScoreTrellis(lambda, last_pos_cost, 0);
+      }
+
+      {
+        // Compute delta_error = how much coding this level will
+        // subtract to max_error as distortion.
+        // Here, distortion = sum of (|coeff_i| - level_i * Q_i)^2
+        const int new_error = coeff0 - level * Q;
+        const int delta_error =
+            kWeightTrellis[j] * (new_error * new_error - coeff0 * coeff0);
+        base_score = RDScoreTrellis(lambda, 0, delta_error);
+      }
 
       // Inspect all possible non-dead predecessors. Retain only the best one.
       for (p = -MIN_DELTA; p <= MAX_DELTA; ++p) {
-        const Node* const prev = &NODE(n - 1, p);
-        const int prev_ctx = prev->ctx;
-        const uint16_t* const tcost = costs[VP8EncBands[n]][prev_ctx];
-        const score_t total_error = prev->error - delta_error;
-        score_t cost, base_cost, score;
-
-        if (prev->cost >= MAX_COST) {   // dead node?
-          continue;
-        }
-
-        // Base cost of both terminal/non-terminal
-        base_cost = prev->cost + VP8LevelCost(tcost, level);
-
+        // Dead nodes (with ss_prev[p].score >= MAX_COST) are automatically
+        // eliminated since their score can't be better than the current best.
+        const score_t cost = VP8LevelCost(ss_prev[p].costs, level);
         // Examine node assuming it's a non-terminal one.
-        cost = base_cost;
-        if (level && n < 15) {
-          cost += VP8BitCost(1, last_proba);
+        const score_t score =
+            base_score + ss_prev[p].score + RDScoreTrellis(lambda, cost, 0);
+        if (score < best_cur_score) {
+          best_cur_score = score;
+          best_prev = p;
         }
-        score = RDScoreTrellis(lambda, cost, total_error);
-        if (score < cur_score) {
-          cur_score = score;
-          cur->cost  = cost;
-          cur->error = total_error;
-          cur->prev  = p;
-        }
-
-        // Now, record best terminal node (and thus best entry in the graph).
-        if (level) {
-          cost = base_cost;
-          if (n < 15) cost += VP8BitCost(0, last_proba);
-          score = RDScoreTrellis(lambda, cost, total_error);
-          if (score < best_score) {
-            best_score = score;
-            best_path[0] = n;   // best eob position
-            best_path[1] = m;   // best level
-            best_path[2] = p;   // best predecessor
-          }
+      }
+      // Store best finding in current node.
+      cur->sign = sign;
+      cur->level = level;
+      cur->prev = best_prev;
+      ss_cur[m].score = best_cur_score;
+
+      // Now, record best terminal node (and thus best entry in the graph).
+      if (level != 0) {
+        const score_t score = best_cur_score + last_pos_score;
+        if (score < best_score) {
+          best_score = score;
+          best_path[0] = n;                     // best eob position
+          best_path[1] = m;                     // best node index
+          best_path[2] = best_prev;             // best predecessor
         }
       }
     }
@@ -550,23 +683,25 @@ static int TrellisQuantizeBlock(const VP8EncIterator* const it,
     return 0;   // skip!
   }
 
-  // Unwind the best path.
-  // Note: best-prev on terminal node is not necessarily equal to the
-  // best_prev for non-terminal. So we patch best_path[2] in.
-  n = best_path[0];
-  best_node = best_path[1];
-  NODE(n, best_node).prev = best_path[2];   // force best-prev for terminal
-  nz = 0;
-
-  for (; n >= first; --n) {
-    const Node* const node = &NODE(n, best_node);
-    const int j = kZigzag[n];
-    out[n] = node->sign ? -node->level : node->level;
-    nz |= (node->level != 0);
-    in[j] = out[n] * mtx->q_[j];
-    best_node = node->prev;
+  {
+    // Unwind the best path.
+    // Note: best-prev on terminal node is not necessarily equal to the
+    // best_prev for non-terminal. So we patch best_path[2] in.
+    int nz = 0;
+    int best_node = best_path[1];
+    n = best_path[0];
+    NODE(n, best_node).prev = best_path[2];   // force best-prev for terminal
+
+    for (; n >= first; --n) {
+      const Node* const node = &NODE(n, best_node);
+      const int j = kZigzag[n];
+      out[n] = node->sign ? -node->level : node->level;
+      nz |= node->level;
+      in[j] = out[n] * mtx->q_[j];
+      best_node = node->prev;
+    }
+    return (nz != 0);
   }
-  return nz;
 }
 
 #undef NODE
@@ -582,17 +717,17 @@ static int ReconstructIntra16(VP8EncIterator* const it,
                               int mode) {
   const VP8Encoder* const enc = it->enc_;
   const uint8_t* const ref = it->yuv_p_ + VP8I16ModeOffsets[mode];
-  const uint8_t* const src = it->yuv_in_ + Y_OFF;
+  const uint8_t* const src = it->yuv_in_ + Y_OFF_ENC;
   const VP8SegmentInfo* const dqm = &enc->dqm_[it->mb_->segment_];
   int nz = 0;
   int n;
   int16_t tmp[16][16], dc_tmp[16];
 
-  for (n = 0; n < 16; ++n) {
-    VP8FTransform(src + VP8Scan[n], ref + VP8Scan[n], tmp[n]);
+  for (n = 0; n < 16; n += 2) {
+    VP8FTransform2(src + VP8Scan[n], ref + VP8Scan[n], tmp[n]);
   }
   VP8FTransformWHT(tmp[0], dc_tmp);
-  nz |= VP8EncQuantizeBlock(dc_tmp, rd->y_dc_levels, 0, &dqm->y2_) << 24;
+  nz |= VP8EncQuantizeBlockWHT(dc_tmp, rd->y_dc_levels, &dqm->y2_) << 24;
 
   if (DO_TRELLIS_I16 && it->do_trellis_) {
     int x, y;
@@ -601,20 +736,26 @@ static int ReconstructIntra16(VP8EncIterator* const it,
       for (x = 0; x < 4; ++x, ++n) {
         const int ctx = it->top_nz_[x] + it->left_nz_[y];
         const int non_zero =
-           TrellisQuantizeBlock(it, tmp[n], rd->y_ac_levels[n], ctx, 0,
-                                &dqm->y1_, dqm->lambda_trellis_i16_);
+            TrellisQuantizeBlock(enc, tmp[n], rd->y_ac_levels[n], ctx, 0,
+                                 &dqm->y1_, dqm->lambda_trellis_i16_);
         it->top_nz_[x] = it->left_nz_[y] = non_zero;
+        rd->y_ac_levels[n][0] = 0;
         nz |= non_zero << n;
       }
     }
   } else {
-    for (n = 0; n < 16; ++n) {
-      nz |= VP8EncQuantizeBlock(tmp[n], rd->y_ac_levels[n], 1, &dqm->y1_) << n;
+    for (n = 0; n < 16; n += 2) {
+      // Zero-out the first coeff, so that: a) nz is correct below, and
+      // b) finding 'last' non-zero coeffs in SetResidualCoeffs() is simplified.
+      tmp[n][0] = tmp[n + 1][0] = 0;
+      nz |= VP8EncQuantize2Blocks(tmp[n], rd->y_ac_levels[n], &dqm->y1_) << n;
+      assert(rd->y_ac_levels[n + 0][0] == 0);
+      assert(rd->y_ac_levels[n + 1][0] == 0);
     }
   }
 
   // Transform back
-  VP8ITransformWHT(dc_tmp, tmp[0]);
+  VP8TransformWHT(dc_tmp, tmp[0]);
   for (n = 0; n < 16; n += 2) {
     VP8ITransform(ref + VP8Scan[n], tmp[n], yuv_out + VP8Scan[n], 1);
   }
@@ -637,10 +778,10 @@ static int ReconstructIntra4(VP8EncIterator* const it,
   if (DO_TRELLIS_I4 && it->do_trellis_) {
     const int x = it->i4_ & 3, y = it->i4_ >> 2;
     const int ctx = it->top_nz_[x] + it->left_nz_[y];
-    nz = TrellisQuantizeBlock(it, tmp, levels, ctx, 3, &dqm->y1_,
+    nz = TrellisQuantizeBlock(enc, tmp, levels, ctx, 3, &dqm->y1_,
                               dqm->lambda_trellis_i4_);
   } else {
-    nz = VP8EncQuantizeBlock(tmp, levels, 0, &dqm->y1_);
+    nz = VP8EncQuantizeBlock(tmp, levels, &dqm->y1_);
   }
   VP8ITransform(ref, tmp, yuv_out, 0);
   return nz;
@@ -650,14 +791,14 @@ static int ReconstructUV(VP8EncIterator* const it, VP8ModeScore* const rd,
                          uint8_t* const yuv_out, int mode) {
   const VP8Encoder* const enc = it->enc_;
   const uint8_t* const ref = it->yuv_p_ + VP8UVModeOffsets[mode];
-  const uint8_t* const src = it->yuv_in_ + U_OFF;
+  const uint8_t* const src = it->yuv_in_ + U_OFF_ENC;
   const VP8SegmentInfo* const dqm = &enc->dqm_[it->mb_->segment_];
   int nz = 0;
   int n;
   int16_t tmp[8][16];
 
-  for (n = 0; n < 8; ++n) {
-    VP8FTransform(src + VP8Scan[16 + n], ref + VP8Scan[16 + n], tmp[n]);
+  for (n = 0; n < 8; n += 2) {
+    VP8FTransform2(src + VP8ScanUV[n], ref + VP8ScanUV[n], tmp[n]);
   }
   if (DO_TRELLIS_UV && it->do_trellis_) {
     int ch, x, y;
@@ -666,28 +807,45 @@ static int ReconstructUV(VP8EncIterator* const it, VP8ModeScore* const rd,
         for (x = 0; x < 2; ++x, ++n) {
           const int ctx = it->top_nz_[4 + ch + x] + it->left_nz_[4 + ch + y];
           const int non_zero =
-            TrellisQuantizeBlock(it, tmp[n], rd->uv_levels[n], ctx, 2,
-                                 &dqm->uv_, dqm->lambda_trellis_uv_);
+              TrellisQuantizeBlock(enc, tmp[n], rd->uv_levels[n], ctx, 2,
+                                   &dqm->uv_, dqm->lambda_trellis_uv_);
           it->top_nz_[4 + ch + x] = it->left_nz_[4 + ch + y] = non_zero;
           nz |= non_zero << n;
         }
       }
     }
   } else {
-    for (n = 0; n < 8; ++n) {
-      nz |= VP8EncQuantizeBlock(tmp[n], rd->uv_levels[n], 0, &dqm->uv_) << n;
+    for (n = 0; n < 8; n += 2) {
+      nz |= VP8EncQuantize2Blocks(tmp[n], rd->uv_levels[n], &dqm->uv_) << n;
     }
   }
 
   for (n = 0; n < 8; n += 2) {
-    VP8ITransform(ref + VP8Scan[16 + n], tmp[n], yuv_out + VP8Scan[16 + n], 1);
+    VP8ITransform(ref + VP8ScanUV[n], tmp[n], yuv_out + VP8ScanUV[n], 1);
   }
   return (nz << 16);
 }
 
 //------------------------------------------------------------------------------
 // RD-opt decision. Reconstruct each modes, evalue distortion and bit-cost.
-// Pick the mode is lower RD-cost = Rate + lamba * Distortion.
+// Pick the mode is lower RD-cost = Rate + lambda * Distortion.
+
+static void StoreMaxDelta(VP8SegmentInfo* const dqm, const int16_t DCs[16]) {
+  // We look at the first three AC coefficients to determine what is the average
+  // delta between each sub-4x4 block.
+  const int v0 = abs(DCs[1]);
+  const int v1 = abs(DCs[4]);
+  const int v2 = abs(DCs[5]);
+  int max_v = (v0 > v1) ? v1 : v0;
+  max_v = (v2 > max_v) ? v2 : max_v;
+  if (max_v > dqm->max_edge_) dqm->max_edge_ = max_v;
+}
+
+static void SwapModeScore(VP8ModeScore** a, VP8ModeScore** b) {
+  VP8ModeScore* const tmp = *a;
+  *a = *b;
+  *b = tmp;
+}
 
 static void SwapPtr(uint8_t** a, uint8_t** b) {
   uint8_t* const tmp = *a;
@@ -699,43 +857,69 @@ static void SwapOut(VP8EncIterator* const it) {
   SwapPtr(&it->yuv_out_, &it->yuv_out2_);
 }
 
-static void PickBestIntra16(VP8EncIterator* const it, VP8ModeScore* const rd) {
-  const VP8Encoder* const enc = it->enc_;
-  const VP8SegmentInfo* const dqm = &enc->dqm_[it->mb_->segment_];
+static score_t IsFlat(const int16_t* levels, int num_blocks, score_t thresh) {
+  score_t score = 0;
+  while (num_blocks-- > 0) {      // TODO(skal): refine positional scoring?
+    int i;
+    for (i = 1; i < 16; ++i) {    // omit DC, we're only interested in AC
+      score += (levels[i] != 0);
+      if (score > thresh) return 0;
+    }
+    levels += 16;
+  }
+  return 1;
+}
+
+static void PickBestIntra16(VP8EncIterator* const it, VP8ModeScore* rd) {
+  const int kNumBlocks = 16;
+  VP8SegmentInfo* const dqm = &it->enc_->dqm_[it->mb_->segment_];
   const int lambda = dqm->lambda_i16_;
   const int tlambda = dqm->tlambda_;
-  const uint8_t* const src = it->yuv_in_ + Y_OFF;
-  VP8ModeScore rd16;
+  const uint8_t* const src = it->yuv_in_ + Y_OFF_ENC;
+  VP8ModeScore rd_tmp;
+  VP8ModeScore* rd_cur = &rd_tmp;
+  VP8ModeScore* rd_best = rd;
   int mode;
 
   rd->mode_i16 = -1;
-  for (mode = 0; mode < 4; ++mode) {
-    uint8_t* const tmp_dst = it->yuv_out2_ + Y_OFF;  // scratch buffer
-    int nz;
+  for (mode = 0; mode < NUM_PRED_MODES; ++mode) {
+    uint8_t* const tmp_dst = it->yuv_out2_ + Y_OFF_ENC;  // scratch buffer
+    rd_cur->mode_i16 = mode;
 
     // Reconstruct
-    nz = ReconstructIntra16(it, &rd16, tmp_dst, mode);
+    rd_cur->nz = ReconstructIntra16(it, rd_cur, tmp_dst, mode);
 
     // Measure RD-score
-    rd16.D = VP8SSE16x16(src, tmp_dst);
-    rd16.SD = tlambda ? MULT_8B(tlambda, VP8TDisto16x16(src, tmp_dst, kWeightY))
-            : 0;
-    rd16.R = VP8GetCostLuma16(it, &rd16);
-    rd16.R += VP8FixedCostsI16[mode];
+    rd_cur->D = VP8SSE16x16(src, tmp_dst);
+    rd_cur->SD =
+        tlambda ? MULT_8B(tlambda, VP8TDisto16x16(src, tmp_dst, kWeightY)) : 0;
+    rd_cur->H = VP8FixedCostsI16[mode];
+    rd_cur->R = VP8GetCostLuma16(it, rd_cur);
+    if (mode > 0 &&
+        IsFlat(rd_cur->y_ac_levels[0], kNumBlocks, FLATNESS_LIMIT_I16)) {
+      // penalty to avoid flat area to be mispredicted by complex mode
+      rd_cur->R += FLATNESS_PENALTY * kNumBlocks;
+    }
 
     // Since we always examine Intra16 first, we can overwrite *rd directly.
-    SetRDScore(lambda, &rd16);
-    if (mode == 0 || rd16.score < rd->score) {
-      CopyScore(rd, &rd16);
-      rd->mode_i16 = mode;
-      rd->nz = nz;
-      memcpy(rd->y_ac_levels, rd16.y_ac_levels, sizeof(rd16.y_ac_levels));
-      memcpy(rd->y_dc_levels, rd16.y_dc_levels, sizeof(rd16.y_dc_levels));
+    SetRDScore(lambda, rd_cur);
+    if (mode == 0 || rd_cur->score < rd_best->score) {
+      SwapModeScore(&rd_cur, &rd_best);
       SwapOut(it);
     }
   }
+  if (rd_best != rd) {
+    memcpy(rd, rd_best, sizeof(*rd));
+  }
   SetRDScore(dqm->lambda_mode_, rd);   // finalize score for mode decision.
   VP8SetIntra16Mode(it, rd->mode_i16);
+
+  // we have a blocky macroblock (only DCs are non-zero) with fairly high
+  // distortion, record max delta so we can later adjust the minimal filtering
+  // strength needed to smooth these blocks out.
+  if ((rd->nz & 0xffff) == 0 && rd->D > dqm->min_disto_) {
+    StoreMaxDelta(dqm, rd->y_dc_levels);
+  }
 }
 
 //------------------------------------------------------------------------------
@@ -755,8 +939,8 @@ static int PickBestIntra4(VP8EncIterator* const it, VP8ModeScore* const rd) {
   const VP8SegmentInfo* const dqm = &enc->dqm_[it->mb_->segment_];
   const int lambda = dqm->lambda_i4_;
   const int tlambda = dqm->tlambda_;
-  const uint8_t* const src0 = it->yuv_in_ + Y_OFF;
-  uint8_t* const best_blocks = it->yuv_out2_ + Y_OFF;
+  const uint8_t* const src0 = it->yuv_in_ + Y_OFF_ENC;
+  uint8_t* const best_blocks = it->yuv_out2_ + Y_OFF_ENC;
   int total_header_bits = 0;
   VP8ModeScore rd_best;
 
@@ -765,9 +949,11 @@ static int PickBestIntra4(VP8EncIterator* const it, VP8ModeScore* const rd) {
   }
 
   InitScore(&rd_best);
-  rd_best.score = 211;  // '211' is the value of VP8BitCost(0, 145)
+  rd_best.H = 211;  // '211' is the value of VP8BitCost(0, 145)
+  SetRDScore(dqm->lambda_mode_, &rd_best);
   VP8IteratorStartI4(it);
   do {
+    const int kNumBlocks = 1;
     VP8ModeScore rd_i4;
     int mode;
     int best_mode = -1;
@@ -791,27 +977,44 @@ static int PickBestIntra4(VP8EncIterator* const it, VP8ModeScore* const rd) {
       rd_tmp.SD =
           tlambda ? MULT_8B(tlambda, VP8TDisto4x4(src, tmp_dst, kWeightY))
                   : 0;
-      rd_tmp.R = VP8GetCostLuma4(it, tmp_levels);
-      rd_tmp.R += mode_costs[mode];
+      rd_tmp.H = mode_costs[mode];
+
+      // Add flatness penalty
+      if (mode > 0 && IsFlat(tmp_levels, kNumBlocks, FLATNESS_LIMIT_I4)) {
+        rd_tmp.R = FLATNESS_PENALTY * kNumBlocks;
+      } else {
+        rd_tmp.R = 0;
+      }
 
+      // early-out check
       SetRDScore(lambda, &rd_tmp);
+      if (best_mode >= 0 && rd_tmp.score >= rd_i4.score) continue;
+
+      // finish computing score
+      rd_tmp.R += VP8GetCostLuma4(it, tmp_levels);
+      SetRDScore(lambda, &rd_tmp);
+
       if (best_mode < 0 || rd_tmp.score < rd_i4.score) {
         CopyScore(&rd_i4, &rd_tmp);
         best_mode = mode;
         SwapPtr(&tmp_dst, &best_block);
-        memcpy(rd_best.y_ac_levels[it->i4_], tmp_levels, sizeof(tmp_levels));
+        memcpy(rd_best.y_ac_levels[it->i4_], tmp_levels,
+               sizeof(rd_best.y_ac_levels[it->i4_]));
       }
     }
     SetRDScore(dqm->lambda_mode_, &rd_i4);
     AddScore(&rd_best, &rd_i4);
-    total_header_bits += mode_costs[best_mode];
-    if (rd_best.score >= rd->score ||
-        total_header_bits > enc->max_i4_header_bits_) {
+    if (rd_best.score >= rd->score) {
+      return 0;
+    }
+    total_header_bits += (int)rd_i4.H;   // <- equal to mode_costs[best_mode];
+    if (total_header_bits > enc->max_i4_header_bits_) {
       return 0;
     }
     // Copy selected samples if not in the right place already.
-    if (best_block != best_blocks + VP8Scan[it->i4_])
+    if (best_block != best_blocks + VP8Scan[it->i4_]) {
       VP8Copy4x4(best_block, best_blocks + VP8Scan[it->i4_]);
+    }
     rd->modes_i4[it->i4_] = best_mode;
     it->top_nz_[it->i4_ & 3] = it->left_nz_[it->i4_ >> 2] = (rd_i4.nz ? 1 : 0);
   } while (VP8IteratorRotateI4(it, best_blocks));
@@ -827,18 +1030,19 @@ static int PickBestIntra4(VP8EncIterator* const it, VP8ModeScore* const rd) {
 //------------------------------------------------------------------------------
 
 static void PickBestUV(VP8EncIterator* const it, VP8ModeScore* const rd) {
-  const VP8Encoder* const enc = it->enc_;
-  const VP8SegmentInfo* const dqm = &enc->dqm_[it->mb_->segment_];
+  const int kNumBlocks = 8;
+  const VP8SegmentInfo* const dqm = &it->enc_->dqm_[it->mb_->segment_];
   const int lambda = dqm->lambda_uv_;
-  const uint8_t* const src = it->yuv_in_ + U_OFF;
-  uint8_t* const tmp_dst = it->yuv_out2_ + U_OFF;  // scratch buffer
-  uint8_t* const dst0 = it->yuv_out_ + U_OFF;
+  const uint8_t* const src = it->yuv_in_ + U_OFF_ENC;
+  uint8_t* tmp_dst = it->yuv_out2_ + U_OFF_ENC;  // scratch buffer
+  uint8_t* dst0 = it->yuv_out_ + U_OFF_ENC;
+  uint8_t* dst = dst0;
   VP8ModeScore rd_best;
   int mode;
 
   rd->mode_uv = -1;
   InitScore(&rd_best);
-  for (mode = 0; mode < 4; ++mode) {
+  for (mode = 0; mode < NUM_PRED_MODES; ++mode) {
     VP8ModeScore rd_uv;
 
     // Reconstruct
@@ -847,19 +1051,25 @@ static void PickBestUV(VP8EncIterator* const it, VP8ModeScore* const rd) {
     // Compute RD-score
     rd_uv.D  = VP8SSE16x8(src, tmp_dst);
     rd_uv.SD = 0;    // TODO: should we call TDisto? it tends to flatten areas.
+    rd_uv.H  = VP8FixedCostsUV[mode];
     rd_uv.R  = VP8GetCostUV(it, &rd_uv);
-    rd_uv.R += VP8FixedCostsUV[mode];
+    if (mode > 0 && IsFlat(rd_uv.uv_levels[0], kNumBlocks, FLATNESS_LIMIT_UV)) {
+      rd_uv.R += FLATNESS_PENALTY * kNumBlocks;
+    }
 
     SetRDScore(lambda, &rd_uv);
     if (mode == 0 || rd_uv.score < rd_best.score) {
       CopyScore(&rd_best, &rd_uv);
       rd->mode_uv = mode;
       memcpy(rd->uv_levels, rd_uv.uv_levels, sizeof(rd->uv_levels));
-      memcpy(dst0, tmp_dst, UV_SIZE);   //  TODO: SwapUVOut() ?
+      SwapPtr(&dst, &tmp_dst);
     }
   }
   VP8SetIntraUVMode(it, rd->mode_uv);
   AddScore(rd, &rd_best);
+  if (dst != dst0) {   // copy 16x8 block if needed
+    VP8Copy16x8(dst, dst0);
+  }
 }
 
 //------------------------------------------------------------------------------
@@ -867,33 +1077,88 @@ static void PickBestUV(VP8EncIterator* const it, VP8ModeScore* const rd) {
 
 static void SimpleQuantize(VP8EncIterator* const it, VP8ModeScore* const rd) {
   const VP8Encoder* const enc = it->enc_;
-  const int i16 = (it->mb_->type_ == 1);
+  const int is_i16 = (it->mb_->type_ == 1);
   int nz = 0;
 
-  if (i16) {
-    nz = ReconstructIntra16(it, rd, it->yuv_out_ + Y_OFF, it->preds_[0]);
+  if (is_i16) {
+    nz = ReconstructIntra16(it, rd, it->yuv_out_ + Y_OFF_ENC, it->preds_[0]);
   } else {
     VP8IteratorStartI4(it);
     do {
       const int mode =
           it->preds_[(it->i4_ & 3) + (it->i4_ >> 2) * enc->preds_w_];
-      const uint8_t* const src = it->yuv_in_ + Y_OFF + VP8Scan[it->i4_];
-      uint8_t* const dst = it->yuv_out_ + Y_OFF + VP8Scan[it->i4_];
+      const uint8_t* const src = it->yuv_in_ + Y_OFF_ENC + VP8Scan[it->i4_];
+      uint8_t* const dst = it->yuv_out_ + Y_OFF_ENC + VP8Scan[it->i4_];
       VP8MakeIntra4Preds(it);
       nz |= ReconstructIntra4(it, rd->y_ac_levels[it->i4_],
                               src, dst, mode) << it->i4_;
-    } while (VP8IteratorRotateI4(it, it->yuv_out_ + Y_OFF));
+    } while (VP8IteratorRotateI4(it, it->yuv_out_ + Y_OFF_ENC));
   }
 
-  nz |= ReconstructUV(it, rd, it->yuv_out_ + U_OFF, it->mb_->uv_mode_);
+  nz |= ReconstructUV(it, rd, it->yuv_out_ + U_OFF_ENC, it->mb_->uv_mode_);
   rd->nz = nz;
 }
 
+// Refine intra16/intra4 sub-modes based on distortion only (not rate).
+static void DistoRefine(VP8EncIterator* const it, int try_both_i4_i16) {
+  const int is_i16 = (it->mb_->type_ == 1);
+  score_t best_score = MAX_COST;
+
+  if (try_both_i4_i16 || is_i16) {
+    int mode;
+    int best_mode = -1;
+    for (mode = 0; mode < NUM_PRED_MODES; ++mode) {
+      const uint8_t* const ref = it->yuv_p_ + VP8I16ModeOffsets[mode];
+      const uint8_t* const src = it->yuv_in_ + Y_OFF_ENC;
+      const score_t score = VP8SSE16x16(src, ref);
+      if (score < best_score) {
+        best_mode = mode;
+        best_score = score;
+      }
+    }
+    VP8SetIntra16Mode(it, best_mode);
+  }
+  if (try_both_i4_i16 || !is_i16) {
+    uint8_t modes_i4[16];
+    // We don't evaluate the rate here, but just account for it through a
+    // constant penalty (i4 mode usually needs more bits compared to i16).
+    score_t score_i4 = (score_t)I4_PENALTY;
+
+    VP8IteratorStartI4(it);
+    do {
+      int mode;
+      int best_sub_mode = -1;
+      score_t best_sub_score = MAX_COST;
+      const uint8_t* const src = it->yuv_in_ + Y_OFF_ENC + VP8Scan[it->i4_];
+
+      // TODO(skal): we don't really need the prediction pixels here,
+      // but just the distortion against 'src'.
+      VP8MakeIntra4Preds(it);
+      for (mode = 0; mode < NUM_BMODES; ++mode) {
+        const uint8_t* const ref = it->yuv_p_ + VP8I4ModeOffsets[mode];
+        const score_t score = VP8SSE4x4(src, ref);
+        if (score < best_sub_score) {
+          best_sub_mode = mode;
+          best_sub_score = score;
+        }
+      }
+      modes_i4[it->i4_] = best_sub_mode;
+      score_i4 += best_sub_score;
+      if (score_i4 >= best_score) break;
+    } while (VP8IteratorRotateI4(it, it->yuv_in_ + Y_OFF_ENC));
+    if (score_i4 < best_score) {
+      VP8SetIntra4Mode(it, modes_i4);
+    }
+  }
+}
+
 //------------------------------------------------------------------------------
 // Entry point
 
-int VP8Decimate(VP8EncIterator* const it, VP8ModeScore* const rd, int rd_opt) {
+int VP8Decimate(VP8EncIterator* const it, VP8ModeScore* const rd,
+                VP8RDLevel rd_opt) {
   int is_skipped;
+  const int method = it->enc_->method_;
 
   InitScore(rd);
 
@@ -902,22 +1167,21 @@ int VP8Decimate(VP8EncIterator* const it, VP8ModeScore* const rd, int rd_opt) {
   VP8MakeLuma16Preds(it);
   VP8MakeChroma8Preds(it);
 
-  // for rd_opt = 2, we perform trellis-quant on the final decision only.
-  // for rd_opt > 2, we use it for every scoring (=much slower).
-  if (rd_opt > 0) {
-    it->do_trellis_ = (rd_opt > 2);
+  if (rd_opt > RD_OPT_NONE) {
+    it->do_trellis_ = (rd_opt >= RD_OPT_TRELLIS_ALL);
     PickBestIntra16(it, rd);
-    if (it->enc_->method_ >= 2) {
+    if (method >= 2) {
       PickBestIntra4(it, rd);
     }
     PickBestUV(it, rd);
-    if (rd_opt == 2) {
+    if (rd_opt == RD_OPT_TRELLIS) {   // finish off with trellis-optim now
       it->do_trellis_ = 1;
       SimpleQuantize(it, rd);
     }
   } else {
-    // TODO: for method_ == 2, pick the best intra4/intra16 based on SSE
-    it->do_trellis_ = (it->enc_->method_ == 2);
+    // For method == 2, pick the best intra4/intra16 based on SSE (~tad slower).
+    // For method <= 1, we refine intra4 or intra16 (but don't re-examine mode).
+    DistoRefine(it, (method >= 2));
     SimpleQuantize(it, rd);
   }
   is_skipped = (rd->nz == 0);
@@ -925,6 +1189,3 @@ int VP8Decimate(VP8EncIterator* const it, VP8ModeScore* const rd, int rd_opt) {
   return is_skipped;
 }
 
-#if defined(__cplusplus) || defined(c_plusplus)
-}    // extern "C"
-#endif
diff --git a/drivers/webp/enc/syntax.c b/drivers/webp/enc/syntax.c
index 4221436ff9..2b65f15ca1 100644
--- a/drivers/webp/enc/syntax.c
+++ b/drivers/webp/enc/syntax.c
@@ -1,8 +1,10 @@
 // Copyright 2011 Google Inc. All Rights Reserved.
 //
-// This code is licensed under the same terms as WebM:
-//  Software License Agreement:  http://www.webmproject.org/license/software/
-//  Additional IP Rights Grant:  http://www.webmproject.org/license/additional/
+// Use of this source code is governed by a BSD-style license
+// that can be found in the COPYING file in the root of the source
+// tree. An additional intellectual property rights grant can be found
+// in the file PATENTS. All contributing project authors may
+// be found in the AUTHORS file in the root of the source tree.
 // -----------------------------------------------------------------------------
 //
 // Header syntax writing
@@ -11,35 +13,20 @@
 
 #include <assert.h>
 
-#include "../format_constants.h"
+#include "../utils/utils.h"
+#include "webp/format_constants.h"  // RIFF constants
+#include "webp/mux_types.h"         // ALPHA_FLAG
 #include "./vp8enci.h"
 
-#if defined(__cplusplus) || defined(c_plusplus)
-extern "C" {
-#endif
-
 //------------------------------------------------------------------------------
 // Helper functions
 
-// TODO(later): Move to webp/format_constants.h?
-static void PutLE24(uint8_t* const data, uint32_t val) {
-  data[0] = (val >>  0) & 0xff;
-  data[1] = (val >>  8) & 0xff;
-  data[2] = (val >> 16) & 0xff;
-}
-
-static void PutLE32(uint8_t* const data, uint32_t val) {
-  PutLE24(data, val);
-  data[3] = (val >> 24) & 0xff;
-}
-
 static int IsVP8XNeeded(const VP8Encoder* const enc) {
   return !!enc->has_alpha_;  // Currently the only case when VP8X is needed.
                              // This could change in the future.
 }
 
 static int PutPaddingByte(const WebPPicture* const pic) {
-
   const uint8_t pad_byte[1] = { 0 };
   return !!pic->writer(pad_byte, 1, pic);
 }
@@ -73,14 +60,14 @@ static WebPEncodingError PutVP8XHeader(const VP8Encoder* const enc) {
   assert(pic->width <= MAX_CANVAS_SIZE && pic->height <= MAX_CANVAS_SIZE);
 
   if (enc->has_alpha_) {
-    flags |= ALPHA_FLAG_BIT;
+    flags |= ALPHA_FLAG;
   }
 
   PutLE32(vp8x + TAG_SIZE,              VP8X_CHUNK_SIZE);
   PutLE32(vp8x + CHUNK_HEADER_SIZE,     flags);
   PutLE24(vp8x + CHUNK_HEADER_SIZE + 4, pic->width - 1);
   PutLE24(vp8x + CHUNK_HEADER_SIZE + 7, pic->height - 1);
-  if(!pic->writer(vp8x, sizeof(vp8x), pic)) {
+  if (!pic->writer(vp8x, sizeof(vp8x), pic)) {
     return VP8_ENC_ERROR_BAD_WRITE;
   }
   return VP8_ENC_OK;
@@ -199,8 +186,8 @@ static int PutWebPHeaders(const VP8Encoder* const enc, size_t size0,
 // Segmentation header
 static void PutSegmentHeader(VP8BitWriter* const bw,
                              const VP8Encoder* const enc) {
-  const VP8SegmentHeader* const hdr = &enc->segment_hdr_;
-  const VP8Proba* const proba = &enc->proba_;
+  const VP8EncSegmentHeader* const hdr = &enc->segment_hdr_;
+  const VP8EncProba* const proba = &enc->proba_;
   if (VP8PutBitUniform(bw, (hdr->num_segments_ > 1))) {
     // We always 'update' the quant and filter strength values
     const int update_data = 1;
@@ -210,16 +197,16 @@ static void PutSegmentHeader(VP8BitWriter* const bw,
       // we always use absolute values, not relative ones
       VP8PutBitUniform(bw, 1);   // (segment_feature_mode = 1. Paragraph 9.3.)
       for (s = 0; s < NUM_MB_SEGMENTS; ++s) {
-        VP8PutSignedValue(bw, enc->dqm_[s].quant_, 7);
+        VP8PutSignedBits(bw, enc->dqm_[s].quant_, 7);
       }
       for (s = 0; s < NUM_MB_SEGMENTS; ++s) {
-        VP8PutSignedValue(bw, enc->dqm_[s].fstrength_, 6);
+        VP8PutSignedBits(bw, enc->dqm_[s].fstrength_, 6);
       }
     }
     if (hdr->update_map_) {
       for (s = 0; s < 3; ++s) {
         if (VP8PutBitUniform(bw, (proba->segments_[s] != 255u))) {
-          VP8PutValue(bw, proba->segments_[s], 8);
+          VP8PutBits(bw, proba->segments_[s], 8);
         }
       }
     }
@@ -228,20 +215,20 @@ static void PutSegmentHeader(VP8BitWriter* const bw,
 
 // Filtering parameters header
 static void PutFilterHeader(VP8BitWriter* const bw,
-                            const VP8FilterHeader* const hdr) {
+                            const VP8EncFilterHeader* const hdr) {
   const int use_lf_delta = (hdr->i4x4_lf_delta_ != 0);
   VP8PutBitUniform(bw, hdr->simple_);
-  VP8PutValue(bw, hdr->level_, 6);
-  VP8PutValue(bw, hdr->sharpness_, 3);
+  VP8PutBits(bw, hdr->level_, 6);
+  VP8PutBits(bw, hdr->sharpness_, 3);
   if (VP8PutBitUniform(bw, use_lf_delta)) {
     // '0' is the default value for i4x4_lf_delta_ at frame #0.
     const int need_update = (hdr->i4x4_lf_delta_ != 0);
     if (VP8PutBitUniform(bw, need_update)) {
       // we don't use ref_lf_delta => emit four 0 bits
-      VP8PutValue(bw, 0, 4);
+      VP8PutBits(bw, 0, 4);
       // we use mode_lf_delta for i4x4
-      VP8PutSignedValue(bw, hdr->i4x4_lf_delta_, 6);
-      VP8PutValue(bw, 0, 3);    // all others unused
+      VP8PutSignedBits(bw, hdr->i4x4_lf_delta_, 6);
+      VP8PutBits(bw, 0, 3);    // all others unused
     }
   }
 }
@@ -249,12 +236,12 @@ static void PutFilterHeader(VP8BitWriter* const bw,
 // Nominal quantization parameters
 static void PutQuant(VP8BitWriter* const bw,
                      const VP8Encoder* const enc) {
-  VP8PutValue(bw, enc->base_quant_, 7);
-  VP8PutSignedValue(bw, enc->dq_y1_dc_, 4);
-  VP8PutSignedValue(bw, enc->dq_y2_dc_, 4);
-  VP8PutSignedValue(bw, enc->dq_y2_ac_, 4);
-  VP8PutSignedValue(bw, enc->dq_uv_dc_, 4);
-  VP8PutSignedValue(bw, enc->dq_uv_ac_, 4);
+  VP8PutBits(bw, enc->base_quant_, 7);
+  VP8PutSignedBits(bw, enc->dq_y1_dc_, 4);
+  VP8PutSignedBits(bw, enc->dq_y2_dc_, 4);
+  VP8PutSignedBits(bw, enc->dq_y2_ac_, 4);
+  VP8PutSignedBits(bw, enc->dq_uv_dc_, 4);
+  VP8PutSignedBits(bw, enc->dq_uv_ac_, 4);
 }
 
 // Partition sizes
@@ -276,58 +263,23 @@ static int EmitPartitionsSize(const VP8Encoder* const enc,
 
 //------------------------------------------------------------------------------
 
-#ifdef WEBP_EXPERIMENTAL_FEATURES
-
-#define KTRAILER_SIZE 8
-
-static int WriteExtensions(VP8Encoder* const enc) {
-  uint8_t buffer[KTRAILER_SIZE];
-  VP8BitWriter* const bw = &enc->bw_;
-  WebPPicture* const pic = enc->pic_;
-
-  // Layer (bytes 0..3)
-  PutLE24(buffer + 0, enc->layer_data_size_);
-  buffer[3] = enc->pic_->colorspace & WEBP_CSP_UV_MASK;
-  if (enc->layer_data_size_ > 0) {
-    assert(enc->use_layer_);
-    // append layer data to last partition
-    if (!VP8BitWriterAppend(&enc->parts_[enc->num_parts_ - 1],
-                            enc->layer_data_, enc->layer_data_size_)) {
-      return WebPEncodingSetError(pic, VP8_ENC_ERROR_BITSTREAM_OUT_OF_MEMORY);
-    }
-  }
-
-  buffer[KTRAILER_SIZE - 1] = 0x01;  // marker
-  if (!VP8BitWriterAppend(bw, buffer, KTRAILER_SIZE)) {
-    return WebPEncodingSetError(pic, VP8_ENC_ERROR_BITSTREAM_OUT_OF_MEMORY);
-  }
-  return 1;
-}
-
-#endif    /* WEBP_EXPERIMENTAL_FEATURES */
-
-//------------------------------------------------------------------------------
-
-static size_t GeneratePartition0(VP8Encoder* const enc) {
+static int GeneratePartition0(VP8Encoder* const enc) {
   VP8BitWriter* const bw = &enc->bw_;
   const int mb_size = enc->mb_w_ * enc->mb_h_;
   uint64_t pos1, pos2, pos3;
-#ifdef WEBP_EXPERIMENTAL_FEATURES
-  const int need_extensions = enc->use_layer_;
-#endif
 
   pos1 = VP8BitWriterPos(bw);
-  VP8BitWriterInit(bw, mb_size * 7 / 8);        // ~7 bits per macroblock
-#ifdef WEBP_EXPERIMENTAL_FEATURES
-  VP8PutBitUniform(bw, need_extensions);   // extensions
-#else
+  if (!VP8BitWriterInit(bw, mb_size * 7 / 8)) {        // ~7 bits per macroblock
+    return WebPEncodingSetError(enc->pic_, VP8_ENC_ERROR_OUT_OF_MEMORY);
+  }
   VP8PutBitUniform(bw, 0);   // colorspace
-#endif
   VP8PutBitUniform(bw, 0);   // clamp type
 
   PutSegmentHeader(bw, enc);
   PutFilterHeader(bw, &enc->filter_hdr_);
-  VP8PutValue(bw, enc->config_->partitions, 2);
+  VP8PutBits(bw, enc->num_parts_ == 8 ? 3 :
+                 enc->num_parts_ == 4 ? 2 :
+                 enc->num_parts_ == 2 ? 1 : 0, 2);
   PutQuant(bw, enc);
   VP8PutBitUniform(bw, 0);   // no proba update
   VP8WriteProbas(bw, &enc->proba_);
@@ -335,21 +287,17 @@ static size_t GeneratePartition0(VP8Encoder* const enc) {
   VP8CodeIntraModes(enc);
   VP8BitWriterFinish(bw);
 
-#ifdef WEBP_EXPERIMENTAL_FEATURES
-  if (need_extensions && !WriteExtensions(enc)) {
-    return 0;
-  }
-#endif
-
   pos3 = VP8BitWriterPos(bw);
 
   if (enc->pic_->stats) {
     enc->pic_->stats->header_bytes[0] = (int)((pos2 - pos1 + 7) >> 3);
     enc->pic_->stats->header_bytes[1] = (int)((pos3 - pos2 + 7) >> 3);
     enc->pic_->stats->alpha_data_size = (int)enc->alpha_data_size_;
-    enc->pic_->stats->layer_data_size = (int)enc->layer_data_size_;
   }
-  return !bw->error_;
+  if (bw->error_) {
+    return WebPEncodingSetError(enc->pic_, VP8_ENC_ERROR_OUT_OF_MEMORY);
+  }
+  return 1;
 }
 
 void VP8EncFreeBitWriters(VP8Encoder* const enc) {
@@ -371,7 +319,8 @@ int VP8EncWrite(VP8Encoder* const enc) {
   int p;
 
   // Partition #0 with header and partition sizes
-  ok = !!GeneratePartition0(enc);
+  ok = GeneratePartition0(enc);
+  if (!ok) return 0;
 
   // Compute VP8 size
   vp8_size = VP8_FRAME_HEADER_SIZE +
@@ -432,6 +381,3 @@ int VP8EncWrite(VP8Encoder* const enc) {
 
 //------------------------------------------------------------------------------
 
-#if defined(__cplusplus) || defined(c_plusplus)
-}    // extern "C"
-#endif
diff --git a/drivers/webp/enc/token.c b/drivers/webp/enc/token.c
new file mode 100644
index 0000000000..e73256b37e
--- /dev/null
+++ b/drivers/webp/enc/token.c
@@ -0,0 +1,285 @@
+// Copyright 2011 Google Inc. All Rights Reserved.
+//
+// Use of this source code is governed by a BSD-style license
+// that can be found in the COPYING file in the root of the source
+// tree. An additional intellectual property rights grant can be found
+// in the file PATENTS. All contributing project authors may
+// be found in the AUTHORS file in the root of the source tree.
+// -----------------------------------------------------------------------------
+//
+// Paginated token buffer
+//
+//  A 'token' is a bit value associated with a probability, either fixed
+// or a later-to-be-determined after statistics have been collected.
+// For dynamic probability, we just record the slot id (idx) for the probability
+// value in the final probability array (uint8_t* probas in VP8EmitTokens).
+//
+// Author: Skal (pascal.massimino@gmail.com)
+
+#include <assert.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include "./cost.h"
+#include "./vp8enci.h"
+#include "../utils/utils.h"
+
+#if !defined(DISABLE_TOKEN_BUFFER)
+
+// we use pages to reduce the number of memcpy()
+#define MIN_PAGE_SIZE 8192          // minimum number of token per page
+#define FIXED_PROBA_BIT (1u << 14)
+
+typedef uint16_t token_t;  // bit #15: bit value
+                           // bit #14: flags for constant proba or idx
+                           // bits #0..13: slot or constant proba
+struct VP8Tokens {
+  VP8Tokens* next_;        // pointer to next page
+};
+// Token data is located in memory just after the next_ field.
+// This macro is used to return their address and hide the trick.
+#define TOKEN_DATA(p) ((const token_t*)&(p)[1])
+
+//------------------------------------------------------------------------------
+
+void VP8TBufferInit(VP8TBuffer* const b, int page_size) {
+  b->tokens_ = NULL;
+  b->pages_ = NULL;
+  b->last_page_ = &b->pages_;
+  b->left_ = 0;
+  b->page_size_ = (page_size < MIN_PAGE_SIZE) ? MIN_PAGE_SIZE : page_size;
+  b->error_ = 0;
+}
+
+void VP8TBufferClear(VP8TBuffer* const b) {
+  if (b != NULL) {
+    VP8Tokens* p = b->pages_;
+    while (p != NULL) {
+      VP8Tokens* const next = p->next_;
+      WebPSafeFree(p);
+      p = next;
+    }
+    VP8TBufferInit(b, b->page_size_);
+  }
+}
+
+static int TBufferNewPage(VP8TBuffer* const b) {
+  VP8Tokens* page = NULL;
+  if (!b->error_) {
+    const size_t size = sizeof(*page) + b->page_size_ * sizeof(token_t);
+    page = (VP8Tokens*)WebPSafeMalloc(1ULL, size);
+  }
+  if (page == NULL) {
+    b->error_ = 1;
+    return 0;
+  }
+  page->next_ = NULL;
+
+  *b->last_page_ = page;
+  b->last_page_ = &page->next_;
+  b->left_ = b->page_size_;
+  b->tokens_ = (token_t*)TOKEN_DATA(page);
+  return 1;
+}
+
+//------------------------------------------------------------------------------
+
+#define TOKEN_ID(t, b, ctx) \
+    (NUM_PROBAS * ((ctx) + NUM_CTX * ((b) + NUM_BANDS * (t))))
+
+static WEBP_INLINE uint32_t AddToken(VP8TBuffer* const b,
+                                     uint32_t bit, uint32_t proba_idx) {
+  assert(proba_idx < FIXED_PROBA_BIT);
+  assert(bit <= 1);
+  if (b->left_ > 0 || TBufferNewPage(b)) {
+    const int slot = --b->left_;
+    b->tokens_[slot] = (bit << 15) | proba_idx;
+  }
+  return bit;
+}
+
+static WEBP_INLINE void AddConstantToken(VP8TBuffer* const b,
+                                         uint32_t bit, uint32_t proba) {
+  assert(proba < 256);
+  assert(bit <= 1);
+  if (b->left_ > 0 || TBufferNewPage(b)) {
+    const int slot = --b->left_;
+    b->tokens_[slot] = (bit << 15) | FIXED_PROBA_BIT | proba;
+  }
+}
+
+int VP8RecordCoeffTokens(const int ctx, const int coeff_type,
+                         int first, int last,
+                         const int16_t* const coeffs,
+                         VP8TBuffer* const tokens) {
+  int n = first;
+  uint32_t base_id = TOKEN_ID(coeff_type, n, ctx);
+  if (!AddToken(tokens, last >= 0, base_id + 0)) {
+    return 0;
+  }
+
+  while (n < 16) {
+    const int c = coeffs[n++];
+    const int sign = c < 0;
+    const uint32_t v = sign ? -c : c;
+    if (!AddToken(tokens, v != 0, base_id + 1)) {
+      base_id = TOKEN_ID(coeff_type, VP8EncBands[n], 0);  // ctx=0
+      continue;
+    }
+    if (!AddToken(tokens, v > 1, base_id + 2)) {
+      base_id = TOKEN_ID(coeff_type, VP8EncBands[n], 1);  // ctx=1
+    } else {
+      if (!AddToken(tokens, v > 4, base_id + 3)) {
+        if (AddToken(tokens, v != 2, base_id + 4))
+          AddToken(tokens, v == 4, base_id + 5);
+      } else if (!AddToken(tokens, v > 10, base_id + 6)) {
+        if (!AddToken(tokens, v > 6, base_id + 7)) {
+          AddConstantToken(tokens, v == 6, 159);
+        } else {
+          AddConstantToken(tokens, v >= 9, 165);
+          AddConstantToken(tokens, !(v & 1), 145);
+        }
+      } else {
+        int mask;
+        const uint8_t* tab;
+        uint32_t residue = v - 3;
+        if (residue < (8 << 1)) {          // VP8Cat3  (3b)
+          AddToken(tokens, 0, base_id + 8);
+          AddToken(tokens, 0, base_id + 9);
+          residue -= (8 << 0);
+          mask = 1 << 2;
+          tab = VP8Cat3;
+        } else if (residue < (8 << 2)) {   // VP8Cat4  (4b)
+          AddToken(tokens, 0, base_id + 8);
+          AddToken(tokens, 1, base_id + 9);
+          residue -= (8 << 1);
+          mask = 1 << 3;
+          tab = VP8Cat4;
+        } else if (residue < (8 << 3)) {   // VP8Cat5  (5b)
+          AddToken(tokens, 1, base_id + 8);
+          AddToken(tokens, 0, base_id + 10);
+          residue -= (8 << 2);
+          mask = 1 << 4;
+          tab = VP8Cat5;
+        } else {                         // VP8Cat6 (11b)
+          AddToken(tokens, 1, base_id + 8);
+          AddToken(tokens, 1, base_id + 10);
+          residue -= (8 << 3);
+          mask = 1 << 10;
+          tab = VP8Cat6;
+        }
+        while (mask) {
+          AddConstantToken(tokens, !!(residue & mask), *tab++);
+          mask >>= 1;
+        }
+      }
+      base_id = TOKEN_ID(coeff_type, VP8EncBands[n], 2);  // ctx=2
+    }
+    AddConstantToken(tokens, sign, 128);
+    if (n == 16 || !AddToken(tokens, n <= last, base_id + 0)) {
+      return 1;   // EOB
+    }
+  }
+  return 1;
+}
+
+#undef TOKEN_ID
+
+//------------------------------------------------------------------------------
+// This function works, but isn't currently used. Saved for later.
+
+#if 0
+
+static void Record(int bit, proba_t* const stats) {
+  proba_t p = *stats;
+  if (p >= 0xffff0000u) {               // an overflow is inbound.
+    p = ((p + 1u) >> 1) & 0x7fff7fffu;  // -> divide the stats by 2.
+  }
+  // record bit count (lower 16 bits) and increment total count (upper 16 bits).
+  p += 0x00010000u + bit;
+  *stats = p;
+}
+
+void VP8TokenToStats(const VP8TBuffer* const b, proba_t* const stats) {
+  const VP8Tokens* p = b->pages_;
+  while (p != NULL) {
+    const int N = (p->next_ == NULL) ? b->left_ : 0;
+    int n = MAX_NUM_TOKEN;
+    const token_t* const tokens = TOKEN_DATA(p);
+    while (n-- > N) {
+      const token_t token = tokens[n];
+      if (!(token & FIXED_PROBA_BIT)) {
+        Record((token >> 15) & 1, stats + (token & 0x3fffu));
+      }
+    }
+    p = p->next_;
+  }
+}
+
+#endif   // 0
+
+//------------------------------------------------------------------------------
+// Final coding pass, with known probabilities
+
+int VP8EmitTokens(VP8TBuffer* const b, VP8BitWriter* const bw,
+                  const uint8_t* const probas, int final_pass) {
+  const VP8Tokens* p = b->pages_;
+  assert(!b->error_);
+  while (p != NULL) {
+    const VP8Tokens* const next = p->next_;
+    const int N = (next == NULL) ? b->left_ : 0;
+    int n = b->page_size_;
+    const token_t* const tokens = TOKEN_DATA(p);
+    while (n-- > N) {
+      const token_t token = tokens[n];
+      const int bit = (token >> 15) & 1;
+      if (token & FIXED_PROBA_BIT) {
+        VP8PutBit(bw, bit, token & 0xffu);  // constant proba
+      } else {
+        VP8PutBit(bw, bit, probas[token & 0x3fffu]);
+      }
+    }
+    if (final_pass) WebPSafeFree((void*)p);
+    p = next;
+  }
+  if (final_pass) b->pages_ = NULL;
+  return 1;
+}
+
+// Size estimation
+size_t VP8EstimateTokenSize(VP8TBuffer* const b, const uint8_t* const probas) {
+  size_t size = 0;
+  const VP8Tokens* p = b->pages_;
+  assert(!b->error_);
+  while (p != NULL) {
+    const VP8Tokens* const next = p->next_;
+    const int N = (next == NULL) ? b->left_ : 0;
+    int n = b->page_size_;
+    const token_t* const tokens = TOKEN_DATA(p);
+    while (n-- > N) {
+      const token_t token = tokens[n];
+      const int bit = token & (1 << 15);
+      if (token & FIXED_PROBA_BIT) {
+        size += VP8BitCost(bit, token & 0xffu);
+      } else {
+        size += VP8BitCost(bit, probas[token & 0x3fffu]);
+      }
+    }
+    p = next;
+  }
+  return size;
+}
+
+//------------------------------------------------------------------------------
+
+#else     // DISABLE_TOKEN_BUFFER
+
+void VP8TBufferInit(VP8TBuffer* const b) {
+  (void)b;
+}
+void VP8TBufferClear(VP8TBuffer* const b) {
+  (void)b;
+}
+
+#endif    // !DISABLE_TOKEN_BUFFER
+
diff --git a/drivers/webp/enc/tree.c b/drivers/webp/enc/tree.c
index 8b25e5e488..f141006d19 100644
--- a/drivers/webp/enc/tree.c
+++ b/drivers/webp/enc/tree.c
@@ -1,27 +1,24 @@
 // Copyright 2011 Google Inc. All Rights Reserved.
 //
-// This code is licensed under the same terms as WebM:
-//  Software License Agreement:  http://www.webmproject.org/license/software/
-//  Additional IP Rights Grant:  http://www.webmproject.org/license/additional/
+// Use of this source code is governed by a BSD-style license
+// that can be found in the COPYING file in the root of the source
+// tree. An additional intellectual property rights grant can be found
+// in the file PATENTS. All contributing project authors may
+// be found in the AUTHORS file in the root of the source tree.
 // -----------------------------------------------------------------------------
 //
-// Token probabilities
+// Coding of token probabilities, intra modes and segments.
 //
 // Author: Skal (pascal.massimino@gmail.com)
 
 #include "./vp8enci.h"
 
-#if defined(__cplusplus) || defined(c_plusplus)
-extern "C" {
-#endif
-
 //------------------------------------------------------------------------------
 // Default probabilities
 
 // Paragraph 13.5
 const uint8_t
   VP8CoeffsProba0[NUM_TYPES][NUM_BANDS][NUM_CTX][NUM_PROBAS] = {
-  // genereated using vp8_default_coef_probs() in entropy.c:129
   { { { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 },
       { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 },
       { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }
@@ -157,7 +154,7 @@ const uint8_t
 };
 
 void VP8DefaultProbas(VP8Encoder* const enc) {
-  VP8Proba* const probas = &enc->proba_;
+  VP8EncProba* const probas = &enc->proba_;
   probas->use_skip_proba_ = 0;
   memset(probas->segments_, 255u, sizeof(probas->segments_));
   memcpy(probas->coeffs_, VP8CoeffsProba0, sizeof(VP8CoeffsProba0));
@@ -318,7 +315,7 @@ void VP8CodeIntraModes(VP8Encoder* const enc) {
   VP8EncIterator it;
   VP8IteratorInit(enc, &it);
   do {
-    const VP8MBInfo* mb = it.mb_;
+    const VP8MBInfo* const mb = it.mb_;
     const uint8_t* preds = it.preds_;
     if (enc->segment_hdr_.update_map_) {
       PutSegment(bw, mb->segment_, enc->proba_.segments_);
@@ -343,7 +340,7 @@ void VP8CodeIntraModes(VP8Encoder* const enc) {
       }
     }
     PutUVMode(bw, mb->uv_mode_);
-  } while (VP8IteratorNext(&it, 0));
+  } while (VP8IteratorNext(&it));
 }
 
 //------------------------------------------------------------------------------
@@ -485,7 +482,7 @@ const uint8_t
   }
 };
 
-void VP8WriteProbas(VP8BitWriter* const bw, const VP8Proba* const probas) {
+void VP8WriteProbas(VP8BitWriter* const bw, const VP8EncProba* const probas) {
   int t, b, c, p;
   for (t = 0; t < NUM_TYPES; ++t) {
     for (b = 0; b < NUM_BANDS; ++b) {
@@ -494,17 +491,14 @@ void VP8WriteProbas(VP8BitWriter* const bw, const VP8Proba* const probas) {
           const uint8_t p0 = probas->coeffs_[t][b][c][p];
           const int update = (p0 != VP8CoeffsProba0[t][b][c][p]);
           if (VP8PutBit(bw, update, VP8CoeffsUpdateProba[t][b][c][p])) {
-            VP8PutValue(bw, p0, 8);
+            VP8PutBits(bw, p0, 8);
           }
         }
       }
     }
   }
   if (VP8PutBitUniform(bw, probas->use_skip_proba_)) {
-    VP8PutValue(bw, probas->skip_proba_, 8);
+    VP8PutBits(bw, probas->skip_proba_, 8);
   }
 }
 
-#if defined(__cplusplus) || defined(c_plusplus)
-}    // extern "C"
-#endif
diff --git a/drivers/webp/enc/vp8enci.h b/drivers/webp/enc/vp8enci.h
index 936e1c18ce..0cb2ccc353 100644
--- a/drivers/webp/enc/vp8enci.h
+++ b/drivers/webp/enc/vp8enci.h
@@ -1,8 +1,10 @@
 // Copyright 2011 Google Inc. All Rights Reserved.
 //
-// This code is licensed under the same terms as WebM:
-//  Software License Agreement:  http://www.webmproject.org/license/software/
-//  Additional IP Rights Grant:  http://www.webmproject.org/license/additional/
+// Use of this source code is governed by a BSD-style license
+// that can be found in the COPYING file in the root of the source
+// tree. An additional intellectual property rights grant can be found
+// in the file PATENTS. All contributing project authors may
+// be found in the AUTHORS file in the root of the source tree.
 // -----------------------------------------------------------------------------
 //
 //   WebP encoder: internal header.
@@ -13,11 +15,18 @@
 #define WEBP_ENC_VP8ENCI_H_
 
 #include <string.h>     // for memcpy()
-#include "../encode.h"
+#include "../dec/common.h"
 #include "../dsp/dsp.h"
 #include "../utils/bit_writer.h"
+#include "../utils/thread.h"
+#include "../utils/utils.h"
+#include "webp/encode.h"
 
-#if defined(__cplusplus) || defined(c_plusplus)
+#ifdef WEBP_EXPERIMENTAL_FEATURES
+#include "./vp8li.h"
+#endif  // WEBP_EXPERIMENTAL_FEATURES
+
+#ifdef __cplusplus
 extern "C" {
 #endif
 
@@ -26,141 +35,94 @@ extern "C" {
 
 // version numbers
 #define ENC_MAJ_VERSION 0
-#define ENC_MIN_VERSION 2
-#define ENC_REV_VERSION 0
-
-// size of histogram used by CollectHistogram.
-#define MAX_COEFF_THRESH   64
-
-// intra prediction modes
-enum { B_DC_PRED = 0,   // 4x4 modes
-       B_TM_PRED = 1,
-       B_VE_PRED = 2,
-       B_HE_PRED = 3,
-       B_RD_PRED = 4,
-       B_VR_PRED = 5,
-       B_LD_PRED = 6,
-       B_VL_PRED = 7,
-       B_HD_PRED = 8,
-       B_HU_PRED = 9,
-       NUM_BMODES = B_HU_PRED + 1 - B_DC_PRED,  // = 10
-
-       // Luma16 or UV modes
-       DC_PRED = B_DC_PRED, V_PRED = B_VE_PRED,
-       H_PRED = B_HE_PRED, TM_PRED = B_TM_PRED
-     };
+#define ENC_MIN_VERSION 4
+#define ENC_REV_VERSION 4
 
-enum { NUM_MB_SEGMENTS = 4,
-       MAX_NUM_PARTITIONS = 8,
-       NUM_TYPES = 4,   // 0: i16-AC,  1: i16-DC,  2:chroma-AC,  3:i4-AC
-       NUM_BANDS = 8,
-       NUM_CTX = 3,
-       NUM_PROBAS = 11,
-       MAX_LF_LEVELS = 64,      // Maximum loop filter level
-       MAX_VARIABLE_LEVEL = 67  // last (inclusive) level with variable cost
+enum { MAX_LF_LEVELS = 64,       // Maximum loop filter level
+       MAX_VARIABLE_LEVEL = 67,  // last (inclusive) level with variable cost
+       MAX_LEVEL = 2047          // max level (note: max codable is 2047 + 67)
      };
 
-// YUV-cache parameters. Cache is 16-pixels wide.
-// The original or reconstructed samples can be accessed using VP8Scan[]
+typedef enum {   // Rate-distortion optimization levels
+  RD_OPT_NONE        = 0,  // no rd-opt
+  RD_OPT_BASIC       = 1,  // basic scoring (no trellis)
+  RD_OPT_TRELLIS     = 2,  // perform trellis-quant on the final decision only
+  RD_OPT_TRELLIS_ALL = 3   // trellis-quant for every scoring (much slower)
+} VP8RDLevel;
+
+// YUV-cache parameters. Cache is 32-bytes wide (= one cacheline).
+// The original or reconstructed samples can be accessed using VP8Scan[].
 // The predicted blocks can be accessed using offsets to yuv_p_ and
-// the arrays VP8*ModeOffsets[];
-//         +----+      YUV Samples area. See VP8Scan[] for accessing the blocks.
-//  Y_OFF  |YYYY| <- original samples  (enc->yuv_in_)
-//         |YYYY|
-//         |YYYY|
-//         |YYYY|
-//  U_OFF  |UUVV| V_OFF  (=U_OFF + 8)
-//         |UUVV|
-//         +----+
-//  Y_OFF  |YYYY| <- compressed/decoded samples  ('yuv_out_')
-//         |YYYY|    There are two buffers like this ('yuv_out_'/'yuv_out2_')
-//         |YYYY|
-//         |YYYY|
-//  U_OFF  |UUVV| V_OFF
-//         |UUVV|
-//          x2 (for yuv_out2_)
-//         +----+     Prediction area ('yuv_p_', size = PRED_SIZE)
-// I16DC16 |YYYY|  Intra16 predictions (16x16 block each)
-//         |YYYY|
-//         |YYYY|
-//         |YYYY|
-// I16TM16 |YYYY|
-//         |YYYY|
-//         |YYYY|
-//         |YYYY|
-// I16VE16 |YYYY|
-//         |YYYY|
-//         |YYYY|
-//         |YYYY|
-// I16HE16 |YYYY|
-//         |YYYY|
-//         |YYYY|
-//         |YYYY|
-//         +----+  Chroma U/V predictions (16x8 block each)
-// C8DC8   |UUVV|
-//         |UUVV|
-// C8TM8   |UUVV|
-//         |UUVV|
-// C8VE8   |UUVV|
-//         |UUVV|
-// C8HE8   |UUVV|
-//         |UUVV|
-//         +----+  Intra 4x4 predictions (4x4 block each)
-//         |YYYY| I4DC4 I4TM4 I4VE4 I4HE4
-//         |YYYY| I4RD4 I4VR4 I4LD4 I4VL4
-//         |YY..| I4HD4 I4HU4 I4TMP
-//         +----+
-#define BPS       16   // this is the common stride
-#define Y_SIZE   (BPS * 16)
-#define UV_SIZE  (BPS * 8)
-#define YUV_SIZE (Y_SIZE + UV_SIZE)
-#define PRED_SIZE (6 * 16 * BPS + 12 * BPS)
-#define Y_OFF    (0)
-#define U_OFF    (Y_SIZE)
-#define V_OFF    (U_OFF + 8)
-#define ALIGN_CST 15
-#define DO_ALIGN(PTR) ((uintptr_t)((PTR) + ALIGN_CST) & ~ALIGN_CST)
-
-extern const int VP8Scan[16 + 4 + 4];           // in quant.c
-extern const int VP8UVModeOffsets[4];           // in analyze.c
+// the arrays VP8*ModeOffsets[].
+// * YUV Samples area (yuv_in_/yuv_out_/yuv_out2_)
+//   (see VP8Scan[] for accessing the blocks, along with
+//   Y_OFF_ENC/U_OFF_ENC/V_OFF_ENC):
+//             +----+----+
+//  Y_OFF_ENC  |YYYY|UUVV|
+//  U_OFF_ENC  |YYYY|UUVV|
+//  V_OFF_ENC  |YYYY|....| <- 25% wasted U/V area
+//             |YYYY|....|
+//             +----+----+
+// * Prediction area ('yuv_p_', size = PRED_SIZE_ENC)
+//   Intra16 predictions (16x16 block each, two per row):
+//         |I16DC16|I16TM16|
+//         |I16VE16|I16HE16|
+//   Chroma U/V predictions (16x8 block each, two per row):
+//         |C8DC8|C8TM8|
+//         |C8VE8|C8HE8|
+//   Intra 4x4 predictions (4x4 block each)
+//         |I4DC4 I4TM4 I4VE4 I4HE4|I4RD4 I4VR4 I4LD4 I4VL4|
+//         |I4HD4 I4HU4 I4TMP .....|.......................| <- ~31% wasted
+#define YUV_SIZE_ENC (BPS * 16)
+#define PRED_SIZE_ENC (32 * BPS + 16 * BPS + 8 * BPS)   // I16+Chroma+I4 preds
+#define Y_OFF_ENC    (0)
+#define U_OFF_ENC    (16)
+#define V_OFF_ENC    (16 + 8)
+
+extern const int VP8Scan[16];           // in quant.c
+extern const int VP8UVModeOffsets[4];   // in analyze.c
 extern const int VP8I16ModeOffsets[4];
 extern const int VP8I4ModeOffsets[NUM_BMODES];
 
 // Layout of prediction blocks
 // intra 16x16
 #define I16DC16 (0 * 16 * BPS)
-#define I16TM16 (1 * 16 * BPS)
-#define I16VE16 (2 * 16 * BPS)
-#define I16HE16 (3 * 16 * BPS)
+#define I16TM16 (I16DC16 + 16)
+#define I16VE16 (1 * 16 * BPS)
+#define I16HE16 (I16VE16 + 16)
 // chroma 8x8, two U/V blocks side by side (hence: 16x8 each)
-#define C8DC8 (4 * 16 * BPS)
-#define C8TM8 (4 * 16 * BPS + 8 * BPS)
-#define C8VE8 (5 * 16 * BPS)
-#define C8HE8 (5 * 16 * BPS + 8 * BPS)
+#define C8DC8 (2 * 16 * BPS)
+#define C8TM8 (C8DC8 + 1 * 16)
+#define C8VE8 (2 * 16 * BPS + 8 * BPS)
+#define C8HE8 (C8VE8 + 1 * 16)
 // intra 4x4
-#define I4DC4 (6 * 16 * BPS +  0)
-#define I4TM4 (6 * 16 * BPS +  4)
-#define I4VE4 (6 * 16 * BPS +  8)
-#define I4HE4 (6 * 16 * BPS + 12)
-#define I4RD4 (6 * 16 * BPS + 4 * BPS +  0)
-#define I4VR4 (6 * 16 * BPS + 4 * BPS +  4)
-#define I4LD4 (6 * 16 * BPS + 4 * BPS +  8)
-#define I4VL4 (6 * 16 * BPS + 4 * BPS + 12)
-#define I4HD4 (6 * 16 * BPS + 8 * BPS +  0)
-#define I4HU4 (6 * 16 * BPS + 8 * BPS +  4)
-#define I4TMP (6 * 16 * BPS + 8 * BPS +  8)
+#define I4DC4 (3 * 16 * BPS +  0)
+#define I4TM4 (I4DC4 +  4)
+#define I4VE4 (I4DC4 +  8)
+#define I4HE4 (I4DC4 + 12)
+#define I4RD4 (I4DC4 + 16)
+#define I4VR4 (I4DC4 + 20)
+#define I4LD4 (I4DC4 + 24)
+#define I4VL4 (I4DC4 + 28)
+#define I4HD4 (3 * 16 * BPS + 4 * BPS)
+#define I4HU4 (I4HD4 + 4)
+#define I4TMP (I4HD4 + 8)
 
 typedef int64_t score_t;     // type used for scores, rate, distortion
+// Note that MAX_COST is not the maximum allowed by sizeof(score_t),
+// in order to allow overflowing computations.
 #define MAX_COST ((score_t)0x7fffffffffffffLL)
 
 #define QFIX 17
 #define BIAS(b)  ((b) << (QFIX - 8))
 // Fun fact: this is the _only_ line where we're actually being lossy and
 // discarding bits.
-static WEBP_INLINE int QUANTDIV(int n, int iQ, int B) {
-  return (n * iQ + B) >> QFIX;
+static WEBP_INLINE int QUANTDIV(uint32_t n, uint32_t iQ, uint32_t B) {
+  return (int)((n * iQ + B) >> QFIX);
 }
-extern const uint8_t VP8Zigzag[16];
+
+// Uncomment the following to remove token-buffer code:
+// #define DISABLE_TOKEN_BUFFER
 
 //------------------------------------------------------------------------------
 // Headers
@@ -169,6 +131,8 @@ typedef uint32_t proba_t;   // 16b + 16b
 typedef uint8_t ProbaArray[NUM_CTX][NUM_PROBAS];
 typedef proba_t StatsArray[NUM_CTX][NUM_PROBAS];
 typedef uint16_t CostArray[NUM_CTX][MAX_VARIABLE_LEVEL + 1];
+typedef const uint16_t* (*CostArrayPtr)[NUM_CTX];   // for easy casting
+typedef const uint16_t* CostArrayMap[16][NUM_CTX];
 typedef double LFStats[NUM_MB_SEGMENTS][MAX_LF_LEVELS];  // filter stats
 
 typedef struct VP8Encoder VP8Encoder;
@@ -179,19 +143,20 @@ typedef struct {
   int update_map_;        // whether to update the segment map or not.
                           // must be 0 if there's only 1 segment.
   int size_;              // bit-cost for transmitting the segment map
-} VP8SegmentHeader;
+} VP8EncSegmentHeader;
 
 // Struct collecting all frame-persistent probabilities.
 typedef struct {
   uint8_t segments_[3];     // probabilities for segment tree
   uint8_t skip_proba_;      // final probability of being skipped.
-  ProbaArray coeffs_[NUM_TYPES][NUM_BANDS];      // 924 bytes
+  ProbaArray coeffs_[NUM_TYPES][NUM_BANDS];      // 1056 bytes
   StatsArray stats_[NUM_TYPES][NUM_BANDS];       // 4224 bytes
-  CostArray level_cost_[NUM_TYPES][NUM_BANDS];   // 11.4k
+  CostArray level_cost_[NUM_TYPES][NUM_BANDS];   // 13056 bytes
+  CostArrayMap remapped_costs_[NUM_TYPES];       // 1536 bytes
   int dirty_;               // if true, need to call VP8CalculateLevelCosts()
   int use_skip_proba_;      // Note: we always use skip_proba for now.
   int nb_skip_;             // number of skipped blocks
-} VP8Proba;
+} VP8EncProba;
 
 // Filter parameters. Not actually used in the code (we don't perform
 // the in-loop filtering), but filled from user's config
@@ -200,7 +165,7 @@ typedef struct {
   int level_;              // base filter level [0..63]
   int sharpness_;          // [0..7]
   int i4x4_lf_delta_;      // delta filter level for i4x4 relative to i16x16
-} VP8FilterHeader;
+} VP8EncFilterHeader;
 
 //------------------------------------------------------------------------------
 // Informations about the macroblocks.
@@ -217,8 +182,8 @@ typedef struct {
 typedef struct VP8Matrix {
   uint16_t q_[16];        // quantizer steps
   uint16_t iq_[16];       // reciprocals, fixed point.
-  uint16_t bias_[16];     // rounding bias
-  uint16_t zthresh_[16];  // value under which a coefficient is zeroed
+  uint32_t bias_[16];     // rounding bias
+  uint32_t zthresh_[16];  // value below which a coefficient is zeroed
   uint16_t sharpen_[16];  // frequency boosters for slight sharpening
 } VP8Matrix;
 
@@ -229,16 +194,19 @@ typedef struct {
   int beta_;       // filter-susceptibility, range [0,255].
   int quant_;      // final segment quantizer.
   int fstrength_;  // final in-loop filtering strength
+  int max_edge_;   // max edge delta (for filtering strength)
+  int min_disto_;  // minimum distortion required to trigger filtering record
   // reactivities
   int lambda_i16_, lambda_i4_, lambda_uv_;
   int lambda_mode_, lambda_trellis_, tlambda_;
   int lambda_trellis_i16_, lambda_trellis_i4_, lambda_trellis_uv_;
 } VP8SegmentInfo;
 
-// Handy transcient struct to accumulate score and info during RD-optimization
+// Handy transient struct to accumulate score and info during RD-optimization
 // and mode evaluation.
 typedef struct {
-  score_t D, SD, R, score;    // Distortion, spectral distortion, rate, score.
+  score_t D, SD;              // Distortion, spectral distortion
+  score_t H, R, score;        // header bits, rate, score.
   int16_t y_dc_levels[16];    // Quantized levels for luma-DC, luma-AC, chroma.
   int16_t y_ac_levels[16][16];
   int16_t uv_levels[4 + 4][16];
@@ -252,12 +220,11 @@ typedef struct {
 // right neighbouring data (samples, predictions, contexts, ...)
 typedef struct {
   int x_, y_;                      // current macroblock
-  int y_offset_, uv_offset_;       // offset to the luma / chroma planes
   int y_stride_, uv_stride_;       // respective strides
-  uint8_t*      yuv_in_;           // borrowed from enc_ (for now)
-  uint8_t*      yuv_out_;          // ''
-  uint8_t*      yuv_out2_;         // ''
-  uint8_t*      yuv_p_;            // ''
+  uint8_t*      yuv_in_;           // input samples
+  uint8_t*      yuv_out_;          // output samples
+  uint8_t*      yuv_out2_;         // secondary buffer swapped with yuv_out_.
+  uint8_t*      yuv_p_;            // scratch buffer for prediction
   VP8Encoder*   enc_;              // back-pointer
   VP8MBInfo*    mb_;               // current macroblock
   VP8BitWriter* bw_;               // current bit-writer
@@ -273,24 +240,44 @@ typedef struct {
   uint64_t      uv_bits_;          // macroblock bit-cost for chroma
   LFStats*      lf_stats_;         // filter stats (borrowed from enc_)
   int           do_trellis_;       // if true, perform extra level optimisation
-  int           done_;             // true when scan is finished
+  int           count_down_;       // number of mb still to be processed
+  int           count_down0_;      // starting counter value (for progress)
   int           percent0_;         // saved initial progress percent
+
+  uint8_t* y_left_;    // left luma samples (addressable from index -1 to 15).
+  uint8_t* u_left_;    // left u samples (addressable from index -1 to 7)
+  uint8_t* v_left_;    // left v samples (addressable from index -1 to 7)
+
+  uint8_t* y_top_;     // top luma samples at position 'x_'
+  uint8_t* uv_top_;    // top u/v samples at position 'x_', packed as 16 bytes
+
+  // memory for storing y/u/v_left_
+  uint8_t yuv_left_mem_[17 + 16 + 16 + 8 + WEBP_ALIGN_CST];
+  // memory for yuv_*
+  uint8_t yuv_mem_[3 * YUV_SIZE_ENC + PRED_SIZE_ENC + WEBP_ALIGN_CST];
 } VP8EncIterator;
 
   // in iterator.c
-// must be called first.
+// must be called first
 void VP8IteratorInit(VP8Encoder* const enc, VP8EncIterator* const it);
-// restart a scan.
+// restart a scan
 void VP8IteratorReset(VP8EncIterator* const it);
-// import samples from source
-void VP8IteratorImport(const VP8EncIterator* const it);
+// reset iterator position to row 'y'
+void VP8IteratorSetRow(VP8EncIterator* const it, int y);
+// set count down (=number of iterations to go)
+void VP8IteratorSetCountDown(VP8EncIterator* const it, int count_down);
+// return true if iteration is finished
+int VP8IteratorIsDone(const VP8EncIterator* const it);
+// Import uncompressed samples from source.
+// If tmp_32 is not NULL, import boundary samples too.
+// tmp_32 is a 32-bytes scratch buffer that must be aligned in memory.
+void VP8IteratorImport(VP8EncIterator* const it, uint8_t* tmp_32);
 // export decimated samples
 void VP8IteratorExport(const VP8EncIterator* const it);
-// go to next macroblock. Returns !done_. If *block_to_save is non-null, will
-// save the boundary values to top_/left_ arrays. block_to_save can be
-// it->yuv_out_ or it->yuv_in_.
-int VP8IteratorNext(VP8EncIterator* const it,
-                    const uint8_t* const block_to_save);
+// go to next macroblock. Returns false if not finished.
+int VP8IteratorNext(VP8EncIterator* const it);
+// save the yuv_out_ boundary values to top_/left_ arrays for next iterations.
+void VP8IteratorSaveBoundary(VP8EncIterator* const it);
 // Report progression based on macroblock rows. Return 0 for user-abort request.
 int VP8IteratorProgress(const VP8EncIterator* const it,
                         int final_delta_percent);
@@ -314,44 +301,43 @@ void VP8SetSegment(const VP8EncIterator* const it, int segment);
 //------------------------------------------------------------------------------
 // Paginated token buffer
 
-// WIP: #define USE_TOKEN_BUFFER
+typedef struct VP8Tokens VP8Tokens;  // struct details in token.c
 
-#ifdef USE_TOKEN_BUFFER
+typedef struct {
+#if !defined(DISABLE_TOKEN_BUFFER)
+  VP8Tokens* pages_;        // first page
+  VP8Tokens** last_page_;   // last page
+  uint16_t* tokens_;        // set to (*last_page_)->tokens_
+  int left_;                // how many free tokens left before the page is full
+  int page_size_;           // number of tokens per page
+#endif
+  int error_;         // true in case of malloc error
+} VP8TBuffer;
 
-#define MAX_NUM_TOKEN 2048
+// initialize an empty buffer
+void VP8TBufferInit(VP8TBuffer* const b, int page_size);
+void VP8TBufferClear(VP8TBuffer* const b);   // de-allocate pages memory
 
-typedef struct VP8Tokens VP8Tokens;
-struct VP8Tokens {
-  uint16_t tokens_[MAX_NUM_TOKEN];  // bit#15: bit, bits 0..14: slot
-  int left_;
-  VP8Tokens* next_;
-};
+#if !defined(DISABLE_TOKEN_BUFFER)
 
-typedef struct {
-  VP8Tokens* rows_;
-  uint16_t* tokens_;    // set to (*last_)->tokens_
-  VP8Tokens** last_;
-  int left_;
-  int error_;  // true in case of malloc error
-} VP8TBuffer;
+// Finalizes bitstream when probabilities are known.
+// Deletes the allocated token memory if final_pass is true.
+int VP8EmitTokens(VP8TBuffer* const b, VP8BitWriter* const bw,
+                  const uint8_t* const probas, int final_pass);
 
-void VP8TBufferInit(VP8TBuffer* const b);    // initialize an empty buffer
-int VP8TBufferNewPage(VP8TBuffer* const b);  // allocate a new page
-void VP8TBufferClear(VP8TBuffer* const b);   // de-allocate memory
+// record the coding of coefficients without knowing the probabilities yet
+int VP8RecordCoeffTokens(const int ctx, const int coeff_type,
+                         int first, int last,
+                         const int16_t* const coeffs,
+                         VP8TBuffer* const tokens);
 
-int VP8EmitTokens(const VP8TBuffer* const b, VP8BitWriter* const bw,
-                  const uint8_t* const probas);
+// Estimate the final coded size given a set of 'probas'.
+size_t VP8EstimateTokenSize(VP8TBuffer* const b, const uint8_t* const probas);
 
-static WEBP_INLINE int VP8AddToken(VP8TBuffer* const b,
-                                   int bit, int proba_idx) {
-  if (b->left_ > 0 || VP8TBufferNewPage(b)) {
-    const int slot = --b->left_;
-    b->tokens_[slot] = (bit << 15) | proba_idx;
-  }
-  return bit;
-}
+// unused for now
+void VP8TokenToStats(const VP8TBuffer* const b, proba_t* const stats);
 
-#endif  // USE_TOKEN_BUFFER
+#endif  // !DISABLE_TOKEN_BUFFER
 
 //------------------------------------------------------------------------------
 // VP8Encoder
@@ -361,8 +347,8 @@ struct VP8Encoder {
   WebPPicture* pic_;            // input / output picture
 
   // headers
-  VP8FilterHeader   filter_hdr_;     // filtering information
-  VP8SegmentHeader  segment_hdr_;    // segment information
+  VP8EncFilterHeader   filter_hdr_;     // filtering information
+  VP8EncSegmentHeader  segment_hdr_;    // segment information
 
   int profile_;                      // VP8's profile, deduced from Config.
 
@@ -376,6 +362,7 @@ struct VP8Encoder {
   // per-partition boolean decoders.
   VP8BitWriter bw_;                         // part0
   VP8BitWriter parts_[MAX_NUM_PARTITIONS];  // token partitions
+  VP8TBuffer tokens_;                       // token buffer
 
   int percent_;                             // for progress
 
@@ -383,17 +370,13 @@ struct VP8Encoder {
   int has_alpha_;
   uint8_t* alpha_data_;       // non-NULL if transparency is present
   uint32_t alpha_data_size_;
-
-  // enhancement layer
-  int use_layer_;
-  VP8BitWriter layer_bw_;
-  uint8_t* layer_data_;
-  size_t layer_data_size_;
+  WebPWorker alpha_worker_;
 
   // quantization info (one set of DC/AC dequant factor per segment)
   VP8SegmentInfo dqm_[NUM_MB_SEGMENTS];
   int base_quant_;                 // nominal quantizer value. Only used
                                    // for relative coding of segments' quant.
+  int alpha_;                      // global susceptibility (<=> complexity)
   int uv_alpha_;                   // U/V quantization susceptibility
   // global offset of quantizers, shared by all segments
   int dq_y1_dc_;
@@ -401,34 +384,29 @@ struct VP8Encoder {
   int dq_uv_dc_, dq_uv_ac_;
 
   // probabilities and statistics
-  VP8Proba proba_;
-  uint64_t sse_[4];        // sum of Y/U/V/A squared errors for all macroblocks
-  uint64_t sse_count_;     // pixel count for the sse_[] stats
-  int      coded_size_;
-  int      residual_bytes_[3][4];
-  int      block_count_[3];
+  VP8EncProba proba_;
+  uint64_t    sse_[4];      // sum of Y/U/V/A squared errors for all macroblocks
+  uint64_t    sse_count_;   // pixel count for the sse_[] stats
+  int         coded_size_;
+  int         residual_bytes_[3][4];
+  int         block_count_[3];
 
   // quality/speed settings
-  int method_;              // 0=fastest, 6=best/slowest.
-  int rd_opt_level_;        // Deduced from method_.
-  int max_i4_header_bits_;  // partition #0 safeness factor
+  int method_;               // 0=fastest, 6=best/slowest.
+  VP8RDLevel rd_opt_level_;  // Deduced from method_.
+  int max_i4_header_bits_;   // partition #0 safeness factor
+  int thread_level_;         // derived from config->thread_level
+  int do_search_;            // derived from config->target_XXX
+  int use_tokens_;           // if true, use token buffer
 
   // Memory
   VP8MBInfo* mb_info_;   // contextual macroblock infos (mb_w_ + 1)
   uint8_t*   preds_;     // predictions modes: (4*mb_w+1) * (4*mb_h+1)
   uint32_t*  nz_;        // non-zero bit context: mb_w+1
-  uint8_t*   yuv_in_;    // input samples
-  uint8_t*   yuv_out_;   // output samples
-  uint8_t*   yuv_out2_;  // secondary scratch out-buffer. swapped with yuv_out_.
-  uint8_t*   yuv_p_;     // scratch buffer for prediction
-  uint8_t   *y_top_;     // top luma samples.
-  uint8_t   *uv_top_;    // top u/v samples.
-                         // U and V are packed into 16 pixels (8 U + 8 V)
-  uint8_t   *y_left_;    // left luma samples (adressable from index -1 to 15).
-  uint8_t   *u_left_;    // left u samples (adressable from index -1 to 7)
-  uint8_t   *v_left_;    // left v samples (adressable from index -1 to 7)
-
-  LFStats   *lf_stats_;  // autofilter stats (if NULL, autofilter is off)
+  uint8_t*   y_top_;     // top luma samples.
+  uint8_t*   uv_top_;    // top u/v samples.
+                         // U and V are packed into 16 bytes (8 U + 8 V)
+  LFStats*   lf_stats_;  // autofilter stats (if NULL, autofilter is off)
 };
 
 //------------------------------------------------------------------------------
@@ -441,7 +419,7 @@ extern const uint8_t
 // Reset the token probabilities to their initial (default) values
 void VP8DefaultProbas(VP8Encoder* const enc);
 // Write the token probabilities
-void VP8WriteProbas(VP8BitWriter* const bw, const VP8Proba* const probas);
+void VP8WriteProbas(VP8BitWriter* const bw, const VP8EncProba* const probas);
 // Writes the partition #0 modes (that is: all intra modes)
 void VP8CodeIntraModes(VP8Encoder* const enc);
 
@@ -454,7 +432,11 @@ int VP8EncWrite(VP8Encoder* const enc);
 void VP8EncFreeBitWriters(VP8Encoder* const enc);
 
   // in frame.c
-extern const uint8_t VP8EncBands[16 + 1];
+extern const uint8_t VP8Cat3[];
+extern const uint8_t VP8Cat4[];
+extern const uint8_t VP8Cat5[];
+extern const uint8_t VP8Cat6[];
+
 // Form all the four Intra16x16 predictions in the yuv_p_ cache
 void VP8MakeLuma16Preds(const VP8EncIterator* const it);
 // Form all the four Chroma8x8 predictions in the yuv_p_ cache
@@ -466,9 +448,9 @@ void VP8MakeIntra4Preds(const VP8EncIterator* const it);
 int VP8GetCostLuma16(VP8EncIterator* const it, const VP8ModeScore* const rd);
 int VP8GetCostLuma4(VP8EncIterator* const it, const int16_t levels[16]);
 int VP8GetCostUV(VP8EncIterator* const it, const VP8ModeScore* const rd);
-// Main stat / coding passes
+// Main coding calls
 int VP8EncLoop(VP8Encoder* const enc);
-int VP8StatLoop(VP8Encoder* const enc);
+int VP8EncTokenLoop(VP8Encoder* const enc);
 
   // in webpenc.c
 // Assign an error code to a picture. Return false for convenience.
@@ -485,18 +467,14 @@ int VP8EncAnalyze(VP8Encoder* const enc);
 // Sets up segment's quantization values, base_quant_ and filter strengths.
 void VP8SetSegmentParams(VP8Encoder* const enc, float quality);
 // Pick best modes and fills the levels. Returns true if skipped.
-int VP8Decimate(VP8EncIterator* const it, VP8ModeScore* const rd, int rd_opt);
+int VP8Decimate(VP8EncIterator* const it, VP8ModeScore* const rd,
+                VP8RDLevel rd_opt);
 
   // in alpha.c
 void VP8EncInitAlpha(VP8Encoder* const enc);    // initialize alpha compression
+int VP8EncStartAlpha(VP8Encoder* const enc);    // start alpha coding process
 int VP8EncFinishAlpha(VP8Encoder* const enc);   // finalize compressed data
-void VP8EncDeleteAlpha(VP8Encoder* const enc);  // delete compressed data
-
-  // in layer.c
-void VP8EncInitLayer(VP8Encoder* const enc);     // init everything
-void VP8EncCodeLayerBlock(VP8EncIterator* it);   // code one more macroblock
-int VP8EncFinishLayer(VP8Encoder* const enc);    // finalize coding
-void VP8EncDeleteLayer(VP8Encoder* enc);         // reclaim memory
+int VP8EncDeleteAlpha(VP8Encoder* const enc);   // delete compressed data
 
   // in filter.c
 
@@ -516,9 +494,38 @@ void VP8InitFilter(VP8EncIterator* const it);
 void VP8StoreFilterStats(VP8EncIterator* const it);
 void VP8AdjustFilterStrength(VP8EncIterator* const it);
 
+// returns the approximate filtering strength needed to smooth a edge
+// step of 'delta', given a sharpness parameter 'sharpness'.
+int VP8FilterStrengthFromDelta(int sharpness, int delta);
+
+  // misc utils for picture_*.c:
+
+// Remove reference to the ARGB/YUVA buffer (doesn't free anything).
+void WebPPictureResetBuffers(WebPPicture* const picture);
+
+// Allocates ARGB buffer of given dimension (previous one is always free'd).
+// Preserves the YUV(A) buffer. Returns false in case of error (invalid param,
+// out-of-memory).
+int WebPPictureAllocARGB(WebPPicture* const picture, int width, int height);
+
+// Allocates YUVA buffer of given dimension (previous one is always free'd).
+// Uses picture->csp to determine whether an alpha buffer is needed.
+// Preserves the ARGB buffer.
+// Returns false in case of error (invalid param, out-of-memory).
+int WebPPictureAllocYUVA(WebPPicture* const picture, int width, int height);
+
+  // in near_lossless.c
+// Near lossless preprocessing in RGB color-space.
+int VP8ApplyNearLossless(int xsize, int ysize, uint32_t* argb, int quality);
+// Near lossless adjustment for predictors.
+void VP8ApplyNearLosslessPredict(int xsize, int ysize, int pred_bits,
+                                 const uint32_t* argb_orig,
+                                 uint32_t* argb, uint32_t* argb_scratch,
+                                 const uint32_t* const transform_data,
+                                 int quality, int subtract_green);
 //------------------------------------------------------------------------------
 
-#if defined(__cplusplus) || defined(c_plusplus)
+#ifdef __cplusplus
 }    // extern "C"
 #endif
 
diff --git a/drivers/webp/enc/vp8l.c b/drivers/webp/enc/vp8l.c
index f4eb6e783f..284995e830 100644
--- a/drivers/webp/enc/vp8l.c
+++ b/drivers/webp/enc/vp8l.c
@@ -1,8 +1,10 @@
 // Copyright 2012 Google Inc. All Rights Reserved.
 //
-// This code is licensed under the same terms as WebM:
-//  Software License Agreement:  http://www.webmproject.org/license/software/
-//  Additional IP Rights Grant:  http://www.webmproject.org/license/additional/
+// Use of this source code is governed by a BSD-style license
+// that can be found in the COPYING file in the root of the source
+// tree. An additional intellectual property rights grant can be found
+// in the file PATENTS. All contributing project authors may
+// be found in the AUTHORS file in the root of the source tree.
 // -----------------------------------------------------------------------------
 //
 // main entry for the lossless encoder.
@@ -11,7 +13,6 @@
 //
 
 #include <assert.h>
-#include <stdio.h>
 #include <stdlib.h>
 
 #include "./backward_references.h"
@@ -21,28 +22,107 @@
 #include "../utils/bit_writer.h"
 #include "../utils/huffman_encode.h"
 #include "../utils/utils.h"
-#include "../format_constants.h"
+#include "webp/format_constants.h"
 
-#if defined(__cplusplus) || defined(c_plusplus)
-extern "C" {
-#endif
+#include "./delta_palettization.h"
 
 #define PALETTE_KEY_RIGHT_SHIFT   22  // Key for 1K buffer.
-#define MAX_HUFF_IMAGE_SIZE       (16 * 1024 * 1024)
-#define MAX_COLORS_FOR_GRAPH      64
+// Maximum number of histogram images (sub-blocks).
+#define MAX_HUFF_IMAGE_SIZE       2600
 
-// -----------------------------------------------------------------------------
-// Palette
+// Palette reordering for smaller sum of deltas (and for smaller storage).
 
-static int CompareColors(const void* p1, const void* p2) {
+static int PaletteCompareColorsForQsort(const void* p1, const void* p2) {
   const uint32_t a = *(const uint32_t*)p1;
   const uint32_t b = *(const uint32_t*)p2;
-  return (a < b) ? -1 : (a > b) ? 1 : 0;
+  assert(a != b);
+  return (a < b) ? -1 : 1;
+}
+
+static WEBP_INLINE uint32_t PaletteComponentDistance(uint32_t v) {
+  return (v <= 128) ? v : (256 - v);
+}
+
+// Computes a value that is related to the entropy created by the
+// palette entry diff.
+//
+// Note that the last & 0xff is a no-operation in the next statement, but
+// removed by most compilers and is here only for regularity of the code.
+static WEBP_INLINE uint32_t PaletteColorDistance(uint32_t col1, uint32_t col2) {
+  const uint32_t diff = VP8LSubPixels(col1, col2);
+  const int kMoreWeightForRGBThanForAlpha = 9;
+  uint32_t score;
+  score =  PaletteComponentDistance((diff >>  0) & 0xff);
+  score += PaletteComponentDistance((diff >>  8) & 0xff);
+  score += PaletteComponentDistance((diff >> 16) & 0xff);
+  score *= kMoreWeightForRGBThanForAlpha;
+  score += PaletteComponentDistance((diff >> 24) & 0xff);
+  return score;
+}
+
+static WEBP_INLINE void SwapColor(uint32_t* const col1, uint32_t* const col2) {
+  const uint32_t tmp = *col1;
+  *col1 = *col2;
+  *col2 = tmp;
+}
+
+static void GreedyMinimizeDeltas(uint32_t palette[], int num_colors) {
+  // Find greedily always the closest color of the predicted color to minimize
+  // deltas in the palette. This reduces storage needs since the
+  // palette is stored with delta encoding.
+  uint32_t predict = 0x00000000;
+  int i, k;
+  for (i = 0; i < num_colors; ++i) {
+    int best_ix = i;
+    uint32_t best_score = ~0U;
+    for (k = i; k < num_colors; ++k) {
+      const uint32_t cur_score = PaletteColorDistance(palette[k], predict);
+      if (best_score > cur_score) {
+        best_score = cur_score;
+        best_ix = k;
+      }
+    }
+    SwapColor(&palette[best_ix], &palette[i]);
+    predict = palette[i];
+  }
+}
+
+// The palette has been sorted by alpha. This function checks if the other
+// components of the palette have a monotonic development with regards to
+// position in the palette. If all have monotonic development, there is
+// no benefit to re-organize them greedily. A monotonic development
+// would be spotted in green-only situations (like lossy alpha) or gray-scale
+// images.
+static int PaletteHasNonMonotonousDeltas(uint32_t palette[], int num_colors) {
+  uint32_t predict = 0x000000;
+  int i;
+  uint8_t sign_found = 0x00;
+  for (i = 0; i < num_colors; ++i) {
+    const uint32_t diff = VP8LSubPixels(palette[i], predict);
+    const uint8_t rd = (diff >> 16) & 0xff;
+    const uint8_t gd = (diff >>  8) & 0xff;
+    const uint8_t bd = (diff >>  0) & 0xff;
+    if (rd != 0x00) {
+      sign_found |= (rd < 0x80) ? 1 : 2;
+    }
+    if (gd != 0x00) {
+      sign_found |= (gd < 0x80) ? 8 : 16;
+    }
+    if (bd != 0x00) {
+      sign_found |= (bd < 0x80) ? 64 : 128;
+    }
+    predict = palette[i];
+  }
+  return (sign_found & (sign_found << 1)) != 0;  // two consequent signs.
 }
 
+// -----------------------------------------------------------------------------
+// Palette
+
 // If number of colors in the image is less than or equal to MAX_PALETTE_SIZE,
 // creates a palette and returns true, else returns false.
 static int AnalyzeAndCreatePalette(const WebPPicture* const pic,
+                                   int low_effort,
                                    uint32_t palette[MAX_PALETTE_SIZE],
                                    int* const palette_size) {
   int i, x, y, key;
@@ -85,7 +165,7 @@ static int AnalyzeAndCreatePalette(const WebPPicture* const pic,
     argb += pic->argb_stride;
   }
 
-  // TODO(skal): could we reuse in_use[] to speed up ApplyPalette()?
+  // TODO(skal): could we reuse in_use[] to speed up EncodePalette()?
   num_colors = 0;
   for (i = 0; i < (int)(sizeof(in_use) / sizeof(in_use[0])); ++i) {
     if (in_use[i]) {
@@ -93,106 +173,272 @@ static int AnalyzeAndCreatePalette(const WebPPicture* const pic,
       ++num_colors;
     }
   }
-
-  qsort(palette, num_colors, sizeof(*palette), CompareColors);
   *palette_size = num_colors;
+  qsort(palette, num_colors, sizeof(*palette), PaletteCompareColorsForQsort);
+  if (!low_effort && PaletteHasNonMonotonousDeltas(palette, num_colors)) {
+    GreedyMinimizeDeltas(palette, num_colors);
+  }
   return 1;
 }
 
+// These five modes are evaluated and their respective entropy is computed.
+typedef enum {
+  kDirect = 0,
+  kSpatial = 1,
+  kSubGreen = 2,
+  kSpatialSubGreen = 3,
+  kPalette = 4,
+  kNumEntropyIx = 5
+} EntropyIx;
+
+typedef enum {
+  kHistoAlpha = 0,
+  kHistoAlphaPred,
+  kHistoGreen,
+  kHistoGreenPred,
+  kHistoRed,
+  kHistoRedPred,
+  kHistoBlue,
+  kHistoBluePred,
+  kHistoRedSubGreen,
+  kHistoRedPredSubGreen,
+  kHistoBlueSubGreen,
+  kHistoBluePredSubGreen,
+  kHistoPalette,
+  kHistoTotal  // Must be last.
+} HistoIx;
+
+static void AddSingleSubGreen(uint32_t p, uint32_t* r, uint32_t* b) {
+  const uint32_t green = p >> 8;  // The upper bits are masked away later.
+  ++r[((p >> 16) - green) & 0xff];
+  ++b[(p - green) & 0xff];
+}
+
+static void AddSingle(uint32_t p,
+                      uint32_t* a, uint32_t* r, uint32_t* g, uint32_t* b) {
+  ++a[p >> 24];
+  ++r[(p >> 16) & 0xff];
+  ++g[(p >> 8) & 0xff];
+  ++b[(p & 0xff)];
+}
+
 static int AnalyzeEntropy(const uint32_t* argb,
                           int width, int height, int argb_stride,
-                          double* const nonpredicted_bits,
-                          double* const predicted_bits) {
-  int x, y;
-  const uint32_t* last_line = NULL;
-  uint32_t last_pix = argb[0];    // so we're sure that pix_diff == 0
-
-  VP8LHistogram* nonpredicted = NULL;
-  VP8LHistogram* predicted =
-      (VP8LHistogram*)malloc(2 * sizeof(*predicted));
-  if (predicted == NULL) return 0;
-  nonpredicted = predicted + 1;
-
-  VP8LHistogramInit(predicted, 0);
-  VP8LHistogramInit(nonpredicted, 0);
-  for (y = 0; y < height; ++y) {
-    for (x = 0; x < width; ++x) {
-      const uint32_t pix = argb[x];
-      const uint32_t pix_diff = VP8LSubPixels(pix, last_pix);
-      if (pix_diff == 0) continue;
-      if (last_line != NULL && pix == last_line[x]) {
-        continue;
+                          int use_palette,
+                          EntropyIx* const min_entropy_ix,
+                          int* const red_and_blue_always_zero) {
+  // Allocate histogram set with cache_bits = 0.
+  uint32_t* const histo =
+      (uint32_t*)WebPSafeCalloc(kHistoTotal, sizeof(*histo) * 256);
+  if (histo != NULL) {
+    int i, x, y;
+    const uint32_t* prev_row = argb;
+    const uint32_t* curr_row = argb + argb_stride;
+    for (y = 1; y < height; ++y) {
+      uint32_t prev_pix = curr_row[0];
+      for (x = 1; x < width; ++x) {
+        const uint32_t pix = curr_row[x];
+        const uint32_t pix_diff = VP8LSubPixels(pix, prev_pix);
+        if ((pix_diff == 0) || (pix == prev_row[x])) continue;
+        prev_pix = pix;
+        AddSingle(pix,
+                  &histo[kHistoAlpha * 256],
+                  &histo[kHistoRed * 256],
+                  &histo[kHistoGreen * 256],
+                  &histo[kHistoBlue * 256]);
+        AddSingle(pix_diff,
+                  &histo[kHistoAlphaPred * 256],
+                  &histo[kHistoRedPred * 256],
+                  &histo[kHistoGreenPred * 256],
+                  &histo[kHistoBluePred * 256]);
+        AddSingleSubGreen(pix,
+                          &histo[kHistoRedSubGreen * 256],
+                          &histo[kHistoBlueSubGreen * 256]);
+        AddSingleSubGreen(pix_diff,
+                          &histo[kHistoRedPredSubGreen * 256],
+                          &histo[kHistoBluePredSubGreen * 256]);
+        {
+          // Approximate the palette by the entropy of the multiplicative hash.
+          const int hash = ((pix + (pix >> 19)) * 0x39c5fba7) >> 24;
+          ++histo[kHistoPalette * 256 + (hash & 0xff)];
+        }
+      }
+      prev_row = curr_row;
+      curr_row += argb_stride;
+    }
+    {
+      double entropy_comp[kHistoTotal];
+      double entropy[kNumEntropyIx];
+      EntropyIx k;
+      EntropyIx last_mode_to_analyze =
+          use_palette ? kPalette : kSpatialSubGreen;
+      int j;
+      // Let's add one zero to the predicted histograms. The zeros are removed
+      // too efficiently by the pix_diff == 0 comparison, at least one of the
+      // zeros is likely to exist.
+      ++histo[kHistoRedPredSubGreen * 256];
+      ++histo[kHistoBluePredSubGreen * 256];
+      ++histo[kHistoRedPred * 256];
+      ++histo[kHistoGreenPred * 256];
+      ++histo[kHistoBluePred * 256];
+      ++histo[kHistoAlphaPred * 256];
+
+      for (j = 0; j < kHistoTotal; ++j) {
+        entropy_comp[j] = VP8LBitsEntropy(&histo[j * 256], 256, NULL);
+      }
+      entropy[kDirect] = entropy_comp[kHistoAlpha] +
+          entropy_comp[kHistoRed] +
+          entropy_comp[kHistoGreen] +
+          entropy_comp[kHistoBlue];
+      entropy[kSpatial] = entropy_comp[kHistoAlphaPred] +
+          entropy_comp[kHistoRedPred] +
+          entropy_comp[kHistoGreenPred] +
+          entropy_comp[kHistoBluePred];
+      entropy[kSubGreen] = entropy_comp[kHistoAlpha] +
+          entropy_comp[kHistoRedSubGreen] +
+          entropy_comp[kHistoGreen] +
+          entropy_comp[kHistoBlueSubGreen];
+      entropy[kSpatialSubGreen] = entropy_comp[kHistoAlphaPred] +
+          entropy_comp[kHistoRedPredSubGreen] +
+          entropy_comp[kHistoGreenPred] +
+          entropy_comp[kHistoBluePredSubGreen];
+      // Palette mode seems more efficient in a breakeven case. Bias with 1.0.
+      entropy[kPalette] = entropy_comp[kHistoPalette] - 1.0;
+
+      *min_entropy_ix = kDirect;
+      for (k = kDirect + 1; k <= last_mode_to_analyze; ++k) {
+        if (entropy[*min_entropy_ix] > entropy[k]) {
+          *min_entropy_ix = k;
+        }
       }
-      last_pix = pix;
+      *red_and_blue_always_zero = 1;
+      // Let's check if the histogram of the chosen entropy mode has
+      // non-zero red and blue values. If all are zero, we can later skip
+      // the cross color optimization.
       {
-        const PixOrCopy pix_token = PixOrCopyCreateLiteral(pix);
-        const PixOrCopy pix_diff_token = PixOrCopyCreateLiteral(pix_diff);
-        VP8LHistogramAddSinglePixOrCopy(nonpredicted, &pix_token);
-        VP8LHistogramAddSinglePixOrCopy(predicted, &pix_diff_token);
+        static const uint8_t kHistoPairs[5][2] = {
+          { kHistoRed, kHistoBlue },
+          { kHistoRedPred, kHistoBluePred },
+          { kHistoRedSubGreen, kHistoBlueSubGreen },
+          { kHistoRedPredSubGreen, kHistoBluePredSubGreen },
+          { kHistoRed, kHistoBlue }
+        };
+        const uint32_t* const red_histo =
+            &histo[256 * kHistoPairs[*min_entropy_ix][0]];
+        const uint32_t* const blue_histo =
+            &histo[256 * kHistoPairs[*min_entropy_ix][1]];
+        for (i = 1; i < 256; ++i) {
+          if ((red_histo[i] | blue_histo[i]) != 0) {
+            *red_and_blue_always_zero = 0;
+            break;
+          }
+        }
       }
     }
-    last_line = argb;
-    argb += argb_stride;
+    free(histo);
+    return 1;
+  } else {
+    return 0;
   }
-  *nonpredicted_bits = VP8LHistogramEstimateBitsBulk(nonpredicted);
-  *predicted_bits = VP8LHistogramEstimateBitsBulk(predicted);
-  free(predicted);
-  return 1;
 }
 
-static int VP8LEncAnalyze(VP8LEncoder* const enc, WebPImageHint image_hint) {
+static int GetHistoBits(int method, int use_palette, int width, int height) {
+  // Make tile size a function of encoding method (Range: 0 to 6).
+  int histo_bits = (use_palette ? 9 : 7) - method;
+  while (1) {
+    const int huff_image_size = VP8LSubSampleSize(width, histo_bits) *
+                                VP8LSubSampleSize(height, histo_bits);
+    if (huff_image_size <= MAX_HUFF_IMAGE_SIZE) break;
+    ++histo_bits;
+  }
+  return (histo_bits < MIN_HUFFMAN_BITS) ? MIN_HUFFMAN_BITS :
+         (histo_bits > MAX_HUFFMAN_BITS) ? MAX_HUFFMAN_BITS : histo_bits;
+}
+
+static int GetTransformBits(int method, int histo_bits) {
+  const int max_transform_bits = (method < 4) ? 6 : (method > 4) ? 4 : 5;
+  return (histo_bits > max_transform_bits) ? max_transform_bits : histo_bits;
+}
+
+static int AnalyzeAndInit(VP8LEncoder* const enc) {
   const WebPPicture* const pic = enc->pic_;
+  const int width = pic->width;
+  const int height = pic->height;
+  const int pix_cnt = width * height;
+  const WebPConfig* const config = enc->config_;
+  const int method = config->method;
+  const int low_effort = (config->method == 0);
+  // we round the block size up, so we're guaranteed to have
+  // at max MAX_REFS_BLOCK_PER_IMAGE blocks used:
+  int refs_block_size = (pix_cnt - 1) / MAX_REFS_BLOCK_PER_IMAGE + 1;
   assert(pic != NULL && pic->argb != NULL);
 
+  enc->use_cross_color_ = 0;
+  enc->use_predict_ = 0;
+  enc->use_subtract_green_ = 0;
   enc->use_palette_ =
-      AnalyzeAndCreatePalette(pic, enc->palette_, &enc->palette_size_);
-
-  if (image_hint == WEBP_HINT_GRAPH) {
-    if (enc->use_palette_ && enc->palette_size_ < MAX_COLORS_FOR_GRAPH) {
-      enc->use_palette_ = 0;
+      AnalyzeAndCreatePalette(pic, low_effort,
+                              enc->palette_, &enc->palette_size_);
+
+  // TODO(jyrki): replace the decision to be based on an actual estimate
+  // of entropy, or even spatial variance of entropy.
+  enc->histo_bits_ = GetHistoBits(method, enc->use_palette_,
+                                  pic->width, pic->height);
+  enc->transform_bits_ = GetTransformBits(method, enc->histo_bits_);
+
+  if (low_effort) {
+    // AnalyzeEntropy is somewhat slow.
+    enc->use_predict_ = !enc->use_palette_;
+    enc->use_subtract_green_ = !enc->use_palette_;
+    enc->use_cross_color_ = 0;
+  } else {
+    int red_and_blue_always_zero;
+    EntropyIx min_entropy_ix;
+    if (!AnalyzeEntropy(pic->argb, width, height, pic->argb_stride,
+                        enc->use_palette_, &min_entropy_ix,
+                        &red_and_blue_always_zero)) {
+      return 0;
     }
+    enc->use_palette_ = (min_entropy_ix == kPalette);
+    enc->use_subtract_green_ =
+        (min_entropy_ix == kSubGreen) || (min_entropy_ix == kSpatialSubGreen);
+    enc->use_predict_ =
+        (min_entropy_ix == kSpatial) || (min_entropy_ix == kSpatialSubGreen);
+    enc->use_cross_color_ = red_and_blue_always_zero ? 0 : enc->use_predict_;
   }
 
-  if (!enc->use_palette_) {
-    if (image_hint == WEBP_HINT_PHOTO) {
-      enc->use_predict_ = 1;
-      enc->use_cross_color_ = 1;
-    } else {
-      double non_pred_entropy, pred_entropy;
-      if (!AnalyzeEntropy(pic->argb, pic->width, pic->height, pic->argb_stride,
-                          &non_pred_entropy, &pred_entropy)) {
-        return 0;
-      }
-      if (pred_entropy < 0.95 * non_pred_entropy) {
-        enc->use_predict_ = 1;
-        // TODO(vikasa): Observed some correlation of cross_color transform with
-        // predict. Need to investigate this further and add separate heuristic
-        // for setting use_cross_color flag.
-        enc->use_cross_color_ = 1;
-      }
-    }
-  }
+  if (!VP8LHashChainInit(&enc->hash_chain_, pix_cnt)) return 0;
+
+  // palette-friendly input typically uses less literals
+  //  -> reduce block size a bit
+  if (enc->use_palette_) refs_block_size /= 2;
+  VP8LBackwardRefsInit(&enc->refs_[0], refs_block_size);
+  VP8LBackwardRefsInit(&enc->refs_[1], refs_block_size);
 
   return 1;
 }
 
+// Returns false in case of memory error.
 static int GetHuffBitLengthsAndCodes(
     const VP8LHistogramSet* const histogram_image,
     HuffmanTreeCode* const huffman_codes) {
   int i, k;
-  int ok = 1;
+  int ok = 0;
   uint64_t total_length_size = 0;
   uint8_t* mem_buf = NULL;
   const int histogram_image_size = histogram_image->size;
+  int max_num_symbols = 0;
+  uint8_t* buf_rle = NULL;
+  HuffmanTree* huff_tree = NULL;
 
   // Iterate over all histograms and get the aggregate number of codes used.
   for (i = 0; i < histogram_image_size; ++i) {
     const VP8LHistogram* const histo = histogram_image->histograms[i];
     HuffmanTreeCode* const codes = &huffman_codes[5 * i];
     for (k = 0; k < 5; ++k) {
-      const int num_symbols = (k == 0) ? VP8LHistogramNumCodes(histo)
-                            : (k == 4) ? NUM_DISTANCE_CODES
-                            : 256;
+      const int num_symbols =
+          (k == 0) ? VP8LHistogramNumCodes(histo->palette_code_bits_) :
+          (k == 4) ? NUM_DISTANCE_CODES : 256;
       codes[k].num_symbols = num_symbols;
       total_length_size += num_symbols;
     }
@@ -204,10 +450,8 @@ static int GetHuffBitLengthsAndCodes(
     uint8_t* lengths;
     mem_buf = (uint8_t*)WebPSafeCalloc(total_length_size,
                                        sizeof(*lengths) + sizeof(*codes));
-    if (mem_buf == NULL) {
-      ok = 0;
-      goto End;
-    }
+    if (mem_buf == NULL) goto End;
+
     codes = (uint16_t*)mem_buf;
     lengths = (uint8_t*)&codes[total_length_size];
     for (i = 0; i < 5 * histogram_image_size; ++i) {
@@ -216,22 +460,35 @@ static int GetHuffBitLengthsAndCodes(
       huffman_codes[i].code_lengths = lengths;
       codes += bit_length;
       lengths += bit_length;
+      if (max_num_symbols < bit_length) {
+        max_num_symbols = bit_length;
+      }
     }
   }
 
+  buf_rle = (uint8_t*)WebPSafeMalloc(1ULL, max_num_symbols);
+  huff_tree = (HuffmanTree*)WebPSafeMalloc(3ULL * max_num_symbols,
+                                           sizeof(*huff_tree));
+  if (buf_rle == NULL || huff_tree == NULL) goto End;
+
   // Create Huffman trees.
   for (i = 0; i < histogram_image_size; ++i) {
     HuffmanTreeCode* const codes = &huffman_codes[5 * i];
     VP8LHistogram* const histo = histogram_image->histograms[i];
-    ok = ok && VP8LCreateHuffmanTree(histo->literal_, 15, codes + 0);
-    ok = ok && VP8LCreateHuffmanTree(histo->red_, 15, codes + 1);
-    ok = ok && VP8LCreateHuffmanTree(histo->blue_, 15, codes + 2);
-    ok = ok && VP8LCreateHuffmanTree(histo->alpha_, 15, codes + 3);
-    ok = ok && VP8LCreateHuffmanTree(histo->distance_, 15, codes + 4);
+    VP8LCreateHuffmanTree(histo->literal_, 15, buf_rle, huff_tree, codes + 0);
+    VP8LCreateHuffmanTree(histo->red_, 15, buf_rle, huff_tree, codes + 1);
+    VP8LCreateHuffmanTree(histo->blue_, 15, buf_rle, huff_tree, codes + 2);
+    VP8LCreateHuffmanTree(histo->alpha_, 15, buf_rle, huff_tree, codes + 3);
+    VP8LCreateHuffmanTree(histo->distance_, 15, buf_rle, huff_tree, codes + 4);
   }
-
+  ok = 1;
  End:
-  if (!ok) free(mem_buf);
+  WebPSafeFree(huff_tree);
+  WebPSafeFree(buf_rle);
+  if (!ok) {
+    WebPSafeFree(mem_buf);
+    memset(huffman_codes, 0, 5 * histogram_image_size * sizeof(*huffman_codes));
+  }
   return ok;
 }
 
@@ -251,9 +508,9 @@ static void StoreHuffmanTreeOfHuffmanTreeToBitMask(
       break;
     }
   }
-  VP8LWriteBits(bw, 4, codes_to_store - 4);
+  VP8LPutBits(bw, codes_to_store - 4, 4);
   for (i = 0; i < codes_to_store; ++i) {
-    VP8LWriteBits(bw, 3, code_length_bitdepth[kStorageOrder[i]]);
+    VP8LPutBits(bw, code_length_bitdepth[kStorageOrder[i]], 3);
   }
 }
 
@@ -281,49 +538,46 @@ static void StoreHuffmanTreeToBitMask(
   for (i = 0; i < num_tokens; ++i) {
     const int ix = tokens[i].code;
     const int extra_bits = tokens[i].extra_bits;
-    VP8LWriteBits(bw, huffman_code->code_lengths[ix], huffman_code->codes[ix]);
+    VP8LPutBits(bw, huffman_code->codes[ix], huffman_code->code_lengths[ix]);
     switch (ix) {
       case 16:
-        VP8LWriteBits(bw, 2, extra_bits);
+        VP8LPutBits(bw, extra_bits, 2);
         break;
       case 17:
-        VP8LWriteBits(bw, 3, extra_bits);
+        VP8LPutBits(bw, extra_bits, 3);
         break;
       case 18:
-        VP8LWriteBits(bw, 7, extra_bits);
+        VP8LPutBits(bw, extra_bits, 7);
         break;
     }
   }
 }
 
-static int StoreFullHuffmanCode(VP8LBitWriter* const bw,
-                                const HuffmanTreeCode* const tree) {
-  int ok = 0;
+// 'huff_tree' and 'tokens' are pre-alloacted buffers.
+static void StoreFullHuffmanCode(VP8LBitWriter* const bw,
+                                 HuffmanTree* const huff_tree,
+                                 HuffmanTreeToken* const tokens,
+                                 const HuffmanTreeCode* const tree) {
   uint8_t code_length_bitdepth[CODE_LENGTH_CODES] = { 0 };
   uint16_t code_length_bitdepth_symbols[CODE_LENGTH_CODES] = { 0 };
   const int max_tokens = tree->num_symbols;
   int num_tokens;
   HuffmanTreeCode huffman_code;
-  HuffmanTreeToken* const tokens =
-      (HuffmanTreeToken*)WebPSafeMalloc((uint64_t)max_tokens, sizeof(*tokens));
-  if (tokens == NULL) return 0;
-
   huffman_code.num_symbols = CODE_LENGTH_CODES;
   huffman_code.code_lengths = code_length_bitdepth;
   huffman_code.codes = code_length_bitdepth_symbols;
 
-  VP8LWriteBits(bw, 1, 0);
+  VP8LPutBits(bw, 0, 1);
   num_tokens = VP8LCreateCompressedHuffmanTree(tree, tokens, max_tokens);
   {
-    int histogram[CODE_LENGTH_CODES] = { 0 };
+    uint32_t histogram[CODE_LENGTH_CODES] = { 0 };
+    uint8_t buf_rle[CODE_LENGTH_CODES] = { 0 };
     int i;
     for (i = 0; i < num_tokens; ++i) {
       ++histogram[tokens[i].code];
     }
 
-    if (!VP8LCreateHuffmanTree(histogram, 7, &huffman_code)) {
-      goto End;
-    }
+    VP8LCreateHuffmanTree(histogram, 7, buf_rle, huff_tree, &huffman_code);
   }
 
   StoreHuffmanTreeOfHuffmanTreeToBitMask(bw, code_length_bitdepth);
@@ -350,24 +604,23 @@ static int StoreFullHuffmanCode(VP8LBitWriter* const bw,
     }
     write_trimmed_length = (trimmed_length > 1 && trailing_zero_bits > 12);
     length = write_trimmed_length ? trimmed_length : num_tokens;
-    VP8LWriteBits(bw, 1, write_trimmed_length);
+    VP8LPutBits(bw, write_trimmed_length, 1);
     if (write_trimmed_length) {
       const int nbits = VP8LBitsLog2Ceiling(trimmed_length - 1);
       const int nbitpairs = (nbits == 0) ? 1 : (nbits + 1) / 2;
-      VP8LWriteBits(bw, 3, nbitpairs - 1);
+      VP8LPutBits(bw, nbitpairs - 1, 3);
       assert(trimmed_length >= 2);
-      VP8LWriteBits(bw, nbitpairs * 2, trimmed_length - 2);
+      VP8LPutBits(bw, trimmed_length - 2, nbitpairs * 2);
     }
     StoreHuffmanTreeToBitMask(bw, tokens, length, &huffman_code);
   }
-  ok = 1;
- End:
-  free(tokens);
-  return ok;
 }
 
-static int StoreHuffmanCode(VP8LBitWriter* const bw,
-                            const HuffmanTreeCode* const huffman_code) {
+// 'huff_tree' and 'tokens' are pre-alloacted buffers.
+static void StoreHuffmanCode(VP8LBitWriter* const bw,
+                             HuffmanTree* const huff_tree,
+                             HuffmanTreeToken* const tokens,
+                             const HuffmanTreeCode* const huffman_code) {
   int i;
   int count = 0;
   int symbols[2] = { 0, 0 };
@@ -384,163 +637,248 @@ static int StoreHuffmanCode(VP8LBitWriter* const bw,
 
   if (count == 0) {   // emit minimal tree for empty cases
     // bits: small tree marker: 1, count-1: 0, large 8-bit code: 0, code: 0
-    VP8LWriteBits(bw, 4, 0x01);
-    return 1;
+    VP8LPutBits(bw, 0x01, 4);
   } else if (count <= 2 && symbols[0] < kMaxSymbol && symbols[1] < kMaxSymbol) {
-    VP8LWriteBits(bw, 1, 1);  // Small tree marker to encode 1 or 2 symbols.
-    VP8LWriteBits(bw, 1, count - 1);
+    VP8LPutBits(bw, 1, 1);  // Small tree marker to encode 1 or 2 symbols.
+    VP8LPutBits(bw, count - 1, 1);
     if (symbols[0] <= 1) {
-      VP8LWriteBits(bw, 1, 0);  // Code bit for small (1 bit) symbol value.
-      VP8LWriteBits(bw, 1, symbols[0]);
+      VP8LPutBits(bw, 0, 1);  // Code bit for small (1 bit) symbol value.
+      VP8LPutBits(bw, symbols[0], 1);
     } else {
-      VP8LWriteBits(bw, 1, 1);
-      VP8LWriteBits(bw, 8, symbols[0]);
+      VP8LPutBits(bw, 1, 1);
+      VP8LPutBits(bw, symbols[0], 8);
     }
     if (count == 2) {
-      VP8LWriteBits(bw, 8, symbols[1]);
+      VP8LPutBits(bw, symbols[1], 8);
     }
-    return 1;
   } else {
-    return StoreFullHuffmanCode(bw, huffman_code);
+    StoreFullHuffmanCode(bw, huff_tree, tokens, huffman_code);
   }
 }
 
-static void WriteHuffmanCode(VP8LBitWriter* const bw,
-                             const HuffmanTreeCode* const code, int index) {
-  const int depth = code->code_lengths[index];
-  const int symbol = code->codes[index];
-  VP8LWriteBits(bw, depth, symbol);
+static WEBP_INLINE void WriteHuffmanCode(VP8LBitWriter* const bw,
+                             const HuffmanTreeCode* const code,
+                             int code_index) {
+  const int depth = code->code_lengths[code_index];
+  const int symbol = code->codes[code_index];
+  VP8LPutBits(bw, symbol, depth);
 }
 
-static void StoreImageToBitMask(
+static WEBP_INLINE void WriteHuffmanCodeWithExtraBits(
+    VP8LBitWriter* const bw,
+    const HuffmanTreeCode* const code,
+    int code_index,
+    int bits,
+    int n_bits) {
+  const int depth = code->code_lengths[code_index];
+  const int symbol = code->codes[code_index];
+  VP8LPutBits(bw, (bits << depth) | symbol, depth + n_bits);
+}
+
+static WebPEncodingError StoreImageToBitMask(
     VP8LBitWriter* const bw, int width, int histo_bits,
-    const VP8LBackwardRefs* const refs,
+    VP8LBackwardRefs* const refs,
     const uint16_t* histogram_symbols,
     const HuffmanTreeCode* const huffman_codes) {
+  const int histo_xsize = histo_bits ? VP8LSubSampleSize(width, histo_bits) : 1;
+  const int tile_mask = (histo_bits == 0) ? 0 : -(1 << histo_bits);
   // x and y trace the position in the image.
   int x = 0;
   int y = 0;
-  const int histo_xsize = histo_bits ? VP8LSubSampleSize(width, histo_bits) : 1;
-  int i;
-  for (i = 0; i < refs->size; ++i) {
-    const PixOrCopy* const v = &refs->refs[i];
-    const int histogram_ix = histogram_symbols[histo_bits ?
-                                               (y >> histo_bits) * histo_xsize +
-                                               (x >> histo_bits) : 0];
-    const HuffmanTreeCode* const codes = huffman_codes + 5 * histogram_ix;
-    if (PixOrCopyIsCacheIdx(v)) {
-      const int code = PixOrCopyCacheIdx(v);
-      const int literal_ix = 256 + NUM_LENGTH_CODES + code;
-      WriteHuffmanCode(bw, codes, literal_ix);
-    } else if (PixOrCopyIsLiteral(v)) {
+  int tile_x = x & tile_mask;
+  int tile_y = y & tile_mask;
+  int histogram_ix = histogram_symbols[0];
+  const HuffmanTreeCode* codes = huffman_codes + 5 * histogram_ix;
+  VP8LRefsCursor c = VP8LRefsCursorInit(refs);
+  while (VP8LRefsCursorOk(&c)) {
+    const PixOrCopy* const v = c.cur_pos;
+    if ((tile_x != (x & tile_mask)) || (tile_y != (y & tile_mask))) {
+      tile_x = x & tile_mask;
+      tile_y = y & tile_mask;
+      histogram_ix = histogram_symbols[(y >> histo_bits) * histo_xsize +
+                                       (x >> histo_bits)];
+      codes = huffman_codes + 5 * histogram_ix;
+    }
+    if (PixOrCopyIsLiteral(v)) {
       static const int order[] = { 1, 2, 0, 3 };
       int k;
       for (k = 0; k < 4; ++k) {
         const int code = PixOrCopyLiteral(v, order[k]);
         WriteHuffmanCode(bw, codes + k, code);
       }
+    } else if (PixOrCopyIsCacheIdx(v)) {
+      const int code = PixOrCopyCacheIdx(v);
+      const int literal_ix = 256 + NUM_LENGTH_CODES + code;
+      WriteHuffmanCode(bw, codes, literal_ix);
     } else {
       int bits, n_bits;
-      int code, distance;
+      int code;
 
-      PrefixEncode(v->len, &code, &n_bits, &bits);
-      WriteHuffmanCode(bw, codes, 256 + code);
-      VP8LWriteBits(bw, n_bits, bits);
+      const int distance = PixOrCopyDistance(v);
+      VP8LPrefixEncode(v->len, &code, &n_bits, &bits);
+      WriteHuffmanCodeWithExtraBits(bw, codes, 256 + code, bits, n_bits);
 
-      distance = PixOrCopyDistance(v);
-      PrefixEncode(distance, &code, &n_bits, &bits);
+      // Don't write the distance with the extra bits code since
+      // the distance can be up to 18 bits of extra bits, and the prefix
+      // 15 bits, totaling to 33, and our PutBits only supports up to 32 bits.
+      // TODO(jyrki): optimize this further.
+      VP8LPrefixEncode(distance, &code, &n_bits, &bits);
       WriteHuffmanCode(bw, codes + 4, code);
-      VP8LWriteBits(bw, n_bits, bits);
+      VP8LPutBits(bw, bits, n_bits);
     }
     x += PixOrCopyLength(v);
     while (x >= width) {
       x -= width;
       ++y;
     }
+    VP8LRefsCursorNext(&c);
   }
+  return bw->error_ ? VP8_ENC_ERROR_OUT_OF_MEMORY : VP8_ENC_OK;
 }
 
 // Special case of EncodeImageInternal() for cache-bits=0, histo_bits=31
-static int EncodeImageNoHuffman(VP8LBitWriter* const bw,
-                                const uint32_t* const argb,
-                                int width, int height, int quality) {
+static WebPEncodingError EncodeImageNoHuffman(VP8LBitWriter* const bw,
+                                              const uint32_t* const argb,
+                                              VP8LHashChain* const hash_chain,
+                                              VP8LBackwardRefs refs_array[2],
+                                              int width, int height,
+                                              int quality) {
   int i;
-  int ok = 0;
-  VP8LBackwardRefs refs;
+  int max_tokens = 0;
+  WebPEncodingError err = VP8_ENC_OK;
+  VP8LBackwardRefs* refs;
+  HuffmanTreeToken* tokens = NULL;
   HuffmanTreeCode huffman_codes[5] = { { 0, NULL, NULL } };
   const uint16_t histogram_symbols[1] = { 0 };    // only one tree, one symbol
-  VP8LHistogramSet* const histogram_image = VP8LAllocateHistogramSet(1, 0);
-  if (histogram_image == NULL) return 0;
+  int cache_bits = 0;
+  VP8LHistogramSet* histogram_image = NULL;
+  HuffmanTree* const huff_tree = (HuffmanTree*)WebPSafeMalloc(
+        3ULL * CODE_LENGTH_CODES, sizeof(*huff_tree));
+  if (huff_tree == NULL) {
+    err = VP8_ENC_ERROR_OUT_OF_MEMORY;
+    goto Error;
+  }
 
   // Calculate backward references from ARGB image.
-  if (!VP8LGetBackwardReferences(width, height, argb, quality, 0, 1, &refs)) {
+  refs = VP8LGetBackwardReferences(width, height, argb, quality, 0, &cache_bits,
+                                   hash_chain, refs_array);
+  if (refs == NULL) {
+    err = VP8_ENC_ERROR_OUT_OF_MEMORY;
     goto Error;
   }
+  histogram_image = VP8LAllocateHistogramSet(1, cache_bits);
+  if (histogram_image == NULL) {
+    err = VP8_ENC_ERROR_OUT_OF_MEMORY;
+    goto Error;
+  }
+
   // Build histogram image and symbols from backward references.
-  VP8LHistogramStoreRefs(&refs, histogram_image->histograms[0]);
+  VP8LHistogramStoreRefs(refs, histogram_image->histograms[0]);
 
   // Create Huffman bit lengths and codes for each histogram image.
   assert(histogram_image->size == 1);
   if (!GetHuffBitLengthsAndCodes(histogram_image, huffman_codes)) {
+    err = VP8_ENC_ERROR_OUT_OF_MEMORY;
     goto Error;
   }
 
   // No color cache, no Huffman image.
-  VP8LWriteBits(bw, 1, 0);
+  VP8LPutBits(bw, 0, 1);
 
-  // Store Huffman codes.
+  // Find maximum number of symbols for the huffman tree-set.
   for (i = 0; i < 5; ++i) {
     HuffmanTreeCode* const codes = &huffman_codes[i];
-    if (!StoreHuffmanCode(bw, codes)) {
-      goto Error;
+    if (max_tokens < codes->num_symbols) {
+      max_tokens = codes->num_symbols;
     }
+  }
+
+  tokens = (HuffmanTreeToken*)WebPSafeMalloc(max_tokens, sizeof(*tokens));
+  if (tokens == NULL) {
+    err = VP8_ENC_ERROR_OUT_OF_MEMORY;
+    goto Error;
+  }
+
+  // Store Huffman codes.
+  for (i = 0; i < 5; ++i) {
+    HuffmanTreeCode* const codes = &huffman_codes[i];
+    StoreHuffmanCode(bw, huff_tree, tokens, codes);
     ClearHuffmanTreeIfOnlyOneSymbol(codes);
   }
 
   // Store actual literals.
-  StoreImageToBitMask(bw, width, 0, &refs, histogram_symbols, huffman_codes);
-  ok = 1;
+  err = StoreImageToBitMask(bw, width, 0, refs, histogram_symbols,
+                            huffman_codes);
 
  Error:
-  free(histogram_image);
-  VP8LClearBackwardRefs(&refs);
-  free(huffman_codes[0].codes);
-  return ok;
+  WebPSafeFree(tokens);
+  WebPSafeFree(huff_tree);
+  VP8LFreeHistogramSet(histogram_image);
+  WebPSafeFree(huffman_codes[0].codes);
+  return err;
 }
 
-static int EncodeImageInternal(VP8LBitWriter* const bw,
-                               const uint32_t* const argb,
-                               int width, int height, int quality,
-                               int cache_bits, int histogram_bits) {
-  int ok = 0;
-  const int use_2d_locality = 1;
-  const int use_color_cache = (cache_bits > 0);
+static WebPEncodingError EncodeImageInternal(VP8LBitWriter* const bw,
+                                             const uint32_t* const argb,
+                                             VP8LHashChain* const hash_chain,
+                                             VP8LBackwardRefs refs_array[2],
+                                             int width, int height, int quality,
+                                             int low_effort, int* cache_bits,
+                                             int histogram_bits,
+                                             size_t init_byte_position,
+                                             int* const hdr_size,
+                                             int* const data_size) {
+  WebPEncodingError err = VP8_ENC_OK;
   const uint32_t histogram_image_xysize =
       VP8LSubSampleSize(width, histogram_bits) *
       VP8LSubSampleSize(height, histogram_bits);
-  VP8LHistogramSet* histogram_image =
-      VP8LAllocateHistogramSet(histogram_image_xysize, 0);
+  VP8LHistogramSet* histogram_image = NULL;
+  VP8LHistogramSet* tmp_histos = NULL;
   int histogram_image_size = 0;
   size_t bit_array_size = 0;
+  HuffmanTree* huff_tree = NULL;
+  HuffmanTreeToken* tokens = NULL;
   HuffmanTreeCode* huffman_codes = NULL;
   VP8LBackwardRefs refs;
+  VP8LBackwardRefs* best_refs;
   uint16_t* const histogram_symbols =
-      (uint16_t*)WebPSafeMalloc((uint64_t)histogram_image_xysize,
+      (uint16_t*)WebPSafeMalloc(histogram_image_xysize,
                                 sizeof(*histogram_symbols));
   assert(histogram_bits >= MIN_HUFFMAN_BITS);
   assert(histogram_bits <= MAX_HUFFMAN_BITS);
-  if (histogram_image == NULL || histogram_symbols == NULL) goto Error;
+  assert(hdr_size != NULL);
+  assert(data_size != NULL);
 
+  VP8LBackwardRefsInit(&refs, refs_array[0].block_size_);
+  if (histogram_symbols == NULL) {
+    err = VP8_ENC_ERROR_OUT_OF_MEMORY;
+    goto Error;
+  }
+
+  *cache_bits = MAX_COLOR_CACHE_BITS;
+  // 'best_refs' is the reference to the best backward refs and points to one
+  // of refs_array[0] or refs_array[1].
   // Calculate backward references from ARGB image.
-  if (!VP8LGetBackwardReferences(width, height, argb, quality, cache_bits,
-                                 use_2d_locality, &refs)) {
+  best_refs = VP8LGetBackwardReferences(width, height, argb, quality,
+                                        low_effort, cache_bits, hash_chain,
+                                        refs_array);
+  if (best_refs == NULL || !VP8LBackwardRefsCopy(best_refs, &refs)) {
+    err = VP8_ENC_ERROR_OUT_OF_MEMORY;
     goto Error;
   }
+  histogram_image =
+      VP8LAllocateHistogramSet(histogram_image_xysize, *cache_bits);
+  tmp_histos = VP8LAllocateHistogramSet(2, *cache_bits);
+  if (histogram_image == NULL || tmp_histos == NULL) {
+    err = VP8_ENC_ERROR_OUT_OF_MEMORY;
+    goto Error;
+  }
+
   // Build histogram image and symbols from backward references.
-  if (!VP8LGetHistoImageSymbols(width, height, &refs,
-                                quality, histogram_bits, cache_bits,
-                                histogram_image,
-                                histogram_symbols)) {
+  if (!VP8LGetHistoImageSymbols(width, height, &refs, quality, low_effort,
+                                histogram_bits, *cache_bits, histogram_image,
+                                tmp_histos, histogram_symbols)) {
+    err = VP8_ENC_ERROR_OUT_OF_MEMORY;
     goto Error;
   }
   // Create Huffman bit lengths and codes for each histogram image.
@@ -548,171 +886,167 @@ static int EncodeImageInternal(VP8LBitWriter* const bw,
   bit_array_size = 5 * histogram_image_size;
   huffman_codes = (HuffmanTreeCode*)WebPSafeCalloc(bit_array_size,
                                                    sizeof(*huffman_codes));
+  // Note: some histogram_image entries may point to tmp_histos[], so the latter
+  // need to outlive the following call to GetHuffBitLengthsAndCodes().
   if (huffman_codes == NULL ||
       !GetHuffBitLengthsAndCodes(histogram_image, huffman_codes)) {
+    err = VP8_ENC_ERROR_OUT_OF_MEMORY;
     goto Error;
   }
+  // Free combined histograms.
+  VP8LFreeHistogramSet(histogram_image);
+  histogram_image = NULL;
+
+  // Free scratch histograms.
+  VP8LFreeHistogramSet(tmp_histos);
+  tmp_histos = NULL;
 
   // Color Cache parameters.
-  VP8LWriteBits(bw, 1, use_color_cache);
-  if (use_color_cache) {
-    VP8LWriteBits(bw, 4, cache_bits);
+  if (*cache_bits > 0) {
+    VP8LPutBits(bw, 1, 1);
+    VP8LPutBits(bw, *cache_bits, 4);
+  } else {
+    VP8LPutBits(bw, 0, 1);
   }
 
   // Huffman image + meta huffman.
   {
     const int write_histogram_image = (histogram_image_size > 1);
-    VP8LWriteBits(bw, 1, write_histogram_image);
+    VP8LPutBits(bw, write_histogram_image, 1);
     if (write_histogram_image) {
       uint32_t* const histogram_argb =
-          (uint32_t*)WebPSafeMalloc((uint64_t)histogram_image_xysize,
+          (uint32_t*)WebPSafeMalloc(histogram_image_xysize,
                                     sizeof(*histogram_argb));
       int max_index = 0;
       uint32_t i;
-      if (histogram_argb == NULL) goto Error;
+      if (histogram_argb == NULL) {
+        err = VP8_ENC_ERROR_OUT_OF_MEMORY;
+        goto Error;
+      }
       for (i = 0; i < histogram_image_xysize; ++i) {
-        const int index = histogram_symbols[i] & 0xffff;
-        histogram_argb[i] = 0xff000000 | (index << 8);
-        if (index >= max_index) {
-          max_index = index + 1;
+        const int symbol_index = histogram_symbols[i] & 0xffff;
+        histogram_argb[i] = (symbol_index << 8);
+        if (symbol_index >= max_index) {
+          max_index = symbol_index + 1;
         }
       }
       histogram_image_size = max_index;
 
-      VP8LWriteBits(bw, 3, histogram_bits - 2);
-      ok = EncodeImageNoHuffman(bw, histogram_argb,
-                                VP8LSubSampleSize(width, histogram_bits),
-                                VP8LSubSampleSize(height, histogram_bits),
-                                quality);
-      free(histogram_argb);
-      if (!ok) goto Error;
+      VP8LPutBits(bw, histogram_bits - 2, 3);
+      err = EncodeImageNoHuffman(bw, histogram_argb, hash_chain, refs_array,
+                                 VP8LSubSampleSize(width, histogram_bits),
+                                 VP8LSubSampleSize(height, histogram_bits),
+                                 quality);
+      WebPSafeFree(histogram_argb);
+      if (err != VP8_ENC_OK) goto Error;
     }
   }
 
   // Store Huffman codes.
   {
     int i;
+    int max_tokens = 0;
+    huff_tree = (HuffmanTree*)WebPSafeMalloc(3ULL * CODE_LENGTH_CODES,
+                                             sizeof(*huff_tree));
+    if (huff_tree == NULL) {
+      err = VP8_ENC_ERROR_OUT_OF_MEMORY;
+      goto Error;
+    }
+    // Find maximum number of symbols for the huffman tree-set.
     for (i = 0; i < 5 * histogram_image_size; ++i) {
       HuffmanTreeCode* const codes = &huffman_codes[i];
-      if (!StoreHuffmanCode(bw, codes)) goto Error;
+      if (max_tokens < codes->num_symbols) {
+        max_tokens = codes->num_symbols;
+      }
+    }
+    tokens = (HuffmanTreeToken*)WebPSafeMalloc(max_tokens,
+                                               sizeof(*tokens));
+    if (tokens == NULL) {
+      err = VP8_ENC_ERROR_OUT_OF_MEMORY;
+      goto Error;
+    }
+    for (i = 0; i < 5 * histogram_image_size; ++i) {
+      HuffmanTreeCode* const codes = &huffman_codes[i];
+      StoreHuffmanCode(bw, huff_tree, tokens, codes);
       ClearHuffmanTreeIfOnlyOneSymbol(codes);
     }
   }
-  // Free combined histograms.
-  free(histogram_image);
-  histogram_image = NULL;
 
+  *hdr_size = (int)(VP8LBitWriterNumBytes(bw) - init_byte_position);
   // Store actual literals.
-  StoreImageToBitMask(bw, width, histogram_bits, &refs,
-                      histogram_symbols, huffman_codes);
-  ok = 1;
+  err = StoreImageToBitMask(bw, width, histogram_bits, &refs,
+                            histogram_symbols, huffman_codes);
+  *data_size =
+        (int)(VP8LBitWriterNumBytes(bw) - init_byte_position - *hdr_size);
 
  Error:
-  if (!ok) free(histogram_image);
-
-  VP8LClearBackwardRefs(&refs);
+  WebPSafeFree(tokens);
+  WebPSafeFree(huff_tree);
+  VP8LFreeHistogramSet(histogram_image);
+  VP8LFreeHistogramSet(tmp_histos);
+  VP8LBackwardRefsClear(&refs);
   if (huffman_codes != NULL) {
-    free(huffman_codes->codes);
-    free(huffman_codes);
+    WebPSafeFree(huffman_codes->codes);
+    WebPSafeFree(huffman_codes);
   }
-  free(histogram_symbols);
-  return ok;
+  WebPSafeFree(histogram_symbols);
+  return err;
 }
 
 // -----------------------------------------------------------------------------
 // Transforms
 
-// Check if it would be a good idea to subtract green from red and blue. We
-// only impact entropy in red/blue components, don't bother to look at others.
-static int EvalAndApplySubtractGreen(VP8LEncoder* const enc,
-                                     int width, int height,
-                                     VP8LBitWriter* const bw) {
-  if (!enc->use_palette_) {
-    int i;
-    const uint32_t* const argb = enc->argb_;
-    double bit_cost_before, bit_cost_after;
-    VP8LHistogram* const histo = (VP8LHistogram*)malloc(sizeof(*histo));
-    if (histo == NULL) return 0;
-
-    VP8LHistogramInit(histo, 1);
-    for (i = 0; i < width * height; ++i) {
-      const uint32_t c = argb[i];
-      ++histo->red_[(c >> 16) & 0xff];
-      ++histo->blue_[(c >> 0) & 0xff];
-    }
-    bit_cost_before = VP8LHistogramEstimateBits(histo);
-
-    VP8LHistogramInit(histo, 1);
-    for (i = 0; i < width * height; ++i) {
-      const uint32_t c = argb[i];
-      const int green = (c >> 8) & 0xff;
-      ++histo->red_[((c >> 16) - green) & 0xff];
-      ++histo->blue_[((c >> 0) - green) & 0xff];
-    }
-    bit_cost_after = VP8LHistogramEstimateBits(histo);
-    free(histo);
-
-    // Check if subtracting green yields low entropy.
-    enc->use_subtract_green_ = (bit_cost_after < bit_cost_before);
-    if (enc->use_subtract_green_) {
-      VP8LWriteBits(bw, 1, TRANSFORM_PRESENT);
-      VP8LWriteBits(bw, 2, SUBTRACT_GREEN);
-      VP8LSubtractGreenFromBlueAndRed(enc->argb_, width * height);
-    }
-  }
-  return 1;
+static void ApplySubtractGreen(VP8LEncoder* const enc, int width, int height,
+                               VP8LBitWriter* const bw) {
+  VP8LPutBits(bw, TRANSFORM_PRESENT, 1);
+  VP8LPutBits(bw, SUBTRACT_GREEN, 2);
+  VP8LSubtractGreenFromBlueAndRed(enc->argb_, width * height);
 }
 
-static int ApplyPredictFilter(const VP8LEncoder* const enc,
-                              int width, int height, int quality,
-                              VP8LBitWriter* const bw) {
+static WebPEncodingError ApplyPredictFilter(const VP8LEncoder* const enc,
+                                            int width, int height,
+                                            int quality, int low_effort,
+                                            VP8LBitWriter* const bw) {
   const int pred_bits = enc->transform_bits_;
   const int transform_width = VP8LSubSampleSize(width, pred_bits);
   const int transform_height = VP8LSubSampleSize(height, pred_bits);
 
-  VP8LResidualImage(width, height, pred_bits, enc->argb_, enc->argb_scratch_,
-                    enc->transform_data_);
-  VP8LWriteBits(bw, 1, TRANSFORM_PRESENT);
-  VP8LWriteBits(bw, 2, PREDICTOR_TRANSFORM);
+  VP8LResidualImage(width, height, pred_bits, low_effort, enc->argb_,
+                    enc->argb_scratch_, enc->transform_data_);
+  VP8LPutBits(bw, TRANSFORM_PRESENT, 1);
+  VP8LPutBits(bw, PREDICTOR_TRANSFORM, 2);
   assert(pred_bits >= 2);
-  VP8LWriteBits(bw, 3, pred_bits - 2);
-  if (!EncodeImageNoHuffman(bw, enc->transform_data_,
-                            transform_width, transform_height, quality)) {
-    return 0;
-  }
-  return 1;
+  VP8LPutBits(bw, pred_bits - 2, 3);
+  return EncodeImageNoHuffman(bw, enc->transform_data_,
+                              (VP8LHashChain*)&enc->hash_chain_,
+                              (VP8LBackwardRefs*)enc->refs_,  // cast const away
+                              transform_width, transform_height,
+                              quality);
 }
 
-static int ApplyCrossColorFilter(const VP8LEncoder* const enc,
-                                 int width, int height, int quality,
-                                 VP8LBitWriter* const bw) {
+static WebPEncodingError ApplyCrossColorFilter(const VP8LEncoder* const enc,
+                                               int width, int height,
+                                               int quality,
+                                               VP8LBitWriter* const bw) {
   const int ccolor_transform_bits = enc->transform_bits_;
   const int transform_width = VP8LSubSampleSize(width, ccolor_transform_bits);
   const int transform_height = VP8LSubSampleSize(height, ccolor_transform_bits);
-  const int step = (quality == 0) ? 32 : 8;
 
-  VP8LColorSpaceTransform(width, height, ccolor_transform_bits, step,
+  VP8LColorSpaceTransform(width, height, ccolor_transform_bits, quality,
                           enc->argb_, enc->transform_data_);
-  VP8LWriteBits(bw, 1, TRANSFORM_PRESENT);
-  VP8LWriteBits(bw, 2, CROSS_COLOR_TRANSFORM);
+  VP8LPutBits(bw, TRANSFORM_PRESENT, 1);
+  VP8LPutBits(bw, CROSS_COLOR_TRANSFORM, 2);
   assert(ccolor_transform_bits >= 2);
-  VP8LWriteBits(bw, 3, ccolor_transform_bits - 2);
-  if (!EncodeImageNoHuffman(bw, enc->transform_data_,
-                            transform_width, transform_height, quality)) {
-    return 0;
-  }
-  return 1;
+  VP8LPutBits(bw, ccolor_transform_bits - 2, 3);
+  return EncodeImageNoHuffman(bw, enc->transform_data_,
+                              (VP8LHashChain*)&enc->hash_chain_,
+                              (VP8LBackwardRefs*)enc->refs_,  // cast const away
+                              transform_width, transform_height,
+                              quality);
 }
 
 // -----------------------------------------------------------------------------
 
-static void PutLE32(uint8_t* const data, uint32_t val) {
-  data[0] = (val >>  0) & 0xff;
-  data[1] = (val >>  8) & 0xff;
-  data[2] = (val >> 16) & 0xff;
-  data[3] = (val >> 24) & 0xff;
-}
-
 static WebPEncodingError WriteRiffHeader(const WebPPicture* const pic,
                                          size_t riff_size, size_t vp8l_size) {
   uint8_t riff[RIFF_HEADER_SIZE + CHUNK_HEADER_SIZE + VP8L_SIGNATURE_SIZE] = {
@@ -733,14 +1067,14 @@ static int WriteImageSize(const WebPPicture* const pic,
   const int height = pic->height - 1;
   assert(width < WEBP_MAX_DIMENSION && height < WEBP_MAX_DIMENSION);
 
-  VP8LWriteBits(bw, VP8L_IMAGE_SIZE_BITS, width);
-  VP8LWriteBits(bw, VP8L_IMAGE_SIZE_BITS, height);
+  VP8LPutBits(bw, width, VP8L_IMAGE_SIZE_BITS);
+  VP8LPutBits(bw, height, VP8L_IMAGE_SIZE_BITS);
   return !bw->error_;
 }
 
 static int WriteRealAlphaAndVersion(VP8LBitWriter* const bw, int has_alpha) {
-  VP8LWriteBits(bw, 1, has_alpha);
-  VP8LWriteBits(bw, VP8L_VERSION_BITS, VP8L_VERSION);
+  VP8LPutBits(bw, has_alpha, 1);
+  VP8LPutBits(bw, VP8L_VERSION, VP8L_VERSION_BITS);
   return !bw->error_;
 }
 
@@ -780,166 +1114,261 @@ static WebPEncodingError WriteImage(const WebPPicture* const pic,
 
 // Allocates the memory for argb (W x H) buffer, 2 rows of context for
 // prediction and transform data.
+// Flags influencing the memory allocated:
+//  enc->transform_bits_
+//  enc->use_predict_, enc->use_cross_color_
 static WebPEncodingError AllocateTransformBuffer(VP8LEncoder* const enc,
                                                  int width, int height) {
   WebPEncodingError err = VP8_ENC_OK;
-  const int tile_size = 1 << enc->transform_bits_;
-  const uint64_t image_size = width * height;
-  const uint64_t argb_scratch_size = tile_size * width + width;
-  const uint64_t transform_data_size =
-      (uint64_t)VP8LSubSampleSize(width, enc->transform_bits_) *
-      (uint64_t)VP8LSubSampleSize(height, enc->transform_bits_);
-  const uint64_t total_size =
-      image_size + argb_scratch_size + transform_data_size;
-  uint32_t* mem = (uint32_t*)WebPSafeMalloc(total_size, sizeof(*mem));
-  if (mem == NULL) {
-    err = VP8_ENC_ERROR_OUT_OF_MEMORY;
-    goto Error;
+  if (enc->argb_ == NULL) {
+    const int tile_size = 1 << enc->transform_bits_;
+    const uint64_t image_size = width * height;
+    // Ensure enough size for tiles, as well as for two scanlines and two
+    // extra pixels for CopyImageWithPrediction.
+    const uint64_t argb_scratch_size =
+        enc->use_predict_ ? tile_size * width + width + 2 : 0;
+    const int transform_data_size =
+        (enc->use_predict_ || enc->use_cross_color_)
+            ? VP8LSubSampleSize(width, enc->transform_bits_) *
+              VP8LSubSampleSize(height, enc->transform_bits_)
+            : 0;
+    const uint64_t total_size =
+        image_size + WEBP_ALIGN_CST +
+        argb_scratch_size + WEBP_ALIGN_CST +
+        (uint64_t)transform_data_size;
+    uint32_t* mem = (uint32_t*)WebPSafeMalloc(total_size, sizeof(*mem));
+    if (mem == NULL) {
+      err = VP8_ENC_ERROR_OUT_OF_MEMORY;
+      goto Error;
+    }
+    enc->argb_ = mem;
+    mem = (uint32_t*)WEBP_ALIGN(mem + image_size);
+    enc->argb_scratch_ = mem;
+    mem = (uint32_t*)WEBP_ALIGN(mem + argb_scratch_size);
+    enc->transform_data_ = mem;
+    enc->current_width_ = width;
   }
-  enc->argb_ = mem;
-  mem += image_size;
-  enc->argb_scratch_ = mem;
-  mem += argb_scratch_size;
-  enc->transform_data_ = mem;
-  enc->current_width_ = width;
-
  Error:
   return err;
 }
 
-// Bundles multiple (2, 4 or 8) pixels into a single pixel.
-// Returns the new xsize.
-static void BundleColorMap(const WebPPicture* const pic,
-                           int xbits, uint32_t* bundled_argb, int xs) {
-  int y;
-  const int bit_depth = 1 << (3 - xbits);
-  uint32_t code = 0;
-  const uint32_t* argb = pic->argb;
-  const int width = pic->width;
-  const int height = pic->height;
+static void ClearTransformBuffer(VP8LEncoder* const enc) {
+  WebPSafeFree(enc->argb_);
+  enc->argb_ = NULL;
+}
 
+static WebPEncodingError MakeInputImageCopy(VP8LEncoder* const enc) {
+  WebPEncodingError err = VP8_ENC_OK;
+  const WebPPicture* const picture = enc->pic_;
+  const int width = picture->width;
+  const int height = picture->height;
+  int y;
+  err = AllocateTransformBuffer(enc, width, height);
+  if (err != VP8_ENC_OK) return err;
   for (y = 0; y < height; ++y) {
-    int x;
-    for (x = 0; x < width; ++x) {
-      const int mask = (1 << xbits) - 1;
-      const int xsub = x & mask;
-      if (xsub == 0) {
-        code = 0;
-      }
-      // TODO(vikasa): simplify the bundling logic.
-      code |= (argb[x] & 0xff00) << (bit_depth * xsub);
-      bundled_argb[y * xs + (x >> xbits)] = 0xff000000 | code;
-    }
-    argb += pic->argb_stride;
+    memcpy(enc->argb_ + y * width,
+           picture->argb + y * picture->argb_stride,
+           width * sizeof(*enc->argb_));
   }
+  assert(enc->current_width_ == width);
+  return VP8_ENC_OK;
 }
 
-// Note: Expects "enc->palette_" to be set properly.
-// Also, "enc->palette_" will be modified after this call and should not be used
-// later.
-static WebPEncodingError ApplyPalette(VP8LBitWriter* const bw,
-                                      VP8LEncoder* const enc, int quality) {
-  WebPEncodingError err = VP8_ENC_OK;
-  int i, x, y;
-  const WebPPicture* const pic = enc->pic_;
-  uint32_t* argb = pic->argb;
-  const int width = pic->width;
-  const int height = pic->height;
-  uint32_t* const palette = enc->palette_;
-  const int palette_size = enc->palette_size_;
+// -----------------------------------------------------------------------------
 
-  // Replace each input pixel by corresponding palette index.
-  for (y = 0; y < height; ++y) {
-    for (x = 0; x < width; ++x) {
-      const uint32_t pix = argb[x];
-      for (i = 0; i < palette_size; ++i) {
+static void MapToPalette(const uint32_t palette[], int num_colors,
+                         uint32_t* const last_pix, int* const last_idx,
+                         const uint32_t* src, uint8_t* dst, int width) {
+  int x;
+  int prev_idx = *last_idx;
+  uint32_t prev_pix = *last_pix;
+  for (x = 0; x < width; ++x) {
+    const uint32_t pix = src[x];
+    if (pix != prev_pix) {
+      int i;
+      for (i = 0; i < num_colors; ++i) {
         if (pix == palette[i]) {
-          argb[x] = 0xff000000u | (i << 8);
+          prev_idx = i;
+          prev_pix = pix;
           break;
         }
       }
     }
-    argb += pic->argb_stride;
+    dst[x] = prev_idx;
   }
+  *last_idx = prev_idx;
+  *last_pix = prev_pix;
+}
 
-  // Save palette to bitstream.
-  VP8LWriteBits(bw, 1, TRANSFORM_PRESENT);
-  VP8LWriteBits(bw, 2, COLOR_INDEXING_TRANSFORM);
-  assert(palette_size >= 1);
-  VP8LWriteBits(bw, 8, palette_size - 1);
-  for (i = palette_size - 1; i >= 1; --i) {
-    palette[i] = VP8LSubPixels(palette[i], palette[i - 1]);
-  }
-  if (!EncodeImageNoHuffman(bw, palette, palette_size, 1, quality)) {
-    err = VP8_ENC_ERROR_INVALID_CONFIGURATION;
-    goto Error;
+// Remap argb values in src[] to packed palettes entries in dst[]
+// using 'row' as a temporary buffer of size 'width'.
+// We assume that all src[] values have a corresponding entry in the palette.
+// Note: src[] can be the same as dst[]
+static WebPEncodingError ApplyPalette(const uint32_t* src, uint32_t src_stride,
+                                      uint32_t* dst, uint32_t dst_stride,
+                                      const uint32_t* palette, int palette_size,
+                                      int width, int height, int xbits) {
+  // TODO(skal): this tmp buffer is not needed if VP8LBundleColorMap() can be
+  // made to work in-place.
+  uint8_t* const tmp_row = (uint8_t*)WebPSafeMalloc(width, sizeof(*tmp_row));
+  int i, x, y;
+  int use_LUT = 1;
+
+  if (tmp_row == NULL) return VP8_ENC_ERROR_OUT_OF_MEMORY;
+  for (i = 0; i < palette_size; ++i) {
+    if ((palette[i] & 0xffff00ffu) != 0) {
+      use_LUT = 0;
+      break;
+    }
   }
 
-  if (palette_size <= 16) {
-    // Image can be packed (multiple pixels per uint32_t).
-    int xbits = 1;
-    if (palette_size <= 2) {
-      xbits = 3;
-    } else if (palette_size <= 4) {
-      xbits = 2;
+  if (use_LUT) {
+    uint8_t inv_palette[MAX_PALETTE_SIZE] = { 0 };
+    for (i = 0; i < palette_size; ++i) {
+      const int color = (palette[i] >> 8) & 0xff;
+      inv_palette[color] = i;
+    }
+    for (y = 0; y < height; ++y) {
+      for (x = 0; x < width; ++x) {
+        const int color = (src[x] >> 8) & 0xff;
+        tmp_row[x] = inv_palette[color];
+      }
+      VP8LBundleColorMap(tmp_row, width, xbits, dst);
+      src += src_stride;
+      dst += dst_stride;
+    }
+  } else {
+    // Use 1 pixel cache for ARGB pixels.
+    uint32_t last_pix = palette[0];
+    int last_idx = 0;
+    for (y = 0; y < height; ++y) {
+      MapToPalette(palette, palette_size, &last_pix, &last_idx,
+                   src, tmp_row, width);
+      VP8LBundleColorMap(tmp_row, width, xbits, dst);
+      src += src_stride;
+      dst += dst_stride;
     }
-    err = AllocateTransformBuffer(enc, VP8LSubSampleSize(width, xbits), height);
-    if (err != VP8_ENC_OK) goto Error;
-    BundleColorMap(pic, xbits, enc->argb_, enc->current_width_);
   }
+  WebPSafeFree(tmp_row);
+  return VP8_ENC_OK;
+}
 
- Error:
+// Note: Expects "enc->palette_" to be set properly.
+static WebPEncodingError MapImageFromPalette(VP8LEncoder* const enc,
+                                             int in_place) {
+  WebPEncodingError err = VP8_ENC_OK;
+  const WebPPicture* const pic = enc->pic_;
+  const int width = pic->width;
+  const int height = pic->height;
+  const uint32_t* const palette = enc->palette_;
+  const uint32_t* src = in_place ? enc->argb_ : pic->argb;
+  const int src_stride = in_place ? enc->current_width_ : pic->argb_stride;
+  const int palette_size = enc->palette_size_;
+  int xbits;
+
+  // Replace each input pixel by corresponding palette index.
+  // This is done line by line.
+  if (palette_size <= 4) {
+    xbits = (palette_size <= 2) ? 3 : 2;
+  } else {
+    xbits = (palette_size <= 16) ? 1 : 0;
+  }
+
+  err = AllocateTransformBuffer(enc, VP8LSubSampleSize(width, xbits), height);
+  if (err != VP8_ENC_OK) return err;
+
+  err = ApplyPalette(src, src_stride,
+                     enc->argb_, enc->current_width_,
+                     palette, palette_size, width, height, xbits);
   return err;
 }
 
-// -----------------------------------------------------------------------------
+// Save palette_[] to bitstream.
+static WebPEncodingError EncodePalette(VP8LBitWriter* const bw,
+                                       VP8LEncoder* const enc) {
+  int i;
+  uint32_t tmp_palette[MAX_PALETTE_SIZE];
+  const int palette_size = enc->palette_size_;
+  const uint32_t* const palette = enc->palette_;
+  VP8LPutBits(bw, TRANSFORM_PRESENT, 1);
+  VP8LPutBits(bw, COLOR_INDEXING_TRANSFORM, 2);
+  assert(palette_size >= 1 && palette_size <= MAX_PALETTE_SIZE);
+  VP8LPutBits(bw, palette_size - 1, 8);
+  for (i = palette_size - 1; i >= 1; --i) {
+    tmp_palette[i] = VP8LSubPixels(palette[i], palette[i - 1]);
+  }
+  tmp_palette[0] = palette[0];
+  return EncodeImageNoHuffman(bw, tmp_palette, &enc->hash_chain_, enc->refs_,
+                              palette_size, 1, 20 /* quality */);
+}
 
-static int GetHistoBits(const WebPConfig* const config,
-                        const WebPPicture* const pic) {
+#ifdef WEBP_EXPERIMENTAL_FEATURES
+
+static WebPEncodingError EncodeDeltaPalettePredictorImage(
+    VP8LBitWriter* const bw, VP8LEncoder* const enc, int quality) {
+  const WebPPicture* const pic = enc->pic_;
   const int width = pic->width;
   const int height = pic->height;
-  const size_t hist_size = sizeof(VP8LHistogram);
-  // Make tile size a function of encoding method (Range: 0 to 6).
-  int histo_bits = 7 - config->method;
-  while (1) {
-    const size_t huff_image_size = VP8LSubSampleSize(width, histo_bits) *
-                                   VP8LSubSampleSize(height, histo_bits) *
-                                   hist_size;
-    if (huff_image_size <= MAX_HUFF_IMAGE_SIZE) break;
-    ++histo_bits;
+
+  const int pred_bits = 5;
+  const int transform_width = VP8LSubSampleSize(width, pred_bits);
+  const int transform_height = VP8LSubSampleSize(height, pred_bits);
+  const int pred = 7;   // default is Predictor7 (Top/Left Average)
+  const int tiles_per_row = VP8LSubSampleSize(width, pred_bits);
+  const int tiles_per_col = VP8LSubSampleSize(height, pred_bits);
+  uint32_t* predictors;
+  int tile_x, tile_y;
+  WebPEncodingError err = VP8_ENC_OK;
+
+  predictors = (uint32_t*)WebPSafeMalloc(tiles_per_col * tiles_per_row,
+                                         sizeof(*predictors));
+  if (predictors == NULL) return VP8_ENC_ERROR_OUT_OF_MEMORY;
+
+  for (tile_y = 0; tile_y < tiles_per_col; ++tile_y) {
+    for (tile_x = 0; tile_x < tiles_per_row; ++tile_x) {
+      predictors[tile_y * tiles_per_row + tile_x] = 0xff000000u | (pred << 8);
+    }
   }
-  return (histo_bits < MIN_HUFFMAN_BITS) ? MIN_HUFFMAN_BITS :
-         (histo_bits > MAX_HUFFMAN_BITS) ? MAX_HUFFMAN_BITS : histo_bits;
-}
 
-static void InitEncParams(VP8LEncoder* const enc) {
-  const WebPConfig* const config = enc->config_;
-  const WebPPicture* const picture = enc->pic_;
-  const int method = config->method;
-  const float quality = config->quality;
-  enc->transform_bits_ = (method < 4) ? 5 : (method > 4) ? 3 : 4;
-  enc->histo_bits_ = GetHistoBits(config, picture);
-  enc->cache_bits_ = (quality <= 25.f) ? 0 : 7;
+  VP8LPutBits(bw, TRANSFORM_PRESENT, 1);
+  VP8LPutBits(bw, PREDICTOR_TRANSFORM, 2);
+  VP8LPutBits(bw, pred_bits - 2, 3);
+  err = EncodeImageNoHuffman(bw, predictors, &enc->hash_chain_,
+                             (VP8LBackwardRefs*)enc->refs_,  // cast const away
+                             transform_width, transform_height,
+                             quality);
+  WebPSafeFree(predictors);
+  return err;
 }
 
+#endif // WEBP_EXPERIMENTAL_FEATURES
+
 // -----------------------------------------------------------------------------
 // VP8LEncoder
 
 static VP8LEncoder* VP8LEncoderNew(const WebPConfig* const config,
                                    const WebPPicture* const picture) {
-  VP8LEncoder* const enc = (VP8LEncoder*)calloc(1, sizeof(*enc));
+  VP8LEncoder* const enc = (VP8LEncoder*)WebPSafeCalloc(1ULL, sizeof(*enc));
   if (enc == NULL) {
     WebPEncodingSetError(picture, VP8_ENC_ERROR_OUT_OF_MEMORY);
     return NULL;
   }
   enc->config_ = config;
   enc->pic_ = picture;
+
+  VP8LEncDspInit();
+
   return enc;
 }
 
 static void VP8LEncoderDelete(VP8LEncoder* enc) {
-  free(enc->argb_);
-  free(enc);
+  if (enc != NULL) {
+    VP8LHashChainClear(&enc->hash_chain_);
+    VP8LBackwardRefsClear(&enc->refs_[0]);
+    VP8LBackwardRefsClear(&enc->refs_[1]);
+    ClearTransformBuffer(enc);
+    WebPSafeFree(enc);
+  }
 }
 
 // -----------------------------------------------------------------------------
@@ -950,89 +1379,102 @@ WebPEncodingError VP8LEncodeStream(const WebPConfig* const config,
                                    VP8LBitWriter* const bw) {
   WebPEncodingError err = VP8_ENC_OK;
   const int quality = (int)config->quality;
+  const int low_effort = (config->method == 0);
   const int width = picture->width;
   const int height = picture->height;
   VP8LEncoder* const enc = VP8LEncoderNew(config, picture);
   const size_t byte_position = VP8LBitWriterNumBytes(bw);
+  int use_near_lossless = 0;
+  int hdr_size = 0;
+  int data_size = 0;
+  int use_delta_palettization = 0;
 
   if (enc == NULL) {
     err = VP8_ENC_ERROR_OUT_OF_MEMORY;
     goto Error;
   }
 
-  InitEncParams(enc);
-
   // ---------------------------------------------------------------------------
   // Analyze image (entropy, num_palettes etc)
 
-  if (!VP8LEncAnalyze(enc, config->image_hint)) {
+  if (!AnalyzeAndInit(enc)) {
     err = VP8_ENC_ERROR_OUT_OF_MEMORY;
     goto Error;
   }
 
-  if (enc->use_palette_) {
-    err = ApplyPalette(bw, enc, quality);
-    if (err != VP8_ENC_OK) goto Error;
-    // Color cache is disabled for palette.
-    enc->cache_bits_ = 0;
+  // Apply near-lossless preprocessing.
+  use_near_lossless = !enc->use_palette_ && (config->near_lossless < 100);
+  if (use_near_lossless) {
+    if (!VP8ApplyNearLossless(width, height, picture->argb,
+                              config->near_lossless)) {
+      err = VP8_ENC_ERROR_OUT_OF_MEMORY;
+      goto Error;
+    }
   }
 
-  // In case image is not packed.
-  if (enc->argb_ == NULL) {
-    int y;
-    err = AllocateTransformBuffer(enc, width, height);
+#ifdef WEBP_EXPERIMENTAL_FEATURES
+  if (config->delta_palettization) {
+    enc->use_predict_ = 1;
+    enc->use_cross_color_ = 0;
+    enc->use_subtract_green_ = 0;
+    enc->use_palette_ = 1;
+    err = MakeInputImageCopy(enc);
     if (err != VP8_ENC_OK) goto Error;
-    for (y = 0; y < height; ++y) {
-      memcpy(enc->argb_ + y * width,
-             picture->argb + y * picture->argb_stride,
-             width * sizeof(*enc->argb_));
+    err = WebPSearchOptimalDeltaPalette(enc);
+    if (err != VP8_ENC_OK) goto Error;
+    if (enc->use_palette_) {
+      err = AllocateTransformBuffer(enc, width, height);
+      if (err != VP8_ENC_OK) goto Error;
+      err = EncodeDeltaPalettePredictorImage(bw, enc, quality);
+      if (err != VP8_ENC_OK) goto Error;
+      use_delta_palettization = 1;
     }
-    enc->current_width_ = width;
   }
+#endif  // WEBP_EXPERIMENTAL_FEATURES
 
-  // ---------------------------------------------------------------------------
-  // Apply transforms and write transform data.
-
-  if (!EvalAndApplySubtractGreen(enc, enc->current_width_, height, bw)) {
-    err = VP8_ENC_ERROR_OUT_OF_MEMORY;
-    goto Error;
+  // Encode palette
+  if (enc->use_palette_) {
+    err = EncodePalette(bw, enc);
+    if (err != VP8_ENC_OK) goto Error;
+    err = MapImageFromPalette(enc, use_delta_palettization);
+    if (err != VP8_ENC_OK) goto Error;
   }
-
-  if (enc->use_predict_) {
-    if (!ApplyPredictFilter(enc, enc->current_width_, height, quality, bw)) {
-      err = VP8_ENC_ERROR_INVALID_CONFIGURATION;
-      goto Error;
+  if (!use_delta_palettization) {
+    // In case image is not packed.
+    if (enc->argb_ == NULL) {
+      err = MakeInputImageCopy(enc);
+      if (err != VP8_ENC_OK) goto Error;
     }
-  }
 
-  if (enc->use_cross_color_) {
-    if (!ApplyCrossColorFilter(enc, enc->current_width_, height, quality, bw)) {
-      err = VP8_ENC_ERROR_INVALID_CONFIGURATION;
-      goto Error;
-    }
-  }
+    // -------------------------------------------------------------------------
+    // Apply transforms and write transform data.
 
-  VP8LWriteBits(bw, 1, !TRANSFORM_PRESENT);  // No more transforms.
+    if (enc->use_subtract_green_) {
+      ApplySubtractGreen(enc, enc->current_width_, height, bw);
+    }
 
-  // ---------------------------------------------------------------------------
-  // Estimate the color cache size.
+    if (enc->use_predict_) {
+      err = ApplyPredictFilter(enc, enc->current_width_, height, quality,
+                               low_effort, bw);
+      if (err != VP8_ENC_OK) goto Error;
+    }
 
-  if (enc->cache_bits_ > 0) {
-    if (!VP8LCalculateEstimateForCacheSize(enc->argb_, enc->current_width_,
-                                           height, &enc->cache_bits_)) {
-      err = VP8_ENC_ERROR_INVALID_CONFIGURATION;
-      goto Error;
+    if (enc->use_cross_color_) {
+      err = ApplyCrossColorFilter(enc, enc->current_width_,
+                                  height, quality, bw);
+      if (err != VP8_ENC_OK) goto Error;
     }
   }
 
+  VP8LPutBits(bw, !TRANSFORM_PRESENT, 1);  // No more transforms.
+
   // ---------------------------------------------------------------------------
   // Encode and write the transformed image.
-
-  if (!EncodeImageInternal(bw, enc->argb_, enc->current_width_, height,
-                           quality, enc->cache_bits_, enc->histo_bits_)) {
-    err = VP8_ENC_ERROR_OUT_OF_MEMORY;
-    goto Error;
-  }
+  err = EncodeImageInternal(bw, enc->argb_, &enc->hash_chain_, enc->refs_,
+                            enc->current_width_, height, quality, low_effort,
+                            &enc->cache_bits_, enc->histo_bits_, byte_position,
+                            &hdr_size, &data_size);
+  if (err != VP8_ENC_OK) goto Error;
 
   if (picture->stats != NULL) {
     WebPAuxStats* const stats = picture->stats;
@@ -1046,6 +1488,8 @@ WebPEncodingError VP8LEncodeStream(const WebPConfig* const config,
     stats->cache_bits = enc->cache_bits_;
     stats->palette_size = enc->palette_size_;
     stats->lossless_size = (int)(VP8LBitWriterNumBytes(bw) - byte_position);
+    stats->lossless_hdr_size = hdr_size;
+    stats->lossless_data_size = data_size;
   }
 
  Error:
@@ -1059,6 +1503,7 @@ int VP8LEncodeImage(const WebPConfig* const config,
   int has_alpha;
   size_t coded_size;
   int percent = 0;
+  int initial_size;
   WebPEncodingError err = VP8_ENC_OK;
   VP8LBitWriter bw;
 
@@ -1072,7 +1517,11 @@ int VP8LEncodeImage(const WebPConfig* const config,
 
   width = picture->width;
   height = picture->height;
-  if (!VP8LBitWriterInit(&bw, (width * height) >> 1)) {
+  // Initialize BitWriter with size corresponding to 16 bpp to photo images and
+  // 8 bpp for graphical images.
+  initial_size = (config->image_hint == WEBP_HINT_GRAPH) ?
+      width * height : width * height * 2;
+  if (!VP8LBitWriterInit(&bw, initial_size)) {
     err = VP8_ENC_ERROR_OUT_OF_MEMORY;
     goto Error;
   }
@@ -1135,7 +1584,7 @@ int VP8LEncodeImage(const WebPConfig* const config,
 
  Error:
   if (bw.error_) err = VP8_ENC_ERROR_OUT_OF_MEMORY;
-  VP8LBitWriterDestroy(&bw);
+  VP8LBitWriterWipeOut(&bw);
   if (err != VP8_ENC_OK) {
     WebPEncodingSetError(picture, err);
     return 0;
@@ -1144,7 +1593,3 @@ int VP8LEncodeImage(const WebPConfig* const config,
 }
 
 //------------------------------------------------------------------------------
-
-#if defined(__cplusplus) || defined(c_plusplus)
-}    // extern "C"
-#endif
diff --git a/drivers/webp/enc/vp8li.h b/drivers/webp/enc/vp8li.h
index bb111aec33..4543c3b260 100644
--- a/drivers/webp/enc/vp8li.h
+++ b/drivers/webp/enc/vp8li.h
@@ -1,8 +1,10 @@
 // Copyright 2012 Google Inc. All Rights Reserved.
 //
-// This code is licensed under the same terms as WebM:
-//  Software License Agreement:  http://www.webmproject.org/license/software/
-//  Additional IP Rights Grant:  http://www.webmproject.org/license/additional/
+// Use of this source code is governed by a BSD-style license
+// that can be found in the COPYING file in the root of the source
+// tree. An additional intellectual property rights grant can be found
+// in the file PATENTS. All contributing project authors may
+// be found in the AUTHORS file in the root of the source tree.
 // -----------------------------------------------------------------------------
 //
 // Lossless encoder: internal header.
@@ -12,12 +14,13 @@
 #ifndef WEBP_ENC_VP8LI_H_
 #define WEBP_ENC_VP8LI_H_
 
+#include "./backward_references.h"
 #include "./histogram.h"
 #include "../utils/bit_writer.h"
-#include "../encode.h"
-#include "../format_constants.h"
+#include "webp/encode.h"
+#include "webp/format_constants.h"
 
-#if defined(__cplusplus) || defined(c_plusplus)
+#ifdef __cplusplus
 extern "C" {
 #endif
 
@@ -43,6 +46,12 @@ typedef struct {
   int use_palette_;
   int palette_size_;
   uint32_t palette_[MAX_PALETTE_SIZE];
+
+  // Some 'scratch' (potentially large) objects.
+  struct VP8LBackwardRefs refs_[2];  // Backward Refs array corresponding to
+                                     // LZ77 & RLE coding.
+  VP8LHashChain hash_chain_;         // HashChain data for constructing
+                                     // backward references.
 } VP8LEncoder;
 
 //------------------------------------------------------------------------------
@@ -61,7 +70,7 @@ WebPEncodingError VP8LEncodeStream(const WebPConfig* const config,
 
 //------------------------------------------------------------------------------
 
-#if defined(__cplusplus) || defined(c_plusplus)
+#ifdef __cplusplus
 }    // extern "C"
 #endif
 
diff --git a/drivers/webp/enc/webpenc.c b/drivers/webp/enc/webpenc.c
index 3c275589fc..8ced07a2a3 100644
--- a/drivers/webp/enc/webpenc.c
+++ b/drivers/webp/enc/webpenc.c
@@ -1,8 +1,10 @@
 // Copyright 2011 Google Inc. All Rights Reserved.
 //
-// This code is licensed under the same terms as WebM:
-//  Software License Agreement:  http://www.webmproject.org/license/software/
-//  Additional IP Rights Grant:  http://www.webmproject.org/license/additional/
+// Use of this source code is governed by a BSD-style license
+// that can be found in the COPYING file in the root of the source
+// tree. An additional intellectual property rights grant can be found
+// in the file PATENTS. All contributing project authors may
+// be found in the AUTHORS file in the root of the source tree.
 // -----------------------------------------------------------------------------
 //
 // WebP encoder: main entry point
@@ -14,16 +16,13 @@
 #include <string.h>
 #include <math.h>
 
+#include "./cost.h"
 #include "./vp8enci.h"
 #include "./vp8li.h"
 #include "../utils/utils.h"
 
 // #define PRINT_MEMORY_INFO
 
-#if defined(__cplusplus) || defined(c_plusplus)
-extern "C" {
-#endif
-
 #ifdef PRINT_MEMORY_INFO
 #include <stdio.h>
 #endif
@@ -35,43 +34,18 @@ int WebPGetEncoderVersion(void) {
 }
 
 //------------------------------------------------------------------------------
-// WebPPicture
-//------------------------------------------------------------------------------
-
-static int DummyWriter(const uint8_t* data, size_t data_size,
-                       const WebPPicture* const picture) {
-  // The following are to prevent 'unused variable' error message.
-  (void)data;
-  (void)data_size;
-  (void)picture;
-  return 1;
-}
-
-int WebPPictureInitInternal(WebPPicture* picture, int version) {
-  if (WEBP_ABI_IS_INCOMPATIBLE(version, WEBP_ENCODER_ABI_VERSION)) {
-    return 0;   // caller/system version mismatch!
-  }
-  if (picture != NULL) {
-    memset(picture, 0, sizeof(*picture));
-    picture->writer = DummyWriter;
-    WebPEncodingSetError(picture, VP8_ENC_OK);
-  }
-  return 1;
-}
-
-//------------------------------------------------------------------------------
 // VP8Encoder
 //------------------------------------------------------------------------------
 
 static void ResetSegmentHeader(VP8Encoder* const enc) {
-  VP8SegmentHeader* const hdr = &enc->segment_hdr_;
+  VP8EncSegmentHeader* const hdr = &enc->segment_hdr_;
   hdr->num_segments_ = enc->config_->segments;
   hdr->update_map_  = (hdr->num_segments_ > 1);
   hdr->size_ = 0;
 }
 
 static void ResetFilterHeader(VP8Encoder* const enc) {
-  VP8FilterHeader* const hdr = &enc->filter_hdr_;
+  VP8EncFilterHeader* const hdr = &enc->filter_hdr_;
   hdr->simple_ = 1;
   hdr->level_ = 0;
   hdr->sharpness_ = 0;
@@ -93,56 +67,73 @@ static void ResetBoundaryPredictions(VP8Encoder* const enc) {
   enc->nz_[-1] = 0;   // constant
 }
 
-// Map configured quality level to coding tools used.
-//-------------+---+---+---+---+---+---+
-//   Quality   | 0 | 1 | 2 | 3 | 4 | 5 +
-//-------------+---+---+---+---+---+---+
-// dynamic prob| ~ | x | x | x | x | x |
-//-------------+---+---+---+---+---+---+
-// rd-opt modes|   |   | x | x | x | x |
-//-------------+---+---+---+---+---+---+
-// fast i4/i16 | x | x |   |   |   |   |
-//-------------+---+---+---+---+---+---+
-// rd-opt i4/16|   |   | x | x | x | x |
-//-------------+---+---+---+---+---+---+
-// Trellis     |   | x |   |   | x | x |
-//-------------+---+---+---+---+---+---+
-// full-SNS    |   |   |   |   |   | x |
-//-------------+---+---+---+---+---+---+
+// Mapping from config->method_ to coding tools used.
+//-------------------+---+---+---+---+---+---+---+
+//   Method          | 0 | 1 | 2 | 3 |(4)| 5 | 6 |
+//-------------------+---+---+---+---+---+---+---+
+// fast probe        | x |   |   | x |   |   |   |
+//-------------------+---+---+---+---+---+---+---+
+// dynamic proba     | ~ | x | x | x | x | x | x |
+//-------------------+---+---+---+---+---+---+---+
+// fast mode analysis|   |   |   |   | x | x | x |
+//-------------------+---+---+---+---+---+---+---+
+// basic rd-opt      |   |   |   | x | x | x | x |
+//-------------------+---+---+---+---+---+---+---+
+// disto-score i4/16 |   |   | x |   |   |   |   |
+//-------------------+---+---+---+---+---+---+---+
+// rd-opt i4/16      |   |   | ~ | x | x | x | x |
+//-------------------+---+---+---+---+---+---+---+
+// token buffer (opt)|   |   |   | x | x | x | x |
+//-------------------+---+---+---+---+---+---+---+
+// Trellis           |   |   |   |   |   | x |Ful|
+//-------------------+---+---+---+---+---+---+---+
+// full-SNS          |   |   |   |   | x | x | x |
+//-------------------+---+---+---+---+---+---+---+
 
 static void MapConfigToTools(VP8Encoder* const enc) {
-  const int method = enc->config_->method;
-  const int limit = 100 - enc->config_->partition_limit;
+  const WebPConfig* const config = enc->config_;
+  const int method = config->method;
+  const int limit = 100 - config->partition_limit;
   enc->method_ = method;
-  enc->rd_opt_level_ = (method >= 6) ? 3
-                     : (method >= 5) ? 2
-                     : (method >= 3) ? 1
-                     : 0;
+  enc->rd_opt_level_ = (method >= 6) ? RD_OPT_TRELLIS_ALL
+                     : (method >= 5) ? RD_OPT_TRELLIS
+                     : (method >= 3) ? RD_OPT_BASIC
+                     : RD_OPT_NONE;
   enc->max_i4_header_bits_ =
       256 * 16 * 16 *                 // upper bound: up to 16bit per 4x4 block
       (limit * limit) / (100 * 100);  // ... modulated with a quadratic curve.
+
+  enc->thread_level_ = config->thread_level;
+
+  enc->do_search_ = (config->target_size > 0 || config->target_PSNR > 0);
+  if (!config->low_memory) {
+#if !defined(DISABLE_TOKEN_BUFFER)
+    enc->use_tokens_ = (enc->rd_opt_level_ >= RD_OPT_BASIC);  // need rd stats
+#endif
+    if (enc->use_tokens_) {
+      enc->num_parts_ = 1;   // doesn't work with multi-partition
+    }
+  }
 }
 
 // Memory scaling with dimensions:
 //  memory (bytes) ~= 2.25 * w + 0.0625 * w * h
 //
-// Typical memory footprint (768x510 picture)
-// Memory used:
-//              encoder: 33919
-//          block cache: 2880
-//                 info: 3072
-//                preds: 24897
-//          top samples: 1623
-//             non-zero: 196
-//             lf-stats: 2048
-//                total: 68635
-// Transcient object sizes:
-//       VP8EncIterator: 352
-//         VP8ModeScore: 912
-//       VP8SegmentInfo: 532
-//             VP8Proba: 31032
+// Typical memory footprint (614x440 picture)
+//              encoder: 22111
+//                 info: 4368
+//                preds: 17741
+//          top samples: 1263
+//             non-zero: 175
+//             lf-stats: 0
+//                total: 45658
+// Transient object sizes:
+//       VP8EncIterator: 3360
+//         VP8ModeScore: 872
+//       VP8SegmentInfo: 732
+//          VP8EncProba: 18352
 //              LFStats: 2048
-// Picture size (yuv): 589824
+// Picture size (yuv): 419328
 
 static VP8Encoder* InitVP8Encoder(const WebPConfig* const config,
                                   WebPPicture* const picture) {
@@ -154,20 +145,16 @@ static VP8Encoder* InitVP8Encoder(const WebPConfig* const config,
   const int preds_h = 4 * mb_h + 1;
   const size_t preds_size = preds_w * preds_h * sizeof(uint8_t);
   const int top_stride = mb_w * 16;
-  const size_t nz_size = (mb_w + 1) * sizeof(uint32_t);
-  const size_t cache_size = (3 * YUV_SIZE + PRED_SIZE) * sizeof(uint8_t);
+  const size_t nz_size = (mb_w + 1) * sizeof(uint32_t) + WEBP_ALIGN_CST;
   const size_t info_size = mb_w * mb_h * sizeof(VP8MBInfo);
-  const size_t samples_size = (2 * top_stride +         // top-luma/u/v
-                               16 + 16 + 16 + 8 + 1 +   // left y/u/v
-                               2 * ALIGN_CST)           // align all
-                               * sizeof(uint8_t);
+  const size_t samples_size = 2 * top_stride * sizeof(uint8_t)  // top-luma/u/v
+                            + WEBP_ALIGN_CST;                   // align all
   const size_t lf_stats_size =
-      config->autofilter ? sizeof(LFStats) + ALIGN_CST : 0;
+      config->autofilter ? sizeof(LFStats) + WEBP_ALIGN_CST : 0;
   VP8Encoder* enc;
   uint8_t* mem;
   const uint64_t size = (uint64_t)sizeof(VP8Encoder)   // main struct
-                      + ALIGN_CST                      // cache alignment
-                      + cache_size                     // working caches
+                      + WEBP_ALIGN_CST                 // cache alignment
                       + info_size                      // modes info
                       + preds_size                     // prediction modes
                       + samples_size                   // top/left samples
@@ -178,23 +165,22 @@ static VP8Encoder* InitVP8Encoder(const WebPConfig* const config,
   printf("===================================\n");
   printf("Memory used:\n"
          "             encoder: %ld\n"
-         "         block cache: %ld\n"
          "                info: %ld\n"
          "               preds: %ld\n"
          "         top samples: %ld\n"
          "            non-zero: %ld\n"
          "            lf-stats: %ld\n"
          "               total: %ld\n",
-         sizeof(VP8Encoder) + ALIGN_CST, cache_size, info_size,
+         sizeof(VP8Encoder) + WEBP_ALIGN_CST, info_size,
          preds_size, samples_size, nz_size, lf_stats_size, size);
-  printf("Transcient object sizes:\n"
+  printf("Transient object sizes:\n"
          "      VP8EncIterator: %ld\n"
          "        VP8ModeScore: %ld\n"
          "      VP8SegmentInfo: %ld\n"
-         "            VP8Proba: %ld\n"
+         "         VP8EncProba: %ld\n"
          "             LFStats: %ld\n",
          sizeof(VP8EncIterator), sizeof(VP8ModeScore),
-         sizeof(VP8SegmentInfo), sizeof(VP8Proba),
+         sizeof(VP8SegmentInfo), sizeof(VP8EncProba),
          sizeof(LFStats));
   printf("Picture size (yuv): %ld\n",
          mb_w * mb_h * 384 * sizeof(uint8_t));
@@ -206,41 +192,27 @@ static VP8Encoder* InitVP8Encoder(const WebPConfig* const config,
     return NULL;
   }
   enc = (VP8Encoder*)mem;
-  mem = (uint8_t*)DO_ALIGN(mem + sizeof(*enc));
+  mem = (uint8_t*)WEBP_ALIGN(mem + sizeof(*enc));
   memset(enc, 0, sizeof(*enc));
   enc->num_parts_ = 1 << config->partitions;
   enc->mb_w_ = mb_w;
   enc->mb_h_ = mb_h;
   enc->preds_w_ = preds_w;
-  enc->yuv_in_ = (uint8_t*)mem;
-  mem += YUV_SIZE;
-  enc->yuv_out_ = (uint8_t*)mem;
-  mem += YUV_SIZE;
-  enc->yuv_out2_ = (uint8_t*)mem;
-  mem += YUV_SIZE;
-  enc->yuv_p_ = (uint8_t*)mem;
-  mem += PRED_SIZE;
   enc->mb_info_ = (VP8MBInfo*)mem;
   mem += info_size;
   enc->preds_ = ((uint8_t*)mem) + 1 + enc->preds_w_;
   mem += preds_w * preds_h * sizeof(uint8_t);
-  enc->nz_ = 1 + (uint32_t*)mem;
+  enc->nz_ = 1 + (uint32_t*)WEBP_ALIGN(mem);
   mem += nz_size;
-  enc->lf_stats_ = lf_stats_size ? (LFStats*)DO_ALIGN(mem) : NULL;
+  enc->lf_stats_ = lf_stats_size ? (LFStats*)WEBP_ALIGN(mem) : NULL;
   mem += lf_stats_size;
 
   // top samples (all 16-aligned)
-  mem = (uint8_t*)DO_ALIGN(mem);
+  mem = (uint8_t*)WEBP_ALIGN(mem);
   enc->y_top_ = (uint8_t*)mem;
   enc->uv_top_ = enc->y_top_ + top_stride;
   mem += 2 * top_stride;
-  mem = (uint8_t*)DO_ALIGN(mem + 1);
-  enc->y_left_ = (uint8_t*)mem;
-  mem += 16 + 16;
-  enc->u_left_ = (uint8_t*)mem;
-  mem += 16;
-  enc->v_left_ = (uint8_t*)mem;
-  mem += 8;
+  assert(mem <= (uint8_t*)enc + size);
 
   enc->config_ = config;
   enc->profile_ = use_filter ? ((config->filter_type == 1) ? 0 : 1) : 2;
@@ -253,29 +225,32 @@ static VP8Encoder* InitVP8Encoder(const WebPConfig* const config,
   ResetSegmentHeader(enc);
   ResetFilterHeader(enc);
   ResetBoundaryPredictions(enc);
-
+  VP8EncDspCostInit();
   VP8EncInitAlpha(enc);
-#ifdef WEBP_EXPERIMENTAL_FEATURES
-  VP8EncInitLayer(enc);
-#endif
 
+  // lower quality means smaller output -> we modulate a little the page
+  // size based on quality. This is just a crude 1rst-order prediction.
+  {
+    const float scale = 1.f + config->quality * 5.f / 100.f;  // in [1,6]
+    VP8TBufferInit(&enc->tokens_, (int)(mb_w * mb_h * 4 * scale));
+  }
   return enc;
 }
 
-static void DeleteVP8Encoder(VP8Encoder* enc) {
+static int DeleteVP8Encoder(VP8Encoder* enc) {
+  int ok = 1;
   if (enc != NULL) {
-    VP8EncDeleteAlpha(enc);
-#ifdef WEBP_EXPERIMENTAL_FEATURES
-    VP8EncDeleteLayer(enc);
-#endif
-    free(enc);
+    ok = VP8EncDeleteAlpha(enc);
+    VP8TBufferClear(&enc->tokens_);
+    WebPSafeFree(enc);
   }
+  return ok;
 }
 
 //------------------------------------------------------------------------------
 
 static double GetPSNR(uint64_t err, uint64_t size) {
-  return err ? 10. * log10(255. * 255. * size / err) : 99.;
+  return (err > 0 && size > 0) ? 10. * log10(255. * 255. * size / err) : 99.;
 }
 
 static void FinalizePSNR(const VP8Encoder* const enc) {
@@ -332,7 +307,7 @@ int WebPReportProgress(const WebPPicture* const pic,
 //------------------------------------------------------------------------------
 
 int WebPEncode(const WebPConfig* config, WebPPicture* pic) {
-  int ok;
+  int ok = 0;
 
   if (pic == NULL)
     return 0;
@@ -346,44 +321,63 @@ int WebPEncode(const WebPConfig* config, WebPPicture* pic) {
   if (pic->width > WEBP_MAX_DIMENSION || pic->height > WEBP_MAX_DIMENSION)
     return WebPEncodingSetError(pic, VP8_ENC_ERROR_BAD_DIMENSION);
 
+  if (!config->exact) {
+    WebPCleanupTransparentArea(pic);
+  }
+
   if (pic->stats != NULL) memset(pic->stats, 0, sizeof(*pic->stats));
 
   if (!config->lossless) {
     VP8Encoder* enc = NULL;
-    if (pic->y == NULL || pic->u == NULL || pic->v == NULL) {
-      if (pic->argb != NULL) {
-        if (!WebPPictureARGBToYUVA(pic, WEBP_YUV420)) return 0;
+    if (pic->use_argb || pic->y == NULL || pic->u == NULL || pic->v == NULL) {
+      // Make sure we have YUVA samples.
+      if (config->preprocessing & 4) {
+        if (!WebPPictureSmartARGBToYUVA(pic)) {
+          return 0;
+        }
       } else {
-        return WebPEncodingSetError(pic, VP8_ENC_ERROR_NULL_PARAMETER);
+        float dithering = 0.f;
+        if (config->preprocessing & 2) {
+          const float x = config->quality / 100.f;
+          const float x2 = x * x;
+          // slowly decreasing from max dithering at low quality (q->0)
+          // to 0.5 dithering amplitude at high quality (q->100)
+          dithering = 1.0f + (0.5f - 1.0f) * x2 * x2;
+        }
+        if (!WebPPictureARGBToYUVADithered(pic, WEBP_YUV420, dithering)) {
+          return 0;
+        }
       }
     }
 
     enc = InitVP8Encoder(config, pic);
     if (enc == NULL) return 0;  // pic->error is already set.
     // Note: each of the tasks below account for 20% in the progress report.
-    ok = VP8EncAnalyze(enc)
-      && VP8StatLoop(enc)
-      && VP8EncLoop(enc)
-      && VP8EncFinishAlpha(enc)
-#ifdef WEBP_EXPERIMENTAL_FEATURES
-      && VP8EncFinishLayer(enc)
-#endif
-      && VP8EncWrite(enc);
+    ok = VP8EncAnalyze(enc);
+
+    // Analysis is done, proceed to actual coding.
+    ok = ok && VP8EncStartAlpha(enc);   // possibly done in parallel
+    if (!enc->use_tokens_) {
+      ok = ok && VP8EncLoop(enc);
+    } else {
+      ok = ok && VP8EncTokenLoop(enc);
+    }
+    ok = ok && VP8EncFinishAlpha(enc);
+
+    ok = ok && VP8EncWrite(enc);
     StoreStats(enc);
     if (!ok) {
       VP8EncFreeBitWriters(enc);
     }
-    DeleteVP8Encoder(enc);
+    ok &= DeleteVP8Encoder(enc);  // must always be called, even if !ok
   } else {
-    if (pic->argb == NULL)
-      return WebPEncodingSetError(pic, VP8_ENC_ERROR_NULL_PARAMETER);
+    // Make sure we have ARGB samples.
+    if (pic->argb == NULL && !WebPPictureYUVAToARGB(pic)) {
+      return 0;
+    }
 
     ok = VP8LEncodeImage(config, pic);  // Sets pic->error in case of problem.
   }
 
   return ok;
 }
-
-#if defined(__cplusplus) || defined(c_plusplus)
-}    // extern "C"
-#endif
diff --git a/drivers/webp/encode.h b/drivers/webp/encode.h
index 2e37cfabe7..c382ea7608 100644
--- a/drivers/webp/encode.h
+++ b/drivers/webp/encode.h
@@ -1,8 +1,10 @@
 // Copyright 2011 Google Inc. All Rights Reserved.
 //
-// This code is licensed under the same terms as WebM:
-//  Software License Agreement:  http://www.webmproject.org/license/software/
-//  Additional IP Rights Grant:  http://www.webmproject.org/license/additional/
+// Use of this source code is governed by a BSD-style license
+// that can be found in the COPYING file in the root of the source
+// tree. An additional intellectual property rights grant can be found
+// in the file PATENTS. All contributing project authors may
+// be found in the AUTHORS file in the root of the source tree.
 // -----------------------------------------------------------------------------
 //
 //   WebP encoder: main interface
@@ -14,11 +16,22 @@
 
 #include "./types.h"
 
-#if defined(__cplusplus) || defined(c_plusplus)
+#ifdef __cplusplus
 extern "C" {
 #endif
 
-#define WEBP_ENCODER_ABI_VERSION 0x0200    // MAJOR(8b) + MINOR(8b)
+#define WEBP_ENCODER_ABI_VERSION 0x0209    // MAJOR(8b) + MINOR(8b)
+
+// Note: forward declaring enumerations is not allowed in (strict) C and C++,
+// the types are left here for reference.
+// typedef enum WebPImageHint WebPImageHint;
+// typedef enum WebPEncCSP WebPEncCSP;
+// typedef enum WebPPreset WebPPreset;
+// typedef enum WebPEncodingError WebPEncodingError;
+typedef struct WebPConfig WebPConfig;
+typedef struct WebPPicture WebPPicture;   // main structure for I/O
+typedef struct WebPAuxStats WebPAuxStats;
+typedef struct WebPMemoryWriter WebPMemoryWriter;
 
 // Return the encoder's version number, packed in hexadecimal using 8bits for
 // each of major/minor/revision. E.g: v2.5.7 is 0x020507.
@@ -29,7 +42,7 @@ WEBP_EXTERN(int) WebPGetEncoderVersion(void);
 
 // Returns the size of the compressed data (pointed to by *output), or 0 if
 // an error occurred. The compressed data must be released by the caller
-// using the call 'free(*output)'.
+// using the call 'WebPFree(*output)'.
 // These functions compress using the lossy format, and the quality_factor
 // can go from 0 (smaller output, lower quality) to 100 (best quality,
 // larger output).
@@ -62,11 +75,14 @@ WEBP_EXTERN(size_t) WebPEncodeLosslessBGRA(const uint8_t* bgra,
                                            int width, int height, int stride,
                                            uint8_t** output);
 
+// Releases memory returned by the WebPEncode*() functions above.
+WEBP_EXTERN(void) WebPFree(void* ptr);
+
 //------------------------------------------------------------------------------
 // Coding parameters
 
 // Image characteristics hint for the underlying encoder.
-typedef enum {
+typedef enum WebPImageHint {
   WEBP_HINT_DEFAULT = 0,  // default preset.
   WEBP_HINT_PICTURE,      // digital picture, like portrait, inner shot
   WEBP_HINT_PHOTO,        // outdoor photograph, with natural lighting
@@ -74,7 +90,8 @@ typedef enum {
   WEBP_HINT_LAST
 } WebPImageHint;
 
-typedef struct {
+// Compression parameters.
+struct WebPConfig {
   int lossless;           // Lossless encoding (0=lossy(default), 1=lossless).
   float quality;          // between 0 (smallest file) and 100 (biggest)
   int method;             // quality/speed trade-off (0=fast, 6=slower-better)
@@ -103,19 +120,38 @@ typedef struct {
 
   int show_compressed;    // if true, export the compressed picture back.
                           // In-loop filtering is not applied.
-  int preprocessing;      // preprocessing filter (0=none, 1=segment-smooth)
+  int preprocessing;      // preprocessing filter:
+                          // 0=none, 1=segment-smooth, 2=pseudo-random dithering
   int partitions;         // log2(number of token partitions) in [0..3]. Default
                           // is set to 0 for easier progressive decoding.
   int partition_limit;    // quality degradation allowed to fit the 512k limit
                           // on prediction modes coding (0: no degradation,
                           // 100: maximum possible degradation).
-
-  uint32_t pad[8];        // padding for later use
-} WebPConfig;
+  int emulate_jpeg_size;  // If true, compression parameters will be remapped
+                          // to better match the expected output size from
+                          // JPEG compression. Generally, the output size will
+                          // be similar but the degradation will be lower.
+  int thread_level;       // If non-zero, try and use multi-threaded encoding.
+  int low_memory;         // If set, reduce memory usage (but increase CPU use).
+
+  int near_lossless;      // Near lossless encoding [0 = off(default) .. 100].
+                          // This feature is experimental.
+  int exact;              // if non-zero, preserve the exact RGB values under
+                          // transparent area. Otherwise, discard this invisible
+                          // RGB information for better compression. The default
+                          // value is 0.
+
+#ifdef WEBP_EXPERIMENTAL_FEATURES
+  int delta_palettization;
+  uint32_t pad[2];        // padding for later use
+#else
+  uint32_t pad[3];        // padding for later use
+#endif  // WEBP_EXPERIMENTAL_FEATURES
+};
 
 // Enumerate some predefined settings for WebPConfig, depending on the type
 // of source picture. These presets are used when calling WebPConfigPreset().
-typedef enum {
+typedef enum WebPPreset {
   WEBP_PRESET_DEFAULT = 0,  // default preset.
   WEBP_PRESET_PICTURE,      // digital picture, like portrait, inner shot
   WEBP_PRESET_PHOTO,        // outdoor photograph, with natural lighting
@@ -146,17 +182,23 @@ static WEBP_INLINE int WebPConfigPreset(WebPConfig* config,
                                 WEBP_ENCODER_ABI_VERSION);
 }
 
+// Activate the lossless compression mode with the desired efficiency level
+// between 0 (fastest, lowest compression) and 9 (slower, best compression).
+// A good default level is '6', providing a fair tradeoff between compression
+// speed and final compressed size.
+// This function will overwrite several fields from config: 'method', 'quality'
+// and 'lossless'. Returns false in case of parameter error.
+WEBP_EXTERN(int) WebPConfigLosslessPreset(WebPConfig* config, int level);
+
 // Returns true if 'config' is non-NULL and all configuration parameters are
 // within their valid ranges.
 WEBP_EXTERN(int) WebPValidateConfig(const WebPConfig* config);
 
 //------------------------------------------------------------------------------
 // Input / Output
-
-typedef struct WebPPicture WebPPicture;   // main structure for I/O
-
 // Structure for storing auxiliary statistics (mostly for lossy encoding).
-typedef struct {
+
+struct WebPAuxStats {
   int coded_size;         // final size
 
   float PSNR[5];          // peak-signal-to-noise ratio for Y/U/V/All/Alpha
@@ -180,9 +222,11 @@ typedef struct {
   int cache_bits;              // number of bits for color cache lookup
   int palette_size;            // number of color in palette, if used
   int lossless_size;           // final lossless size
+  int lossless_hdr_size;       // lossless header (transform, huffman etc) size
+  int lossless_data_size;      // lossless image data size
 
-  uint32_t pad[4];        // padding for later use
-} WebPAuxStats;
+  uint32_t pad[2];        // padding for later use
+};
 
 // Signature for output function. Should return true if writing was successful.
 // data/data_size is the segment of data to write, and 'picture' is for
@@ -192,18 +236,22 @@ typedef int (*WebPWriterFunction)(const uint8_t* data, size_t data_size,
 
 // WebPMemoryWrite: a special WebPWriterFunction that writes to memory using
 // the following WebPMemoryWriter object (to be set as a custom_ptr).
-typedef struct {
+struct WebPMemoryWriter {
   uint8_t* mem;       // final buffer (of size 'max_size', larger than 'size').
   size_t   size;      // final size
   size_t   max_size;  // total capacity
   uint32_t pad[1];    // padding for later use
-} WebPMemoryWriter;
+};
 
 // The following must be called first before any use.
 WEBP_EXTERN(void) WebPMemoryWriterInit(WebPMemoryWriter* writer);
 
+// The following must be called to deallocate writer->mem memory. The 'writer'
+// object itself is not deallocated.
+WEBP_EXTERN(void) WebPMemoryWriterClear(WebPMemoryWriter* writer);
 // The custom writer to be used with WebPMemoryWriter as custom_ptr. Upon
 // completion, writer.mem and writer.size will hold the coded data.
+// writer.mem must be freed by calling WebPMemoryWriterClear.
 WEBP_EXTERN(int) WebPMemoryWrite(const uint8_t* data, size_t data_size,
                                  const WebPPicture* picture);
 
@@ -212,23 +260,17 @@ WEBP_EXTERN(int) WebPMemoryWrite(const uint8_t* data, size_t data_size,
 // everything is OK.
 typedef int (*WebPProgressHook)(int percent, const WebPPicture* picture);
 
-typedef enum {
+// Color spaces.
+typedef enum WebPEncCSP {
   // chroma sampling
-  WEBP_YUV420 = 0,   // 4:2:0
-  WEBP_YUV422 = 1,   // 4:2:2
-  WEBP_YUV444 = 2,   // 4:4:4
-  WEBP_YUV400 = 3,   // grayscale
-  WEBP_CSP_UV_MASK = 3,   // bit-mask to get the UV sampling factors
-  // alpha channel variants
-  WEBP_YUV420A = 4,
-  WEBP_YUV422A = 5,
-  WEBP_YUV444A = 6,
-  WEBP_YUV400A = 7,   // grayscale + alpha
+  WEBP_YUV420  = 0,        // 4:2:0
+  WEBP_YUV420A = 4,        // alpha channel variant
+  WEBP_CSP_UV_MASK = 3,    // bit-mask to get the UV sampling factors
   WEBP_CSP_ALPHA_BIT = 4   // bit that is set if alpha is present
 } WebPEncCSP;
 
 // Encoding error conditions.
-typedef enum {
+typedef enum WebPEncodingError {
   VP8_ENC_OK = 0,
   VP8_ENC_ERROR_OUT_OF_MEMORY,            // memory error allocating objects
   VP8_ENC_ERROR_BITSTREAM_OUT_OF_MEMORY,  // memory error while flushing bits
@@ -248,7 +290,6 @@ typedef enum {
 
 // Main exchange structure (input samples, output bytes, statistics)
 struct WebPPicture {
-
   //   INPUT
   //////////////
   // Main flag for encoder selecting between ARGB or YUV input.
@@ -303,17 +344,15 @@ struct WebPPicture {
 
   uint32_t pad3[3];       // padding for later use
 
-  // Unused for now: original samples (for non-YUV420 modes)
-  uint8_t *u0, *v0;
-  int uv0_stride;
-
-  uint32_t pad4[7];       // padding for later use
+  // Unused for now
+  uint8_t *pad4, *pad5;
+  uint32_t pad6[8];       // padding for later use
 
   // PRIVATE FIELDS
   ////////////////////
   void* memory_;          // row chunk of memory for yuva planes
   void* memory_argb_;     // and for argb too.
-  void* pad5[2];          // padding for later use
+  void* pad7[2];          // padding for later use
 };
 
 // Internal, version-checked, entry point
@@ -343,18 +382,19 @@ WEBP_EXTERN(int) WebPPictureAlloc(WebPPicture* picture);
 // preserved.
 WEBP_EXTERN(void) WebPPictureFree(WebPPicture* picture);
 
-// Copy the pixels of *src into *dst, using WebPPictureAlloc. Upon return,
-// *dst will fully own the copied pixels (this is not a view).
+// Copy the pixels of *src into *dst, using WebPPictureAlloc. Upon return, *dst
+// will fully own the copied pixels (this is not a view). The 'dst' picture need
+// not be initialized as its content is overwritten.
 // Returns false in case of memory allocation error.
 WEBP_EXTERN(int) WebPPictureCopy(const WebPPicture* src, WebPPicture* dst);
 
-// Compute PSNR or SSIM distortion between two pictures.
-// Result is in dB, stores in result[] in the Y/U/V/Alpha/All order.
-// Returns false in case of error (pic1 and pic2 don't have same dimension, ...)
+// Compute PSNR, SSIM or LSIM distortion metric between two pictures. Results
+// are in dB, stored in result[] in the Y/U/V/Alpha/All or B/G/R/A/All order.
+// Returns false in case of error (src and ref don't have same dimension, ...)
 // Warning: this function is rather CPU-intensive.
 WEBP_EXTERN(int) WebPPictureDistortion(
-    const WebPPicture* pic1, const WebPPicture* pic2,
-    int metric_type,           // 0 = PSNR, 1 = SSIM
+    const WebPPicture* src, const WebPPicture* ref,
+    int metric_type,           // 0 = PSNR, 1 = SSIM, 2 = LSIM
     float result[5]);
 
 // self-crops a picture to the rectangle defined by top/left/width/height.
@@ -375,7 +415,9 @@ WEBP_EXTERN(int) WebPPictureCrop(WebPPicture* picture,
 // the top and left coordinates will be snapped to even values.
 // Picture 'src' must out-live 'dst' picture. Self-extraction of view is allowed
 // ('src' equal to 'dst') as a mean of fast-cropping (but note that doing so,
-// the original dimension will be lost).
+// the original dimension will be lost). Picture 'dst' need not be initialized
+// with WebPPictureInit() if it is different from 'src', since its content will
+// be overwritten.
 // Returns false in case of memory allocation error or invalid parameters.
 WEBP_EXTERN(int) WebPPictureView(const WebPPicture* src,
                                  int left, int top, int width, int height,
@@ -386,7 +428,9 @@ WEBP_EXTERN(int) WebPPictureView(const WebPPicture* src,
 WEBP_EXTERN(int) WebPPictureIsView(const WebPPicture* picture);
 
 // Rescale a picture to new dimension width x height.
-// Now gamma correction is applied.
+// If either 'width' or 'height' (but not both) is 0 the corresponding
+// dimension will be calculated preserving the aspect ratio.
+// No gamma correction is applied.
 // Returns false in case of error (invalid parameter or insufficient memory).
 WEBP_EXTERN(int) WebPPictureRescale(WebPPicture* pic, int width, int height);
 
@@ -413,13 +457,28 @@ WEBP_EXTERN(int) WebPPictureImportBGRA(
 WEBP_EXTERN(int) WebPPictureImportBGRX(
     WebPPicture* picture, const uint8_t* bgrx, int bgrx_stride);
 
-// Converts picture->argb data to the YUVA format specified by 'colorspace'.
+// Converts picture->argb data to the YUV420A format. The 'colorspace'
+// parameter is deprecated and should be equal to WEBP_YUV420.
 // Upon return, picture->use_argb is set to false. The presence of real
 // non-opaque transparent values is detected, and 'colorspace' will be
 // adjusted accordingly. Note that this method is lossy.
 // Returns false in case of error.
 WEBP_EXTERN(int) WebPPictureARGBToYUVA(WebPPicture* picture,
-                                       WebPEncCSP colorspace);
+                                       WebPEncCSP /*colorspace = WEBP_YUV420*/);
+
+// Same as WebPPictureARGBToYUVA(), but the conversion is done using
+// pseudo-random dithering with a strength 'dithering' between
+// 0.0 (no dithering) and 1.0 (maximum dithering). This is useful
+// for photographic picture.
+WEBP_EXTERN(int) WebPPictureARGBToYUVADithered(
+    WebPPicture* picture, WebPEncCSP colorspace, float dithering);
+
+// Performs 'smart' RGBA->YUVA420 downsampling and colorspace conversion.
+// Downsampling is handled with extra care in case of color clipping. This
+// method is roughly 2x slower than WebPPictureARGBToYUVA() but produces better
+// YUV representation.
+// Returns false in case of error.
+WEBP_EXTERN(int) WebPPictureSmartARGBToYUVA(WebPPicture* picture);
 
 // Converts picture->yuv to picture->argb and sets picture->use_argb to true.
 // The input format must be YUV_420 or YUV_420A.
@@ -429,9 +488,9 @@ WEBP_EXTERN(int) WebPPictureARGBToYUVA(WebPPicture* picture,
 // Returns false in case of error.
 WEBP_EXTERN(int) WebPPictureYUVAToARGB(WebPPicture* picture);
 
-// Helper function: given a width x height plane of YUV(A) samples
-// (with stride 'stride'), clean-up the YUV samples under fully transparent
-// area, to help compressibility (no guarantee, though).
+// Helper function: given a width x height plane of RGBA or YUV(A) samples
+// clean-up the YUV or RGB samples under fully transparent area, to help
+// compressibility (no guarantee, though).
 WEBP_EXTERN(void) WebPCleanupTransparentArea(WebPPicture* picture);
 
 // Scan the picture 'picture' for the presence of non fully opaque alpha values.
@@ -439,6 +498,11 @@ WEBP_EXTERN(void) WebPCleanupTransparentArea(WebPPicture* picture);
 // alpha plane can be ignored altogether e.g.).
 WEBP_EXTERN(int) WebPPictureHasTransparency(const WebPPicture* picture);
 
+// Remove the transparency information (if present) by blending the color with
+// the background color 'background_rgb' (specified as 24bit RGB triplet).
+// After this call, all alpha values are reset to 0xff.
+WEBP_EXTERN(void) WebPBlendAlpha(WebPPicture* pic, uint32_t background_rgb);
+
 //------------------------------------------------------------------------------
 // Main call
 
@@ -456,7 +520,7 @@ WEBP_EXTERN(int) WebPEncode(const WebPConfig* config, WebPPicture* picture);
 
 //------------------------------------------------------------------------------
 
-#if defined(__cplusplus) || defined(c_plusplus)
+#ifdef __cplusplus
 }    // extern "C"
 #endif
 
diff --git a/drivers/webp/extras.h b/drivers/webp/extras.h
new file mode 100644
index 0000000000..1c24be2e0c
--- /dev/null
+++ b/drivers/webp/extras.h
@@ -0,0 +1,51 @@
+// Copyright 2015 Google Inc. All Rights Reserved.
+//
+// Use of this source code is governed by a BSD-style license
+// that can be found in the COPYING file in the root of the source
+// tree. An additional intellectual property rights grant can be found
+// in the file PATENTS. All contributing project authors may
+// be found in the AUTHORS file in the root of the source tree.
+// -----------------------------------------------------------------------------
+//
+
+#ifndef WEBP_WEBP_EXTRAS_H_
+#define WEBP_WEBP_EXTRAS_H_
+
+#include "./types.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#include "./encode.h"
+
+#define WEBP_EXTRAS_ABI_VERSION 0x0000    // MAJOR(8b) + MINOR(8b)
+
+//------------------------------------------------------------------------------
+
+// Returns the version number of the extras library, packed in hexadecimal using
+// 8bits for each of major/minor/revision. E.g: v2.5.7 is 0x020507.
+WEBP_EXTERN(int) WebPGetExtrasVersion(void);
+
+//------------------------------------------------------------------------------
+// Ad-hoc colorspace importers.
+
+// Import luma sample (gray scale image) into 'picture'. The 'picture'
+// width and height must be set prior to calling this function.
+WEBP_EXTERN(int) WebPImportGray(const uint8_t* gray, WebPPicture* picture);
+
+// Import rgb sample in RGB565 packed format into 'picture'. The 'picture'
+// width and height must be set prior to calling this function.
+WEBP_EXTERN(int) WebPImportRGB565(const uint8_t* rgb565, WebPPicture* pic);
+
+// Import rgb sample in RGB4444 packed format into 'picture'. The 'picture'
+// width and height must be set prior to calling this function.
+WEBP_EXTERN(int) WebPImportRGB4444(const uint8_t* rgb4444, WebPPicture* pic);
+
+//------------------------------------------------------------------------------
+
+#ifdef __cplusplus
+}    // extern "C"
+#endif
+
+#endif  /* WEBP_WEBP_EXTRAS_H_ */
diff --git a/drivers/webp/format_constants.h b/drivers/webp/format_constants.h
index 7ce498f672..b6e78a643e 100644
--- a/drivers/webp/format_constants.h
+++ b/drivers/webp/format_constants.h
@@ -1,8 +1,10 @@
 // Copyright 2012 Google Inc. All Rights Reserved.
 //
-// This code is licensed under the same terms as WebM:
-//  Software License Agreement:  http://www.webmproject.org/license/software/
-//  Additional IP Rights Grant:  http://www.webmproject.org/license/additional/
+// Use of this source code is governed by a BSD-style license
+// that can be found in the COPYING file in the root of the source
+// tree. An additional intellectual property rights grant can be found
+// in the file PATENTS. All contributing project authors may
+// be found in the AUTHORS file in the root of the source tree.
 // -----------------------------------------------------------------------------
 //
 //  Internal header for constants related to WebP file format.
@@ -12,6 +14,9 @@
 #ifndef WEBP_WEBP_FORMAT_CONSTANTS_H_
 #define WEBP_WEBP_FORMAT_CONSTANTS_H_
 
+// Create fourcc of the chunk from the chunk tag characters.
+#define MKFOURCC(a, b, c, d) ((a) | (b) << 8 | (c) << 16 | (uint32_t)(d) << 24)
+
 // VP8 related constants.
 #define VP8_SIGNATURE 0x9d012a              // Signature in VP8 data.
 #define VP8_MAX_PARTITION0_SIZE (1 << 19)   // max size of mode partition
@@ -65,23 +70,16 @@ typedef enum {
 #define CHUNK_SIZE_BYTES   4     // Size needed to store chunk's size.
 #define CHUNK_HEADER_SIZE  8     // Size of a chunk header.
 #define RIFF_HEADER_SIZE   12    // Size of the RIFF header ("RIFFnnnnWEBP").
-#define FRAME_CHUNK_SIZE   15    // Size of a FRM chunk.
-#define LOOP_CHUNK_SIZE    2     // Size of a LOOP chunk.
-#define TILE_CHUNK_SIZE    6     // Size of a TILE chunk.
+#define ANMF_CHUNK_SIZE    16    // Size of an ANMF chunk.
+#define ANIM_CHUNK_SIZE    6     // Size of an ANIM chunk.
+#define FRGM_CHUNK_SIZE    6     // Size of a FRGM chunk.
 #define VP8X_CHUNK_SIZE    10    // Size of a VP8X chunk.
 
-#define TILING_FLAG_BIT    0x01  // Set if tiles are possibly used.
-#define ANIMATION_FLAG_BIT 0x02  // Set if some animation is expected
-#define ICC_FLAG_BIT       0x04  // Whether ICC is present or not.
-#define METADATA_FLAG_BIT  0x08  // Set if some META chunk is possibly present.
-#define ALPHA_FLAG_BIT     0x10  // Should be same as the ALPHA_FLAG in mux.h
-#define ROTATION_FLAG_BITS 0xe0  // all 3 bits for rotation + symmetry
-
-#define MAX_CANVAS_SIZE     (1 << 24)    // 24-bit max for VP8X width/height.
-#define MAX_IMAGE_AREA      (1ULL << 32) // 32-bit max for width x height.
-#define MAX_LOOP_COUNT      (1 << 16)    // maximum value for loop-count
-#define MAX_DURATION        (1 << 24)    // maximum duration
-#define MAX_POSITION_OFFSET (1 << 24)    // maximum frame/tile x/y offset
+#define MAX_CANVAS_SIZE     (1 << 24)     // 24-bit max for VP8X width/height.
+#define MAX_IMAGE_AREA      (1ULL << 32)  // 32-bit max for width x height.
+#define MAX_LOOP_COUNT      (1 << 16)     // maximum value for loop-count
+#define MAX_DURATION        (1 << 24)     // maximum duration
+#define MAX_POSITION_OFFSET (1 << 24)     // maximum frame/fragment x/y offset
 
 // Maximum chunk payload is such that adding the header and padding won't
 // overflow a uint32_t.
diff --git a/drivers/webp/mux.h b/drivers/webp/mux.h
index 5139af80fa..1fddfb76d4 100644
--- a/drivers/webp/mux.h
+++ b/drivers/webp/mux.h
@@ -1,60 +1,76 @@
 // Copyright 2011 Google Inc. All Rights Reserved.
 //
-// This code is licensed under the same terms as WebM:
-//  Software License Agreement:  http://www.webmproject.org/license/software/
-//  Additional IP Rights Grant:  http://www.webmproject.org/license/additional/
+// Use of this source code is governed by a BSD-style license
+// that can be found in the COPYING file in the root of the source
+// tree. An additional intellectual property rights grant can be found
+// in the file PATENTS. All contributing project authors may
+// be found in the AUTHORS file in the root of the source tree.
 // -----------------------------------------------------------------------------
 //
-//  RIFF container manipulation for WEBP images.
+//  RIFF container manipulation and encoding for WebP images.
 //
 // Authors: Urvang (urvang@google.com)
 //          Vikas (vikasa@google.com)
 
-// This API allows manipulation of WebP container images containing features
-// like Color profile, XMP metadata, Animation and Tiling.
-//
-// Code Example#1: Creating a MUX with image data, color profile and XMP
-// metadata.
-//
-//   int copy_data = 0;
-//   WebPMux* mux = WebPMuxNew();
-//   // ... (Prepare image data).
-//   WebPMuxSetImage(mux, &image, copy_data);
-//   // ... (Prepare ICCP color profile data).
-//   WebPMuxSetColorProfile(mux, &icc_profile, copy_data);
-//   // ... (Prepare XMP metadata).
-//   WebPMuxSetMetadata(mux, &xmp, copy_data);
-//   // Get data from mux in WebP RIFF format.
-//   WebPMuxAssemble(mux, &output_data);
-//   WebPMuxDelete(mux);
-//   // ... (Consume output_data; e.g. write output_data.bytes_ to file).
-//   WebPDataClear(&output_data);
-//
-// Code Example#2: Get image and color profile data from a WebP file.
-//
-//   int copy_data = 0;
-//   // ... (Read data from file).
-//   WebPMux* mux = WebPMuxCreate(&data, copy_data);
-//   WebPMuxGetImage(mux, &image);
-//   // ... (Consume image; e.g. call WebPDecode() to decode the data).
-//   WebPMuxGetColorProfile(mux, &icc_profile);
-//   // ... (Consume icc_data).
-//   WebPMuxDelete(mux);
-//   free(data);
-
 #ifndef WEBP_WEBP_MUX_H_
 #define WEBP_WEBP_MUX_H_
 
-#include "./types.h"
+#include "./mux_types.h"
 
-#if defined(__cplusplus) || defined(c_plusplus)
+#ifdef __cplusplus
 extern "C" {
 #endif
 
-#define WEBP_MUX_ABI_VERSION 0x0100        // MAJOR(8b) + MINOR(8b)
+#define WEBP_MUX_ABI_VERSION 0x0106        // MAJOR(8b) + MINOR(8b)
+
+//------------------------------------------------------------------------------
+// Mux API
+//
+// This API allows manipulation of WebP container images containing features
+// like color profile, metadata, animation and fragmented images.
+//
+// Code Example#1: Create a WebPMux object with image data, color profile and
+// XMP metadata.
+/*
+  int copy_data = 0;
+  WebPMux* mux = WebPMuxNew();
+  // ... (Prepare image data).
+  WebPMuxSetImage(mux, &image, copy_data);
+  // ... (Prepare ICCP color profile data).
+  WebPMuxSetChunk(mux, "ICCP", &icc_profile, copy_data);
+  // ... (Prepare XMP metadata).
+  WebPMuxSetChunk(mux, "XMP ", &xmp, copy_data);
+  // Get data from mux in WebP RIFF format.
+  WebPMuxAssemble(mux, &output_data);
+  WebPMuxDelete(mux);
+  // ... (Consume output_data; e.g. write output_data.bytes to file).
+  WebPDataClear(&output_data);
+*/
+
+// Code Example#2: Get image and color profile data from a WebP file.
+/*
+  int copy_data = 0;
+  // ... (Read data from file).
+  WebPMux* mux = WebPMuxCreate(&data, copy_data);
+  WebPMuxGetFrame(mux, 1, &image);
+  // ... (Consume image; e.g. call WebPDecode() to decode the data).
+  WebPMuxGetChunk(mux, "ICCP", &icc_profile);
+  // ... (Consume icc_data).
+  WebPMuxDelete(mux);
+  free(data);
+*/
+
+// Note: forward declaring enumerations is not allowed in (strict) C and C++,
+// the types are left here for reference.
+// typedef enum WebPMuxError WebPMuxError;
+// typedef enum WebPChunkId WebPChunkId;
+typedef struct WebPMux WebPMux;   // main opaque object.
+typedef struct WebPMuxFrameInfo WebPMuxFrameInfo;
+typedef struct WebPMuxAnimParams WebPMuxAnimParams;
+typedef struct WebPAnimEncoderOptions WebPAnimEncoderOptions;
 
 // Error codes
-typedef enum {
+typedef enum WebPMuxError {
   WEBP_MUX_OK                 =  1,
   WEBP_MUX_NOT_FOUND          =  0,
   WEBP_MUX_INVALID_ARGUMENT   = -1,
@@ -63,51 +79,26 @@ typedef enum {
   WEBP_MUX_NOT_ENOUGH_DATA    = -4
 } WebPMuxError;
 
-// Flag values for different features used in VP8X chunk.
-typedef enum {
-  TILE_FLAG       = 0x00000001,
-  ANIMATION_FLAG  = 0x00000002,
-  ICCP_FLAG       = 0x00000004,
-  META_FLAG       = 0x00000008,
-  ALPHA_FLAG      = 0x00000010
-} WebPFeatureFlags;
-
 // IDs for different types of chunks.
-typedef enum {
+typedef enum WebPChunkId {
   WEBP_CHUNK_VP8X,     // VP8X
   WEBP_CHUNK_ICCP,     // ICCP
-  WEBP_CHUNK_LOOP,     // LOOP
-  WEBP_CHUNK_FRAME,    // FRM
-  WEBP_CHUNK_TILE,     // TILE
+  WEBP_CHUNK_ANIM,     // ANIM
+  WEBP_CHUNK_ANMF,     // ANMF
+  WEBP_CHUNK_FRGM,     // FRGM
   WEBP_CHUNK_ALPHA,    // ALPH
   WEBP_CHUNK_IMAGE,    // VP8/VP8L
-  WEBP_CHUNK_META,     // META
+  WEBP_CHUNK_EXIF,     // EXIF
+  WEBP_CHUNK_XMP,      // XMP
   WEBP_CHUNK_UNKNOWN,  // Other chunks.
   WEBP_CHUNK_NIL
 } WebPChunkId;
 
-typedef struct WebPMux WebPMux;   // main opaque object.
-
-// Data type used to describe 'raw' data, e.g., chunk data
-// (ICC profile, metadata) and WebP compressed image data.
-typedef struct {
-  const uint8_t* bytes_;
-  size_t size_;
-} WebPData;
-
 //------------------------------------------------------------------------------
-// Manipulation of a WebPData object.
-
-// Initializes the contents of the 'webp_data' object with default values.
-WEBP_EXTERN(void) WebPDataInit(WebPData* webp_data);
 
-// Clears the contents of the 'webp_data' object by calling free(). Does not
-// deallocate the object itself.
-WEBP_EXTERN(void) WebPDataClear(WebPData* webp_data);
-
-// Allocates necessary storage for 'dst' and copies the contents of 'src'.
-// Returns true on success.
-WEBP_EXTERN(int) WebPDataCopy(const WebPData* src, WebPData* dst);
+// Returns the version number of the mux library, packed in hexadecimal using
+// 8bits for each of major/minor/revision. E.g: v2.5.7 is 0x020507.
+WEBP_EXTERN(int) WebPGetMuxVersion(void);
 
 //------------------------------------------------------------------------------
 // Life of a Mux object
@@ -118,6 +109,7 @@ WEBP_EXTERN(WebPMux*) WebPNewInternal(int);
 // Creates an empty mux object.
 // Returns:
 //   A pointer to the newly created empty mux object.
+//   Or NULL in case of memory error.
 static WEBP_INLINE WebPMux* WebPMuxNew(void) {
   return WebPNewInternal(WEBP_MUX_ABI_VERSION);
 }
@@ -136,8 +128,8 @@ WEBP_EXTERN(WebPMux*) WebPMuxCreateInternal(const WebPData*, int, int);
 // Creates a mux object from raw data given in WebP RIFF format.
 // Parameters:
 //   bitstream - (in) the bitstream data in WebP RIFF format
-//   copy_data - (in) value 1 indicates given data WILL copied to the mux, and
-//               value 0 indicates data will NOT be copied.
+//   copy_data - (in) value 1 indicates given data WILL be copied to the mux
+//               object and value 0 indicates data will NOT be copied.
 // Returns:
 //   A pointer to the mux object created from given data - on success.
 //   NULL - In case of invalid data or memory error.
@@ -147,295 +139,237 @@ static WEBP_INLINE WebPMux* WebPMuxCreate(const WebPData* bitstream,
 }
 
 //------------------------------------------------------------------------------
-// Single Image.
-
-// Sets the image in the mux object. Any existing images (including frame/tile)
-// will be removed.
-// Parameters:
-//   mux - (in/out) object in which the image is to be set
-//   bitstream - (in) can either be a raw VP8/VP8L bitstream or a single-image
-//               WebP file (non-animated and non-tiled)
-//   copy_data - (in) value 1 indicates given data WILL copied to the mux, and
-//               value 0 indicates data will NOT be copied.
-// Returns:
-//   WEBP_MUX_INVALID_ARGUMENT - if mux is NULL or bitstream is NULL.
-//   WEBP_MUX_MEMORY_ERROR - on memory allocation error.
-//   WEBP_MUX_OK - on success.
-WEBP_EXTERN(WebPMuxError) WebPMuxSetImage(WebPMux* mux,
-                                          const WebPData* bitstream,
-                                          int copy_data);
-
-// Gets image data from the mux object.
-// The content of 'bitstream' is allocated using malloc(), and NOT
-// owned by the 'mux' object. It MUST be deallocated by the caller by calling
-// WebPDataClear().
-// Parameters:
-//   mux - (in) object from which the image is to be fetched
-//   bitstream - (out) the image data
-// Returns:
-//   WEBP_MUX_INVALID_ARGUMENT - if either mux or bitstream is NULL
-//                               OR mux contains animation/tiling.
-//   WEBP_MUX_NOT_FOUND - if image is not present in mux object.
-//   WEBP_MUX_OK - on success.
-WEBP_EXTERN(WebPMuxError) WebPMuxGetImage(const WebPMux* mux,
-                                          WebPData* bitstream);
-
-// Deletes the image in the mux object.
-// Parameters:
-//   mux - (in/out) object from which the image is to be deleted
-// Returns:
-//   WEBP_MUX_INVALID_ARGUMENT - if mux is NULL
-//                               OR if mux contains animation/tiling.
-//   WEBP_MUX_NOT_FOUND - if image is not present in mux object.
-//   WEBP_MUX_OK - on success.
-WEBP_EXTERN(WebPMuxError) WebPMuxDeleteImage(WebPMux* mux);
+// Non-image chunks.
 
-//------------------------------------------------------------------------------
-// XMP Metadata.
+// Note: Only non-image related chunks should be managed through chunk APIs.
+// (Image related chunks are: "ANMF", "FRGM", "VP8 ", "VP8L" and "ALPH").
+// To add, get and delete images, use WebPMuxSetImage(), WebPMuxPushFrame(),
+// WebPMuxGetFrame() and WebPMuxDeleteFrame().
 
-// Sets the XMP metadata in the mux object. Any existing metadata chunk(s) will
-// be removed.
+// Adds a chunk with id 'fourcc' and data 'chunk_data' in the mux object.
+// Any existing chunk(s) with the same id will be removed.
 // Parameters:
-//   mux - (in/out) object to which the XMP metadata is to be added
-//   metadata - (in) the XMP metadata data to be added
-//   copy_data - (in) value 1 indicates given data WILL copied to the mux, and
-//               value 0 indicates data will NOT be copied.
+//   mux - (in/out) object to which the chunk is to be added
+//   fourcc - (in) a character array containing the fourcc of the given chunk;
+//                 e.g., "ICCP", "XMP ", "EXIF" etc.
+//   chunk_data - (in) the chunk data to be added
+//   copy_data - (in) value 1 indicates given data WILL be copied to the mux
+//               object and value 0 indicates data will NOT be copied.
 // Returns:
-//   WEBP_MUX_INVALID_ARGUMENT - if mux or metadata is NULL.
+//   WEBP_MUX_INVALID_ARGUMENT - if mux, fourcc or chunk_data is NULL
+//                               or if fourcc corresponds to an image chunk.
 //   WEBP_MUX_MEMORY_ERROR - on memory allocation error.
 //   WEBP_MUX_OK - on success.
-WEBP_EXTERN(WebPMuxError) WebPMuxSetMetadata(WebPMux* mux,
-                                             const WebPData* metadata,
-                                             int copy_data);
+WEBP_EXTERN(WebPMuxError) WebPMuxSetChunk(
+    WebPMux* mux, const char fourcc[4], const WebPData* chunk_data,
+    int copy_data);
 
-// Gets a reference to the XMP metadata in the mux object.
+// Gets a reference to the data of the chunk with id 'fourcc' in the mux object.
 // The caller should NOT free the returned data.
 // Parameters:
-//   mux - (in) object from which the XMP metadata is to be fetched
-//   metadata - (out) XMP metadata
+//   mux - (in) object from which the chunk data is to be fetched
+//   fourcc - (in) a character array containing the fourcc of the chunk;
+//                 e.g., "ICCP", "XMP ", "EXIF" etc.
+//   chunk_data - (out) returned chunk data
 // Returns:
-//   WEBP_MUX_INVALID_ARGUMENT - if either mux or metadata is NULL.
-//   WEBP_MUX_NOT_FOUND - if metadata is not present in mux object.
+//   WEBP_MUX_INVALID_ARGUMENT - if mux, fourcc or chunk_data is NULL
+//                               or if fourcc corresponds to an image chunk.
+//   WEBP_MUX_NOT_FOUND - If mux does not contain a chunk with the given id.
 //   WEBP_MUX_OK - on success.
-WEBP_EXTERN(WebPMuxError) WebPMuxGetMetadata(const WebPMux* mux,
-                                             WebPData* metadata);
+WEBP_EXTERN(WebPMuxError) WebPMuxGetChunk(
+    const WebPMux* mux, const char fourcc[4], WebPData* chunk_data);
 
-// Deletes the XMP metadata in the mux object.
+// Deletes the chunk with the given 'fourcc' from the mux object.
 // Parameters:
-//   mux - (in/out) object from which XMP metadata is to be deleted
+//   mux - (in/out) object from which the chunk is to be deleted
+//   fourcc - (in) a character array containing the fourcc of the chunk;
+//                 e.g., "ICCP", "XMP ", "EXIF" etc.
 // Returns:
-//   WEBP_MUX_INVALID_ARGUMENT - if mux is NULL
-//   WEBP_MUX_NOT_FOUND - If mux does not contain metadata.
+//   WEBP_MUX_INVALID_ARGUMENT - if mux or fourcc is NULL
+//                               or if fourcc corresponds to an image chunk.
+//   WEBP_MUX_NOT_FOUND - If mux does not contain a chunk with the given fourcc.
 //   WEBP_MUX_OK - on success.
-WEBP_EXTERN(WebPMuxError) WebPMuxDeleteMetadata(WebPMux* mux);
+WEBP_EXTERN(WebPMuxError) WebPMuxDeleteChunk(
+    WebPMux* mux, const char fourcc[4]);
 
 //------------------------------------------------------------------------------
-// ICC Color Profile.
-
-// Sets the color profile in the mux object. Any existing color profile chunk(s)
-// will be removed.
+// Images.
+
+// Encapsulates data about a single frame/fragment.
+struct WebPMuxFrameInfo {
+  WebPData    bitstream;  // image data: can be a raw VP8/VP8L bitstream
+                          // or a single-image WebP file.
+  int         x_offset;   // x-offset of the frame.
+  int         y_offset;   // y-offset of the frame.
+  int         duration;   // duration of the frame (in milliseconds).
+
+  WebPChunkId id;         // frame type: should be one of WEBP_CHUNK_ANMF,
+                          // WEBP_CHUNK_FRGM or WEBP_CHUNK_IMAGE
+  WebPMuxAnimDispose dispose_method;  // Disposal method for the frame.
+  WebPMuxAnimBlend   blend_method;    // Blend operation for the frame.
+  uint32_t    pad[1];     // padding for later use
+};
+
+// Sets the (non-animated and non-fragmented) image in the mux object.
+// Note: Any existing images (including frames/fragments) will be removed.
 // Parameters:
-//   mux - (in/out) object to which the color profile is to be added
-//   color_profile - (in) the color profile data to be added
-//   copy_data - (in) value 1 indicates given data WILL copied to the mux, and
-//               value 0 indicates data will NOT be copied.
-// Returns:
-//   WEBP_MUX_INVALID_ARGUMENT - if mux or color_profile is NULL
-//   WEBP_MUX_MEMORY_ERROR - on memory allocation error
-//   WEBP_MUX_OK - on success
-WEBP_EXTERN(WebPMuxError) WebPMuxSetColorProfile(WebPMux* mux,
-                                                 const WebPData* color_profile,
-                                                 int copy_data);
-
-// Gets a reference to the color profile in the mux object.
-// The caller should NOT free the returned data.
-// Parameters:
-//   mux - (in) object from which the color profile data is to be fetched
-//   color_profile - (out) color profile data
+//   mux - (in/out) object in which the image is to be set
+//   bitstream - (in) can be a raw VP8/VP8L bitstream or a single-image
+//               WebP file (non-animated and non-fragmented)
+//   copy_data - (in) value 1 indicates given data WILL be copied to the mux
+//               object and value 0 indicates data will NOT be copied.
 // Returns:
-//   WEBP_MUX_INVALID_ARGUMENT - if either mux or color_profile is NULL.
-//   WEBP_MUX_NOT_FOUND - if color profile is not present in mux object.
+//   WEBP_MUX_INVALID_ARGUMENT - if mux is NULL or bitstream is NULL.
+//   WEBP_MUX_MEMORY_ERROR - on memory allocation error.
 //   WEBP_MUX_OK - on success.
-WEBP_EXTERN(WebPMuxError) WebPMuxGetColorProfile(const WebPMux* mux,
-                                                 WebPData* color_profile);
-
-// Deletes the color profile in the mux object.
+WEBP_EXTERN(WebPMuxError) WebPMuxSetImage(
+    WebPMux* mux, const WebPData* bitstream, int copy_data);
+
+// Adds a frame at the end of the mux object.
+// Notes: (1) frame.id should be one of WEBP_CHUNK_ANMF or WEBP_CHUNK_FRGM
+//        (2) For setting a non-animated non-fragmented image, use
+//            WebPMuxSetImage() instead.
+//        (3) Type of frame being pushed must be same as the frames in mux.
+//        (4) As WebP only supports even offsets, any odd offset will be snapped
+//            to an even location using: offset &= ~1
 // Parameters:
-//   mux - (in/out) object from which color profile is to be deleted
+//   mux - (in/out) object to which the frame is to be added
+//   frame - (in) frame data.
+//   copy_data - (in) value 1 indicates given data WILL be copied to the mux
+//               object and value 0 indicates data will NOT be copied.
 // Returns:
-//   WEBP_MUX_INVALID_ARGUMENT - if mux is NULL
-//   WEBP_MUX_NOT_FOUND - If mux does not contain color profile.
-//   WEBP_MUX_OK - on success.
-WEBP_EXTERN(WebPMuxError) WebPMuxDeleteColorProfile(WebPMux* mux);
-
-//------------------------------------------------------------------------------
-// Animation.
-
-// Adds an animation frame at the end of the mux object.
-// Note: as WebP only supports even offsets, any odd offset will be snapped to
-// an even location using: offset &= ~1
-// Parameters:
-//   mux - (in/out) object to which an animation frame is to be added
-//   bitstream - (in) the image data corresponding to the frame. It can either
-//               be a raw VP8/VP8L bitstream or a single-image WebP file
-//               (non-animated and non-tiled)
-//   x_offset - (in) x-offset of the frame to be added
-//   y_offset - (in) y-offset of the frame to be added
-//   duration - (in) duration of the frame to be added (in milliseconds)
-//   copy_data - (in) value 1 indicates given data WILL copied to the mux, and
-//               value 0 indicates data will NOT be copied.
-// Returns:
-//   WEBP_MUX_INVALID_ARGUMENT - if mux is NULL or bitstream is NULL
+//   WEBP_MUX_INVALID_ARGUMENT - if mux or frame is NULL
+//                               or if content of 'frame' is invalid.
 //   WEBP_MUX_MEMORY_ERROR - on memory allocation error.
 //   WEBP_MUX_OK - on success.
 WEBP_EXTERN(WebPMuxError) WebPMuxPushFrame(
-    WebPMux* mux, const WebPData* bitstream,
-    int x_offset, int y_offset, int duration, int copy_data);
-
-// TODO(urvang): Create a struct as follows to reduce argument list size:
-// typedef struct {
-//  WebPData bitstream;
-//  int x_offset, y_offset;
-//  int duration;
-// } FrameInfo;
-
-// Gets the nth animation frame from the mux object.
-// The content of 'bitstream' is allocated using malloc(), and NOT
+    WebPMux* mux, const WebPMuxFrameInfo* frame, int copy_data);
+
+// Gets the nth frame from the mux object.
+// The content of 'frame->bitstream' is allocated using malloc(), and NOT
 // owned by the 'mux' object. It MUST be deallocated by the caller by calling
 // WebPDataClear().
 // nth=0 has a special meaning - last position.
 // Parameters:
 //   mux - (in) object from which the info is to be fetched
 //   nth - (in) index of the frame in the mux object
-//   bitstream - (out) the image data
-//   x_offset - (out) x-offset of the returned frame
-//   y_offset - (out) y-offset of the returned frame
-//   duration - (out) duration of the returned frame (in milliseconds)
+//   frame - (out) data of the returned frame
 // Returns:
-//   WEBP_MUX_INVALID_ARGUMENT - if either mux, bitstream, x_offset,
-//                               y_offset, or duration is NULL
+//   WEBP_MUX_INVALID_ARGUMENT - if mux or frame is NULL.
 //   WEBP_MUX_NOT_FOUND - if there are less than nth frames in the mux object.
 //   WEBP_MUX_BAD_DATA - if nth frame chunk in mux is invalid.
+//   WEBP_MUX_MEMORY_ERROR - on memory allocation error.
 //   WEBP_MUX_OK - on success.
 WEBP_EXTERN(WebPMuxError) WebPMuxGetFrame(
-    const WebPMux* mux, uint32_t nth, WebPData* bitstream,
-    int* x_offset, int* y_offset, int* duration);
+    const WebPMux* mux, uint32_t nth, WebPMuxFrameInfo* frame);
 
-// Deletes an animation frame from the mux object.
+// Deletes a frame from the mux object.
 // nth=0 has a special meaning - last position.
 // Parameters:
 //   mux - (in/out) object from which a frame is to be deleted
 //   nth - (in) The position from which the frame is to be deleted
 // Returns:
-//   WEBP_MUX_INVALID_ARGUMENT - if mux is NULL
+//   WEBP_MUX_INVALID_ARGUMENT - if mux is NULL.
 //   WEBP_MUX_NOT_FOUND - If there are less than nth frames in the mux object
 //                        before deletion.
 //   WEBP_MUX_OK - on success.
 WEBP_EXTERN(WebPMuxError) WebPMuxDeleteFrame(WebPMux* mux, uint32_t nth);
 
-// Sets the animation loop count in the mux object. Any existing loop count
-// value(s) will be removed.
+//------------------------------------------------------------------------------
+// Animation.
+
+// Animation parameters.
+struct WebPMuxAnimParams {
+  uint32_t bgcolor;  // Background color of the canvas stored (in MSB order) as:
+                     // Bits 00 to 07: Alpha.
+                     // Bits 08 to 15: Red.
+                     // Bits 16 to 23: Green.
+                     // Bits 24 to 31: Blue.
+  int loop_count;    // Number of times to repeat the animation [0 = infinite].
+};
+
+// Sets the animation parameters in the mux object. Any existing ANIM chunks
+// will be removed.
 // Parameters:
-//   mux - (in/out) object in which loop chunk is to be set/added
-//   loop_count - (in) animation loop count value.
-//                Note that loop_count of zero denotes infinite loop.
+//   mux - (in/out) object in which ANIM chunk is to be set/added
+//   params - (in) animation parameters.
 // Returns:
-//   WEBP_MUX_INVALID_ARGUMENT - if mux is NULL
+//   WEBP_MUX_INVALID_ARGUMENT - if mux or params is NULL.
 //   WEBP_MUX_MEMORY_ERROR - on memory allocation error.
 //   WEBP_MUX_OK - on success.
-WEBP_EXTERN(WebPMuxError) WebPMuxSetLoopCount(WebPMux* mux, int loop_count);
+WEBP_EXTERN(WebPMuxError) WebPMuxSetAnimationParams(
+    WebPMux* mux, const WebPMuxAnimParams* params);
 
-// Gets the animation loop count from the mux object.
+// Gets the animation parameters from the mux object.
 // Parameters:
-//   mux - (in) object from which the loop count is to be fetched
-//   loop_count - (out) the loop_count value present in the LOOP chunk
+//   mux - (in) object from which the animation parameters to be fetched
+//   params - (out) animation parameters extracted from the ANIM chunk
 // Returns:
-//   WEBP_MUX_INVALID_ARGUMENT - if either of mux or loop_count is NULL
-//   WEBP_MUX_NOT_FOUND - if loop chunk is not present in mux object.
+//   WEBP_MUX_INVALID_ARGUMENT - if mux or params is NULL.
+//   WEBP_MUX_NOT_FOUND - if ANIM chunk is not present in mux object.
 //   WEBP_MUX_OK - on success.
-WEBP_EXTERN(WebPMuxError) WebPMuxGetLoopCount(const WebPMux* mux,
-                                              int* loop_count);
+WEBP_EXTERN(WebPMuxError) WebPMuxGetAnimationParams(
+    const WebPMux* mux, WebPMuxAnimParams* params);
 
 //------------------------------------------------------------------------------
-// Tiling.
-
-// Adds a tile at the end of the mux object.
-// Note: as WebP only supports even offsets, any odd offset will be snapped to
-// an even location using: offset &= ~1
-// Parameters:
-//   mux - (in/out) object to which a tile is to be added.
-//   bitstream - (in) the image data corresponding to the frame. It can either
-//               be a raw VP8/VP8L bitstream or a single-image WebP file
-//               (non-animated and non-tiled)
-//   x_offset - (in) x-offset of the tile to be added
-//   y_offset - (in) y-offset of the tile to be added
-//   copy_data - (in) value 1 indicates given data WILL copied to the mux, and
-//               value 0 indicates data will NOT be copied.
-// Returns:
-//   WEBP_MUX_INVALID_ARGUMENT - if mux is NULL or bitstream is NULL
-//   WEBP_MUX_MEMORY_ERROR - on memory allocation error.
-//   WEBP_MUX_OK - on success.
-WEBP_EXTERN(WebPMuxError) WebPMuxPushTile(
-    WebPMux* mux, const WebPData* bitstream,
-    int x_offset, int y_offset, int copy_data);
+// Misc Utilities.
 
-// Gets the nth tile from the mux object.
-// The content of 'bitstream' is allocated using malloc(), and NOT
-// owned by the 'mux' object. It MUST be deallocated by the caller by calling
-// WebPDataClear().
-// nth=0 has a special meaning - last position.
+// Sets the canvas size for the mux object. The width and height can be
+// specified explicitly or left as zero (0, 0).
+// * When width and height are specified explicitly, then this frame bound is
+//   enforced during subsequent calls to WebPMuxAssemble() and an error is
+//   reported if any animated frame does not completely fit within the canvas.
+// * When unspecified (0, 0), the constructed canvas will get the frame bounds
+//   from the bounding-box over all frames after calling WebPMuxAssemble().
 // Parameters:
-//   mux - (in) object from which the info is to be fetched
-//   nth - (in) index of the tile in the mux object
-//   bitstream - (out) the image data
-//   x_offset - (out) x-offset of the returned tile
-//   y_offset - (out) y-offset of the returned tile
+//   mux - (in) object to which the canvas size is to be set
+//   width - (in) canvas width
+//   height - (in) canvas height
 // Returns:
-//   WEBP_MUX_INVALID_ARGUMENT - if either mux, bitstream, x_offset or
-//                               y_offset is NULL
-//   WEBP_MUX_NOT_FOUND - if there are less than nth tiles in the mux object.
-//   WEBP_MUX_BAD_DATA - if nth tile chunk in mux is invalid.
+//   WEBP_MUX_INVALID_ARGUMENT - if mux is NULL; or
+//                               width or height are invalid or out of bounds
 //   WEBP_MUX_OK - on success.
-WEBP_EXTERN(WebPMuxError) WebPMuxGetTile(
-    const WebPMux* mux, uint32_t nth, WebPData* bitstream,
-    int* x_offset, int* y_offset);
+WEBP_EXTERN(WebPMuxError) WebPMuxSetCanvasSize(WebPMux* mux,
+                                               int width, int height);
 
-// Deletes a tile from the mux object.
-// nth=0 has a special meaning - last position
+// Gets the canvas size from the mux object.
+// Note: This method assumes that the VP8X chunk, if present, is up-to-date.
+// That is, the mux object hasn't been modified since the last call to
+// WebPMuxAssemble() or WebPMuxCreate().
 // Parameters:
-//   mux - (in/out) object from which a tile is to be deleted
-//   nth - (in) The position from which the tile is to be deleted
+//   mux - (in) object from which the canvas size is to be fetched
+//   width - (out) canvas width
+//   height - (out) canvas height
 // Returns:
-//   WEBP_MUX_INVALID_ARGUMENT - if mux is NULL
-//   WEBP_MUX_NOT_FOUND - If there are less than nth tiles in the mux object
-//                        before deletion.
+//   WEBP_MUX_INVALID_ARGUMENT - if mux, width or height is NULL.
+//   WEBP_MUX_BAD_DATA - if VP8X/VP8/VP8L chunk or canvas size is invalid.
 //   WEBP_MUX_OK - on success.
-WEBP_EXTERN(WebPMuxError) WebPMuxDeleteTile(WebPMux* mux, uint32_t nth);
-
-//------------------------------------------------------------------------------
-// Misc Utilities.
+WEBP_EXTERN(WebPMuxError) WebPMuxGetCanvasSize(const WebPMux* mux,
+                                               int* width, int* height);
 
 // Gets the feature flags from the mux object.
+// Note: This method assumes that the VP8X chunk, if present, is up-to-date.
+// That is, the mux object hasn't been modified since the last call to
+// WebPMuxAssemble() or WebPMuxCreate().
 // Parameters:
 //   mux - (in) object from which the features are to be fetched
 //   flags - (out) the flags specifying which features are present in the
 //           mux object. This will be an OR of various flag values.
 //           Enum 'WebPFeatureFlags' can be used to test individual flag values.
 // Returns:
-//   WEBP_MUX_INVALID_ARGUMENT - if mux or flags is NULL
-//   WEBP_MUX_NOT_FOUND - if VP8X chunk is not present in mux object.
-//   WEBP_MUX_BAD_DATA - if VP8X chunk in mux is invalid.
+//   WEBP_MUX_INVALID_ARGUMENT - if mux or flags is NULL.
+//   WEBP_MUX_BAD_DATA - if VP8X/VP8/VP8L chunk or canvas size is invalid.
 //   WEBP_MUX_OK - on success.
 WEBP_EXTERN(WebPMuxError) WebPMuxGetFeatures(const WebPMux* mux,
                                              uint32_t* flags);
 
-// Gets number of chunks having tag value tag in the mux object.
+// Gets number of chunks with the given 'id' in the mux object.
 // Parameters:
 //   mux - (in) object from which the info is to be fetched
 //   id - (in) chunk id specifying the type of chunk
 //   num_elements - (out) number of chunks with the given chunk id
 // Returns:
-//   WEBP_MUX_INVALID_ARGUMENT - if either mux, or num_elements is NULL
+//   WEBP_MUX_INVALID_ARGUMENT - if mux, or num_elements is NULL.
 //   WEBP_MUX_OK - on success.
 WEBP_EXTERN(WebPMuxError) WebPMuxNumChunks(const WebPMux* mux,
                                            WebPChunkId id, int* num_elements);
@@ -445,159 +379,151 @@ WEBP_EXTERN(WebPMuxError) WebPMuxNumChunks(const WebPMux* mux,
 // Note: The content of 'assembled_data' will be ignored and overwritten.
 // Also, the content of 'assembled_data' is allocated using malloc(), and NOT
 // owned by the 'mux' object. It MUST be deallocated by the caller by calling
-// WebPDataClear().
+// WebPDataClear(). It's always safe to call WebPDataClear() upon return,
+// even in case of error.
 // Parameters:
 //   mux - (in/out) object whose chunks are to be assembled
 //   assembled_data - (out) assembled WebP data
 // Returns:
 //   WEBP_MUX_BAD_DATA - if mux object is invalid.
-//   WEBP_MUX_INVALID_ARGUMENT - if either mux, output_data or output_size is
-//                               NULL.
+//   WEBP_MUX_INVALID_ARGUMENT - if mux or assembled_data is NULL.
 //   WEBP_MUX_MEMORY_ERROR - on memory allocation error.
-//   WEBP_MUX_OK - on success
+//   WEBP_MUX_OK - on success.
 WEBP_EXTERN(WebPMuxError) WebPMuxAssemble(WebPMux* mux,
                                           WebPData* assembled_data);
 
 //------------------------------------------------------------------------------
-// Demux API.
-// Enables extraction of image and extended format data from WebP files.
-
-#define WEBP_DEMUX_ABI_VERSION 0x0100    // MAJOR(8b) + MINOR(8b)
-
-typedef struct WebPDemuxer WebPDemuxer;
-
-typedef enum {
-  WEBP_DEMUX_PARSING_HEADER,  // Not enough data to parse full header.
-  WEBP_DEMUX_PARSED_HEADER,   // Header parsing complete, data may be available.
-  WEBP_DEMUX_DONE             // Entire file has been parsed.
-} WebPDemuxState;
-
-//------------------------------------------------------------------------------
-// Life of a Demux object
-
-// Internal, version-checked, entry point
-WEBP_EXTERN(WebPDemuxer*) WebPDemuxInternal(
-    const WebPData*, int, WebPDemuxState*, int);
-
-// Parses the WebP file given by 'data'.
-// A complete WebP file must be present in 'data' for the function to succeed.
-// Returns a WebPDemuxer object on successful parse, NULL otherwise.
-static WEBP_INLINE WebPDemuxer* WebPDemux(const WebPData* data) {
-  return WebPDemuxInternal(data, 0, NULL, WEBP_DEMUX_ABI_VERSION);
-}
-
-// Parses the WebP file given by 'data'.
-// If 'state' is non-NULL it will be set to indicate the status of the demuxer.
-// Returns a WebPDemuxer object on successful parse, NULL otherwise.
-static WEBP_INLINE WebPDemuxer* WebPDemuxPartial(
-    const WebPData* data, WebPDemuxState* state) {
-  return WebPDemuxInternal(data, 1, state, WEBP_DEMUX_ABI_VERSION);
+// WebPAnimEncoder API
+//
+// This API allows encoding (possibly) animated WebP images.
+//
+// Code Example:
+/*
+  WebPAnimEncoderOptions enc_options;
+  WebPAnimEncoderOptionsInit(&enc_options);
+  // Tune 'enc_options' as needed.
+  WebPAnimEncoder* enc = WebPAnimEncoderNew(width, height, &enc_options);
+  while(<there are more frames>) {
+    WebPConfig config;
+    WebPConfigInit(&config);
+    // Tune 'config' as needed.
+    WebPAnimEncoderAdd(enc, frame, timestamp_ms, &config);
+  }
+  WebPAnimEncoderAdd(enc, NULL, timestamp_ms, NULL);
+  WebPAnimEncoderAssemble(enc, webp_data);
+  WebPAnimEncoderDelete(enc);
+  // Write the 'webp_data' to a file, or re-mux it further.
+*/
+
+typedef struct WebPAnimEncoder WebPAnimEncoder;  // Main opaque object.
+
+// Forward declarations. Defined in encode.h.
+struct WebPPicture;
+struct WebPConfig;
+
+// Global options.
+struct WebPAnimEncoderOptions {
+  WebPMuxAnimParams anim_params;  // Animation parameters.
+  int minimize_size;    // If true, minimize the output size (slow). Implicitly
+                        // disables key-frame insertion.
+  int kmin;
+  int kmax;             // Minimum and maximum distance between consecutive key
+                        // frames in the output. The library may insert some key
+                        // frames as needed to satisfy this criteria.
+                        // Note that these conditions should hold: kmax > kmin
+                        // and kmin >= kmax / 2 + 1. Also, if kmin == 0, then
+                        // key-frame insertion is disabled; and if kmax == 0,
+                        // then all frames will be key-frames.
+  int allow_mixed;      // If true, use mixed compression mode; may choose
+                        // either lossy and lossless for each frame.
+  int verbose;          // If true, print info and warning messages to stderr.
+
+  uint32_t padding[4];  // Padding for later use.
+};
+
+// Internal, version-checked, entry point.
+WEBP_EXTERN(int) WebPAnimEncoderOptionsInitInternal(
+    WebPAnimEncoderOptions*, int);
+
+// Should always be called, to initialize a fresh WebPAnimEncoderOptions
+// structure before modification. Returns false in case of version mismatch.
+// WebPAnimEncoderOptionsInit() must have succeeded before using the
+// 'enc_options' object.
+static WEBP_INLINE int WebPAnimEncoderOptionsInit(
+    WebPAnimEncoderOptions* enc_options) {
+  return WebPAnimEncoderOptionsInitInternal(enc_options, WEBP_MUX_ABI_VERSION);
 }
 
-// Frees memory associated with 'dmux'.
-WEBP_EXTERN(void) WebPDemuxDelete(WebPDemuxer* dmux);
-
-//------------------------------------------------------------------------------
-// Data/information extraction.
+// Internal, version-checked, entry point.
+WEBP_EXTERN(WebPAnimEncoder*) WebPAnimEncoderNewInternal(
+    int, int, const WebPAnimEncoderOptions*, int);
 
-typedef enum {
-  WEBP_FF_FORMAT_FLAGS,  // Extended format flags present in the 'VP8X' chunk.
-  WEBP_FF_CANVAS_WIDTH,
-  WEBP_FF_CANVAS_HEIGHT,
-  WEBP_FF_LOOP_COUNT
-} WebPFormatFeature;
+// Creates and initializes a WebPAnimEncoder object.
+// Parameters:
+//   width/height - (in) canvas width and height of the animation.
+//   enc_options - (in) encoding options; can be passed NULL to pick
+//                     reasonable defaults.
+// Returns:
+//   A pointer to the newly created WebPAnimEncoder object.
+//   Or NULL in case of memory error.
+static WEBP_INLINE WebPAnimEncoder* WebPAnimEncoderNew(
+    int width, int height, const WebPAnimEncoderOptions* enc_options) {
+  return WebPAnimEncoderNewInternal(width, height, enc_options,
+                                    WEBP_MUX_ABI_VERSION);
+}
 
-// Get the 'feature' value from the 'dmux'.
-// NOTE: values are only valid if WebPDemux() was used or WebPDemuxPartial()
-// returned a state > WEBP_DEMUX_PARSING_HEADER.
-WEBP_EXTERN(uint32_t) WebPDemuxGetI(
-    const WebPDemuxer* dmux, WebPFormatFeature feature);
+// Optimize the given frame for WebP, encode it and add it to the
+// WebPAnimEncoder object.
+// The last call to 'WebPAnimEncoderAdd' should be with frame = NULL, which
+// indicates that no more frames are to be added. This call is also used to
+// determine the duration of the last frame.
+// Parameters:
+//   enc - (in/out) object to which the frame is to be added.
+//   frame - (in/out) frame data in ARGB or YUV(A) format. If it is in YUV(A)
+//           format, it will be converted to ARGB, which incurs a small loss.
+//   timestamp_ms - (in) timestamp of this frame in milliseconds.
+//                       Duration of a frame would be calculated as
+//                       "timestamp of next frame - timestamp of this frame".
+//                       Hence, timestamps should be in non-decreasing order.
+//   config - (in) encoding options; can be passed NULL to pick
+//            reasonable defaults.
+// Returns:
+//   On error, returns false and frame->error_code is set appropriately.
+//   Otherwise, returns true.
+WEBP_EXTERN(int) WebPAnimEncoderAdd(
+    WebPAnimEncoder* enc, struct WebPPicture* frame, int timestamp_ms,
+    const struct WebPConfig* config);
+
+// Assemble all frames added so far into a WebP bitstream.
+// This call should be preceded by  a call to 'WebPAnimEncoderAdd' with
+// frame = NULL; if not, the duration of the last frame will be internally
+// estimated.
+// Parameters:
+//   enc - (in/out) object from which the frames are to be assembled.
+//   webp_data - (out) generated WebP bitstream.
+// Returns:
+//   True on success.
+WEBP_EXTERN(int) WebPAnimEncoderAssemble(WebPAnimEncoder* enc,
+                                         WebPData* webp_data);
 
-//------------------------------------------------------------------------------
-// Frame iteration.
-
-typedef struct {
-  int frame_num_;
-  int num_frames_;
-  int tile_num_;
-  int num_tiles_;
-  int x_offset_, y_offset_;  // offset relative to the canvas.
-  int width_, height_;       // dimensions of this frame or tile.
-  int duration_;   // display duration in milliseconds.
-  int complete_;   // true if 'tile_' contains a full frame. partial images may
-                   // still be decoded with the WebP incremental decoder.
-  WebPData tile_;  // The frame or tile given by 'frame_num_' and 'tile_num_'.
-
-  uint32_t pad[4];           // padding for later use
-  void* private_;
-} WebPIterator;
-
-// Retrieves frame 'frame_number' from 'dmux'.
-// 'iter->tile_' points to the first tile on return from this function.
-// Individual tiles may be extracted using WebPDemuxSetTile().
-// Setting 'frame_number' equal to 0 will return the last frame of the image.
-// Returns false if 'dmux' is NULL or frame 'frame_number' is not present.
-// Call WebPDemuxReleaseIterator() when use of the iterator is complete.
-// NOTE: 'dmux' must persist for the lifetime of 'iter'.
-WEBP_EXTERN(int) WebPDemuxGetFrame(
-    const WebPDemuxer* dmux, int frame_number, WebPIterator* iter);
-
-// Sets 'iter->tile_' to point to the next ('iter->frame_num_' + 1) or previous
-// ('iter->frame_num_' - 1) frame. These functions do not loop.
-// Returns true on success, false otherwise.
-WEBP_EXTERN(int) WebPDemuxNextFrame(WebPIterator* iter);
-WEBP_EXTERN(int) WebPDemuxPrevFrame(WebPIterator* iter);
-
-// Sets 'iter->tile_' to reflect tile number 'tile_number'.
-// Returns true if tile 'tile_number' is present, false otherwise.
-WEBP_EXTERN(int) WebPDemuxSelectTile(WebPIterator* iter, int tile_number);
-
-// Releases any memory associated with 'iter'.
-// Must be called before destroying the associated WebPDemuxer with
-// WebPDemuxDelete().
-WEBP_EXTERN(void) WebPDemuxReleaseIterator(WebPIterator* iter);
+// Get error string corresponding to the most recent call using 'enc'. The
+// returned string is owned by 'enc' and is valid only until the next call to
+// WebPAnimEncoderAdd() or WebPAnimEncoderAssemble() or WebPAnimEncoderDelete().
+// Parameters:
+//   enc - (in/out) object from which the error string is to be fetched.
+// Returns:
+//   NULL if 'enc' is NULL. Otherwise, returns the error string if the last call
+//   to 'enc' had an error, or an empty string if the last call was a success.
+WEBP_EXTERN(const char*) WebPAnimEncoderGetError(WebPAnimEncoder* enc);
 
-//------------------------------------------------------------------------------
-// Chunk iteration.
-
-typedef struct {
-  // The current and total number of chunks with the fourcc given to
-  // WebPDemuxGetChunk().
-  int chunk_num_;
-  int num_chunks_;
-  WebPData chunk_;    // The payload of the chunk.
-
-  uint32_t pad[6];    // padding for later use
-  void* private_;
-} WebPChunkIterator;
-
-// Retrieves the 'chunk_number' instance of the chunk with id 'fourcc' from
-// 'dmux'.
-// 'fourcc' is a character array containing the fourcc of the chunk to return,
-// e.g., "ICCP", "META", "EXIF", etc.
-// Setting 'chunk_number' equal to 0 will return the last chunk in a set.
-// Returns true if the chunk is found, false otherwise. Image related chunk
-// payloads are accessed through WebPDemuxGetFrame() and related functions.
-// Call WebPDemuxReleaseChunkIterator() when use of the iterator is complete.
-// NOTE: 'dmux' must persist for the lifetime of the iterator.
-WEBP_EXTERN(int) WebPDemuxGetChunk(const WebPDemuxer* dmux,
-                                   const char fourcc[4], int chunk_number,
-                                   WebPChunkIterator* iter);
-
-// Sets 'iter->chunk_' to point to the next ('iter->chunk_num_' + 1) or previous
-// ('iter->chunk_num_' - 1) chunk. These functions do not loop.
-// Returns true on success, false otherwise.
-WEBP_EXTERN(int) WebPDemuxNextChunk(WebPChunkIterator* iter);
-WEBP_EXTERN(int) WebPDemuxPrevChunk(WebPChunkIterator* iter);
-
-// Releases any memory associated with 'iter'.
-// Must be called before destroying the associated WebPDemuxer with
-// WebPDemuxDelete().
-WEBP_EXTERN(void) WebPDemuxReleaseChunkIterator(WebPChunkIterator* iter);
+// Deletes the WebPAnimEncoder object.
+// Parameters:
+//   enc - (in/out) object to be deleted
+WEBP_EXTERN(void) WebPAnimEncoderDelete(WebPAnimEncoder* enc);
 
 //------------------------------------------------------------------------------
 
-#if defined(__cplusplus) || defined(c_plusplus)
+#ifdef __cplusplus
 }    // extern "C"
 #endif
 
diff --git a/drivers/webp/mux/anim_encode.c b/drivers/webp/mux/anim_encode.c
new file mode 100644
index 0000000000..bb7c0f50b9
--- /dev/null
+++ b/drivers/webp/mux/anim_encode.c
@@ -0,0 +1,1404 @@
+// Copyright 2014 Google Inc. All Rights Reserved.
+//
+// Use of this source code is governed by a BSD-style license
+// that can be found in the COPYING file in the root of the source
+// tree. An additional intellectual property rights grant can be found
+// in the file PATENTS. All contributing project authors may
+// be found in the AUTHORS file in the root of the source tree.
+// -----------------------------------------------------------------------------
+//
+//  AnimEncoder implementation.
+//
+
+#include <assert.h>
+#include <limits.h>
+#include <stdio.h>
+
+#include "../utils/utils.h"
+#include "webp/decode.h"
+#include "webp/encode.h"
+#include "webp/format_constants.h"
+#include "webp/mux.h"
+
+#if defined(_MSC_VER) && _MSC_VER < 1900
+#define snprintf _snprintf
+#endif
+
+#define ERROR_STR_MAX_LENGTH 100
+
+//------------------------------------------------------------------------------
+// Internal structs.
+
+// Stores frame rectangle dimensions.
+typedef struct {
+  int x_offset_, y_offset_, width_, height_;
+} FrameRect;
+
+// Used to store two candidates of encoded data for an animation frame. One of
+// the two will be chosen later.
+typedef struct {
+  WebPMuxFrameInfo sub_frame_;  // Encoded frame rectangle.
+  WebPMuxFrameInfo key_frame_;  // Encoded frame if it is a key-frame.
+  int is_key_frame_;            // True if 'key_frame' has been chosen.
+} EncodedFrame;
+
+struct WebPAnimEncoder {
+  const int canvas_width_;                  // Canvas width.
+  const int canvas_height_;                 // Canvas height.
+  const WebPAnimEncoderOptions options_;    // Global encoding options.
+
+  FrameRect prev_rect_;               // Previous WebP frame rectangle.
+  WebPConfig last_config_;            // Cached in case a re-encode is needed.
+  WebPConfig last_config2_;           // 2nd cached config; only valid if
+                                      // 'options_.allow_mixed' is true.
+
+  WebPPicture* curr_canvas_;          // Only pointer; we don't own memory.
+
+  // Canvas buffers.
+  WebPPicture curr_canvas_copy_;      // Possibly modified current canvas.
+  int curr_canvas_copy_modified_;     // True if pixels in 'curr_canvas_copy_'
+                                      // differ from those in 'curr_canvas_'.
+
+  WebPPicture prev_canvas_;           // Previous canvas.
+  WebPPicture prev_canvas_disposed_;  // Previous canvas disposed to background.
+
+  // Encoded data.
+  EncodedFrame* encoded_frames_;      // Array of encoded frames.
+  size_t size_;             // Number of allocated frames.
+  size_t start_;            // Frame start index.
+  size_t count_;            // Number of valid frames.
+  size_t flush_count_;      // If >0, 'flush_count' frames starting from
+                            // 'start' are ready to be added to mux.
+
+  // key-frame related.
+  int64_t best_delta_;      // min(canvas size - frame size) over the frames.
+                            // Can be negative in certain cases due to
+                            // transparent pixels in a frame.
+  int keyframe_;            // Index of selected key-frame relative to 'start_'.
+  int count_since_key_frame_;     // Frames seen since the last key-frame.
+
+  int first_timestamp_;           // Timestamp of the first frame.
+  int prev_timestamp_;            // Timestamp of the last added frame.
+  int prev_candidate_undecided_;  // True if it's not yet decided if previous
+                                  // frame would be a sub-frame or a key-frame.
+
+  // Misc.
+  int is_first_frame_;  // True if first frame is yet to be added/being added.
+  int got_null_frame_;  // True if WebPAnimEncoderAdd() has already been called
+                        // with a NULL frame.
+
+  size_t in_frame_count_;   // Number of input frames processed so far.
+  size_t out_frame_count_;  // Number of frames added to mux so far. This may be
+                            // different from 'in_frame_count_' due to merging.
+
+  WebPMux* mux_;        // Muxer to assemble the WebP bitstream.
+  char error_str_[ERROR_STR_MAX_LENGTH];  // Error string. Empty if no error.
+};
+
+// -----------------------------------------------------------------------------
+// Life of WebPAnimEncoder object.
+
+#define DELTA_INFINITY      (1ULL << 32)
+#define KEYFRAME_NONE       (-1)
+
+// Reset the counters in the WebPAnimEncoder.
+static void ResetCounters(WebPAnimEncoder* const enc) {
+  enc->start_ = 0;
+  enc->count_ = 0;
+  enc->flush_count_ = 0;
+  enc->best_delta_ = DELTA_INFINITY;
+  enc->keyframe_ = KEYFRAME_NONE;
+}
+
+static void DisableKeyframes(WebPAnimEncoderOptions* const enc_options) {
+  enc_options->kmax = INT_MAX;
+  enc_options->kmin = enc_options->kmax - 1;
+}
+
+#define MAX_CACHED_FRAMES 30
+
+static void SanitizeEncoderOptions(WebPAnimEncoderOptions* const enc_options) {
+  int print_warning = enc_options->verbose;
+
+  if (enc_options->minimize_size) {
+    DisableKeyframes(enc_options);
+  }
+
+  if (enc_options->kmin <= 0) {
+    DisableKeyframes(enc_options);
+    print_warning = 0;
+  }
+  if (enc_options->kmax <= 0) {  // All frames will be key-frames.
+    enc_options->kmin = 0;
+    enc_options->kmax = 0;
+    return;
+  }
+
+  if (enc_options->kmin >= enc_options->kmax) {
+    enc_options->kmin = enc_options->kmax - 1;
+    if (print_warning) {
+      fprintf(stderr, "WARNING: Setting kmin = %d, so that kmin < kmax.\n",
+              enc_options->kmin);
+    }
+  } else {
+    const int kmin_limit = enc_options->kmax / 2 + 1;
+    if (enc_options->kmin < kmin_limit && kmin_limit < enc_options->kmax) {
+      // This ensures that enc.keyframe + kmin >= kmax is always true. So, we
+      // can flush all the frames in the 'count_since_key_frame == kmax' case.
+      enc_options->kmin = kmin_limit;
+      if (print_warning) {
+        fprintf(stderr,
+                "WARNING: Setting kmin = %d, so that kmin >= kmax / 2 + 1.\n",
+                enc_options->kmin);
+      }
+    }
+  }
+  // Limit the max number of frames that are allocated.
+  if (enc_options->kmax - enc_options->kmin > MAX_CACHED_FRAMES) {
+    enc_options->kmin = enc_options->kmax - MAX_CACHED_FRAMES;
+    if (print_warning) {
+      fprintf(stderr,
+              "WARNING: Setting kmin = %d, so that kmax - kmin <= %d.\n",
+              enc_options->kmin, MAX_CACHED_FRAMES);
+    }
+  }
+  assert(enc_options->kmin < enc_options->kmax);
+}
+
+#undef MAX_CACHED_FRAMES
+
+static void DefaultEncoderOptions(WebPAnimEncoderOptions* const enc_options) {
+  enc_options->anim_params.loop_count = 0;
+  enc_options->anim_params.bgcolor = 0xffffffff;  // White.
+  enc_options->minimize_size = 0;
+  DisableKeyframes(enc_options);
+  enc_options->allow_mixed = 0;
+}
+
+int WebPAnimEncoderOptionsInitInternal(WebPAnimEncoderOptions* enc_options,
+                                       int abi_version) {
+  if (enc_options == NULL ||
+      WEBP_ABI_IS_INCOMPATIBLE(abi_version, WEBP_MUX_ABI_VERSION)) {
+    return 0;
+  }
+  DefaultEncoderOptions(enc_options);
+  return 1;
+}
+
+#define TRANSPARENT_COLOR   0x00ffffff
+
+static void ClearRectangle(WebPPicture* const picture,
+                           int left, int top, int width, int height) {
+  int j;
+  for (j = top; j < top + height; ++j) {
+    uint32_t* const dst = picture->argb + j * picture->argb_stride;
+    int i;
+    for (i = left; i < left + width; ++i) {
+      dst[i] = TRANSPARENT_COLOR;
+    }
+  }
+}
+
+static void WebPUtilClearPic(WebPPicture* const picture,
+                             const FrameRect* const rect) {
+  if (rect != NULL) {
+    ClearRectangle(picture, rect->x_offset_, rect->y_offset_,
+                   rect->width_, rect->height_);
+  } else {
+    ClearRectangle(picture, 0, 0, picture->width, picture->height);
+  }
+}
+
+static void MarkNoError(WebPAnimEncoder* const enc) {
+  enc->error_str_[0] = '\0';  // Empty string.
+}
+
+static void MarkError(WebPAnimEncoder* const enc, const char* str) {
+  if (snprintf(enc->error_str_, ERROR_STR_MAX_LENGTH, "%s.", str) < 0) {
+    assert(0);  // FIX ME!
+  }
+}
+
+static void MarkError2(WebPAnimEncoder* const enc,
+                       const char* str, int error_code) {
+  if (snprintf(enc->error_str_, ERROR_STR_MAX_LENGTH, "%s: %d.", str,
+               error_code) < 0) {
+    assert(0);  // FIX ME!
+  }
+}
+
+WebPAnimEncoder* WebPAnimEncoderNewInternal(
+    int width, int height, const WebPAnimEncoderOptions* enc_options,
+    int abi_version) {
+  WebPAnimEncoder* enc;
+
+  if (WEBP_ABI_IS_INCOMPATIBLE(abi_version, WEBP_MUX_ABI_VERSION)) {
+    return NULL;
+  }
+  if (width <= 0 || height <= 0 ||
+      (width * (uint64_t)height) >= MAX_IMAGE_AREA) {
+    return NULL;
+  }
+
+  enc = (WebPAnimEncoder*)WebPSafeCalloc(1, sizeof(*enc));
+  if (enc == NULL) return NULL;
+  // sanity inits, so we can call WebPAnimEncoderDelete():
+  enc->encoded_frames_ = NULL;
+  enc->mux_ = NULL;
+  MarkNoError(enc);
+
+  // Dimensions and options.
+  *(int*)&enc->canvas_width_ = width;
+  *(int*)&enc->canvas_height_ = height;
+  if (enc_options != NULL) {
+    *(WebPAnimEncoderOptions*)&enc->options_ = *enc_options;
+    SanitizeEncoderOptions((WebPAnimEncoderOptions*)&enc->options_);
+  } else {
+    DefaultEncoderOptions((WebPAnimEncoderOptions*)&enc->options_);
+  }
+
+  // Canvas buffers.
+  if (!WebPPictureInit(&enc->curr_canvas_copy_) ||
+      !WebPPictureInit(&enc->prev_canvas_) ||
+      !WebPPictureInit(&enc->prev_canvas_disposed_)) {
+    goto Err;
+  }
+  enc->curr_canvas_copy_.width = width;
+  enc->curr_canvas_copy_.height = height;
+  enc->curr_canvas_copy_.use_argb = 1;
+  if (!WebPPictureAlloc(&enc->curr_canvas_copy_) ||
+      !WebPPictureCopy(&enc->curr_canvas_copy_, &enc->prev_canvas_) ||
+      !WebPPictureCopy(&enc->curr_canvas_copy_, &enc->prev_canvas_disposed_)) {
+    goto Err;
+  }
+  WebPUtilClearPic(&enc->prev_canvas_, NULL);
+  enc->curr_canvas_copy_modified_ = 1;
+
+  // Encoded frames.
+  ResetCounters(enc);
+  // Note: one extra storage is for the previous frame.
+  enc->size_ = enc->options_.kmax - enc->options_.kmin + 1;
+  // We need space for at least 2 frames. But when kmin, kmax are both zero,
+  // enc->size_ will be 1. So we handle that special case below.
+  if (enc->size_ < 2) enc->size_ = 2;
+  enc->encoded_frames_ =
+      (EncodedFrame*)WebPSafeCalloc(enc->size_, sizeof(*enc->encoded_frames_));
+  if (enc->encoded_frames_ == NULL) goto Err;
+
+  enc->mux_ = WebPMuxNew();
+  if (enc->mux_ == NULL) goto Err;
+
+  enc->count_since_key_frame_ = 0;
+  enc->first_timestamp_ = 0;
+  enc->prev_timestamp_ = 0;
+  enc->prev_candidate_undecided_ = 0;
+  enc->is_first_frame_ = 1;
+  enc->got_null_frame_ = 0;
+
+  return enc;  // All OK.
+
+ Err:
+  WebPAnimEncoderDelete(enc);
+  return NULL;
+}
+
+// Release the data contained by 'encoded_frame'.
+static void FrameRelease(EncodedFrame* const encoded_frame) {
+  if (encoded_frame != NULL) {
+    WebPDataClear(&encoded_frame->sub_frame_.bitstream);
+    WebPDataClear(&encoded_frame->key_frame_.bitstream);
+    memset(encoded_frame, 0, sizeof(*encoded_frame));
+  }
+}
+
+void WebPAnimEncoderDelete(WebPAnimEncoder* enc) {
+  if (enc != NULL) {
+    WebPPictureFree(&enc->curr_canvas_copy_);
+    WebPPictureFree(&enc->prev_canvas_);
+    WebPPictureFree(&enc->prev_canvas_disposed_);
+    if (enc->encoded_frames_ != NULL) {
+      size_t i;
+      for (i = 0; i < enc->size_; ++i) {
+        FrameRelease(&enc->encoded_frames_[i]);
+      }
+      WebPSafeFree(enc->encoded_frames_);
+    }
+    WebPMuxDelete(enc->mux_);
+    WebPSafeFree(enc);
+  }
+}
+
+// -----------------------------------------------------------------------------
+// Frame addition.
+
+// Returns cached frame at the given 'position'.
+static EncodedFrame* GetFrame(const WebPAnimEncoder* const enc,
+                              size_t position) {
+  assert(enc->start_ + position < enc->size_);
+  return &enc->encoded_frames_[enc->start_ + position];
+}
+
+// Returns true if 'length' number of pixels in 'src' and 'dst' are identical,
+// assuming the given step sizes between pixels.
+static WEBP_INLINE int ComparePixels(const uint32_t* src, int src_step,
+                                     const uint32_t* dst, int dst_step,
+                                     int length) {
+  assert(length > 0);
+  while (length-- > 0) {
+    if (*src != *dst) {
+      return 0;
+    }
+    src += src_step;
+    dst += dst_step;
+  }
+  return 1;
+}
+
+static int IsEmptyRect(const FrameRect* const rect) {
+  return (rect->width_ == 0) || (rect->height_ == 0);
+}
+
+// Assumes that an initial valid guess of change rectangle 'rect' is passed.
+static void MinimizeChangeRectangle(const WebPPicture* const src,
+                                    const WebPPicture* const dst,
+                                    FrameRect* const rect) {
+  int i, j;
+  // Sanity checks.
+  assert(src->width == dst->width && src->height == dst->height);
+  assert(rect->x_offset_ + rect->width_ <= dst->width);
+  assert(rect->y_offset_ + rect->height_ <= dst->height);
+
+  // Left boundary.
+  for (i = rect->x_offset_; i < rect->x_offset_ + rect->width_; ++i) {
+    const uint32_t* const src_argb =
+        &src->argb[rect->y_offset_ * src->argb_stride + i];
+    const uint32_t* const dst_argb =
+        &dst->argb[rect->y_offset_ * dst->argb_stride + i];
+    if (ComparePixels(src_argb, src->argb_stride, dst_argb, dst->argb_stride,
+                      rect->height_)) {
+      --rect->width_;  // Redundant column.
+      ++rect->x_offset_;
+    } else {
+      break;
+    }
+  }
+  if (rect->width_ == 0) goto NoChange;
+
+  // Right boundary.
+  for (i = rect->x_offset_ + rect->width_ - 1; i >= rect->x_offset_; --i) {
+    const uint32_t* const src_argb =
+        &src->argb[rect->y_offset_ * src->argb_stride + i];
+    const uint32_t* const dst_argb =
+        &dst->argb[rect->y_offset_ * dst->argb_stride + i];
+    if (ComparePixels(src_argb, src->argb_stride, dst_argb, dst->argb_stride,
+                      rect->height_)) {
+      --rect->width_;  // Redundant column.
+    } else {
+      break;
+    }
+  }
+  if (rect->width_ == 0) goto NoChange;
+
+  // Top boundary.
+  for (j = rect->y_offset_; j < rect->y_offset_ + rect->height_; ++j) {
+    const uint32_t* const src_argb =
+        &src->argb[j * src->argb_stride + rect->x_offset_];
+    const uint32_t* const dst_argb =
+        &dst->argb[j * dst->argb_stride + rect->x_offset_];
+    if (ComparePixels(src_argb, 1, dst_argb, 1, rect->width_)) {
+      --rect->height_;  // Redundant row.
+      ++rect->y_offset_;
+    } else {
+      break;
+    }
+  }
+  if (rect->height_ == 0) goto NoChange;
+
+  // Bottom boundary.
+  for (j = rect->y_offset_ + rect->height_ - 1; j >= rect->y_offset_; --j) {
+    const uint32_t* const src_argb =
+        &src->argb[j * src->argb_stride + rect->x_offset_];
+    const uint32_t* const dst_argb =
+        &dst->argb[j * dst->argb_stride + rect->x_offset_];
+    if (ComparePixels(src_argb, 1, dst_argb, 1, rect->width_)) {
+      --rect->height_;  // Redundant row.
+    } else {
+      break;
+    }
+  }
+  if (rect->height_ == 0) goto NoChange;
+
+  if (IsEmptyRect(rect)) {
+ NoChange:
+    rect->x_offset_ = 0;
+    rect->y_offset_ = 0;
+    rect->width_ = 0;
+    rect->height_ = 0;
+  }
+}
+
+// Snap rectangle to even offsets (and adjust dimensions if needed).
+static WEBP_INLINE void SnapToEvenOffsets(FrameRect* const rect) {
+  rect->width_ += (rect->x_offset_ & 1);
+  rect->height_ += (rect->y_offset_ & 1);
+  rect->x_offset_ &= ~1;
+  rect->y_offset_ &= ~1;
+}
+
+// Given previous and current canvas, picks the optimal rectangle for the
+// current frame. The initial guess for 'rect' will be the full canvas.
+static int GetSubRect(const WebPPicture* const prev_canvas,
+                      const WebPPicture* const curr_canvas, int is_key_frame,
+                      int is_first_frame, int empty_rect_allowed,
+                      FrameRect* const rect, WebPPicture* const sub_frame) {
+  rect->x_offset_ = 0;
+  rect->y_offset_ = 0;
+  rect->width_ = curr_canvas->width;
+  rect->height_ = curr_canvas->height;
+  if (!is_key_frame || is_first_frame) {  // Optimize frame rectangle.
+    // Note: This behaves as expected for first frame, as 'prev_canvas' is
+    // initialized to a fully transparent canvas in the beginning.
+    MinimizeChangeRectangle(prev_canvas, curr_canvas, rect);
+  }
+
+  if (IsEmptyRect(rect)) {
+    if (empty_rect_allowed) {  // No need to get 'sub_frame'.
+      return 1;
+    } else {                   // Force a 1x1 rectangle.
+      rect->width_ = 1;
+      rect->height_ = 1;
+      assert(rect->x_offset_ == 0);
+      assert(rect->y_offset_ == 0);
+    }
+  }
+
+  SnapToEvenOffsets(rect);
+  return WebPPictureView(curr_canvas, rect->x_offset_, rect->y_offset_,
+                         rect->width_, rect->height_, sub_frame);
+}
+
+static void DisposeFrameRectangle(int dispose_method,
+                                  const FrameRect* const rect,
+                                  WebPPicture* const curr_canvas) {
+  assert(rect != NULL);
+  if (dispose_method == WEBP_MUX_DISPOSE_BACKGROUND) {
+    WebPUtilClearPic(curr_canvas, rect);
+  }
+}
+
+static uint32_t RectArea(const FrameRect* const rect) {
+  return (uint32_t)rect->width_ * rect->height_;
+}
+
+static int IsBlendingPossible(const WebPPicture* const src,
+                              const WebPPicture* const dst,
+                              const FrameRect* const rect) {
+  int i, j;
+  assert(src->width == dst->width && src->height == dst->height);
+  assert(rect->x_offset_ + rect->width_ <= dst->width);
+  assert(rect->y_offset_ + rect->height_ <= dst->height);
+  for (j = rect->y_offset_; j < rect->y_offset_ + rect->height_; ++j) {
+    for (i = rect->x_offset_; i < rect->x_offset_ + rect->width_; ++i) {
+      const uint32_t src_pixel = src->argb[j * src->argb_stride + i];
+      const uint32_t dst_pixel = dst->argb[j * dst->argb_stride + i];
+      const uint32_t dst_alpha = dst_pixel >> 24;
+      if (dst_alpha != 0xff && src_pixel != dst_pixel) {
+        // In this case, if we use blending, we can't attain the desired
+        // 'dst_pixel' value for this pixel. So, blending is not possible.
+        return 0;
+      }
+    }
+  }
+  return 1;
+}
+
+#define MIN_COLORS_LOSSY     31  // Don't try lossy below this threshold.
+#define MAX_COLORS_LOSSLESS 194  // Don't try lossless above this threshold.
+#define MAX_COLOR_COUNT     256  // Power of 2 greater than MAX_COLORS_LOSSLESS.
+#define HASH_SIZE (MAX_COLOR_COUNT * 4)
+#define HASH_RIGHT_SHIFT     22  // 32 - log2(HASH_SIZE).
+
+// TODO(urvang): Also used in enc/vp8l.c. Move to utils.
+// If the number of colors in the 'pic' is at least MAX_COLOR_COUNT, return
+// MAX_COLOR_COUNT. Otherwise, return the exact number of colors in the 'pic'.
+static int GetColorCount(const WebPPicture* const pic) {
+  int x, y;
+  int num_colors = 0;
+  uint8_t in_use[HASH_SIZE] = { 0 };
+  uint32_t colors[HASH_SIZE];
+  static const uint32_t kHashMul = 0x1e35a7bd;
+  const uint32_t* argb = pic->argb;
+  const int width = pic->width;
+  const int height = pic->height;
+  uint32_t last_pix = ~argb[0];   // so we're sure that last_pix != argb[0]
+
+  for (y = 0; y < height; ++y) {
+    for (x = 0; x < width; ++x) {
+      int key;
+      if (argb[x] == last_pix) {
+        continue;
+      }
+      last_pix = argb[x];
+      key = (kHashMul * last_pix) >> HASH_RIGHT_SHIFT;
+      while (1) {
+        if (!in_use[key]) {
+          colors[key] = last_pix;
+          in_use[key] = 1;
+          ++num_colors;
+          if (num_colors >= MAX_COLOR_COUNT) {
+            return MAX_COLOR_COUNT;  // Exact count not needed.
+          }
+          break;
+        } else if (colors[key] == last_pix) {
+          break;  // The color is already there.
+        } else {
+          // Some other color sits here, so do linear conflict resolution.
+          ++key;
+          key &= (HASH_SIZE - 1);  // Key mask.
+        }
+      }
+    }
+    argb += pic->argb_stride;
+  }
+  return num_colors;
+}
+
+#undef MAX_COLOR_COUNT
+#undef HASH_SIZE
+#undef HASH_RIGHT_SHIFT
+
+// For pixels in 'rect', replace those pixels in 'dst' that are same as 'src' by
+// transparent pixels.
+static void IncreaseTransparency(const WebPPicture* const src,
+                                 const FrameRect* const rect,
+                                 WebPPicture* const dst) {
+  int i, j;
+  assert(src != NULL && dst != NULL && rect != NULL);
+  assert(src->width == dst->width && src->height == dst->height);
+  for (j = rect->y_offset_; j < rect->y_offset_ + rect->height_; ++j) {
+    const uint32_t* const psrc = src->argb + j * src->argb_stride;
+    uint32_t* const pdst = dst->argb + j * dst->argb_stride;
+    for (i = rect->x_offset_; i < rect->x_offset_ + rect->width_; ++i) {
+      if (psrc[i] == pdst[i]) {
+        pdst[i] = TRANSPARENT_COLOR;
+      }
+    }
+  }
+}
+
+#undef TRANSPARENT_COLOR
+
+// Replace similar blocks of pixels by a 'see-through' transparent block
+// with uniform average color.
+static void FlattenSimilarBlocks(const WebPPicture* const src,
+                                 const FrameRect* const rect,
+                                 WebPPicture* const dst) {
+  int i, j;
+  const int block_size = 8;
+  const int y_start = (rect->y_offset_ + block_size) & ~(block_size - 1);
+  const int y_end = (rect->y_offset_ + rect->height_) & ~(block_size - 1);
+  const int x_start = (rect->x_offset_ + block_size) & ~(block_size - 1);
+  const int x_end = (rect->x_offset_ + rect->width_) & ~(block_size - 1);
+  assert(src != NULL && dst != NULL && rect != NULL);
+  assert(src->width == dst->width && src->height == dst->height);
+  assert((block_size & (block_size - 1)) == 0);  // must be a power of 2
+  // Iterate over each block and count similar pixels.
+  for (j = y_start; j < y_end; j += block_size) {
+    for (i = x_start; i < x_end; i += block_size) {
+      int cnt = 0;
+      int avg_r = 0, avg_g = 0, avg_b = 0;
+      int x, y;
+      const uint32_t* const psrc = src->argb + j * src->argb_stride + i;
+      uint32_t* const pdst = dst->argb + j * dst->argb_stride + i;
+      for (y = 0; y < block_size; ++y) {
+        for (x = 0; x < block_size; ++x) {
+          const uint32_t src_pixel = psrc[x + y * src->argb_stride];
+          const int alpha = src_pixel >> 24;
+          if (alpha == 0xff &&
+              src_pixel == pdst[x + y * dst->argb_stride]) {
+              ++cnt;
+              avg_r += (src_pixel >> 16) & 0xff;
+              avg_g += (src_pixel >>  8) & 0xff;
+              avg_b += (src_pixel >>  0) & 0xff;
+          }
+        }
+      }
+      // If we have a fully similar block, we replace it with an
+      // average transparent block. This compresses better in lossy mode.
+      if (cnt == block_size * block_size) {
+        const uint32_t color = (0x00          << 24) |
+                               ((avg_r / cnt) << 16) |
+                               ((avg_g / cnt) <<  8) |
+                               ((avg_b / cnt) <<  0);
+        for (y = 0; y < block_size; ++y) {
+          for (x = 0; x < block_size; ++x) {
+            pdst[x + y * dst->argb_stride] = color;
+          }
+        }
+      }
+    }
+  }
+}
+
+static int EncodeFrame(const WebPConfig* const config, WebPPicture* const pic,
+                       WebPMemoryWriter* const memory) {
+  pic->use_argb = 1;
+  pic->writer = WebPMemoryWrite;
+  pic->custom_ptr = memory;
+  if (!WebPEncode(config, pic)) {
+    return 0;
+  }
+  return 1;
+}
+
+// Struct representing a candidate encoded frame including its metadata.
+typedef struct {
+  WebPMemoryWriter  mem_;
+  WebPMuxFrameInfo  info_;
+  FrameRect         rect_;
+  int               evaluate_;  // True if this candidate should be evaluated.
+} Candidate;
+
+// Generates a candidate encoded frame given a picture and metadata.
+static WebPEncodingError EncodeCandidate(WebPPicture* const sub_frame,
+                                         const FrameRect* const rect,
+                                         const WebPConfig* const config,
+                                         int use_blending,
+                                         Candidate* const candidate) {
+  WebPEncodingError error_code = VP8_ENC_OK;
+  assert(candidate != NULL);
+  memset(candidate, 0, sizeof(*candidate));
+
+  // Set frame rect and info.
+  candidate->rect_ = *rect;
+  candidate->info_.id = WEBP_CHUNK_ANMF;
+  candidate->info_.x_offset = rect->x_offset_;
+  candidate->info_.y_offset = rect->y_offset_;
+  candidate->info_.dispose_method = WEBP_MUX_DISPOSE_NONE;  // Set later.
+  candidate->info_.blend_method =
+      use_blending ? WEBP_MUX_BLEND : WEBP_MUX_NO_BLEND;
+  candidate->info_.duration = 0;  // Set in next call to WebPAnimEncoderAdd().
+
+  // Encode picture.
+  WebPMemoryWriterInit(&candidate->mem_);
+
+  if (!EncodeFrame(config, sub_frame, &candidate->mem_)) {
+    error_code = sub_frame->error_code;
+    goto Err;
+  }
+
+  candidate->evaluate_ = 1;
+  return error_code;
+
+ Err:
+  WebPMemoryWriterClear(&candidate->mem_);
+  return error_code;
+}
+
+static void CopyCurrentCanvas(WebPAnimEncoder* const enc) {
+  if (enc->curr_canvas_copy_modified_) {
+    WebPCopyPixels(enc->curr_canvas_, &enc->curr_canvas_copy_);
+    enc->curr_canvas_copy_modified_ = 0;
+  }
+}
+
+enum {
+  LL_DISP_NONE = 0,
+  LL_DISP_BG,
+  LOSSY_DISP_NONE,
+  LOSSY_DISP_BG,
+  CANDIDATE_COUNT
+};
+
+// Generates candidates for a given dispose method given pre-filled 'rect'
+// and 'sub_frame'.
+static WebPEncodingError GenerateCandidates(
+    WebPAnimEncoder* const enc, Candidate candidates[CANDIDATE_COUNT],
+    WebPMuxAnimDispose dispose_method, int is_lossless, int is_key_frame,
+    const FrameRect* const rect, WebPPicture* sub_frame,
+    const WebPConfig* const config_ll, const WebPConfig* const config_lossy) {
+  WebPEncodingError error_code = VP8_ENC_OK;
+  const int is_dispose_none = (dispose_method == WEBP_MUX_DISPOSE_NONE);
+  Candidate* const candidate_ll =
+      is_dispose_none ? &candidates[LL_DISP_NONE] : &candidates[LL_DISP_BG];
+  Candidate* const candidate_lossy = is_dispose_none
+                                     ? &candidates[LOSSY_DISP_NONE]
+                                     : &candidates[LOSSY_DISP_BG];
+  WebPPicture* const curr_canvas = &enc->curr_canvas_copy_;
+  const WebPPicture* const prev_canvas =
+      is_dispose_none ? &enc->prev_canvas_ : &enc->prev_canvas_disposed_;
+  const int use_blending =
+      !is_key_frame &&
+      IsBlendingPossible(prev_canvas, curr_canvas, rect);
+
+  // Pick candidates to be tried.
+  if (!enc->options_.allow_mixed) {
+    candidate_ll->evaluate_ = is_lossless;
+    candidate_lossy->evaluate_ = !is_lossless;
+  } else {  // Use a heuristic for trying lossless and/or lossy compression.
+    const int num_colors = GetColorCount(sub_frame);
+    candidate_ll->evaluate_ = (num_colors < MAX_COLORS_LOSSLESS);
+    candidate_lossy->evaluate_ = (num_colors >= MIN_COLORS_LOSSY);
+  }
+
+  // Generate candidates.
+  if (candidate_ll->evaluate_) {
+    CopyCurrentCanvas(enc);
+    if (use_blending) {
+      IncreaseTransparency(prev_canvas, rect, curr_canvas);
+      enc->curr_canvas_copy_modified_ = 1;
+    }
+    error_code = EncodeCandidate(sub_frame, rect, config_ll, use_blending,
+                                 candidate_ll);
+    if (error_code != VP8_ENC_OK) return error_code;
+  }
+  if (candidate_lossy->evaluate_) {
+    CopyCurrentCanvas(enc);
+    if (use_blending) {
+      FlattenSimilarBlocks(prev_canvas, rect, curr_canvas);
+      enc->curr_canvas_copy_modified_ = 1;
+    }
+    error_code = EncodeCandidate(sub_frame, rect, config_lossy, use_blending,
+                                 candidate_lossy);
+    if (error_code != VP8_ENC_OK) return error_code;
+  }
+  return error_code;
+}
+
+#undef MIN_COLORS_LOSSY
+#undef MAX_COLORS_LOSSLESS
+
+static void GetEncodedData(const WebPMemoryWriter* const memory,
+                           WebPData* const encoded_data) {
+  encoded_data->bytes = memory->mem;
+  encoded_data->size  = memory->size;
+}
+
+// Sets dispose method of the previous frame to be 'dispose_method'.
+static void SetPreviousDisposeMethod(WebPAnimEncoder* const enc,
+                                     WebPMuxAnimDispose dispose_method) {
+  const size_t position = enc->count_ - 2;
+  EncodedFrame* const prev_enc_frame = GetFrame(enc, position);
+  assert(enc->count_ >= 2);  // As current and previous frames are in enc.
+
+  if (enc->prev_candidate_undecided_) {
+    assert(dispose_method == WEBP_MUX_DISPOSE_NONE);
+    prev_enc_frame->sub_frame_.dispose_method = dispose_method;
+    prev_enc_frame->key_frame_.dispose_method = dispose_method;
+  } else {
+    WebPMuxFrameInfo* const prev_info = prev_enc_frame->is_key_frame_
+                                        ? &prev_enc_frame->key_frame_
+                                        : &prev_enc_frame->sub_frame_;
+    prev_info->dispose_method = dispose_method;
+  }
+}
+
+static int IncreasePreviousDuration(WebPAnimEncoder* const enc, int duration) {
+  const size_t position = enc->count_ - 1;
+  EncodedFrame* const prev_enc_frame = GetFrame(enc, position);
+  int new_duration;
+
+  assert(enc->count_ >= 1);
+  assert(prev_enc_frame->sub_frame_.duration ==
+         prev_enc_frame->key_frame_.duration);
+  assert(prev_enc_frame->sub_frame_.duration ==
+         (prev_enc_frame->sub_frame_.duration & (MAX_DURATION - 1)));
+  assert(duration == (duration & (MAX_DURATION - 1)));
+
+  new_duration = prev_enc_frame->sub_frame_.duration + duration;
+  if (new_duration >= MAX_DURATION) {  // Special case.
+    // Separate out previous frame from earlier merged frames to avoid overflow.
+    // We add a 1x1 transparent frame for the previous frame, with blending on.
+    const FrameRect rect = { 0, 0, 1, 1 };
+    const uint8_t lossless_1x1_bytes[] = {
+      0x52, 0x49, 0x46, 0x46, 0x14, 0x00, 0x00, 0x00, 0x57, 0x45, 0x42, 0x50,
+      0x56, 0x50, 0x38, 0x4c, 0x08, 0x00, 0x00, 0x00, 0x2f, 0x00, 0x00, 0x00,
+      0x10, 0x88, 0x88, 0x08
+    };
+    const WebPData lossless_1x1 = {
+        lossless_1x1_bytes, sizeof(lossless_1x1_bytes)
+    };
+    const uint8_t lossy_1x1_bytes[] = {
+      0x52, 0x49, 0x46, 0x46, 0x40, 0x00, 0x00, 0x00, 0x57, 0x45, 0x42, 0x50,
+      0x56, 0x50, 0x38, 0x58, 0x0a, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00,
+      0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x41, 0x4c, 0x50, 0x48, 0x02, 0x00,
+      0x00, 0x00, 0x00, 0x00, 0x56, 0x50, 0x38, 0x20, 0x18, 0x00, 0x00, 0x00,
+      0x30, 0x01, 0x00, 0x9d, 0x01, 0x2a, 0x01, 0x00, 0x01, 0x00, 0x02, 0x00,
+      0x34, 0x25, 0xa4, 0x00, 0x03, 0x70, 0x00, 0xfe, 0xfb, 0xfd, 0x50, 0x00
+    };
+    const WebPData lossy_1x1 = { lossy_1x1_bytes, sizeof(lossy_1x1_bytes) };
+    const int can_use_lossless =
+        (enc->last_config_.lossless || enc->options_.allow_mixed);
+    EncodedFrame* const curr_enc_frame = GetFrame(enc, enc->count_);
+    curr_enc_frame->is_key_frame_ = 0;
+    curr_enc_frame->sub_frame_.id = WEBP_CHUNK_ANMF;
+    curr_enc_frame->sub_frame_.x_offset = 0;
+    curr_enc_frame->sub_frame_.y_offset = 0;
+    curr_enc_frame->sub_frame_.dispose_method = WEBP_MUX_DISPOSE_NONE;
+    curr_enc_frame->sub_frame_.blend_method = WEBP_MUX_BLEND;
+    curr_enc_frame->sub_frame_.duration = duration;
+    if (!WebPDataCopy(can_use_lossless ? &lossless_1x1 : &lossy_1x1,
+                      &curr_enc_frame->sub_frame_.bitstream)) {
+      return 0;
+    }
+    ++enc->count_;
+    ++enc->count_since_key_frame_;
+    enc->flush_count_ = enc->count_ - 1;
+    enc->prev_candidate_undecided_ = 0;
+    enc->prev_rect_ = rect;
+  } else {                           // Regular case.
+    // Increase duration of the previous frame by 'duration'.
+    prev_enc_frame->sub_frame_.duration = new_duration;
+    prev_enc_frame->key_frame_.duration = new_duration;
+  }
+  return 1;
+}
+
+// Pick the candidate encoded frame with smallest size and release other
+// candidates.
+// TODO(later): Perhaps a rough SSIM/PSNR produced by the encoder should
+// also be a criteria, in addition to sizes.
+static void PickBestCandidate(WebPAnimEncoder* const enc,
+                              Candidate* const candidates, int is_key_frame,
+                              EncodedFrame* const encoded_frame) {
+  int i;
+  int best_idx = -1;
+  size_t best_size = ~0;
+  for (i = 0; i < CANDIDATE_COUNT; ++i) {
+    if (candidates[i].evaluate_) {
+      const size_t candidate_size = candidates[i].mem_.size;
+      if (candidate_size < best_size) {
+        best_idx = i;
+        best_size = candidate_size;
+      }
+    }
+  }
+  assert(best_idx != -1);
+  for (i = 0; i < CANDIDATE_COUNT; ++i) {
+    if (candidates[i].evaluate_) {
+      if (i == best_idx) {
+        WebPMuxFrameInfo* const dst = is_key_frame
+                                      ? &encoded_frame->key_frame_
+                                      : &encoded_frame->sub_frame_;
+        *dst = candidates[i].info_;
+        GetEncodedData(&candidates[i].mem_, &dst->bitstream);
+        if (!is_key_frame) {
+          // Note: Previous dispose method only matters for non-keyframes.
+          // Also, we don't want to modify previous dispose method that was
+          // selected when a non key-frame was assumed.
+          const WebPMuxAnimDispose prev_dispose_method =
+              (best_idx == LL_DISP_NONE || best_idx == LOSSY_DISP_NONE)
+                  ? WEBP_MUX_DISPOSE_NONE
+                  : WEBP_MUX_DISPOSE_BACKGROUND;
+          SetPreviousDisposeMethod(enc, prev_dispose_method);
+        }
+        enc->prev_rect_ = candidates[i].rect_;  // save for next frame.
+      } else {
+        WebPMemoryWriterClear(&candidates[i].mem_);
+        candidates[i].evaluate_ = 0;
+      }
+    }
+  }
+}
+
+// Depending on the configuration, tries different compressions
+// (lossy/lossless), dispose methods, blending methods etc to encode the current
+// frame and outputs the best one in 'encoded_frame'.
+// 'frame_skipped' will be set to true if this frame should actually be skipped.
+static WebPEncodingError SetFrame(WebPAnimEncoder* const enc,
+                                  const WebPConfig* const config,
+                                  int is_key_frame,
+                                  EncodedFrame* const encoded_frame,
+                                  int* const frame_skipped) {
+  int i;
+  WebPEncodingError error_code = VP8_ENC_OK;
+  const WebPPicture* const curr_canvas = &enc->curr_canvas_copy_;
+  const WebPPicture* const prev_canvas = &enc->prev_canvas_;
+  Candidate candidates[CANDIDATE_COUNT];
+  const int is_lossless = config->lossless;
+  const int is_first_frame = enc->is_first_frame_;
+
+  int try_dispose_none = 1;  // Default.
+  FrameRect rect_none;
+  WebPPicture sub_frame_none;
+  // First frame cannot be skipped as there is no 'previous frame' to merge it
+  // to. So, empty rectangle is not allowed for the first frame.
+  const int empty_rect_allowed_none = !is_first_frame;
+
+  // If current frame is a key-frame, dispose method of previous frame doesn't
+  // matter, so we don't try dispose to background.
+  // Also, if key-frame insertion is on, and previous frame could be picked as
+  // either a sub-frame or a key-frame, then we can't be sure about what frame
+  // rectangle would be disposed. In that case too, we don't try dispose to
+  // background.
+  const int dispose_bg_possible =
+      !is_key_frame && !enc->prev_candidate_undecided_;
+  int try_dispose_bg = 0;  // Default.
+  FrameRect rect_bg;
+  WebPPicture sub_frame_bg;
+
+  WebPConfig config_ll = *config;
+  WebPConfig config_lossy = *config;
+  config_ll.lossless = 1;
+  config_lossy.lossless = 0;
+  enc->last_config_ = *config;
+  enc->last_config2_ = config->lossless ? config_lossy : config_ll;
+  *frame_skipped = 0;
+
+  if (!WebPPictureInit(&sub_frame_none) || !WebPPictureInit(&sub_frame_bg)) {
+    return VP8_ENC_ERROR_INVALID_CONFIGURATION;
+  }
+
+  for (i = 0; i < CANDIDATE_COUNT; ++i) {
+    candidates[i].evaluate_ = 0;
+  }
+
+  // Change-rectangle assuming previous frame was DISPOSE_NONE.
+  GetSubRect(prev_canvas, curr_canvas, is_key_frame, is_first_frame,
+             empty_rect_allowed_none, &rect_none, &sub_frame_none);
+
+  if (IsEmptyRect(&rect_none)) {
+    // Don't encode the frame at all. Instead, the duration of the previous
+    // frame will be increased later.
+    assert(empty_rect_allowed_none);
+    *frame_skipped = 1;
+    goto End;
+  }
+
+  if (dispose_bg_possible) {
+    // Change-rectangle assuming previous frame was DISPOSE_BACKGROUND.
+    WebPPicture* const prev_canvas_disposed = &enc->prev_canvas_disposed_;
+    WebPCopyPixels(prev_canvas, prev_canvas_disposed);
+    DisposeFrameRectangle(WEBP_MUX_DISPOSE_BACKGROUND, &enc->prev_rect_,
+                          prev_canvas_disposed);
+    // Even if there is exact pixel match between 'disposed previous canvas' and
+    // 'current canvas', we can't skip current frame, as there may not be exact
+    // pixel match between 'previous canvas' and 'current canvas'. So, we don't
+    // allow empty rectangle in this case.
+    GetSubRect(prev_canvas_disposed, curr_canvas, is_key_frame, is_first_frame,
+               0 /* empty_rect_allowed */, &rect_bg, &sub_frame_bg);
+    assert(!IsEmptyRect(&rect_bg));
+
+    if (enc->options_.minimize_size) {  // Try both dispose methods.
+      try_dispose_bg = 1;
+      try_dispose_none = 1;
+    } else if (RectArea(&rect_bg) < RectArea(&rect_none)) {
+      try_dispose_bg = 1;  // Pick DISPOSE_BACKGROUND.
+      try_dispose_none = 0;
+    }
+  }
+
+  if (try_dispose_none) {
+    error_code = GenerateCandidates(
+        enc, candidates, WEBP_MUX_DISPOSE_NONE, is_lossless, is_key_frame,
+        &rect_none, &sub_frame_none, &config_ll, &config_lossy);
+    if (error_code != VP8_ENC_OK) goto Err;
+  }
+
+  if (try_dispose_bg) {
+    assert(!enc->is_first_frame_);
+    assert(dispose_bg_possible);
+    error_code = GenerateCandidates(
+        enc, candidates, WEBP_MUX_DISPOSE_BACKGROUND, is_lossless, is_key_frame,
+        &rect_bg, &sub_frame_bg, &config_ll, &config_lossy);
+    if (error_code != VP8_ENC_OK) goto Err;
+  }
+
+  PickBestCandidate(enc, candidates, is_key_frame, encoded_frame);
+
+  goto End;
+
+ Err:
+  for (i = 0; i < CANDIDATE_COUNT; ++i) {
+    if (candidates[i].evaluate_) {
+      WebPMemoryWriterClear(&candidates[i].mem_);
+    }
+  }
+
+ End:
+  WebPPictureFree(&sub_frame_none);
+  WebPPictureFree(&sub_frame_bg);
+  return error_code;
+}
+
+// Calculate the penalty incurred if we encode given frame as a key frame
+// instead of a sub-frame.
+static int64_t KeyFramePenalty(const EncodedFrame* const encoded_frame) {
+  return ((int64_t)encoded_frame->key_frame_.bitstream.size -
+          encoded_frame->sub_frame_.bitstream.size);
+}
+
+static int CacheFrame(WebPAnimEncoder* const enc,
+                      const WebPConfig* const config) {
+  int ok = 0;
+  int frame_skipped = 0;
+  WebPEncodingError error_code = VP8_ENC_OK;
+  const size_t position = enc->count_;
+  EncodedFrame* const encoded_frame = GetFrame(enc, position);
+
+  ++enc->count_;
+
+  if (enc->is_first_frame_) {  // Add this as a key-frame.
+    error_code = SetFrame(enc, config, 1, encoded_frame, &frame_skipped);
+    if (error_code != VP8_ENC_OK) goto End;
+    assert(frame_skipped == 0);  // First frame can't be skipped, even if empty.
+    assert(position == 0 && enc->count_ == 1);
+    encoded_frame->is_key_frame_ = 1;
+    enc->flush_count_ = 0;
+    enc->count_since_key_frame_ = 0;
+    enc->prev_candidate_undecided_ = 0;
+  } else {
+    ++enc->count_since_key_frame_;
+    if (enc->count_since_key_frame_ <= enc->options_.kmin) {
+      // Add this as a frame rectangle.
+      error_code = SetFrame(enc, config, 0, encoded_frame, &frame_skipped);
+      if (error_code != VP8_ENC_OK) goto End;
+      if (frame_skipped) goto Skip;
+      encoded_frame->is_key_frame_ = 0;
+      enc->flush_count_ = enc->count_ - 1;
+      enc->prev_candidate_undecided_ = 0;
+    } else {
+      int64_t curr_delta;
+
+      // Add this as a frame rectangle to enc.
+      error_code = SetFrame(enc, config, 0, encoded_frame, &frame_skipped);
+      if (error_code != VP8_ENC_OK) goto End;
+      if (frame_skipped) goto Skip;
+
+      // Add this as a key-frame to enc, too.
+      error_code = SetFrame(enc, config, 1, encoded_frame, &frame_skipped);
+      if (error_code != VP8_ENC_OK) goto End;
+      assert(frame_skipped == 0);  // Key-frame cannot be an empty rectangle.
+
+      // Analyze size difference of the two variants.
+      curr_delta = KeyFramePenalty(encoded_frame);
+      if (curr_delta <= enc->best_delta_) {  // Pick this as the key-frame.
+        if (enc->keyframe_ != KEYFRAME_NONE) {
+          EncodedFrame* const old_keyframe = GetFrame(enc, enc->keyframe_);
+          assert(old_keyframe->is_key_frame_);
+          old_keyframe->is_key_frame_ = 0;
+        }
+        encoded_frame->is_key_frame_ = 1;
+        enc->keyframe_ = (int)position;
+        enc->best_delta_ = curr_delta;
+        enc->flush_count_ = enc->count_ - 1;  // We can flush previous frames.
+      } else {
+        encoded_frame->is_key_frame_ = 0;
+      }
+      // Note: We need '>=' below because when kmin and kmax are both zero,
+      // count_since_key_frame will always be > kmax.
+      if (enc->count_since_key_frame_ >= enc->options_.kmax) {
+        enc->flush_count_ = enc->count_ - 1;
+        enc->count_since_key_frame_ = 0;
+        enc->keyframe_ = KEYFRAME_NONE;
+        enc->best_delta_ = DELTA_INFINITY;
+      }
+      enc->prev_candidate_undecided_ = 1;
+    }
+  }
+
+  // Update previous to previous and previous canvases for next call.
+  WebPCopyPixels(enc->curr_canvas_, &enc->prev_canvas_);
+  enc->is_first_frame_ = 0;
+
+ Skip:
+  ok = 1;
+  ++enc->in_frame_count_;
+
+ End:
+  if (!ok || frame_skipped) {
+    FrameRelease(encoded_frame);
+    // We reset some counters, as the frame addition failed/was skipped.
+    --enc->count_;
+    if (!enc->is_first_frame_) --enc->count_since_key_frame_;
+    if (!ok) {
+      MarkError2(enc, "ERROR adding frame. WebPEncodingError", error_code);
+    }
+  }
+  enc->curr_canvas_->error_code = error_code;   // report error_code
+  assert(ok || error_code != VP8_ENC_OK);
+  return ok;
+}
+
+static int FlushFrames(WebPAnimEncoder* const enc) {
+  while (enc->flush_count_ > 0) {
+    WebPMuxError err;
+    EncodedFrame* const curr = GetFrame(enc, 0);
+    const WebPMuxFrameInfo* const info =
+        curr->is_key_frame_ ? &curr->key_frame_ : &curr->sub_frame_;
+    assert(enc->mux_ != NULL);
+    err = WebPMuxPushFrame(enc->mux_, info, 1);
+    if (err != WEBP_MUX_OK) {
+      MarkError2(enc, "ERROR adding frame. WebPMuxError", err);
+      return 0;
+    }
+    if (enc->options_.verbose) {
+      fprintf(stderr, "INFO: Added frame. offset:%d,%d dispose:%d blend:%d\n",
+              info->x_offset, info->y_offset, info->dispose_method,
+              info->blend_method);
+    }
+    ++enc->out_frame_count_;
+    FrameRelease(curr);
+    ++enc->start_;
+    --enc->flush_count_;
+    --enc->count_;
+    if (enc->keyframe_ != KEYFRAME_NONE) --enc->keyframe_;
+  }
+
+  if (enc->count_ == 1 && enc->start_ != 0) {
+    // Move enc->start to index 0.
+    const int enc_start_tmp = (int)enc->start_;
+    EncodedFrame temp = enc->encoded_frames_[0];
+    enc->encoded_frames_[0] = enc->encoded_frames_[enc_start_tmp];
+    enc->encoded_frames_[enc_start_tmp] = temp;
+    FrameRelease(&enc->encoded_frames_[enc_start_tmp]);
+    enc->start_ = 0;
+  }
+  return 1;
+}
+
+#undef DELTA_INFINITY
+#undef KEYFRAME_NONE
+
+int WebPAnimEncoderAdd(WebPAnimEncoder* enc, WebPPicture* frame, int timestamp,
+                       const WebPConfig* encoder_config) {
+  WebPConfig config;
+
+  if (enc == NULL) {
+    return 0;
+  }
+  MarkNoError(enc);
+
+  if (!enc->is_first_frame_) {
+    // Make sure timestamps are non-decreasing (integer wrap-around is OK).
+    const uint32_t prev_frame_duration =
+        (uint32_t)timestamp - enc->prev_timestamp_;
+    if (prev_frame_duration >= MAX_DURATION) {
+      if (frame != NULL) {
+        frame->error_code = VP8_ENC_ERROR_INVALID_CONFIGURATION;
+      }
+      MarkError(enc, "ERROR adding frame: timestamps must be non-decreasing");
+      return 0;
+    }
+    if (!IncreasePreviousDuration(enc, (int)prev_frame_duration)) {
+      return 0;
+    }
+  } else {
+    enc->first_timestamp_ = timestamp;
+  }
+
+  if (frame == NULL) {  // Special: last call.
+    enc->got_null_frame_ = 1;
+    enc->prev_timestamp_ = timestamp;
+    return 1;
+  }
+
+  if (frame->width != enc->canvas_width_ ||
+      frame->height != enc->canvas_height_) {
+    frame->error_code = VP8_ENC_ERROR_INVALID_CONFIGURATION;
+    MarkError(enc, "ERROR adding frame: Invalid frame dimensions");
+    return 0;
+  }
+
+  if (!frame->use_argb) {  // Convert frame from YUV(A) to ARGB.
+    if (enc->options_.verbose) {
+      fprintf(stderr, "WARNING: Converting frame from YUV(A) to ARGB format; "
+              "this incurs a small loss.\n");
+    }
+    if (!WebPPictureYUVAToARGB(frame)) {
+      MarkError(enc, "ERROR converting frame from YUV(A) to ARGB");
+      return 0;
+    }
+  }
+
+  if (encoder_config != NULL) {
+    config = *encoder_config;
+  } else {
+    WebPConfigInit(&config);
+    config.lossless = 1;
+  }
+  assert(enc->curr_canvas_ == NULL);
+  enc->curr_canvas_ = frame;  // Store reference.
+  assert(enc->curr_canvas_copy_modified_ == 1);
+  CopyCurrentCanvas(enc);
+
+  if (!CacheFrame(enc, &config)) {
+    return 0;
+  }
+
+  if (!FlushFrames(enc)) {
+    return 0;
+  }
+  enc->curr_canvas_ = NULL;
+  enc->curr_canvas_copy_modified_ = 1;
+  enc->prev_timestamp_ = timestamp;
+  return 1;
+}
+
+// -----------------------------------------------------------------------------
+// Bitstream assembly.
+
+static int DecodeFrameOntoCanvas(const WebPMuxFrameInfo* const frame,
+                                 WebPPicture* const canvas) {
+  const WebPData* const image = &frame->bitstream;
+  WebPPicture sub_image;
+  WebPDecoderConfig config;
+  WebPInitDecoderConfig(&config);
+  WebPUtilClearPic(canvas, NULL);
+  if (WebPGetFeatures(image->bytes, image->size, &config.input) !=
+      VP8_STATUS_OK) {
+    return 0;
+  }
+  if (!WebPPictureView(canvas, frame->x_offset, frame->y_offset,
+                       config.input.width, config.input.height, &sub_image)) {
+    return 0;
+  }
+  config.output.is_external_memory = 1;
+  config.output.colorspace = MODE_BGRA;
+  config.output.u.RGBA.rgba = (uint8_t*)sub_image.argb;
+  config.output.u.RGBA.stride = sub_image.argb_stride * 4;
+  config.output.u.RGBA.size = config.output.u.RGBA.stride * sub_image.height;
+
+  if (WebPDecode(image->bytes, image->size, &config) != VP8_STATUS_OK) {
+    return 0;
+  }
+  return 1;
+}
+
+static int FrameToFullCanvas(WebPAnimEncoder* const enc,
+                             const WebPMuxFrameInfo* const frame,
+                             WebPData* const full_image) {
+  WebPPicture* const canvas_buf = &enc->curr_canvas_copy_;
+  WebPMemoryWriter mem1, mem2;
+  WebPMemoryWriterInit(&mem1);
+  WebPMemoryWriterInit(&mem2);
+
+  if (!DecodeFrameOntoCanvas(frame, canvas_buf)) goto Err;
+  if (!EncodeFrame(&enc->last_config_, canvas_buf, &mem1)) goto Err;
+  GetEncodedData(&mem1, full_image);
+
+  if (enc->options_.allow_mixed) {
+    if (!EncodeFrame(&enc->last_config_, canvas_buf, &mem2)) goto Err;
+    if (mem2.size < mem1.size) {
+      GetEncodedData(&mem2, full_image);
+      WebPMemoryWriterClear(&mem1);
+    } else {
+      WebPMemoryWriterClear(&mem2);
+    }
+  }
+  return 1;
+
+ Err:
+  WebPMemoryWriterClear(&mem1);
+  WebPMemoryWriterClear(&mem2);
+  return 0;
+}
+
+// Convert a single-frame animation to a non-animated image if appropriate.
+// TODO(urvang): Can we pick one of the two heuristically (based on frame
+// rectangle and/or presence of alpha)?
+static WebPMuxError OptimizeSingleFrame(WebPAnimEncoder* const enc,
+                                        WebPData* const webp_data) {
+  WebPMuxError err = WEBP_MUX_OK;
+  int canvas_width, canvas_height;
+  WebPMuxFrameInfo frame;
+  WebPData full_image;
+  WebPData webp_data2;
+  WebPMux* const mux = WebPMuxCreate(webp_data, 0);
+  if (mux == NULL) return WEBP_MUX_BAD_DATA;
+  assert(enc->out_frame_count_ == 1);
+  WebPDataInit(&frame.bitstream);
+  WebPDataInit(&full_image);
+  WebPDataInit(&webp_data2);
+
+  err = WebPMuxGetFrame(mux, 1, &frame);
+  if (err != WEBP_MUX_OK) goto End;
+  if (frame.id != WEBP_CHUNK_ANMF) goto End;  // Non-animation: nothing to do.
+  err = WebPMuxGetCanvasSize(mux, &canvas_width, &canvas_height);
+  if (err != WEBP_MUX_OK) goto End;
+  if (!FrameToFullCanvas(enc, &frame, &full_image)) {
+    err = WEBP_MUX_BAD_DATA;
+    goto End;
+  }
+  err = WebPMuxSetImage(mux, &full_image, 1);
+  if (err != WEBP_MUX_OK) goto End;
+  err = WebPMuxAssemble(mux, &webp_data2);
+  if (err != WEBP_MUX_OK) goto End;
+
+  if (webp_data2.size < webp_data->size) {  // Pick 'webp_data2' if smaller.
+    WebPDataClear(webp_data);
+    *webp_data = webp_data2;
+    WebPDataInit(&webp_data2);
+  }
+
+ End:
+  WebPDataClear(&frame.bitstream);
+  WebPDataClear(&full_image);
+  WebPMuxDelete(mux);
+  WebPDataClear(&webp_data2);
+  return err;
+}
+
+int WebPAnimEncoderAssemble(WebPAnimEncoder* enc, WebPData* webp_data) {
+  WebPMux* mux;
+  WebPMuxError err;
+
+  if (enc == NULL) {
+    return 0;
+  }
+  MarkNoError(enc);
+
+  if (webp_data == NULL) {
+    MarkError(enc, "ERROR assembling: NULL input");
+    return 0;
+  }
+
+  if (enc->in_frame_count_ == 0) {
+    MarkError(enc, "ERROR: No frames to assemble");
+    return 0;
+  }
+
+  if (!enc->got_null_frame_ && enc->in_frame_count_ > 1 && enc->count_ > 0) {
+    // set duration of the last frame to be avg of durations of previous frames.
+    const double delta_time = enc->prev_timestamp_ - enc->first_timestamp_;
+    const int average_duration = (int)(delta_time / (enc->in_frame_count_ - 1));
+    if (!IncreasePreviousDuration(enc, average_duration)) {
+      return 0;
+    }
+  }
+
+  // Flush any remaining frames.
+  enc->flush_count_ = enc->count_;
+  if (!FlushFrames(enc)) {
+    return 0;
+  }
+
+  // Set definitive canvas size.
+  mux = enc->mux_;
+  err = WebPMuxSetCanvasSize(mux, enc->canvas_width_, enc->canvas_height_);
+  if (err != WEBP_MUX_OK) goto Err;
+
+  err = WebPMuxSetAnimationParams(mux, &enc->options_.anim_params);
+  if (err != WEBP_MUX_OK) goto Err;
+
+  // Assemble into a WebP bitstream.
+  err = WebPMuxAssemble(mux, webp_data);
+  if (err != WEBP_MUX_OK) goto Err;
+
+  if (enc->out_frame_count_ == 1) {
+    err = OptimizeSingleFrame(enc, webp_data);
+    if (err != WEBP_MUX_OK) goto Err;
+  }
+  return 1;
+
+ Err:
+  MarkError2(enc, "ERROR assembling WebP", err);
+  return 0;
+}
+
+const char* WebPAnimEncoderGetError(WebPAnimEncoder* enc) {
+  if (enc == NULL) return NULL;
+  return enc->error_str_;
+}
+
+// -----------------------------------------------------------------------------
diff --git a/drivers/webp/mux/muxedit.c b/drivers/webp/mux/muxedit.c
index 08629d4ae2..b27663f87a 100644
--- a/drivers/webp/mux/muxedit.c
+++ b/drivers/webp/mux/muxedit.c
@@ -1,8 +1,10 @@
 // Copyright 2011 Google Inc. All Rights Reserved.
 //
-// This code is licensed under the same terms as WebM:
-//  Software License Agreement:  http://www.webmproject.org/license/software/
-//  Additional IP Rights Grant:  http://www.webmproject.org/license/additional/
+// Use of this source code is governed by a BSD-style license
+// that can be found in the COPYING file in the root of the source
+// tree. An additional intellectual property rights grant can be found
+// in the file PATENTS. All contributing project authors may
+// be found in the AUTHORS file in the root of the source tree.
 // -----------------------------------------------------------------------------
 //
 // Set and delete APIs for mux.
@@ -12,50 +14,51 @@
 
 #include <assert.h>
 #include "./muxi.h"
-
-#if defined(__cplusplus) || defined(c_plusplus)
-extern "C" {
-#endif
+#include "../utils/utils.h"
 
 //------------------------------------------------------------------------------
 // Life of a mux object.
 
 static void MuxInit(WebPMux* const mux) {
-  if (mux == NULL) return;
+  assert(mux != NULL);
   memset(mux, 0, sizeof(*mux));
+  mux->canvas_width_ = 0;     // just to be explicit
+  mux->canvas_height_ = 0;
 }
 
 WebPMux* WebPNewInternal(int version) {
   if (WEBP_ABI_IS_INCOMPATIBLE(version, WEBP_MUX_ABI_VERSION)) {
     return NULL;
   } else {
-    WebPMux* const mux = (WebPMux*)malloc(sizeof(WebPMux));
-    // If mux is NULL MuxInit is a noop.
-    MuxInit(mux);
+    WebPMux* const mux = (WebPMux*)WebPSafeMalloc(1ULL, sizeof(WebPMux));
+    if (mux != NULL) MuxInit(mux);
     return mux;
   }
 }
 
-static void DeleteAllChunks(WebPChunk** const chunk_list) {
-  while (*chunk_list) {
-    *chunk_list = ChunkDelete(*chunk_list);
+// Delete all images in 'wpi_list'.
+static void DeleteAllImages(WebPMuxImage** const wpi_list) {
+  while (*wpi_list != NULL) {
+    *wpi_list = MuxImageDelete(*wpi_list);
   }
 }
 
 static void MuxRelease(WebPMux* const mux) {
-  if (mux == NULL) return;
-  MuxImageDeleteAll(&mux->images_);
-  DeleteAllChunks(&mux->vp8x_);
-  DeleteAllChunks(&mux->iccp_);
-  DeleteAllChunks(&mux->loop_);
-  DeleteAllChunks(&mux->meta_);
-  DeleteAllChunks(&mux->unknown_);
+  assert(mux != NULL);
+  DeleteAllImages(&mux->images_);
+  ChunkListDelete(&mux->vp8x_);
+  ChunkListDelete(&mux->iccp_);
+  ChunkListDelete(&mux->anim_);
+  ChunkListDelete(&mux->exif_);
+  ChunkListDelete(&mux->xmp_);
+  ChunkListDelete(&mux->unknown_);
 }
 
 void WebPMuxDelete(WebPMux* mux) {
-  // If mux is NULL MuxRelease is a noop.
-  MuxRelease(mux);
-  free(mux);
+  if (mux != NULL) {
+    MuxRelease(mux);
+    WebPSafeFree(mux);
+  }
 }
 
 //------------------------------------------------------------------------------
@@ -64,81 +67,60 @@ void WebPMuxDelete(WebPMux* mux) {
 // Handy MACRO, makes MuxSet() very symmetric to MuxGet().
 #define SWITCH_ID_LIST(INDEX, LIST)                                            \
   if (idx == (INDEX)) {                                                        \
-    err = ChunkAssignData(&chunk, data, copy_data, kChunks[(INDEX)].tag);      \
+    err = ChunkAssignData(&chunk, data, copy_data, tag);                       \
     if (err == WEBP_MUX_OK) {                                                  \
       err = ChunkSetNth(&chunk, (LIST), nth);                                  \
     }                                                                          \
     return err;                                                                \
   }
 
-static WebPMuxError MuxSet(WebPMux* const mux, CHUNK_INDEX idx, uint32_t nth,
+static WebPMuxError MuxSet(WebPMux* const mux, uint32_t tag, uint32_t nth,
                            const WebPData* const data, int copy_data) {
   WebPChunk chunk;
   WebPMuxError err = WEBP_MUX_NOT_FOUND;
+  const CHUNK_INDEX idx = ChunkGetIndexFromTag(tag);
   assert(mux != NULL);
   assert(!IsWPI(kChunks[idx].id));
 
   ChunkInit(&chunk);
-  SWITCH_ID_LIST(IDX_VP8X, &mux->vp8x_);
-  SWITCH_ID_LIST(IDX_ICCP, &mux->iccp_);
-  SWITCH_ID_LIST(IDX_LOOP, &mux->loop_);
-  SWITCH_ID_LIST(IDX_META, &mux->meta_);
-  if (idx == IDX_UNKNOWN && data->size_ > TAG_SIZE) {
-    // For raw-data unknown chunk, the first four bytes should be the tag to be
-    // used for the chunk.
-    const WebPData tmp = { data->bytes_ + TAG_SIZE, data->size_ - TAG_SIZE };
-    err = ChunkAssignData(&chunk, &tmp, copy_data, GetLE32(data->bytes_ + 0));
-    if (err == WEBP_MUX_OK)
-      err = ChunkSetNth(&chunk, &mux->unknown_, nth);
-  }
+  SWITCH_ID_LIST(IDX_VP8X,    &mux->vp8x_);
+  SWITCH_ID_LIST(IDX_ICCP,    &mux->iccp_);
+  SWITCH_ID_LIST(IDX_ANIM,    &mux->anim_);
+  SWITCH_ID_LIST(IDX_EXIF,    &mux->exif_);
+  SWITCH_ID_LIST(IDX_XMP,     &mux->xmp_);
+  SWITCH_ID_LIST(IDX_UNKNOWN, &mux->unknown_);
   return err;
 }
 #undef SWITCH_ID_LIST
 
-static WebPMuxError MuxAddChunk(WebPMux* const mux, uint32_t nth, uint32_t tag,
-                                const uint8_t* data, size_t size,
-                                int copy_data) {
-  const CHUNK_INDEX idx = ChunkGetIndexFromTag(tag);
-  const WebPData chunk_data = { data, size };
-  assert(mux != NULL);
-  assert(size <= MAX_CHUNK_PAYLOAD);
-  assert(idx != IDX_NIL);
-  return MuxSet(mux, idx, nth, &chunk_data, copy_data);
-}
+// Create data for frame/fragment given image data, offsets and duration.
+static WebPMuxError CreateFrameFragmentData(
+    int width, int height, const WebPMuxFrameInfo* const info, int is_frame,
+    WebPData* const frame_frgm) {
+  uint8_t* frame_frgm_bytes;
+  const size_t frame_frgm_size = kChunks[is_frame ? IDX_ANMF : IDX_FRGM].size;
 
-// Create data for frame/tile given image data, offsets and duration.
-static WebPMuxError CreateFrameTileData(const WebPData* const image,
-                                        int x_offset, int y_offset,
-                                        int duration, int is_lossless,
-                                        int is_frame,
-                                        WebPData* const frame_tile) {
-  int width;
-  int height;
-  uint8_t* frame_tile_bytes;
-  const size_t frame_tile_size = kChunks[is_frame ? IDX_FRAME : IDX_TILE].size;
-
-  const int ok = is_lossless ?
-      VP8LGetInfo(image->bytes_, image->size_, &width, &height, NULL) :
-      VP8GetInfo(image->bytes_, image->size_, image->size_, &width, &height);
-  if (!ok) return WEBP_MUX_INVALID_ARGUMENT;
-
-  assert(width > 0 && height > 0 && duration > 0);
+  assert(width > 0 && height > 0 && info->duration >= 0);
+  assert(info->dispose_method == (info->dispose_method & 1));
   // Note: assertion on upper bounds is done in PutLE24().
 
-  frame_tile_bytes = (uint8_t*)malloc(frame_tile_size);
-  if (frame_tile_bytes == NULL) return WEBP_MUX_MEMORY_ERROR;
+  frame_frgm_bytes = (uint8_t*)WebPSafeMalloc(1ULL, frame_frgm_size);
+  if (frame_frgm_bytes == NULL) return WEBP_MUX_MEMORY_ERROR;
 
-  PutLE24(frame_tile_bytes + 0, x_offset / 2);
-  PutLE24(frame_tile_bytes + 3, y_offset / 2);
+  PutLE24(frame_frgm_bytes + 0, info->x_offset / 2);
+  PutLE24(frame_frgm_bytes + 3, info->y_offset / 2);
 
   if (is_frame) {
-    PutLE24(frame_tile_bytes + 6, width - 1);
-    PutLE24(frame_tile_bytes + 9, height - 1);
-    PutLE24(frame_tile_bytes + 12, duration - 1);
+    PutLE24(frame_frgm_bytes + 6, width - 1);
+    PutLE24(frame_frgm_bytes + 9, height - 1);
+    PutLE24(frame_frgm_bytes + 12, info->duration);
+    frame_frgm_bytes[15] =
+        (info->blend_method == WEBP_MUX_NO_BLEND ? 2 : 0) |
+        (info->dispose_method == WEBP_MUX_DISPOSE_BACKGROUND ? 1 : 0);
   }
 
-  frame_tile->bytes_ = frame_tile_bytes;
-  frame_tile->size_ = frame_tile_size;
+  frame_frgm->bytes = frame_frgm_bytes;
+  frame_frgm->size = frame_frgm_size;
   return WEBP_MUX_OK;
 }
 
@@ -149,8 +131,8 @@ static WebPMuxError GetImageData(const WebPData* const bitstream,
                                  WebPData* const image, WebPData* const alpha,
                                  int* const is_lossless) {
   WebPDataInit(alpha);  // Default: no alpha.
-  if (bitstream->size_ < TAG_SIZE ||
-      memcmp(bitstream->bytes_, "RIFF", TAG_SIZE)) {
+  if (bitstream->size < TAG_SIZE ||
+      memcmp(bitstream->bytes, "RIFF", TAG_SIZE)) {
     // It is NOT webp file data. Return input data as is.
     *image = *bitstream;
   } else {
@@ -166,7 +148,7 @@ static WebPMuxError GetImageData(const WebPData* const bitstream,
     }
     WebPMuxDelete(mux);
   }
-  *is_lossless = VP8LCheckSignature(image->bytes_, image->size_);
+  *is_lossless = VP8LCheckSignature(image->bytes, image->size);
   return WEBP_MUX_OK;
 }
 
@@ -185,204 +167,166 @@ static WebPMuxError DeleteChunks(WebPChunk** chunk_list, uint32_t tag) {
   return err;
 }
 
-static WebPMuxError MuxDeleteAllNamedData(WebPMux* const mux, CHUNK_INDEX idx) {
-  const WebPChunkId id = kChunks[idx].id;
-  WebPChunk** chunk_list;
-
-  if (mux == NULL) return WEBP_MUX_INVALID_ARGUMENT;
+static WebPMuxError MuxDeleteAllNamedData(WebPMux* const mux, uint32_t tag) {
+  const WebPChunkId id = ChunkGetIdFromTag(tag);
+  assert(mux != NULL);
   if (IsWPI(id)) return WEBP_MUX_INVALID_ARGUMENT;
-
-  chunk_list = MuxGetChunkListFromId(mux, id);
-  if (chunk_list == NULL) return WEBP_MUX_INVALID_ARGUMENT;
-
-  return DeleteChunks(chunk_list, kChunks[idx].tag);
-}
-
-static WebPMuxError DeleteLoopCount(WebPMux* const mux) {
-  return MuxDeleteAllNamedData(mux, IDX_LOOP);
+  return DeleteChunks(MuxGetChunkListFromId(mux, id), tag);
 }
 
 //------------------------------------------------------------------------------
 // Set API(s).
 
-WebPMuxError WebPMuxSetImage(WebPMux* mux,
-                             const WebPData* bitstream, int copy_data) {
+WebPMuxError WebPMuxSetChunk(WebPMux* mux, const char fourcc[4],
+                             const WebPData* chunk_data, int copy_data) {
+  uint32_t tag;
   WebPMuxError err;
-  WebPChunk chunk;
-  WebPMuxImage wpi;
-  WebPData image;
-  WebPData alpha;
-  int is_lossless;
-  int image_tag;
-
-  if (mux == NULL || bitstream == NULL || bitstream->bytes_ == NULL ||
-      bitstream->size_ > MAX_CHUNK_PAYLOAD) {
+  if (mux == NULL || fourcc == NULL || chunk_data == NULL ||
+      chunk_data->bytes == NULL || chunk_data->size > MAX_CHUNK_PAYLOAD) {
     return WEBP_MUX_INVALID_ARGUMENT;
   }
+  tag = ChunkGetTagFromFourCC(fourcc);
 
-  // If given data is for a whole webp file,
-  // extract only the VP8/VP8L data from it.
-  err = GetImageData(bitstream, &image, &alpha, &is_lossless);
-  if (err != WEBP_MUX_OK) return err;
-  image_tag = is_lossless ? kChunks[IDX_VP8L].tag : kChunks[IDX_VP8].tag;
-
-  // Delete the existing images.
-  MuxImageDeleteAll(&mux->images_);
-
-  MuxImageInit(&wpi);
+  // Delete existing chunk(s) with the same 'fourcc'.
+  err = MuxDeleteAllNamedData(mux, tag);
+  if (err != WEBP_MUX_OK && err != WEBP_MUX_NOT_FOUND) return err;
 
-  if (alpha.bytes_ != NULL) {  // Add alpha chunk.
-    ChunkInit(&chunk);
-    err = ChunkAssignData(&chunk, &alpha, copy_data, kChunks[IDX_ALPHA].tag);
-    if (err != WEBP_MUX_OK) goto Err;
-    err = ChunkSetNth(&chunk, &wpi.alpha_, 1);
-    if (err != WEBP_MUX_OK) goto Err;
-  }
+  // Add the given chunk.
+  return MuxSet(mux, tag, 1, chunk_data, copy_data);
+}
 
-  // Add image chunk.
+// Creates a chunk from given 'data' and sets it as 1st chunk in 'chunk_list'.
+static WebPMuxError AddDataToChunkList(
+    const WebPData* const data, int copy_data, uint32_t tag,
+    WebPChunk** chunk_list) {
+  WebPChunk chunk;
+  WebPMuxError err;
   ChunkInit(&chunk);
-  err = ChunkAssignData(&chunk, &image, copy_data, image_tag);
+  err = ChunkAssignData(&chunk, data, copy_data, tag);
   if (err != WEBP_MUX_OK) goto Err;
-  err = ChunkSetNth(&chunk, &wpi.img_, 1);
-  if (err != WEBP_MUX_OK) goto Err;
-
-  // Add this image to mux.
-  err = MuxImagePush(&wpi, &mux->images_);
+  err = ChunkSetNth(&chunk, chunk_list, 1);
   if (err != WEBP_MUX_OK) goto Err;
-
-  // All OK.
   return WEBP_MUX_OK;
-
  Err:
-  // Something bad happened.
   ChunkRelease(&chunk);
-  MuxImageRelease(&wpi);
   return err;
 }
 
-WebPMuxError WebPMuxSetMetadata(WebPMux* mux, const WebPData* metadata,
-                                int copy_data) {
-  WebPMuxError err;
-
-  if (mux == NULL || metadata == NULL || metadata->bytes_ == NULL ||
-      metadata->size_ > MAX_CHUNK_PAYLOAD) {
-    return WEBP_MUX_INVALID_ARGUMENT;
+// Extracts image & alpha data from the given bitstream and then sets wpi.alpha_
+// and wpi.img_ appropriately.
+static WebPMuxError SetAlphaAndImageChunks(
+    const WebPData* const bitstream, int copy_data, WebPMuxImage* const wpi) {
+  int is_lossless = 0;
+  WebPData image, alpha;
+  WebPMuxError err = GetImageData(bitstream, &image, &alpha, &is_lossless);
+  const int image_tag =
+      is_lossless ? kChunks[IDX_VP8L].tag : kChunks[IDX_VP8].tag;
+  if (err != WEBP_MUX_OK) return err;
+  if (alpha.bytes != NULL) {
+    err = AddDataToChunkList(&alpha, copy_data, kChunks[IDX_ALPHA].tag,
+                             &wpi->alpha_);
+    if (err != WEBP_MUX_OK) return err;
   }
-
-  // Delete the existing metadata chunk(s).
-  err = WebPMuxDeleteMetadata(mux);
-  if (err != WEBP_MUX_OK && err != WEBP_MUX_NOT_FOUND) return err;
-
-  // Add the given metadata chunk.
-  return MuxSet(mux, IDX_META, 1, metadata, copy_data);
+  err = AddDataToChunkList(&image, copy_data, image_tag, &wpi->img_);
+  if (err != WEBP_MUX_OK) return err;
+  return MuxImageFinalize(wpi) ? WEBP_MUX_OK : WEBP_MUX_INVALID_ARGUMENT;
 }
 
-WebPMuxError WebPMuxSetColorProfile(WebPMux* mux, const WebPData* color_profile,
-                                    int copy_data) {
+WebPMuxError WebPMuxSetImage(WebPMux* mux, const WebPData* bitstream,
+                             int copy_data) {
+  WebPMuxImage wpi;
   WebPMuxError err;
 
-  if (mux == NULL || color_profile == NULL || color_profile->bytes_ == NULL ||
-      color_profile->size_ > MAX_CHUNK_PAYLOAD) {
+  // Sanity checks.
+  if (mux == NULL || bitstream == NULL || bitstream->bytes == NULL ||
+      bitstream->size > MAX_CHUNK_PAYLOAD) {
     return WEBP_MUX_INVALID_ARGUMENT;
   }
 
-  // Delete the existing ICCP chunk(s).
-  err = WebPMuxDeleteColorProfile(mux);
-  if (err != WEBP_MUX_OK && err != WEBP_MUX_NOT_FOUND) return err;
-
-  // Add the given ICCP chunk.
-  return MuxSet(mux, IDX_ICCP, 1, color_profile, copy_data);
-}
-
-WebPMuxError WebPMuxSetLoopCount(WebPMux* mux, int loop_count) {
-  WebPMuxError err;
-  uint8_t* data = NULL;
+  if (mux->images_ != NULL) {
+    // Only one 'simple image' can be added in mux. So, remove present images.
+    DeleteAllImages(&mux->images_);
+  }
 
-  if (mux == NULL) return WEBP_MUX_INVALID_ARGUMENT;
-  if (loop_count >= MAX_LOOP_COUNT) return WEBP_MUX_INVALID_ARGUMENT;
+  MuxImageInit(&wpi);
+  err = SetAlphaAndImageChunks(bitstream, copy_data, &wpi);
+  if (err != WEBP_MUX_OK) goto Err;
 
-  // Delete the existing LOOP chunk(s).
-  err = DeleteLoopCount(mux);
-  if (err != WEBP_MUX_OK && err != WEBP_MUX_NOT_FOUND) return err;
+  // Add this WebPMuxImage to mux.
+  err = MuxImagePush(&wpi, &mux->images_);
+  if (err != WEBP_MUX_OK) goto Err;
 
-  // Add the given loop count.
-  data = (uint8_t*)malloc(kChunks[IDX_LOOP].size);
-  if (data == NULL) return WEBP_MUX_MEMORY_ERROR;
+  // All is well.
+  return WEBP_MUX_OK;
 
-  PutLE16(data, loop_count);
-  err = MuxAddChunk(mux, 1, kChunks[IDX_LOOP].tag, data,
-                    kChunks[IDX_LOOP].size, 1);
-  free(data);
+ Err:  // Something bad happened.
+  MuxImageRelease(&wpi);
   return err;
 }
 
-static WebPMuxError MuxPushFrameTileInternal(
-    WebPMux* const mux, const WebPData* const bitstream, int x_offset,
-    int y_offset, int duration, int copy_data, uint32_t tag) {
-  WebPChunk chunk;
-  WebPData image;
-  WebPData alpha;
+WebPMuxError WebPMuxPushFrame(WebPMux* mux, const WebPMuxFrameInfo* frame,
+                              int copy_data) {
   WebPMuxImage wpi;
   WebPMuxError err;
-  WebPData frame_tile;
-  const int is_frame = (tag == kChunks[IDX_FRAME].tag) ? 1 : 0;
-  int is_lossless;
-  int image_tag;
+  int is_frame;
+  const WebPData* const bitstream = &frame->bitstream;
 
   // Sanity checks.
-  if (mux == NULL || bitstream == NULL || bitstream->bytes_ == NULL ||
-      bitstream->size_ > MAX_CHUNK_PAYLOAD) {
+  if (mux == NULL || frame == NULL) return WEBP_MUX_INVALID_ARGUMENT;
+
+  is_frame = (frame->id == WEBP_CHUNK_ANMF);
+  if (!(is_frame || (frame->id == WEBP_CHUNK_FRGM))) {
     return WEBP_MUX_INVALID_ARGUMENT;
   }
-  if (x_offset < 0 || x_offset >= MAX_POSITION_OFFSET ||
-      y_offset < 0 || y_offset >= MAX_POSITION_OFFSET ||
-      duration <= 0 || duration > MAX_DURATION) {
+  if (frame->id == WEBP_CHUNK_FRGM) {     // Dead experiment.
     return WEBP_MUX_INVALID_ARGUMENT;
   }
 
-  // Snap offsets to even positions.
-  x_offset &= ~1;
-  y_offset &= ~1;
+  if (bitstream->bytes == NULL || bitstream->size > MAX_CHUNK_PAYLOAD) {
+    return WEBP_MUX_INVALID_ARGUMENT;
+  }
 
-  // If given data is for a whole webp file,
-  // extract only the VP8/VP8L data from it.
-  err = GetImageData(bitstream, &image, &alpha, &is_lossless);
-  if (err != WEBP_MUX_OK) return err;
-  image_tag = is_lossless ? kChunks[IDX_VP8L].tag : kChunks[IDX_VP8].tag;
+  if (mux->images_ != NULL) {
+    const WebPMuxImage* const image = mux->images_;
+    const uint32_t image_id = (image->header_ != NULL) ?
+        ChunkGetIdFromTag(image->header_->tag_) : WEBP_CHUNK_IMAGE;
+    if (image_id != frame->id) {
+      return WEBP_MUX_INVALID_ARGUMENT;  // Conflicting frame types.
+    }
+  }
 
-  WebPDataInit(&frame_tile);
-  ChunkInit(&chunk);
   MuxImageInit(&wpi);
-
-  if (alpha.bytes_ != NULL) {
-    // Add alpha chunk.
-    err = ChunkAssignData(&chunk, &alpha, copy_data, kChunks[IDX_ALPHA].tag);
+  err = SetAlphaAndImageChunks(bitstream, copy_data, &wpi);
+  if (err != WEBP_MUX_OK) goto Err;
+  assert(wpi.img_ != NULL);  // As SetAlphaAndImageChunks() was successful.
+
+  {
+    WebPData frame_frgm;
+    const uint32_t tag = kChunks[is_frame ? IDX_ANMF : IDX_FRGM].tag;
+    WebPMuxFrameInfo tmp = *frame;
+    tmp.x_offset &= ~1;  // Snap offsets to even.
+    tmp.y_offset &= ~1;
+    if (!is_frame) {  // Reset unused values.
+      tmp.duration = 1;
+      tmp.dispose_method = WEBP_MUX_DISPOSE_NONE;
+      tmp.blend_method = WEBP_MUX_BLEND;
+    }
+    if (tmp.x_offset < 0 || tmp.x_offset >= MAX_POSITION_OFFSET ||
+        tmp.y_offset < 0 || tmp.y_offset >= MAX_POSITION_OFFSET ||
+        (tmp.duration < 0 || tmp.duration >= MAX_DURATION) ||
+        tmp.dispose_method != (tmp.dispose_method & 1)) {
+      err = WEBP_MUX_INVALID_ARGUMENT;
+      goto Err;
+    }
+    err = CreateFrameFragmentData(wpi.width_, wpi.height_, &tmp, is_frame,
+                                  &frame_frgm);
     if (err != WEBP_MUX_OK) goto Err;
-    err = ChunkSetNth(&chunk, &wpi.alpha_, 1);
+    // Add frame/fragment chunk (with copy_data = 1).
+    err = AddDataToChunkList(&frame_frgm, 1, tag, &wpi.header_);
+    WebPDataClear(&frame_frgm);  // frame_frgm owned by wpi.header_ now.
     if (err != WEBP_MUX_OK) goto Err;
-    ChunkInit(&chunk);  // chunk owned by wpi.alpha_ now.
   }
 
-  // Add image chunk.
-  err = ChunkAssignData(&chunk, &image, copy_data, image_tag);
-  if (err != WEBP_MUX_OK) goto Err;
-  err = ChunkSetNth(&chunk, &wpi.img_, 1);
-  if (err != WEBP_MUX_OK) goto Err;
-  ChunkInit(&chunk);  // chunk owned by wpi.img_ now.
-
-  // Create frame/tile data.
-  err = CreateFrameTileData(&image, x_offset, y_offset, duration, is_lossless,
-                            is_frame, &frame_tile);
-  if (err != WEBP_MUX_OK) goto Err;
-
-  // Add frame/tile chunk (with copy_data = 1).
-  err = ChunkAssignData(&chunk, &frame_tile, 1, tag);
-  if (err != WEBP_MUX_OK) goto Err;
-  WebPDataClear(&frame_tile);
-  err = ChunkSetNth(&chunk, &wpi.header_, 1);
-  if (err != WEBP_MUX_OK) goto Err;
-  ChunkInit(&chunk);  // chunk owned by wpi.header_ now.
-
   // Add this WebPMuxImage to mux.
   err = MuxImagePush(&wpi, &mux->images_);
   if (err != WEBP_MUX_OK) goto Err;
@@ -391,128 +335,114 @@ static WebPMuxError MuxPushFrameTileInternal(
   return WEBP_MUX_OK;
 
  Err:  // Something bad happened.
-  WebPDataClear(&frame_tile);
-  ChunkRelease(&chunk);
   MuxImageRelease(&wpi);
   return err;
 }
 
-WebPMuxError WebPMuxPushFrame(WebPMux* mux, const WebPData* bitstream,
-                              int x_offset, int y_offset,
-                              int duration, int copy_data) {
-  return MuxPushFrameTileInternal(mux, bitstream, x_offset, y_offset,
-                                  duration, copy_data, kChunks[IDX_FRAME].tag);
-}
-
-WebPMuxError WebPMuxPushTile(WebPMux* mux, const WebPData* bitstream,
-                             int x_offset, int y_offset,
-                             int copy_data) {
-  return MuxPushFrameTileInternal(mux, bitstream, x_offset, y_offset,
-                                  1 /* unused duration */, copy_data,
-                                  kChunks[IDX_TILE].tag);
-}
-
-//------------------------------------------------------------------------------
-// Delete API(s).
-
-WebPMuxError WebPMuxDeleteImage(WebPMux* mux) {
+WebPMuxError WebPMuxSetAnimationParams(WebPMux* mux,
+                                       const WebPMuxAnimParams* params) {
   WebPMuxError err;
+  uint8_t data[ANIM_CHUNK_SIZE];
+  const WebPData anim = { data, ANIM_CHUNK_SIZE };
 
-  if (mux == NULL) return WEBP_MUX_INVALID_ARGUMENT;
+  if (mux == NULL || params == NULL) return WEBP_MUX_INVALID_ARGUMENT;
+  if (params->loop_count < 0 || params->loop_count >= MAX_LOOP_COUNT) {
+    return WEBP_MUX_INVALID_ARGUMENT;
+  }
 
-  err = MuxValidateForImage(mux);
-  if (err != WEBP_MUX_OK) return err;
+  // Delete any existing ANIM chunk(s).
+  err = MuxDeleteAllNamedData(mux, kChunks[IDX_ANIM].tag);
+  if (err != WEBP_MUX_OK && err != WEBP_MUX_NOT_FOUND) return err;
 
-  // All well, delete image.
-  MuxImageDeleteAll(&mux->images_);
-  return WEBP_MUX_OK;
+  // Set the animation parameters.
+  PutLE32(data, params->bgcolor);
+  PutLE16(data + 4, params->loop_count);
+  return MuxSet(mux, kChunks[IDX_ANIM].tag, 1, &anim, 1);
 }
 
-WebPMuxError WebPMuxDeleteMetadata(WebPMux* mux) {
-  return MuxDeleteAllNamedData(mux, IDX_META);
-}
+WebPMuxError WebPMuxSetCanvasSize(WebPMux* mux,
+                                  int width, int height) {
+  WebPMuxError err;
+  if (mux == NULL) {
+    return WEBP_MUX_INVALID_ARGUMENT;
+  }
+  if (width < 0 || height < 0 ||
+      width > MAX_CANVAS_SIZE || height > MAX_CANVAS_SIZE) {
+    return WEBP_MUX_INVALID_ARGUMENT;
+  }
+  if (width * (uint64_t)height >= MAX_IMAGE_AREA) {
+    return WEBP_MUX_INVALID_ARGUMENT;
+  }
+  if ((width * height) == 0 && (width | height) != 0) {
+    // one of width / height is zero, but not both -> invalid!
+    return WEBP_MUX_INVALID_ARGUMENT;
+  }
+  // If we already assembled a VP8X chunk, invalidate it.
+  err = MuxDeleteAllNamedData(mux, kChunks[IDX_VP8X].tag);
+  if (err != WEBP_MUX_OK && err != WEBP_MUX_NOT_FOUND) return err;
 
-WebPMuxError WebPMuxDeleteColorProfile(WebPMux* mux) {
-  return MuxDeleteAllNamedData(mux, IDX_ICCP);
+  mux->canvas_width_ = width;
+  mux->canvas_height_ = height;
+  return WEBP_MUX_OK;
 }
 
-static WebPMuxError DeleteFrameTileInternal(WebPMux* const mux, uint32_t nth,
-                                            CHUNK_INDEX idx) {
-  const WebPChunkId id = kChunks[idx].id;
-  if (mux == NULL) return WEBP_MUX_INVALID_ARGUMENT;
+//------------------------------------------------------------------------------
+// Delete API(s).
 
-  assert(idx == IDX_FRAME || idx == IDX_TILE);
-  return MuxImageDeleteNth(&mux->images_, nth, id);
+WebPMuxError WebPMuxDeleteChunk(WebPMux* mux, const char fourcc[4]) {
+  if (mux == NULL || fourcc == NULL) return WEBP_MUX_INVALID_ARGUMENT;
+  return MuxDeleteAllNamedData(mux, ChunkGetTagFromFourCC(fourcc));
 }
 
 WebPMuxError WebPMuxDeleteFrame(WebPMux* mux, uint32_t nth) {
-  return DeleteFrameTileInternal(mux, nth, IDX_FRAME);
-}
-
-WebPMuxError WebPMuxDeleteTile(WebPMux* mux, uint32_t nth) {
-  return DeleteFrameTileInternal(mux, nth, IDX_TILE);
+  if (mux == NULL) return WEBP_MUX_INVALID_ARGUMENT;
+  return MuxImageDeleteNth(&mux->images_, nth);
 }
 
 //------------------------------------------------------------------------------
 // Assembly of the WebP RIFF file.
 
-static WebPMuxError GetFrameTileInfo(const WebPChunk* const frame_tile_chunk,
-                                     int* const x_offset, int* const y_offset,
-                                     int* const duration) {
-  const uint32_t tag = frame_tile_chunk->tag_;
-  const int is_frame = (tag == kChunks[IDX_FRAME].tag);
-  const WebPData* const data = &frame_tile_chunk->data_;
+static WebPMuxError GetFrameFragmentInfo(
+    const WebPChunk* const frame_frgm_chunk,
+    int* const x_offset, int* const y_offset, int* const duration) {
+  const uint32_t tag = frame_frgm_chunk->tag_;
+  const int is_frame = (tag == kChunks[IDX_ANMF].tag);
+  const WebPData* const data = &frame_frgm_chunk->data_;
   const size_t expected_data_size =
-      is_frame ? FRAME_CHUNK_SIZE : TILE_CHUNK_SIZE;
-  assert(frame_tile_chunk != NULL);
-  assert(tag == kChunks[IDX_FRAME].tag || tag ==  kChunks[IDX_TILE].tag);
-  if (data->size_ != expected_data_size) return WEBP_MUX_INVALID_ARGUMENT;
-
-  *x_offset = 2 * GetLE24(data->bytes_ + 0);
-  *y_offset = 2 * GetLE24(data->bytes_ + 3);
-  if (is_frame) *duration = 1 + GetLE24(data->bytes_ + 12);
+      is_frame ? ANMF_CHUNK_SIZE : FRGM_CHUNK_SIZE;
+  assert(frame_frgm_chunk != NULL);
+  assert(tag == kChunks[IDX_ANMF].tag || tag ==  kChunks[IDX_FRGM].tag);
+  if (data->size != expected_data_size) return WEBP_MUX_INVALID_ARGUMENT;
+
+  *x_offset = 2 * GetLE24(data->bytes + 0);
+  *y_offset = 2 * GetLE24(data->bytes + 3);
+  if (is_frame) *duration = GetLE24(data->bytes + 12);
   return WEBP_MUX_OK;
 }
 
-WebPMuxError MuxGetImageWidthHeight(const WebPChunk* const image_chunk,
-                                    int* const width, int* const height) {
-  const uint32_t tag = image_chunk->tag_;
-  const WebPData* const data = &image_chunk->data_;
-  int w, h;
-  int ok;
-  assert(image_chunk != NULL);
-  assert(tag == kChunks[IDX_VP8].tag || tag ==  kChunks[IDX_VP8L].tag);
-  ok = (tag == kChunks[IDX_VP8].tag) ?
-      VP8GetInfo(data->bytes_, data->size_, data->size_, &w, &h) :
-      VP8LGetInfo(data->bytes_, data->size_, &w, &h, NULL);
-  if (ok) {
-    *width = w;
-    *height = h;
-    return WEBP_MUX_OK;
-  } else {
-    return WEBP_MUX_BAD_DATA;
-  }
-}
-
 static WebPMuxError GetImageInfo(const WebPMuxImage* const wpi,
                                  int* const x_offset, int* const y_offset,
                                  int* const duration,
                                  int* const width, int* const height) {
-  const WebPChunk* const image_chunk = wpi->img_;
-  const WebPChunk* const frame_tile_chunk = wpi->header_;
+  const WebPChunk* const frame_frgm_chunk = wpi->header_;
+  WebPMuxError err;
+  assert(wpi != NULL);
+  assert(frame_frgm_chunk != NULL);
 
-  // Get offsets and duration from FRM/TILE chunk.
-  const WebPMuxError err =
-      GetFrameTileInfo(frame_tile_chunk, x_offset, y_offset, duration);
+  // Get offsets and duration from ANMF/FRGM chunk.
+  err = GetFrameFragmentInfo(frame_frgm_chunk, x_offset, y_offset, duration);
   if (err != WEBP_MUX_OK) return err;
 
   // Get width and height from VP8/VP8L chunk.
-  return MuxGetImageWidthHeight(image_chunk, width, height);
+  if (width != NULL) *width = wpi->width_;
+  if (height != NULL) *height = wpi->height_;
+  return WEBP_MUX_OK;
 }
 
-static WebPMuxError GetImageCanvasWidthHeight(
-    const WebPMux* const mux, uint32_t flags,
-    int* const width, int* const height) {
+// Returns the tightest dimension for the canvas considering the image list.
+static WebPMuxError GetAdjustedCanvasSize(const WebPMux* const mux,
+                                          uint32_t flags,
+                                          int* const width, int* const height) {
   WebPMuxImage* wpi = NULL;
   assert(mux != NULL);
   assert(width != NULL && height != NULL);
@@ -521,13 +451,15 @@ static WebPMuxError GetImageCanvasWidthHeight(
   assert(wpi != NULL);
   assert(wpi->img_ != NULL);
 
-  if (wpi->next_) {
+  if (wpi->next_ != NULL) {
     int max_x = 0;
     int max_y = 0;
     int64_t image_area = 0;
-    // Aggregate the bounding box for animation frames & tiled images.
+    // if we have a chain of wpi's, header_ is necessarily set
+    assert(wpi->header_ != NULL);
+    // Aggregate the bounding box for animation frames & fragmented images.
     for (; wpi != NULL; wpi = wpi->next_) {
-      int x_offset, y_offset, duration, w, h;
+      int x_offset = 0, y_offset = 0, duration = 0, w = 0, h = 0;
       const WebPMuxError err = GetImageInfo(wpi, &x_offset, &y_offset,
                                             &duration, &w, &h);
       const int max_x_pos = x_offset + w;
@@ -542,23 +474,19 @@ static WebPMuxError GetImageCanvasWidthHeight(
     }
     *width = max_x;
     *height = max_y;
-    // Crude check to validate that there are no image overlaps/holes for tile
-    // images. Check that the aggregated image area for individual tiles exactly
-    // matches the image area of the constructed canvas. However, the area-match
-    // is necessary but not sufficient condition.
-    if ((flags & TILE_FLAG) && (image_area != (max_x * max_y))) {
+    // Crude check to validate that there are no image overlaps/holes for
+    // fragmented images. Check that the aggregated image area for individual
+    // fragments exactly matches the image area of the constructed canvas.
+    // However, the area-match is necessary but not sufficient condition.
+    if ((flags & FRAGMENTS_FLAG) && (image_area != (max_x * max_y))) {
       *width = 0;
       *height = 0;
       return WEBP_MUX_INVALID_ARGUMENT;
     }
   } else {
-    // For a single image, extract the width & height from VP8/VP8L image-data.
-    int w, h;
-    const WebPChunk* const image_chunk = wpi->img_;
-    const WebPMuxError err = MuxGetImageWidthHeight(image_chunk, &w, &h);
-    if (err != WEBP_MUX_OK) return err;
-    *width = w;
-    *height = h;
+    // For a single image, canvas dimensions are same as image dimensions.
+    *width = wpi->width_;
+    *height = wpi->height_;
   }
   return WEBP_MUX_OK;
 }
@@ -574,50 +502,45 @@ static WebPMuxError CreateVP8XChunk(WebPMux* const mux) {
   int width = 0;
   int height = 0;
   uint8_t data[VP8X_CHUNK_SIZE];
-  const size_t data_size = VP8X_CHUNK_SIZE;
+  const WebPData vp8x = { data, VP8X_CHUNK_SIZE };
   const WebPMuxImage* images = NULL;
 
   assert(mux != NULL);
   images = mux->images_;  // First image.
   if (images == NULL || images->img_ == NULL ||
-      images->img_->data_.bytes_ == NULL) {
+      images->img_->data_.bytes == NULL) {
     return WEBP_MUX_INVALID_ARGUMENT;
   }
 
   // If VP8X chunk(s) is(are) already present, remove them (and later add new
   // VP8X chunk with updated flags).
-  err = MuxDeleteAllNamedData(mux, IDX_VP8X);
+  err = MuxDeleteAllNamedData(mux, kChunks[IDX_VP8X].tag);
   if (err != WEBP_MUX_OK && err != WEBP_MUX_NOT_FOUND) return err;
 
   // Set flags.
-  if (mux->iccp_ != NULL && mux->iccp_->data_.bytes_ != NULL) {
+  if (mux->iccp_ != NULL && mux->iccp_->data_.bytes != NULL) {
     flags |= ICCP_FLAG;
   }
-
-  if (mux->meta_ != NULL && mux->meta_->data_.bytes_ != NULL) {
-    flags |= META_FLAG;
+  if (mux->exif_ != NULL && mux->exif_->data_.bytes != NULL) {
+    flags |= EXIF_FLAG;
+  }
+  if (mux->xmp_ != NULL && mux->xmp_->data_.bytes != NULL) {
+    flags |= XMP_FLAG;
   }
-
   if (images->header_ != NULL) {
-    if (images->header_->tag_ == kChunks[IDX_TILE].tag) {
-      // This is a tiled image.
-      flags |= TILE_FLAG;
-    } else if (images->header_->tag_ == kChunks[IDX_FRAME].tag) {
+    if (images->header_->tag_ == kChunks[IDX_FRGM].tag) {
+      // This is a fragmented image.
+      flags |= FRAGMENTS_FLAG;
+    } else if (images->header_->tag_ == kChunks[IDX_ANMF].tag) {
       // This is an image with animation.
       flags |= ANIMATION_FLAG;
     }
   }
-
   if (MuxImageCount(images, WEBP_CHUNK_ALPHA) > 0) {
     flags |= ALPHA_FLAG;  // Some images have an alpha channel.
   }
 
-  if (flags == 0) {
-    // For Simple Image, VP8X chunk should not be added.
-    return WEBP_MUX_OK;
-  }
-
-  err = GetImageCanvasWidthHeight(mux, flags, &width, &height);
+  err = GetAdjustedCanvasSize(mux, flags, &width, &height);
   if (err != WEBP_MUX_OK) return err;
 
   if (width <= 0 || height <= 0) {
@@ -627,9 +550,21 @@ static WebPMuxError CreateVP8XChunk(WebPMux* const mux) {
     return WEBP_MUX_INVALID_ARGUMENT;
   }
 
-  if (MuxHasLosslessImages(images)) {
-    // We have a file with a VP8X chunk having some lossless images.
-    // As lossless images implicitly contain alpha, force ALPHA_FLAG to be true.
+  if (mux->canvas_width_ != 0 || mux->canvas_height_ != 0) {
+    if (width > mux->canvas_width_ || height > mux->canvas_height_) {
+      return WEBP_MUX_INVALID_ARGUMENT;
+    }
+    width = mux->canvas_width_;
+    height = mux->canvas_height_;
+  }
+
+  if (flags == 0) {
+    // For Simple Image, VP8X chunk should not be added.
+    return WEBP_MUX_OK;
+  }
+
+  if (MuxHasAlpha(images)) {
+    // This means some frames explicitly/implicitly contain alpha.
     // Note: This 'flags' update must NOT be done for a lossless image
     // without a VP8X chunk!
     flags |= ALPHA_FLAG;
@@ -639,74 +574,123 @@ static WebPMuxError CreateVP8XChunk(WebPMux* const mux) {
   PutLE24(data + 4, width - 1);   // canvas width.
   PutLE24(data + 7, height - 1);  // canvas height.
 
-  err = MuxAddChunk(mux, 1, kChunks[IDX_VP8X].tag, data, data_size, 1);
-  return err;
+  return MuxSet(mux, kChunks[IDX_VP8X].tag, 1, &vp8x, 1);
+}
+
+// Cleans up 'mux' by removing any unnecessary chunks.
+static WebPMuxError MuxCleanup(WebPMux* const mux) {
+  int num_frames;
+  int num_fragments;
+  int num_anim_chunks;
+
+  // If we have an image with a single fragment or frame, and its rectangle
+  // covers the whole canvas, convert it to a non-animated non-fragmented image
+  // (to avoid writing FRGM/ANMF chunk unnecessarily).
+  WebPMuxError err = WebPMuxNumChunks(mux, kChunks[IDX_ANMF].id, &num_frames);
+  if (err != WEBP_MUX_OK) return err;
+  err = WebPMuxNumChunks(mux, kChunks[IDX_FRGM].id, &num_fragments);
+  if (err != WEBP_MUX_OK) return err;
+  if (num_frames == 1 || num_fragments == 1) {
+    WebPMuxImage* frame_frag;
+    err = MuxImageGetNth((const WebPMuxImage**)&mux->images_, 1, &frame_frag);
+    assert(err == WEBP_MUX_OK);  // We know that one frame/fragment does exist.
+    assert(frame_frag != NULL);
+    if (frame_frag->header_ != NULL &&
+        ((mux->canvas_width_ == 0 && mux->canvas_height_ == 0) ||
+         (frame_frag->width_ == mux->canvas_width_ &&
+          frame_frag->height_ == mux->canvas_height_))) {
+      assert(frame_frag->header_->tag_ == kChunks[IDX_ANMF].tag ||
+             frame_frag->header_->tag_ == kChunks[IDX_FRGM].tag);
+      ChunkDelete(frame_frag->header_);  // Removes ANMF/FRGM chunk.
+      frame_frag->header_ = NULL;
+      num_frames = 0;
+      num_fragments = 0;
+    }
+  }
+  // Remove ANIM chunk if this is a non-animated image.
+  err = WebPMuxNumChunks(mux, kChunks[IDX_ANIM].id, &num_anim_chunks);
+  if (err != WEBP_MUX_OK) return err;
+  if (num_anim_chunks >= 1 && num_frames == 0) {
+    err = MuxDeleteAllNamedData(mux, kChunks[IDX_ANIM].tag);
+    if (err != WEBP_MUX_OK) return err;
+  }
+  return WEBP_MUX_OK;
+}
+
+// Total size of a list of images.
+static size_t ImageListDiskSize(const WebPMuxImage* wpi_list) {
+  size_t size = 0;
+  while (wpi_list != NULL) {
+    size += MuxImageDiskSize(wpi_list);
+    wpi_list = wpi_list->next_;
+  }
+  return size;
+}
+
+// Write out the given list of images into 'dst'.
+static uint8_t* ImageListEmit(const WebPMuxImage* wpi_list, uint8_t* dst) {
+  while (wpi_list != NULL) {
+    dst = MuxImageEmit(wpi_list, dst);
+    wpi_list = wpi_list->next_;
+  }
+  return dst;
 }
 
 WebPMuxError WebPMuxAssemble(WebPMux* mux, WebPData* assembled_data) {
   size_t size = 0;
   uint8_t* data = NULL;
   uint8_t* dst = NULL;
-  int num_frames;
-  int num_loop_chunks;
   WebPMuxError err;
 
-  if (mux == NULL || assembled_data == NULL) {
+  if (assembled_data == NULL) {
     return WEBP_MUX_INVALID_ARGUMENT;
   }
+  // Clean up returned data, in case something goes wrong.
+  memset(assembled_data, 0, sizeof(*assembled_data));
 
-  // Remove LOOP chunk if unnecessary.
-  err = WebPMuxNumChunks(mux, kChunks[IDX_LOOP].id, &num_loop_chunks);
-  if (err != WEBP_MUX_OK) return err;
-  if (num_loop_chunks >= 1) {
-    err = WebPMuxNumChunks(mux, kChunks[IDX_FRAME].id, &num_frames);
-    if (err != WEBP_MUX_OK) return err;
-    if (num_frames == 0) {
-      err = DeleteLoopCount(mux);
-      if (err != WEBP_MUX_OK) return err;
-    }
+  if (mux == NULL) {
+    return WEBP_MUX_INVALID_ARGUMENT;
   }
 
-  // Create VP8X chunk.
+  // Finalize mux.
+  err = MuxCleanup(mux);
+  if (err != WEBP_MUX_OK) return err;
   err = CreateVP8XChunk(mux);
   if (err != WEBP_MUX_OK) return err;
 
   // Allocate data.
-  size = ChunksListDiskSize(mux->vp8x_) + ChunksListDiskSize(mux->iccp_)
-       + ChunksListDiskSize(mux->loop_) + MuxImageListDiskSize(mux->images_)
-       + ChunksListDiskSize(mux->meta_) + ChunksListDiskSize(mux->unknown_)
-       + RIFF_HEADER_SIZE;
+  size = ChunkListDiskSize(mux->vp8x_) + ChunkListDiskSize(mux->iccp_)
+       + ChunkListDiskSize(mux->anim_) + ImageListDiskSize(mux->images_)
+       + ChunkListDiskSize(mux->exif_) + ChunkListDiskSize(mux->xmp_)
+       + ChunkListDiskSize(mux->unknown_) + RIFF_HEADER_SIZE;
 
-  data = (uint8_t*)malloc(size);
+  data = (uint8_t*)WebPSafeMalloc(1ULL, size);
   if (data == NULL) return WEBP_MUX_MEMORY_ERROR;
 
   // Emit header & chunks.
   dst = MuxEmitRiffHeader(data, size);
   dst = ChunkListEmit(mux->vp8x_, dst);
   dst = ChunkListEmit(mux->iccp_, dst);
-  dst = ChunkListEmit(mux->loop_, dst);
-  dst = MuxImageListEmit(mux->images_, dst);
-  dst = ChunkListEmit(mux->meta_, dst);
+  dst = ChunkListEmit(mux->anim_, dst);
+  dst = ImageListEmit(mux->images_, dst);
+  dst = ChunkListEmit(mux->exif_, dst);
+  dst = ChunkListEmit(mux->xmp_, dst);
   dst = ChunkListEmit(mux->unknown_, dst);
   assert(dst == data + size);
 
   // Validate mux.
   err = MuxValidate(mux);
   if (err != WEBP_MUX_OK) {
-    free(data);
+    WebPSafeFree(data);
     data = NULL;
     size = 0;
   }
 
-  // Finalize.
-  assembled_data->bytes_ = data;
-  assembled_data->size_ = size;
+  // Finalize data.
+  assembled_data->bytes = data;
+  assembled_data->size = size;
 
   return err;
 }
 
 //------------------------------------------------------------------------------
-
-#if defined(__cplusplus) || defined(c_plusplus)
-}    // extern "C"
-#endif
diff --git a/drivers/webp/mux/muxi.h b/drivers/webp/mux/muxi.h
index 2f06f3ed03..8bd5291661 100644
--- a/drivers/webp/mux/muxi.h
+++ b/drivers/webp/mux/muxi.h
@@ -1,8 +1,10 @@
 // Copyright 2011 Google Inc. All Rights Reserved.
 //
-// This code is licensed under the same terms as WebM:
-//  Software License Agreement:  http://www.webmproject.org/license/software/
-//  Additional IP Rights Grant:  http://www.webmproject.org/license/additional/
+// Use of this source code is governed by a BSD-style license
+// that can be found in the COPYING file in the root of the source
+// tree. An additional intellectual property rights grant can be found
+// in the file PATENTS. All contributing project authors may
+// be found in the AUTHORS file in the root of the source tree.
 // -----------------------------------------------------------------------------
 //
 // Internal header for mux library.
@@ -15,34 +17,41 @@
 #include <stdlib.h>
 #include "../dec/vp8i.h"
 #include "../dec/vp8li.h"
-#include "../format_constants.h"
-#include "../mux.h"
+#include "webp/mux.h"
 
-#if defined(__cplusplus) || defined(c_plusplus)
+#ifdef __cplusplus
 extern "C" {
 #endif
 
 //------------------------------------------------------------------------------
 // Defines and constants.
 
+#define MUX_MAJ_VERSION 0
+#define MUX_MIN_VERSION 2
+#define MUX_REV_VERSION 2
+
 // Chunk object.
 typedef struct WebPChunk WebPChunk;
 struct WebPChunk {
   uint32_t        tag_;
   int             owner_;  // True if *data_ memory is owned internally.
-                           // VP8X, Loop, and other internally created chunks
-                           // like frame/tile are always owned.
+                           // VP8X, ANIM, and other internally created chunks
+                           // like ANMF/FRGM are always owned.
   WebPData        data_;
   WebPChunk*      next_;
 };
 
-// MuxImage object. Store a full webp image (including frame/tile chunk, alpha
+// MuxImage object. Store a full WebP image (including ANMF/FRGM chunk, ALPH
 // chunk and VP8/VP8L chunk),
 typedef struct WebPMuxImage WebPMuxImage;
 struct WebPMuxImage {
-  WebPChunk*  header_;      // Corresponds to WEBP_CHUNK_FRAME/WEBP_CHUNK_TILE.
+  WebPChunk*  header_;      // Corresponds to WEBP_CHUNK_ANMF/WEBP_CHUNK_FRGM.
   WebPChunk*  alpha_;       // Corresponds to WEBP_CHUNK_ALPHA.
   WebPChunk*  img_;         // Corresponds to WEBP_CHUNK_IMAGE.
+  WebPChunk*  unknown_;     // Corresponds to WEBP_CHUNK_UNKNOWN.
+  int         width_;
+  int         height_;
+  int         has_alpha_;   // Through ALPH chunk or as part of VP8L.
   int         is_partial_;  // True if only some of the chunks are filled.
   WebPMuxImage* next_;
 };
@@ -51,11 +60,14 @@ struct WebPMuxImage {
 struct WebPMux {
   WebPMuxImage*   images_;
   WebPChunk*      iccp_;
-  WebPChunk*      meta_;
-  WebPChunk*      loop_;
+  WebPChunk*      exif_;
+  WebPChunk*      xmp_;
+  WebPChunk*      anim_;
   WebPChunk*      vp8x_;
 
-  WebPChunk*  unknown_;
+  WebPChunk*      unknown_;
+  int             canvas_width_;
+  int             canvas_height_;
 };
 
 // CHUNK_INDEX enum: used for indexing within 'kChunks' (defined below) only.
@@ -65,13 +77,14 @@ struct WebPMux {
 typedef enum {
   IDX_VP8X = 0,
   IDX_ICCP,
-  IDX_LOOP,
-  IDX_FRAME,
-  IDX_TILE,
+  IDX_ANIM,
+  IDX_ANMF,
+  IDX_FRGM,
   IDX_ALPHA,
   IDX_VP8,
   IDX_VP8L,
-  IDX_META,
+  IDX_EXIF,
+  IDX_XMP,
   IDX_UNKNOWN,
 
   IDX_NIL,
@@ -80,8 +93,6 @@ typedef enum {
 
 #define NIL_TAG 0x00000000u  // To signal void chunk.
 
-#define MKFOURCC(a, b, c, d) ((uint32_t)(a) | (b) << 8 | (c) << 16 | (d) << 24)
-
 typedef struct {
   uint32_t      tag;
   WebPChunkId   id;
@@ -91,55 +102,23 @@ typedef struct {
 extern const ChunkInfo kChunks[IDX_LAST_CHUNK];
 
 //------------------------------------------------------------------------------
-// Helper functions.
-
-// Read 16, 24 or 32 bits stored in little-endian order.
-static WEBP_INLINE int GetLE16(const uint8_t* const data) {
-  return (int)(data[0] << 0) | (data[1] << 8);
-}
-
-static WEBP_INLINE int GetLE24(const uint8_t* const data) {
-  return GetLE16(data) | (data[2] << 16);
-}
-
-static WEBP_INLINE uint32_t GetLE32(const uint8_t* const data) {
-  return (uint32_t)GetLE16(data) | (GetLE16(data + 2) << 16);
-}
-
-// Store 16, 24 or 32 bits in little-endian order.
-static WEBP_INLINE void PutLE16(uint8_t* const data, int val) {
-  assert(val < (1 << 16));
-  data[0] = (val >> 0);
-  data[1] = (val >> 8);
-}
-
-static WEBP_INLINE void PutLE24(uint8_t* const data, int val) {
-  assert(val < (1 << 24));
-  PutLE16(data, val & 0xffff);
-  data[2] = (val >> 16);
-}
-
-static WEBP_INLINE void PutLE32(uint8_t* const data, uint32_t val) {
-  PutLE16(data, (int)(val & 0xffff));
-  PutLE16(data + 2, (int)(val >> 16));
-}
-
-static WEBP_INLINE size_t SizeWithPadding(size_t chunk_size) {
-  return CHUNK_HEADER_SIZE + ((chunk_size + 1) & ~1U);
-}
-
-//------------------------------------------------------------------------------
 // Chunk object management.
 
 // Initialize.
 void ChunkInit(WebPChunk* const chunk);
 
-// Get chunk index from chunk tag. Returns IDX_NIL if not found.
+// Get chunk index from chunk tag. Returns IDX_UNKNOWN if not found.
 CHUNK_INDEX ChunkGetIndexFromTag(uint32_t tag);
 
-// Get chunk id from chunk tag. Returns WEBP_CHUNK_NIL if not found.
+// Get chunk id from chunk tag. Returns WEBP_CHUNK_UNKNOWN if not found.
 WebPChunkId ChunkGetIdFromTag(uint32_t tag);
 
+// Convert a fourcc string to a tag.
+uint32_t ChunkGetTagFromFourCC(const char fourcc[4]);
+
+// Get chunk index from fourcc. Returns IDX_UNKNOWN if given fourcc is unknown.
+CHUNK_INDEX ChunkGetIndexFromFourCC(const char fourcc[4]);
+
 // Search for nth chunk with given 'tag' in the chunk list.
 // nth = 0 means "last of the list".
 WebPChunk* ChunkSearchList(WebPChunk* first, uint32_t nth, uint32_t tag);
@@ -150,7 +129,8 @@ WebPMuxError ChunkAssignData(WebPChunk* chunk, const WebPData* const data,
 
 // Sets 'chunk' at nth position in the 'chunk_list'.
 // nth = 0 has the special meaning "last of the list".
-WebPMuxError ChunkSetNth(const WebPChunk* chunk, WebPChunk** chunk_list,
+// On success ownership is transferred from 'chunk' to the 'chunk_list'.
+WebPMuxError ChunkSetNth(WebPChunk* chunk, WebPChunk** chunk_list,
                          uint32_t nth);
 
 // Releases chunk and returns chunk->next_.
@@ -159,23 +139,27 @@ WebPChunk* ChunkRelease(WebPChunk* const chunk);
 // Deletes given chunk & returns chunk->next_.
 WebPChunk* ChunkDelete(WebPChunk* const chunk);
 
+// Deletes all chunks in the given chunk list.
+void ChunkListDelete(WebPChunk** const chunk_list);
+
+// Returns size of the chunk including chunk header and padding byte (if any).
+static WEBP_INLINE size_t SizeWithPadding(size_t chunk_size) {
+  return CHUNK_HEADER_SIZE + ((chunk_size + 1) & ~1U);
+}
+
 // Size of a chunk including header and padding.
 static WEBP_INLINE size_t ChunkDiskSize(const WebPChunk* chunk) {
-  const size_t data_size = chunk->data_.size_;
+  const size_t data_size = chunk->data_.size;
   assert(data_size < MAX_CHUNK_PAYLOAD);
   return SizeWithPadding(data_size);
 }
 
 // Total size of a list of chunks.
-size_t ChunksListDiskSize(const WebPChunk* chunk_list);
+size_t ChunkListDiskSize(const WebPChunk* chunk_list);
 
 // Write out the given list of chunks into 'dst'.
 uint8_t* ChunkListEmit(const WebPChunk* chunk_list, uint8_t* dst);
 
-// Get the width & height of image stored in 'image_chunk'.
-WebPMuxError MuxGetImageWidthHeight(const WebPChunk* const image_chunk,
-                                    int* const width, int* const height);
-
 //------------------------------------------------------------------------------
 // MuxImage object management.
 
@@ -189,82 +173,59 @@ WebPMuxImage* MuxImageRelease(WebPMuxImage* const wpi);
 // 'wpi' can be NULL.
 WebPMuxImage* MuxImageDelete(WebPMuxImage* const wpi);
 
-// Delete all images in 'wpi_list'.
-void MuxImageDeleteAll(WebPMuxImage** const wpi_list);
-
 // Count number of images matching the given tag id in the 'wpi_list'.
+// If id == WEBP_CHUNK_NIL, all images will be matched.
 int MuxImageCount(const WebPMuxImage* wpi_list, WebPChunkId id);
 
+// Update width/height/has_alpha info from chunks within wpi.
+// Also remove ALPH chunk if not needed.
+int MuxImageFinalize(WebPMuxImage* const wpi);
+
 // Check if given ID corresponds to an image related chunk.
 static WEBP_INLINE int IsWPI(WebPChunkId id) {
   switch (id) {
-    case WEBP_CHUNK_FRAME:
-    case WEBP_CHUNK_TILE:
+    case WEBP_CHUNK_ANMF:
+    case WEBP_CHUNK_FRGM:
     case WEBP_CHUNK_ALPHA:
     case WEBP_CHUNK_IMAGE:  return 1;
     default:        return 0;
   }
 }
 
-// Get a reference to appropriate chunk list within an image given chunk tag.
-static WEBP_INLINE WebPChunk** MuxImageGetListFromId(
-    const WebPMuxImage* const wpi, WebPChunkId id) {
-  assert(wpi != NULL);
-  switch (id) {
-    case WEBP_CHUNK_FRAME:
-    case WEBP_CHUNK_TILE:  return (WebPChunk**)&wpi->header_;
-    case WEBP_CHUNK_ALPHA: return (WebPChunk**)&wpi->alpha_;
-    case WEBP_CHUNK_IMAGE: return (WebPChunk**)&wpi->img_;
-    default: return NULL;
-  }
-}
-
 // Pushes 'wpi' at the end of 'wpi_list'.
 WebPMuxError MuxImagePush(const WebPMuxImage* wpi, WebPMuxImage** wpi_list);
 
-// Delete nth image in the image list with given tag id.
-WebPMuxError MuxImageDeleteNth(WebPMuxImage** wpi_list, uint32_t nth,
-                               WebPChunkId id);
+// Delete nth image in the image list.
+WebPMuxError MuxImageDeleteNth(WebPMuxImage** wpi_list, uint32_t nth);
 
-// Get nth image in the image list with given tag id.
+// Get nth image in the image list.
 WebPMuxError MuxImageGetNth(const WebPMuxImage** wpi_list, uint32_t nth,
-                            WebPChunkId id, WebPMuxImage** wpi);
+                            WebPMuxImage** wpi);
 
 // Total size of the given image.
 size_t MuxImageDiskSize(const WebPMuxImage* const wpi);
 
-// Total size of a list of images.
-size_t MuxImageListDiskSize(const WebPMuxImage* wpi_list);
-
 // Write out the given image into 'dst'.
 uint8_t* MuxImageEmit(const WebPMuxImage* const wpi, uint8_t* dst);
 
-// Write out the given list of images into 'dst'.
-uint8_t* MuxImageListEmit(const WebPMuxImage* wpi_list, uint8_t* dst);
-
 //------------------------------------------------------------------------------
 // Helper methods for mux.
 
-// Checks if the given image list contains at least one lossless image.
-int MuxHasLosslessImages(const WebPMuxImage* images);
+// Checks if the given image list contains at least one image with alpha.
+int MuxHasAlpha(const WebPMuxImage* images);
 
 // Write out RIFF header into 'data', given total data size 'size'.
 uint8_t* MuxEmitRiffHeader(uint8_t* const data, size_t size);
 
 // Returns the list where chunk with given ID is to be inserted in mux.
-// Return value is NULL if this chunk should be inserted in mux->images_ list
-// or if 'id' is not known.
 WebPChunk** MuxGetChunkListFromId(const WebPMux* mux, WebPChunkId id);
 
-// Validates that the given mux has a single image.
-WebPMuxError MuxValidateForImage(const WebPMux* const mux);
-
 // Validates the given mux object.
 WebPMuxError MuxValidate(const WebPMux* const mux);
 
 //------------------------------------------------------------------------------
 
-#if defined(__cplusplus) || defined(c_plusplus)
+#ifdef __cplusplus
 }    // extern "C"
 #endif
 
diff --git a/drivers/webp/mux/muxinternal.c b/drivers/webp/mux/muxinternal.c
index 6c3c4fe60a..4babbe82fc 100644
--- a/drivers/webp/mux/muxinternal.c
+++ b/drivers/webp/mux/muxinternal.c
@@ -1,8 +1,10 @@
 // Copyright 2011 Google Inc. All Rights Reserved.
 //
-// This code is licensed under the same terms as WebM:
-//  Software License Agreement:  http://www.webmproject.org/license/software/
-//  Additional IP Rights Grant:  http://www.webmproject.org/license/additional/
+// Use of this source code is governed by a BSD-style license
+// that can be found in the COPYING file in the root of the source
+// tree. An additional intellectual property rights grant can be found
+// in the file PATENTS. All contributing project authors may
+// be found in the AUTHORS file in the root of the source tree.
 // -----------------------------------------------------------------------------
 //
 // Internal objects and utils for mux.
@@ -12,29 +14,33 @@
 
 #include <assert.h>
 #include "./muxi.h"
-
-#if defined(__cplusplus) || defined(c_plusplus)
-extern "C" {
-#endif
+#include "../utils/utils.h"
 
 #define UNDEFINED_CHUNK_SIZE (-1)
 
 const ChunkInfo kChunks[] = {
   { MKFOURCC('V', 'P', '8', 'X'),  WEBP_CHUNK_VP8X,    VP8X_CHUNK_SIZE },
   { MKFOURCC('I', 'C', 'C', 'P'),  WEBP_CHUNK_ICCP,    UNDEFINED_CHUNK_SIZE },
-  { MKFOURCC('L', 'O', 'O', 'P'),  WEBP_CHUNK_LOOP,    LOOP_CHUNK_SIZE },
-  { MKFOURCC('F', 'R', 'M', ' '),  WEBP_CHUNK_FRAME,   FRAME_CHUNK_SIZE },
-  { MKFOURCC('T', 'I', 'L', 'E'),  WEBP_CHUNK_TILE,    TILE_CHUNK_SIZE },
+  { MKFOURCC('A', 'N', 'I', 'M'),  WEBP_CHUNK_ANIM,    ANIM_CHUNK_SIZE },
+  { MKFOURCC('A', 'N', 'M', 'F'),  WEBP_CHUNK_ANMF,    ANMF_CHUNK_SIZE },
+  { MKFOURCC('F', 'R', 'G', 'M'),  WEBP_CHUNK_FRGM,    FRGM_CHUNK_SIZE },
   { MKFOURCC('A', 'L', 'P', 'H'),  WEBP_CHUNK_ALPHA,   UNDEFINED_CHUNK_SIZE },
   { MKFOURCC('V', 'P', '8', ' '),  WEBP_CHUNK_IMAGE,   UNDEFINED_CHUNK_SIZE },
   { MKFOURCC('V', 'P', '8', 'L'),  WEBP_CHUNK_IMAGE,   UNDEFINED_CHUNK_SIZE },
-  { MKFOURCC('M', 'E', 'T', 'A'),  WEBP_CHUNK_META,    UNDEFINED_CHUNK_SIZE },
-  { MKFOURCC('U', 'N', 'K', 'N'),  WEBP_CHUNK_UNKNOWN, UNDEFINED_CHUNK_SIZE },
+  { MKFOURCC('E', 'X', 'I', 'F'),  WEBP_CHUNK_EXIF,    UNDEFINED_CHUNK_SIZE },
+  { MKFOURCC('X', 'M', 'P', ' '),  WEBP_CHUNK_XMP,     UNDEFINED_CHUNK_SIZE },
+  { NIL_TAG,                       WEBP_CHUNK_UNKNOWN, UNDEFINED_CHUNK_SIZE },
 
-  { NIL_TAG,                    WEBP_CHUNK_NIL,     UNDEFINED_CHUNK_SIZE }
+  { NIL_TAG,                       WEBP_CHUNK_NIL,     UNDEFINED_CHUNK_SIZE }
 };
 
 //------------------------------------------------------------------------------
+
+int WebPGetMuxVersion(void) {
+  return (MUX_MAJ_VERSION << 16) | (MUX_MIN_VERSION << 8) | MUX_REV_VERSION;
+}
+
+//------------------------------------------------------------------------------
 // Life of a chunk object.
 
 void ChunkInit(WebPChunk* const chunk) {
@@ -60,9 +66,9 @@ WebPChunk* ChunkRelease(WebPChunk* const chunk) {
 CHUNK_INDEX ChunkGetIndexFromTag(uint32_t tag) {
   int i;
   for (i = 0; kChunks[i].tag != NIL_TAG; ++i) {
-    if (tag == kChunks[i].tag) return i;
+    if (tag == kChunks[i].tag) return (CHUNK_INDEX)i;
   }
-  return IDX_NIL;
+  return IDX_UNKNOWN;
 }
 
 WebPChunkId ChunkGetIdFromTag(uint32_t tag) {
@@ -70,7 +76,16 @@ WebPChunkId ChunkGetIdFromTag(uint32_t tag) {
   for (i = 0; kChunks[i].tag != NIL_TAG; ++i) {
     if (tag == kChunks[i].tag) return kChunks[i].id;
   }
-  return WEBP_CHUNK_NIL;
+  return WEBP_CHUNK_UNKNOWN;
+}
+
+uint32_t ChunkGetTagFromFourCC(const char fourcc[4]) {
+  return MKFOURCC(fourcc[0], fourcc[1], fourcc[2], fourcc[3]);
+}
+
+CHUNK_INDEX ChunkGetIndexFromFourCC(const char fourcc[4]) {
+  const uint32_t tag = ChunkGetTagFromFourCC(fourcc);
+  return ChunkGetIndexFromTag(tag);
 }
 
 //------------------------------------------------------------------------------
@@ -78,7 +93,7 @@ WebPChunkId ChunkGetIdFromTag(uint32_t tag) {
 
 // Returns next chunk in the chunk list with the given tag.
 static WebPChunk* ChunkSearchNextInList(WebPChunk* chunk, uint32_t tag) {
-  while (chunk && chunk->tag_ != tag) {
+  while (chunk != NULL && chunk->tag_ != tag) {
     chunk = chunk->next_;
   }
   return chunk;
@@ -87,7 +102,7 @@ static WebPChunk* ChunkSearchNextInList(WebPChunk* chunk, uint32_t tag) {
 WebPChunk* ChunkSearchList(WebPChunk* first, uint32_t nth, uint32_t tag) {
   uint32_t iter = nth;
   first = ChunkSearchNextInList(first, tag);
-  if (!first) return NULL;
+  if (first == NULL) return NULL;
 
   while (--iter != 0) {
     WebPChunk* next_chunk = ChunkSearchNextInList(first->next_, tag);
@@ -99,14 +114,14 @@ WebPChunk* ChunkSearchList(WebPChunk* first, uint32_t nth, uint32_t tag) {
 
 // Outputs a pointer to 'prev_chunk->next_',
 //   where 'prev_chunk' is the pointer to the chunk at position (nth - 1).
-// Returns 1 if nth chunk was found, 0 otherwise.
+// Returns true if nth chunk was found.
 static int ChunkSearchListToSet(WebPChunk** chunk_list, uint32_t nth,
                                 WebPChunk*** const location) {
   uint32_t count = 0;
-  assert(chunk_list);
+  assert(chunk_list != NULL);
   *location = chunk_list;
 
-  while (*chunk_list) {
+  while (*chunk_list != NULL) {
     WebPChunk* const cur_chunk = *chunk_list;
     ++count;
     if (count == nth) return 1;  // Found.
@@ -124,34 +139,25 @@ static int ChunkSearchListToSet(WebPChunk** chunk_list, uint32_t nth,
 WebPMuxError ChunkAssignData(WebPChunk* chunk, const WebPData* const data,
                              int copy_data, uint32_t tag) {
   // For internally allocated chunks, always copy data & make it owner of data.
-  if (tag == kChunks[IDX_VP8X].tag || tag == kChunks[IDX_LOOP].tag) {
+  if (tag == kChunks[IDX_VP8X].tag || tag == kChunks[IDX_ANIM].tag) {
     copy_data = 1;
   }
 
   ChunkRelease(chunk);
 
   if (data != NULL) {
-    if (copy_data) {
-      // Copy data.
-      chunk->data_.bytes_ = (uint8_t*)malloc(data->size_);
-      if (chunk->data_.bytes_ == NULL) return WEBP_MUX_MEMORY_ERROR;
-      memcpy((uint8_t*)chunk->data_.bytes_, data->bytes_, data->size_);
-      chunk->data_.size_ = data->size_;
-
-      // Chunk is owner of data.
-      chunk->owner_ = 1;
-    } else {
-      // Don't copy data.
+    if (copy_data) {        // Copy data.
+      if (!WebPDataCopy(data, &chunk->data_)) return WEBP_MUX_MEMORY_ERROR;
+      chunk->owner_ = 1;    // Chunk is owner of data.
+    } else {                // Don't copy data.
       chunk->data_ = *data;
     }
   }
-
   chunk->tag_ = tag;
-
   return WEBP_MUX_OK;
 }
 
-WebPMuxError ChunkSetNth(const WebPChunk* chunk, WebPChunk** chunk_list,
+WebPMuxError ChunkSetNth(WebPChunk* chunk, WebPChunk** chunk_list,
                          uint32_t nth) {
   WebPChunk* new_chunk;
 
@@ -159,9 +165,10 @@ WebPMuxError ChunkSetNth(const WebPChunk* chunk, WebPChunk** chunk_list,
     return WEBP_MUX_NOT_FOUND;
   }
 
-  new_chunk = (WebPChunk*)malloc(sizeof(*new_chunk));
+  new_chunk = (WebPChunk*)WebPSafeMalloc(1ULL, sizeof(*new_chunk));
   if (new_chunk == NULL) return WEBP_MUX_MEMORY_ERROR;
   *new_chunk = *chunk;
+  chunk->owner_ = 0;
   new_chunk->next_ = *chunk_list;
   *chunk_list = new_chunk;
   return WEBP_MUX_OK;
@@ -172,70 +179,47 @@ WebPMuxError ChunkSetNth(const WebPChunk* chunk, WebPChunk** chunk_list,
 
 WebPChunk* ChunkDelete(WebPChunk* const chunk) {
   WebPChunk* const next = ChunkRelease(chunk);
-  free(chunk);
+  WebPSafeFree(chunk);
   return next;
 }
 
-//------------------------------------------------------------------------------
-// Chunk serialization methods.
-
-size_t ChunksListDiskSize(const WebPChunk* chunk_list) {
-  size_t size = 0;
-  while (chunk_list) {
-    size += ChunkDiskSize(chunk_list);
-    chunk_list = chunk_list->next_;
+void ChunkListDelete(WebPChunk** const chunk_list) {
+  while (*chunk_list != NULL) {
+    *chunk_list = ChunkDelete(*chunk_list);
   }
-  return size;
 }
 
+//------------------------------------------------------------------------------
+// Chunk serialization methods.
+
 static uint8_t* ChunkEmit(const WebPChunk* const chunk, uint8_t* dst) {
-  const size_t chunk_size = chunk->data_.size_;
+  const size_t chunk_size = chunk->data_.size;
   assert(chunk);
   assert(chunk->tag_ != NIL_TAG);
   PutLE32(dst + 0, chunk->tag_);
   PutLE32(dst + TAG_SIZE, (uint32_t)chunk_size);
   assert(chunk_size == (uint32_t)chunk_size);
-  memcpy(dst + CHUNK_HEADER_SIZE, chunk->data_.bytes_, chunk_size);
+  memcpy(dst + CHUNK_HEADER_SIZE, chunk->data_.bytes, chunk_size);
   if (chunk_size & 1)
     dst[CHUNK_HEADER_SIZE + chunk_size] = 0;  // Add padding.
   return dst + ChunkDiskSize(chunk);
 }
 
 uint8_t* ChunkListEmit(const WebPChunk* chunk_list, uint8_t* dst) {
-  while (chunk_list) {
+  while (chunk_list != NULL) {
     dst = ChunkEmit(chunk_list, dst);
     chunk_list = chunk_list->next_;
   }
   return dst;
 }
 
-//------------------------------------------------------------------------------
-// Manipulation of a WebPData object.
-
-void WebPDataInit(WebPData* webp_data) {
-  if (webp_data != NULL) {
-    memset(webp_data, 0, sizeof(*webp_data));
-  }
-}
-
-void WebPDataClear(WebPData* webp_data) {
-  if (webp_data != NULL) {
-    free((void*)webp_data->bytes_);
-    WebPDataInit(webp_data);
-  }
-}
-
-int WebPDataCopy(const WebPData* src, WebPData* dst) {
-  if (src == NULL || dst == NULL) return 0;
-
-  WebPDataInit(dst);
-  if (src->bytes_ != NULL && src->size_ != 0) {
-    dst->bytes_ = (uint8_t*)malloc(src->size_);
-    if (dst->bytes_ == NULL) return 0;
-    memcpy((void*)dst->bytes_, src->bytes_, src->size_);
-    dst->size_ = src->size_;
+size_t ChunkListDiskSize(const WebPChunk* chunk_list) {
+  size_t size = 0;
+  while (chunk_list != NULL) {
+    size += ChunkDiskSize(chunk_list);
+    chunk_list = chunk_list->next_;
   }
-  return 1;
+  return size;
 }
 
 //------------------------------------------------------------------------------
@@ -252,6 +236,7 @@ WebPMuxImage* MuxImageRelease(WebPMuxImage* const wpi) {
   ChunkDelete(wpi->header_);
   ChunkDelete(wpi->alpha_);
   ChunkDelete(wpi->img_);
+  ChunkListDelete(&wpi->unknown_);
 
   next = wpi->next_;
   MuxImageInit(wpi);
@@ -261,14 +246,31 @@ WebPMuxImage* MuxImageRelease(WebPMuxImage* const wpi) {
 //------------------------------------------------------------------------------
 // MuxImage search methods.
 
+// Get a reference to appropriate chunk list within an image given chunk tag.
+static WebPChunk** GetChunkListFromId(const WebPMuxImage* const wpi,
+                                      WebPChunkId id) {
+  assert(wpi != NULL);
+  switch (id) {
+    case WEBP_CHUNK_ANMF:
+    case WEBP_CHUNK_FRGM:  return (WebPChunk**)&wpi->header_;
+    case WEBP_CHUNK_ALPHA: return (WebPChunk**)&wpi->alpha_;
+    case WEBP_CHUNK_IMAGE: return (WebPChunk**)&wpi->img_;
+    default: return NULL;
+  }
+}
+
 int MuxImageCount(const WebPMuxImage* wpi_list, WebPChunkId id) {
   int count = 0;
   const WebPMuxImage* current;
   for (current = wpi_list; current != NULL; current = current->next_) {
-    const WebPChunk* const wpi_chunk = *MuxImageGetListFromId(current, id);
-    if (wpi_chunk != NULL) {
-      const WebPChunkId wpi_chunk_id = ChunkGetIdFromTag(wpi_chunk->tag_);
-      if (wpi_chunk_id == id) ++count;
+    if (id == WEBP_CHUNK_NIL) {
+      ++count;  // Special case: count all images.
+    } else {
+      const WebPChunk* const wpi_chunk = *GetChunkListFromId(current, id);
+      if (wpi_chunk != NULL) {
+        const WebPChunkId wpi_chunk_id = ChunkGetIdFromTag(wpi_chunk->tag_);
+        if (wpi_chunk_id == id) ++count;  // Count images with a matching 'id'.
+      }
     }
   }
   return count;
@@ -276,34 +278,22 @@ int MuxImageCount(const WebPMuxImage* wpi_list, WebPChunkId id) {
 
 // Outputs a pointer to 'prev_wpi->next_',
 //   where 'prev_wpi' is the pointer to the image at position (nth - 1).
-// Returns 1 if nth image with given id was found, 0 otherwise.
+// Returns true if nth image was found.
 static int SearchImageToGetOrDelete(WebPMuxImage** wpi_list, uint32_t nth,
-                                    WebPChunkId id,
                                     WebPMuxImage*** const location) {
   uint32_t count = 0;
   assert(wpi_list);
   *location = wpi_list;
 
-  // Search makes sense only for the following.
-  assert(id == WEBP_CHUNK_FRAME || id == WEBP_CHUNK_TILE ||
-         id == WEBP_CHUNK_IMAGE);
-  assert(id != WEBP_CHUNK_IMAGE || nth == 1);
-
   if (nth == 0) {
-    nth = MuxImageCount(*wpi_list, id);
+    nth = MuxImageCount(*wpi_list, WEBP_CHUNK_NIL);
     if (nth == 0) return 0;  // Not found.
   }
 
-  while (*wpi_list) {
+  while (*wpi_list != NULL) {
     WebPMuxImage* const cur_wpi = *wpi_list;
-    const WebPChunk* const wpi_chunk = *MuxImageGetListFromId(cur_wpi, id);
-    if (wpi_chunk != NULL) {
-      const WebPChunkId wpi_chunk_id = ChunkGetIdFromTag(wpi_chunk->tag_);
-      if (wpi_chunk_id == id) {
-        ++count;
-        if (count == nth) return 1;  // Found.
-      }
-    }
+    ++count;
+    if (count == nth) return 1;  // Found.
     wpi_list = &cur_wpi->next_;
     *location = wpi_list;
   }
@@ -322,7 +312,7 @@ WebPMuxError MuxImagePush(const WebPMuxImage* wpi, WebPMuxImage** wpi_list) {
     wpi_list = &cur_wpi->next_;
   }
 
-  new_wpi = (WebPMuxImage*)malloc(sizeof(*new_wpi));
+  new_wpi = (WebPMuxImage*)WebPSafeMalloc(1ULL, sizeof(*new_wpi));
   if (new_wpi == NULL) return WEBP_MUX_MEMORY_ERROR;
   *new_wpi = *wpi;
   new_wpi->next_ = NULL;
@@ -341,20 +331,13 @@ WebPMuxError MuxImagePush(const WebPMuxImage* wpi, WebPMuxImage** wpi_list) {
 WebPMuxImage* MuxImageDelete(WebPMuxImage* const wpi) {
   // Delete the components of wpi. If wpi is NULL this is a noop.
   WebPMuxImage* const next = MuxImageRelease(wpi);
-  free(wpi);
+  WebPSafeFree(wpi);
   return next;
 }
 
-void MuxImageDeleteAll(WebPMuxImage** const wpi_list) {
-  while (*wpi_list) {
-    *wpi_list = MuxImageDelete(*wpi_list);
-  }
-}
-
-WebPMuxError MuxImageDeleteNth(WebPMuxImage** wpi_list, uint32_t nth,
-                               WebPChunkId id) {
+WebPMuxError MuxImageDeleteNth(WebPMuxImage** wpi_list, uint32_t nth) {
   assert(wpi_list);
-  if (!SearchImageToGetOrDelete(wpi_list, nth, id, &wpi_list)) {
+  if (!SearchImageToGetOrDelete(wpi_list, nth, &wpi_list)) {
     return WEBP_MUX_NOT_FOUND;
   }
   *wpi_list = MuxImageDelete(*wpi_list);
@@ -365,10 +348,10 @@ WebPMuxError MuxImageDeleteNth(WebPMuxImage** wpi_list, uint32_t nth,
 // MuxImage reader methods.
 
 WebPMuxError MuxImageGetNth(const WebPMuxImage** wpi_list, uint32_t nth,
-                            WebPChunkId id, WebPMuxImage** wpi) {
+                            WebPMuxImage** wpi) {
   assert(wpi_list);
   assert(wpi);
-  if (!SearchImageToGetOrDelete((WebPMuxImage**)wpi_list, nth, id,
+  if (!SearchImageToGetOrDelete((WebPMuxImage**)wpi_list, nth,
                                 (WebPMuxImage***)&wpi_list)) {
     return WEBP_MUX_NOT_FOUND;
   }
@@ -385,47 +368,48 @@ size_t MuxImageDiskSize(const WebPMuxImage* const wpi) {
   if (wpi->header_ != NULL) size += ChunkDiskSize(wpi->header_);
   if (wpi->alpha_ != NULL) size += ChunkDiskSize(wpi->alpha_);
   if (wpi->img_ != NULL) size += ChunkDiskSize(wpi->img_);
+  if (wpi->unknown_ != NULL) size += ChunkListDiskSize(wpi->unknown_);
   return size;
 }
 
-size_t MuxImageListDiskSize(const WebPMuxImage* wpi_list) {
-  size_t size = 0;
-  while (wpi_list) {
-    size += MuxImageDiskSize(wpi_list);
-    wpi_list = wpi_list->next_;
+// Special case as ANMF/FRGM chunk encapsulates other image chunks.
+static uint8_t* ChunkEmitSpecial(const WebPChunk* const header,
+                                 size_t total_size, uint8_t* dst) {
+  const size_t header_size = header->data_.size;
+  const size_t offset_to_next = total_size - CHUNK_HEADER_SIZE;
+  assert(header->tag_ == kChunks[IDX_ANMF].tag ||
+         header->tag_ == kChunks[IDX_FRGM].tag);
+  PutLE32(dst + 0, header->tag_);
+  PutLE32(dst + TAG_SIZE, (uint32_t)offset_to_next);
+  assert(header_size == (uint32_t)header_size);
+  memcpy(dst + CHUNK_HEADER_SIZE, header->data_.bytes, header_size);
+  if (header_size & 1) {
+    dst[CHUNK_HEADER_SIZE + header_size] = 0;  // Add padding.
   }
-  return size;
+  return dst + ChunkDiskSize(header);
 }
 
 uint8_t* MuxImageEmit(const WebPMuxImage* const wpi, uint8_t* dst) {
   // Ordering of chunks to be emitted is strictly as follows:
-  // 1. Frame/Tile chunk (if present).
-  // 2. Alpha chunk (if present).
+  // 1. ANMF/FRGM chunk (if present).
+  // 2. ALPH chunk (if present).
   // 3. VP8/VP8L chunk.
   assert(wpi);
-  if (wpi->header_ != NULL) dst = ChunkEmit(wpi->header_, dst);
+  if (wpi->header_ != NULL) {
+    dst = ChunkEmitSpecial(wpi->header_, MuxImageDiskSize(wpi), dst);
+  }
   if (wpi->alpha_ != NULL) dst = ChunkEmit(wpi->alpha_, dst);
   if (wpi->img_ != NULL) dst = ChunkEmit(wpi->img_, dst);
-  return dst;
-}
-
-uint8_t* MuxImageListEmit(const WebPMuxImage* wpi_list, uint8_t* dst) {
-  while (wpi_list) {
-    dst = MuxImageEmit(wpi_list, dst);
-    wpi_list = wpi_list->next_;
-  }
+  if (wpi->unknown_ != NULL) dst = ChunkListEmit(wpi->unknown_, dst);
   return dst;
 }
 
 //------------------------------------------------------------------------------
 // Helper methods for mux.
 
-int MuxHasLosslessImages(const WebPMuxImage* images) {
+int MuxHasAlpha(const WebPMuxImage* images) {
   while (images != NULL) {
-    assert(images->img_ != NULL);
-    if (images->img_->tag_ == kChunks[IDX_VP8L].tag) {
-      return 1;
-    }
+    if (images->has_alpha_) return 1;
     images = images->next_;
   }
   return 0;
@@ -441,30 +425,13 @@ uint8_t* MuxEmitRiffHeader(uint8_t* const data, size_t size) {
 
 WebPChunk** MuxGetChunkListFromId(const WebPMux* mux, WebPChunkId id) {
   assert(mux != NULL);
-  switch(id) {
+  switch (id) {
     case WEBP_CHUNK_VP8X:    return (WebPChunk**)&mux->vp8x_;
     case WEBP_CHUNK_ICCP:    return (WebPChunk**)&mux->iccp_;
-    case WEBP_CHUNK_LOOP:    return (WebPChunk**)&mux->loop_;
-    case WEBP_CHUNK_META:    return (WebPChunk**)&mux->meta_;
-    case WEBP_CHUNK_UNKNOWN: return (WebPChunk**)&mux->unknown_;
-    default: return NULL;
-  }
-}
-
-WebPMuxError MuxValidateForImage(const WebPMux* const mux) {
-  const int num_images = MuxImageCount(mux->images_, WEBP_CHUNK_IMAGE);
-  const int num_frames = MuxImageCount(mux->images_, WEBP_CHUNK_FRAME);
-  const int num_tiles  = MuxImageCount(mux->images_, WEBP_CHUNK_TILE);
-
-  if (num_images == 0) {
-    // No images in mux.
-    return WEBP_MUX_NOT_FOUND;
-  } else if (num_images == 1 && num_frames == 0 && num_tiles == 0) {
-    // Valid case (single image).
-    return WEBP_MUX_OK;
-  } else {
-    // Frame/Tile case OR an invalid mux.
-    return WEBP_MUX_INVALID_ARGUMENT;
+    case WEBP_CHUNK_ANIM:    return (WebPChunk**)&mux->anim_;
+    case WEBP_CHUNK_EXIF:    return (WebPChunk**)&mux->exif_;
+    case WEBP_CHUNK_XMP:     return (WebPChunk**)&mux->xmp_;
+    default:                 return (WebPChunk**)&mux->unknown_;
   }
 }
 
@@ -480,7 +447,7 @@ static int IsNotCompatible(int feature, int num_items) {
 // On success returns WEBP_MUX_OK and stores the chunk count in *num.
 static WebPMuxError ValidateChunk(const WebPMux* const mux, CHUNK_INDEX idx,
                                   WebPFeatureFlags feature,
-                                  WebPFeatureFlags vp8x_flags,
+                                  uint32_t vp8x_flags,
                                   int max, int* num) {
   const WebPMuxError err =
       WebPMuxNumChunks(mux, kChunks[idx].id, num);
@@ -494,10 +461,11 @@ static WebPMuxError ValidateChunk(const WebPMux* const mux, CHUNK_INDEX idx,
 
 WebPMuxError MuxValidate(const WebPMux* const mux) {
   int num_iccp;
-  int num_meta;
-  int num_loop_chunks;
+  int num_exif;
+  int num_xmp;
+  int num_anim;
   int num_frames;
-  int num_tiles;
+  int num_fragments;
   int num_vp8x;
   int num_images;
   int num_alpha;
@@ -517,29 +485,33 @@ WebPMuxError MuxValidate(const WebPMux* const mux) {
   err = ValidateChunk(mux, IDX_ICCP, ICCP_FLAG, flags, 1, &num_iccp);
   if (err != WEBP_MUX_OK) return err;
 
+  // At most one EXIF metadata.
+  err = ValidateChunk(mux, IDX_EXIF, EXIF_FLAG, flags, 1, &num_exif);
+  if (err != WEBP_MUX_OK) return err;
+
   // At most one XMP metadata.
-  err = ValidateChunk(mux, IDX_META, META_FLAG, flags, 1, &num_meta);
+  err = ValidateChunk(mux, IDX_XMP, XMP_FLAG, flags, 1, &num_xmp);
   if (err != WEBP_MUX_OK) return err;
 
-  // Animation: ANIMATION_FLAG, loop chunk and frame chunk(s) are consistent.
-  // At most one loop chunk.
-  err = ValidateChunk(mux, IDX_LOOP, NO_FLAG, flags, 1, &num_loop_chunks);
+  // Animation: ANIMATION_FLAG, ANIM chunk and ANMF chunk(s) are consistent.
+  // At most one ANIM chunk.
+  err = ValidateChunk(mux, IDX_ANIM, NO_FLAG, flags, 1, &num_anim);
   if (err != WEBP_MUX_OK) return err;
-  err = ValidateChunk(mux, IDX_FRAME, NO_FLAG, flags, -1, &num_frames);
+  err = ValidateChunk(mux, IDX_ANMF, NO_FLAG, flags, -1, &num_frames);
   if (err != WEBP_MUX_OK) return err;
 
   {
     const int has_animation = !!(flags & ANIMATION_FLAG);
-    if (has_animation && (num_loop_chunks == 0 || num_frames == 0)) {
+    if (has_animation && (num_anim == 0 || num_frames == 0)) {
       return WEBP_MUX_INVALID_ARGUMENT;
     }
-    if (!has_animation && (num_loop_chunks == 1 || num_frames > 0)) {
+    if (!has_animation && (num_anim == 1 || num_frames > 0)) {
       return WEBP_MUX_INVALID_ARGUMENT;
     }
   }
 
-  // Tiling: TILE_FLAG and tile chunk(s) are consistent.
-  err = ValidateChunk(mux, IDX_TILE, TILE_FLAG, flags, -1, &num_tiles);
+  // Fragmentation: FRAGMENTS_FLAG and FRGM chunk(s) are consistent.
+  err = ValidateChunk(mux, IDX_FRGM, FRAGMENTS_FLAG, flags, -1, &num_fragments);
   if (err != WEBP_MUX_OK) return err;
 
   // Verify either VP8X chunk is present OR there is only one elem in
@@ -551,16 +523,22 @@ WebPMuxError MuxValidate(const WebPMux* const mux) {
   if (num_vp8x == 0 && num_images != 1) return WEBP_MUX_INVALID_ARGUMENT;
 
   // ALPHA_FLAG & alpha chunk(s) are consistent.
-  if (num_vp8x > 0 && MuxHasLosslessImages(mux->images_)) {
-    // Special case: we have a VP8X chunk as well as some lossless images.
-    if (!(flags & ALPHA_FLAG)) return WEBP_MUX_INVALID_ARGUMENT;
-  } else {
-    err = ValidateChunk(mux, IDX_ALPHA, ALPHA_FLAG, flags, -1, &num_alpha);
-    if (err != WEBP_MUX_OK) return err;
+  if (MuxHasAlpha(mux->images_)) {
+    if (num_vp8x > 0) {
+      // VP8X chunk is present, so it should contain ALPHA_FLAG.
+      if (!(flags & ALPHA_FLAG)) return WEBP_MUX_INVALID_ARGUMENT;
+    } else {
+      // VP8X chunk is not present, so ALPH chunks should NOT be present either.
+      err = WebPMuxNumChunks(mux, WEBP_CHUNK_ALPHA, &num_alpha);
+      if (err != WEBP_MUX_OK) return err;
+      if (num_alpha > 0) return WEBP_MUX_INVALID_ARGUMENT;
+    }
+  } else {  // Mux doesn't need alpha. So, ALPHA_FLAG should NOT be present.
+    if (flags & ALPHA_FLAG) return WEBP_MUX_INVALID_ARGUMENT;
   }
 
-  // num_tiles & num_images are consistent.
-  if (num_tiles > 0 && num_images != num_tiles) {
+  // num_fragments & num_images are consistent.
+  if (num_fragments > 0 && num_images != num_fragments) {
     return WEBP_MUX_INVALID_ARGUMENT;
   }
 
@@ -571,6 +549,3 @@ WebPMuxError MuxValidate(const WebPMux* const mux) {
 
 //------------------------------------------------------------------------------
 
-#if defined(__cplusplus) || defined(c_plusplus)
-}    // extern "C"
-#endif
diff --git a/drivers/webp/mux/muxread.c b/drivers/webp/mux/muxread.c
index 21c3cfbaeb..8957a1e46e 100644
--- a/drivers/webp/mux/muxread.c
+++ b/drivers/webp/mux/muxread.c
@@ -1,8 +1,10 @@
 // Copyright 2011 Google Inc. All Rights Reserved.
 //
-// This code is licensed under the same terms as WebM:
-//  Software License Agreement:  http://www.webmproject.org/license/software/
-//  Additional IP Rights Grant:  http://www.webmproject.org/license/additional/
+// Use of this source code is governed by a BSD-style license
+// that can be found in the COPYING file in the root of the source
+// tree. An additional intellectual property rights grant can be found
+// in the file PATENTS. All contributing project authors may
+// be found in the AUTHORS file in the root of the source tree.
 // -----------------------------------------------------------------------------
 //
 // Read APIs for mux.
@@ -12,10 +14,7 @@
 
 #include <assert.h>
 #include "./muxi.h"
-
-#if defined(__cplusplus) || defined(c_plusplus)
-extern "C" {
-#endif
+#include "../utils/utils.h"
 
 //------------------------------------------------------------------------------
 // Helper method(s).
@@ -41,8 +40,9 @@ static WebPMuxError MuxGet(const WebPMux* const mux, CHUNK_INDEX idx,
 
   SWITCH_ID_LIST(IDX_VP8X, mux->vp8x_);
   SWITCH_ID_LIST(IDX_ICCP, mux->iccp_);
-  SWITCH_ID_LIST(IDX_LOOP, mux->loop_);
-  SWITCH_ID_LIST(IDX_META, mux->meta_);
+  SWITCH_ID_LIST(IDX_ANIM, mux->anim_);
+  SWITCH_ID_LIST(IDX_EXIF, mux->exif_);
+  SWITCH_ID_LIST(IDX_XMP, mux->xmp_);
   SWITCH_ID_LIST(IDX_UNKNOWN, mux->unknown_);
   return WEBP_MUX_NOT_FOUND;
 }
@@ -50,15 +50,14 @@ static WebPMuxError MuxGet(const WebPMux* const mux, CHUNK_INDEX idx,
 
 // Fill the chunk with the given data (includes chunk header bytes), after some
 // verifications.
-static WebPMuxError ChunkVerifyAndAssignData(WebPChunk* chunk,
-                                             const uint8_t* data,
-                                             size_t data_size, size_t riff_size,
-                                             int copy_data) {
+static WebPMuxError ChunkVerifyAndAssign(WebPChunk* chunk,
+                                         const uint8_t* data, size_t data_size,
+                                         size_t riff_size, int copy_data) {
   uint32_t chunk_size;
   WebPData chunk_data;
 
   // Sanity checks.
-  if (data_size < TAG_SIZE) return WEBP_MUX_NOT_ENOUGH_DATA;
+  if (data_size < CHUNK_HEADER_SIZE) return WEBP_MUX_NOT_ENOUGH_DATA;
   chunk_size = GetLE32(data + TAG_SIZE);
 
   {
@@ -68,11 +67,103 @@ static WebPMuxError ChunkVerifyAndAssignData(WebPChunk* chunk,
   }
 
   // Data assignment.
-  chunk_data.bytes_ = data + CHUNK_HEADER_SIZE;
-  chunk_data.size_ = chunk_size;
+  chunk_data.bytes = data + CHUNK_HEADER_SIZE;
+  chunk_data.size = chunk_size;
   return ChunkAssignData(chunk, &chunk_data, copy_data, GetLE32(data + 0));
 }
 
+int MuxImageFinalize(WebPMuxImage* const wpi) {
+  const WebPChunk* const img = wpi->img_;
+  const WebPData* const image = &img->data_;
+  const int is_lossless = (img->tag_ == kChunks[IDX_VP8L].tag);
+  int w, h;
+  int vp8l_has_alpha = 0;
+  const int ok = is_lossless ?
+      VP8LGetInfo(image->bytes, image->size, &w, &h, &vp8l_has_alpha) :
+      VP8GetInfo(image->bytes, image->size, image->size, &w, &h);
+  assert(img != NULL);
+  if (ok) {
+    // Ignore ALPH chunk accompanying VP8L.
+    if (is_lossless && (wpi->alpha_ != NULL)) {
+      ChunkDelete(wpi->alpha_);
+      wpi->alpha_ = NULL;
+    }
+    wpi->width_ = w;
+    wpi->height_ = h;
+    wpi->has_alpha_ = vp8l_has_alpha || (wpi->alpha_ != NULL);
+  }
+  return ok;
+}
+
+static int MuxImageParse(const WebPChunk* const chunk, int copy_data,
+                         WebPMuxImage* const wpi) {
+  const uint8_t* bytes = chunk->data_.bytes;
+  size_t size = chunk->data_.size;
+  const uint8_t* const last = bytes + size;
+  WebPChunk subchunk;
+  size_t subchunk_size;
+  ChunkInit(&subchunk);
+
+  assert(chunk->tag_ == kChunks[IDX_ANMF].tag ||
+         chunk->tag_ == kChunks[IDX_FRGM].tag);
+  assert(!wpi->is_partial_);
+
+  // ANMF/FRGM.
+  {
+    const size_t hdr_size = (chunk->tag_ == kChunks[IDX_ANMF].tag) ?
+        ANMF_CHUNK_SIZE : FRGM_CHUNK_SIZE;
+    const WebPData temp = { bytes, hdr_size };
+    // Each of ANMF and FRGM chunk contain a header at the beginning. So, its
+    // size should at least be 'hdr_size'.
+    if (size < hdr_size) goto Fail;
+    ChunkAssignData(&subchunk, &temp, copy_data, chunk->tag_);
+  }
+  ChunkSetNth(&subchunk, &wpi->header_, 1);
+  wpi->is_partial_ = 1;  // Waiting for ALPH and/or VP8/VP8L chunks.
+
+  // Rest of the chunks.
+  subchunk_size = ChunkDiskSize(&subchunk) - CHUNK_HEADER_SIZE;
+  bytes += subchunk_size;
+  size -= subchunk_size;
+
+  while (bytes != last) {
+    ChunkInit(&subchunk);
+    if (ChunkVerifyAndAssign(&subchunk, bytes, size, size,
+                             copy_data) != WEBP_MUX_OK) {
+      goto Fail;
+    }
+    switch (ChunkGetIdFromTag(subchunk.tag_)) {
+      case WEBP_CHUNK_ALPHA:
+        if (wpi->alpha_ != NULL) goto Fail;  // Consecutive ALPH chunks.
+        if (ChunkSetNth(&subchunk, &wpi->alpha_, 1) != WEBP_MUX_OK) goto Fail;
+        wpi->is_partial_ = 1;  // Waiting for a VP8 chunk.
+        break;
+      case WEBP_CHUNK_IMAGE:
+        if (ChunkSetNth(&subchunk, &wpi->img_, 1) != WEBP_MUX_OK) goto Fail;
+        if (!MuxImageFinalize(wpi)) goto Fail;
+        wpi->is_partial_ = 0;  // wpi is completely filled.
+        break;
+      case WEBP_CHUNK_UNKNOWN:
+        if (wpi->is_partial_) goto Fail;  // Encountered an unknown chunk
+                                          // before some image chunks.
+        if (ChunkSetNth(&subchunk, &wpi->unknown_, 0) != WEBP_MUX_OK) goto Fail;
+        break;
+      default:
+        goto Fail;
+        break;
+    }
+    subchunk_size = ChunkDiskSize(&subchunk);
+    bytes += subchunk_size;
+    size -= subchunk_size;
+  }
+  if (wpi->is_partial_) goto Fail;
+  return 1;
+
+ Fail:
+  ChunkRelease(&subchunk);
+  return 0;
+}
+
 //------------------------------------------------------------------------------
 // Create a mux object from WebP-RIFF data.
 
@@ -94,8 +185,8 @@ WebPMux* WebPMuxCreateInternal(const WebPData* bitstream, int copy_data,
   }
   if (bitstream == NULL) return NULL;
 
-  data = bitstream->bytes_;
-  size = bitstream->size_;
+  data = bitstream->bytes;
+  size = bitstream->size;
 
   if (data == NULL) return NULL;
   if (size < RIFF_HEADER_SIZE) return NULL;
@@ -129,48 +220,55 @@ WebPMux* WebPMuxCreateInternal(const WebPData* bitstream, int copy_data,
   data += RIFF_HEADER_SIZE;
   size -= RIFF_HEADER_SIZE;
 
-  wpi = (WebPMuxImage*)malloc(sizeof(*wpi));
+  wpi = (WebPMuxImage*)WebPSafeMalloc(1ULL, sizeof(*wpi));
   if (wpi == NULL) goto Err;
   MuxImageInit(wpi);
 
   // Loop over chunks.
   while (data != end) {
+    size_t data_size;
     WebPChunkId id;
-    WebPMuxError err;
-
-    err = ChunkVerifyAndAssignData(&chunk, data, size, riff_size, copy_data);
-    if (err != WEBP_MUX_OK) goto Err;
-
+    WebPChunk** chunk_list;
+    if (ChunkVerifyAndAssign(&chunk, data, size, riff_size,
+                             copy_data) != WEBP_MUX_OK) {
+      goto Err;
+    }
+    data_size = ChunkDiskSize(&chunk);
     id = ChunkGetIdFromTag(chunk.tag_);
-
-    if (IsWPI(id)) {  // An image chunk (frame/tile/alpha/vp8).
-      WebPChunk** wpi_chunk_ptr =
-          MuxImageGetListFromId(wpi, id);  // Image chunk to set.
-      assert(wpi_chunk_ptr != NULL);
-      if (*wpi_chunk_ptr != NULL) goto Err;  // Consecutive alpha chunks or
-                                             // consecutive frame/tile chunks.
-      if (ChunkSetNth(&chunk, wpi_chunk_ptr, 1) != WEBP_MUX_OK) goto Err;
-      if (id == WEBP_CHUNK_IMAGE) {
+    switch (id) {
+      case WEBP_CHUNK_ALPHA:
+        if (wpi->alpha_ != NULL) goto Err;  // Consecutive ALPH chunks.
+        if (ChunkSetNth(&chunk, &wpi->alpha_, 1) != WEBP_MUX_OK) goto Err;
+        wpi->is_partial_ = 1;  // Waiting for a VP8 chunk.
+        break;
+      case WEBP_CHUNK_IMAGE:
+        if (ChunkSetNth(&chunk, &wpi->img_, 1) != WEBP_MUX_OK) goto Err;
+        if (!MuxImageFinalize(wpi)) goto Err;
         wpi->is_partial_ = 0;  // wpi is completely filled.
+ PushImage:
         // Add this to mux->images_ list.
         if (MuxImagePush(wpi, &mux->images_) != WEBP_MUX_OK) goto Err;
         MuxImageInit(wpi);  // Reset for reading next image.
-      } else {
-        wpi->is_partial_ = 1;  // wpi is only partially filled.
-      }
-    } else {  // A non-image chunk.
-      WebPChunk** chunk_list;
-      if (wpi->is_partial_) goto Err;  // Encountered a non-image chunk before
-                                       // getting all chunks of an image.
-      chunk_list = MuxGetChunkListFromId(mux, id);  // List to add this chunk.
-      if (chunk_list == NULL) chunk_list = &mux->unknown_;
-      if (ChunkSetNth(&chunk, chunk_list, 0) != WEBP_MUX_OK) goto Err;
-    }
-    {
-      const size_t data_size = ChunkDiskSize(&chunk);
-      data += data_size;
-      size -= data_size;
+        break;
+      case WEBP_CHUNK_ANMF:
+        if (wpi->is_partial_) goto Err;  // Previous wpi is still incomplete.
+        if (!MuxImageParse(&chunk, copy_data, wpi)) goto Err;
+        ChunkRelease(&chunk);
+        goto PushImage;
+        break;
+      default:  // A non-image chunk.
+        if (wpi->is_partial_) goto Err;  // Encountered a non-image chunk before
+                                         // getting all chunks of an image.
+        chunk_list = MuxGetChunkListFromId(mux, id);  // List to add this chunk.
+        if (ChunkSetNth(&chunk, chunk_list, 0) != WEBP_MUX_OK) goto Err;
+        if (id == WEBP_CHUNK_VP8X) {  // grab global specs
+          mux->canvas_width_ = GetLE24(data + 12) + 1;
+          mux->canvas_height_ = GetLE24(data + 15) + 1;
+        }
+        break;
     }
+    data += data_size;
+    size -= data_size;
     ChunkInit(&chunk);
   }
 
@@ -190,31 +288,74 @@ WebPMux* WebPMuxCreateInternal(const WebPData* bitstream, int copy_data,
 //------------------------------------------------------------------------------
 // Get API(s).
 
-WebPMuxError WebPMuxGetFeatures(const WebPMux* mux, uint32_t* flags) {
-  WebPData data;
-  WebPMuxError err;
+// Validates that the given mux has a single image.
+static WebPMuxError ValidateForSingleImage(const WebPMux* const mux) {
+  const int num_images = MuxImageCount(mux->images_, WEBP_CHUNK_IMAGE);
+  const int num_frames = MuxImageCount(mux->images_, WEBP_CHUNK_ANMF);
+  const int num_fragments = MuxImageCount(mux->images_, WEBP_CHUNK_FRGM);
+
+  if (num_images == 0) {
+    // No images in mux.
+    return WEBP_MUX_NOT_FOUND;
+  } else if (num_images == 1 && num_frames == 0 && num_fragments == 0) {
+    // Valid case (single image).
+    return WEBP_MUX_OK;
+  } else {
+    // Frame/Fragment case OR an invalid mux.
+    return WEBP_MUX_INVALID_ARGUMENT;
+  }
+}
 
-  if (mux == NULL || flags == NULL) return WEBP_MUX_INVALID_ARGUMENT;
-  *flags = 0;
+// Get the canvas width, height and flags after validating that VP8X/VP8/VP8L
+// chunk and canvas size are valid.
+static WebPMuxError MuxGetCanvasInfo(const WebPMux* const mux,
+                                     int* width, int* height, uint32_t* flags) {
+  int w, h;
+  uint32_t f = 0;
+  WebPData data;
+  assert(mux != NULL);
 
   // Check if VP8X chunk is present.
-  err = MuxGet(mux, IDX_VP8X, 1, &data);
-  if (err == WEBP_MUX_NOT_FOUND) {
-    // Check if VP8/VP8L chunk is present.
-    err = WebPMuxGetImage(mux, &data);
-    WebPDataClear(&data);
-    return err;
-  } else if (err != WEBP_MUX_OK) {
-    return err;
+  if (MuxGet(mux, IDX_VP8X, 1, &data) == WEBP_MUX_OK) {
+    if (data.size < VP8X_CHUNK_SIZE) return WEBP_MUX_BAD_DATA;
+    f = GetLE32(data.bytes + 0);
+    w = GetLE24(data.bytes + 4) + 1;
+    h = GetLE24(data.bytes + 7) + 1;
+  } else {
+    const WebPMuxImage* const wpi = mux->images_;
+    // Grab user-forced canvas size as default.
+    w = mux->canvas_width_;
+    h = mux->canvas_height_;
+    if (w == 0 && h == 0 && ValidateForSingleImage(mux) == WEBP_MUX_OK) {
+      // single image and not forced canvas size => use dimension of first frame
+      assert(wpi != NULL);
+      w = wpi->width_;
+      h = wpi->height_;
+    }
+    if (wpi != NULL) {
+      if (wpi->has_alpha_) f |= ALPHA_FLAG;
+    }
   }
+  if (w * (uint64_t)h >= MAX_IMAGE_AREA) return WEBP_MUX_BAD_DATA;
 
-  if (data.size_ < CHUNK_SIZE_BYTES) return WEBP_MUX_BAD_DATA;
-
-  // All OK. Fill up flags.
-  *flags = GetLE32(data.bytes_);
+  if (width != NULL) *width = w;
+  if (height != NULL) *height = h;
+  if (flags != NULL) *flags = f;
   return WEBP_MUX_OK;
 }
 
+WebPMuxError WebPMuxGetCanvasSize(const WebPMux* mux, int* width, int* height) {
+  if (mux == NULL || width == NULL || height == NULL) {
+    return WEBP_MUX_INVALID_ARGUMENT;
+  }
+  return MuxGetCanvasInfo(mux, width, height, NULL);
+}
+
+WebPMuxError WebPMuxGetFeatures(const WebPMux* mux, uint32_t* flags) {
+  if (mux == NULL || flags == NULL) return WEBP_MUX_INVALID_ARGUMENT;
+  return MuxGetCanvasInfo(mux, NULL, NULL, flags);
+}
+
 static uint8_t* EmitVP8XChunk(uint8_t* const dst, int width,
                               int height, uint32_t flags) {
   const size_t vp8x_size = CHUNK_HEADER_SIZE + VP8X_CHUNK_SIZE;
@@ -230,7 +371,7 @@ static uint8_t* EmitVP8XChunk(uint8_t* const dst, int width,
 }
 
 // Assemble a single image WebP bitstream from 'wpi'.
-static WebPMuxError SynthesizeBitstream(WebPMuxImage* const wpi,
+static WebPMuxError SynthesizeBitstream(const WebPMuxImage* const wpi,
                                         WebPData* const bitstream) {
   uint8_t* dst;
 
@@ -238,25 +379,17 @@ static WebPMuxError SynthesizeBitstream(WebPMuxImage* const wpi,
   const int need_vp8x = (wpi->alpha_ != NULL);
   const size_t vp8x_size = need_vp8x ? CHUNK_HEADER_SIZE + VP8X_CHUNK_SIZE : 0;
   const size_t alpha_size = need_vp8x ? ChunkDiskSize(wpi->alpha_) : 0;
-  // Note: No need to output FRM/TILE chunk for a single image.
+  // Note: No need to output ANMF/FRGM chunk for a single image.
   const size_t size = RIFF_HEADER_SIZE + vp8x_size + alpha_size +
                       ChunkDiskSize(wpi->img_);
-  uint8_t* const data = (uint8_t*)malloc(size);
+  uint8_t* const data = (uint8_t*)WebPSafeMalloc(1ULL, size);
   if (data == NULL) return WEBP_MUX_MEMORY_ERROR;
 
   // Main RIFF header.
   dst = MuxEmitRiffHeader(data, size);
 
   if (need_vp8x) {
-    int w, h;
-    WebPMuxError err;
-    assert(wpi->img_ != NULL);
-    err = MuxGetImageWidthHeight(wpi->img_, &w, &h);
-    if (err != WEBP_MUX_OK) {
-      free(data);
-      return err;
-    }
-    dst = EmitVP8XChunk(dst, w, h, ALPHA_FLAG);  // VP8X.
+    dst = EmitVP8XChunk(dst, wpi->width_, wpi->height_, ALPHA_FLAG);  // VP8X.
     dst = ChunkListEmit(wpi->alpha_, dst);       // ALPH.
   }
 
@@ -265,107 +398,115 @@ static WebPMuxError SynthesizeBitstream(WebPMuxImage* const wpi,
   assert(dst == data + size);
 
   // Output.
-  bitstream->bytes_ = data;
-  bitstream->size_ = size;
+  bitstream->bytes = data;
+  bitstream->size = size;
   return WEBP_MUX_OK;
 }
 
-WebPMuxError WebPMuxGetImage(const WebPMux* mux, WebPData* bitstream) {
-  WebPMuxError err;
-  WebPMuxImage* wpi = NULL;
-
-  if (mux == NULL || bitstream == NULL) {
+WebPMuxError WebPMuxGetChunk(const WebPMux* mux, const char fourcc[4],
+                             WebPData* chunk_data) {
+  CHUNK_INDEX idx;
+  if (mux == NULL || fourcc == NULL || chunk_data == NULL) {
     return WEBP_MUX_INVALID_ARGUMENT;
   }
-
-  err = MuxValidateForImage(mux);
-  if (err != WEBP_MUX_OK) return err;
-
-  // All well. Get the image.
-  err = MuxImageGetNth((const WebPMuxImage**)&mux->images_, 1, WEBP_CHUNK_IMAGE,
-                       &wpi);
-  assert(err == WEBP_MUX_OK);  // Already tested above.
-
-  return SynthesizeBitstream(wpi, bitstream);
-}
-
-WebPMuxError WebPMuxGetMetadata(const WebPMux* mux, WebPData* metadata) {
-  if (mux == NULL || metadata == NULL) return WEBP_MUX_INVALID_ARGUMENT;
-  return MuxGet(mux, IDX_META, 1, metadata);
+  idx = ChunkGetIndexFromFourCC(fourcc);
+  if (IsWPI(kChunks[idx].id)) {     // An image chunk.
+    return WEBP_MUX_INVALID_ARGUMENT;
+  } else if (idx != IDX_UNKNOWN) {  // A known chunk type.
+    return MuxGet(mux, idx, 1, chunk_data);
+  } else {                          // An unknown chunk type.
+    const WebPChunk* const chunk =
+        ChunkSearchList(mux->unknown_, 1, ChunkGetTagFromFourCC(fourcc));
+    if (chunk == NULL) return WEBP_MUX_NOT_FOUND;
+    *chunk_data = chunk->data_;
+    return WEBP_MUX_OK;
+  }
 }
 
-WebPMuxError WebPMuxGetColorProfile(const WebPMux* mux,
-                                    WebPData* color_profile) {
-  if (mux == NULL || color_profile == NULL) return WEBP_MUX_INVALID_ARGUMENT;
-  return MuxGet(mux, IDX_ICCP, 1, color_profile);
+static WebPMuxError MuxGetImageInternal(const WebPMuxImage* const wpi,
+                                        WebPMuxFrameInfo* const info) {
+  // Set some defaults for unrelated fields.
+  info->x_offset = 0;
+  info->y_offset = 0;
+  info->duration = 1;
+  info->dispose_method = WEBP_MUX_DISPOSE_NONE;
+  info->blend_method = WEBP_MUX_BLEND;
+  // Extract data for related fields.
+  info->id = ChunkGetIdFromTag(wpi->img_->tag_);
+  return SynthesizeBitstream(wpi, &info->bitstream);
 }
 
-WebPMuxError WebPMuxGetLoopCount(const WebPMux* mux, int* loop_count) {
-  WebPData image;
-  WebPMuxError err;
-
-  if (mux == NULL || loop_count == NULL) return WEBP_MUX_INVALID_ARGUMENT;
-
-  err = MuxGet(mux, IDX_LOOP, 1, &image);
-  if (err != WEBP_MUX_OK) return err;
-  if (image.size_ < kChunks[WEBP_CHUNK_LOOP].size) return WEBP_MUX_BAD_DATA;
-  *loop_count = GetLE16(image.bytes_);
-
-  return WEBP_MUX_OK;
+static WebPMuxError MuxGetFrameFragmentInternal(const WebPMuxImage* const wpi,
+                                                WebPMuxFrameInfo* const frame) {
+  const int is_frame = (wpi->header_->tag_ == kChunks[IDX_ANMF].tag);
+  const CHUNK_INDEX idx = is_frame ? IDX_ANMF : IDX_FRGM;
+  const WebPData* frame_frgm_data;
+  if (!is_frame) return WEBP_MUX_INVALID_ARGUMENT;
+  assert(wpi->header_ != NULL);  // Already checked by WebPMuxGetFrame().
+  // Get frame/fragment chunk.
+  frame_frgm_data = &wpi->header_->data_;
+  if (frame_frgm_data->size < kChunks[idx].size) return WEBP_MUX_BAD_DATA;
+  // Extract info.
+  frame->x_offset = 2 * GetLE24(frame_frgm_data->bytes + 0);
+  frame->y_offset = 2 * GetLE24(frame_frgm_data->bytes + 3);
+  if (is_frame) {
+    const uint8_t bits = frame_frgm_data->bytes[15];
+    frame->duration = GetLE24(frame_frgm_data->bytes + 12);
+    frame->dispose_method =
+        (bits & 1) ? WEBP_MUX_DISPOSE_BACKGROUND : WEBP_MUX_DISPOSE_NONE;
+    frame->blend_method = (bits & 2) ? WEBP_MUX_NO_BLEND : WEBP_MUX_BLEND;
+  } else {  // Defaults for unused values.
+    frame->duration = 1;
+    frame->dispose_method = WEBP_MUX_DISPOSE_NONE;
+    frame->blend_method = WEBP_MUX_BLEND;
+  }
+  frame->id = ChunkGetIdFromTag(wpi->header_->tag_);
+  return SynthesizeBitstream(wpi, &frame->bitstream);
 }
 
-static WebPMuxError MuxGetFrameTileInternal(
-    const WebPMux* const mux, uint32_t nth, WebPData* const bitstream,
-    int* const x_offset, int* const y_offset, int* const duration,
-    uint32_t tag) {
-  const WebPData* frame_tile_data;
+WebPMuxError WebPMuxGetFrame(
+    const WebPMux* mux, uint32_t nth, WebPMuxFrameInfo* frame) {
   WebPMuxError err;
   WebPMuxImage* wpi;
 
-  const int is_frame = (tag == kChunks[WEBP_CHUNK_FRAME].tag) ? 1 : 0;
-  const CHUNK_INDEX idx = is_frame ? IDX_FRAME : IDX_TILE;
-  const WebPChunkId id = kChunks[idx].id;
-
-  if (mux == NULL || bitstream == NULL ||
-      x_offset == NULL || y_offset == NULL || (is_frame && duration == NULL)) {
+  // Sanity checks.
+  if (mux == NULL || frame == NULL) {
     return WEBP_MUX_INVALID_ARGUMENT;
   }
 
   // Get the nth WebPMuxImage.
-  err = MuxImageGetNth((const WebPMuxImage**)&mux->images_, nth, id, &wpi);
+  err = MuxImageGetNth((const WebPMuxImage**)&mux->images_, nth, &wpi);
   if (err != WEBP_MUX_OK) return err;
 
-  // Get frame chunk.
-  assert(wpi->header_ != NULL);  // As MuxImageGetNth() already checked header_.
-  frame_tile_data = &wpi->header_->data_;
+  // Get frame info.
+  if (wpi->header_ == NULL) {
+    return MuxGetImageInternal(wpi, frame);
+  } else {
+    return MuxGetFrameFragmentInternal(wpi, frame);
+  }
+}
 
-  if (frame_tile_data->size_ < kChunks[idx].size) return WEBP_MUX_BAD_DATA;
-  *x_offset = 2 * GetLE24(frame_tile_data->bytes_ + 0);
-  *y_offset = 2 * GetLE24(frame_tile_data->bytes_ + 3);
-  if (is_frame) *duration = 1 + GetLE24(frame_tile_data->bytes_ + 12);
+WebPMuxError WebPMuxGetAnimationParams(const WebPMux* mux,
+                                       WebPMuxAnimParams* params) {
+  WebPData anim;
+  WebPMuxError err;
 
-  return SynthesizeBitstream(wpi, bitstream);
-}
+  if (mux == NULL || params == NULL) return WEBP_MUX_INVALID_ARGUMENT;
 
-WebPMuxError WebPMuxGetFrame(const WebPMux* mux, uint32_t nth,
-                             WebPData* bitstream,
-                             int* x_offset, int* y_offset, int* duration) {
-  return MuxGetFrameTileInternal(mux, nth, bitstream, x_offset, y_offset,
-                                 duration, kChunks[IDX_FRAME].tag);
-}
+  err = MuxGet(mux, IDX_ANIM, 1, &anim);
+  if (err != WEBP_MUX_OK) return err;
+  if (anim.size < kChunks[WEBP_CHUNK_ANIM].size) return WEBP_MUX_BAD_DATA;
+  params->bgcolor = GetLE32(anim.bytes);
+  params->loop_count = GetLE16(anim.bytes + 4);
 
-WebPMuxError WebPMuxGetTile(const WebPMux* mux, uint32_t nth,
-                            WebPData* bitstream,
-                            int* x_offset, int* y_offset) {
-  return MuxGetFrameTileInternal(mux, nth, bitstream, x_offset, y_offset, NULL,
-                                 kChunks[IDX_TILE].tag);
+  return WEBP_MUX_OK;
 }
 
 // Get chunk index from chunk id. Returns IDX_NIL if not found.
 static CHUNK_INDEX ChunkGetIndexFromId(WebPChunkId id) {
   int i;
   for (i = 0; kChunks[i].id != WEBP_CHUNK_NIL; ++i) {
-    if (id == kChunks[i].id) return i;
+    if (id == kChunks[i].id) return (CHUNK_INDEX)i;
   }
   return IDX_NIL;
 }
@@ -393,19 +534,11 @@ WebPMuxError WebPMuxNumChunks(const WebPMux* mux,
     *num_elements = MuxImageCount(mux->images_, id);
   } else {
     WebPChunk* const* chunk_list = MuxGetChunkListFromId(mux, id);
-    if (chunk_list == NULL) {
-      *num_elements = 0;
-    } else {
-      const CHUNK_INDEX idx = ChunkGetIndexFromId(id);
-      *num_elements = CountChunks(*chunk_list, kChunks[idx].tag);
-    }
+    const CHUNK_INDEX idx = ChunkGetIndexFromId(id);
+    *num_elements = CountChunks(*chunk_list, kChunks[idx].tag);
   }
 
   return WEBP_MUX_OK;
 }
 
 //------------------------------------------------------------------------------
-
-#if defined(__cplusplus) || defined(c_plusplus)
-}    // extern "C"
-#endif
diff --git a/drivers/webp/mux_types.h b/drivers/webp/mux_types.h
new file mode 100644
index 0000000000..c94043a3c0
--- /dev/null
+++ b/drivers/webp/mux_types.h
@@ -0,0 +1,97 @@
+// Copyright 2012 Google Inc. All Rights Reserved.
+//
+// Use of this source code is governed by a BSD-style license
+// that can be found in the COPYING file in the root of the source
+// tree. An additional intellectual property rights grant can be found
+// in the file PATENTS. All contributing project authors may
+// be found in the AUTHORS file in the root of the source tree.
+// -----------------------------------------------------------------------------
+//
+// Data-types common to the mux and demux libraries.
+//
+// Author: Urvang (urvang@google.com)
+
+#ifndef WEBP_WEBP_MUX_TYPES_H_
+#define WEBP_WEBP_MUX_TYPES_H_
+
+#include <stdlib.h>  // free()
+#include <string.h>  // memset()
+#include "./types.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+// Note: forward declaring enumerations is not allowed in (strict) C and C++,
+// the types are left here for reference.
+// typedef enum WebPFeatureFlags WebPFeatureFlags;
+// typedef enum WebPMuxAnimDispose WebPMuxAnimDispose;
+// typedef enum WebPMuxAnimBlend WebPMuxAnimBlend;
+typedef struct WebPData WebPData;
+
+// VP8X Feature Flags.
+typedef enum WebPFeatureFlags {
+  FRAGMENTS_FLAG  = 0x00000001,
+  ANIMATION_FLAG  = 0x00000002,
+  XMP_FLAG        = 0x00000004,
+  EXIF_FLAG       = 0x00000008,
+  ALPHA_FLAG      = 0x00000010,
+  ICCP_FLAG       = 0x00000020
+} WebPFeatureFlags;
+
+// Dispose method (animation only). Indicates how the area used by the current
+// frame is to be treated before rendering the next frame on the canvas.
+typedef enum WebPMuxAnimDispose {
+  WEBP_MUX_DISPOSE_NONE,       // Do not dispose.
+  WEBP_MUX_DISPOSE_BACKGROUND  // Dispose to background color.
+} WebPMuxAnimDispose;
+
+// Blend operation (animation only). Indicates how transparent pixels of the
+// current frame are blended with those of the previous canvas.
+typedef enum WebPMuxAnimBlend {
+  WEBP_MUX_BLEND,              // Blend.
+  WEBP_MUX_NO_BLEND            // Do not blend.
+} WebPMuxAnimBlend;
+
+// Data type used to describe 'raw' data, e.g., chunk data
+// (ICC profile, metadata) and WebP compressed image data.
+struct WebPData {
+  const uint8_t* bytes;
+  size_t size;
+};
+
+// Initializes the contents of the 'webp_data' object with default values.
+static WEBP_INLINE void WebPDataInit(WebPData* webp_data) {
+  if (webp_data != NULL) {
+    memset(webp_data, 0, sizeof(*webp_data));
+  }
+}
+
+// Clears the contents of the 'webp_data' object by calling free(). Does not
+// deallocate the object itself.
+static WEBP_INLINE void WebPDataClear(WebPData* webp_data) {
+  if (webp_data != NULL) {
+    free((void*)webp_data->bytes);
+    WebPDataInit(webp_data);
+  }
+}
+
+// Allocates necessary storage for 'dst' and copies the contents of 'src'.
+// Returns true on success.
+static WEBP_INLINE int WebPDataCopy(const WebPData* src, WebPData* dst) {
+  if (src == NULL || dst == NULL) return 0;
+  WebPDataInit(dst);
+  if (src->bytes != NULL && src->size != 0) {
+    dst->bytes = (uint8_t*)malloc(src->size);
+    if (dst->bytes == NULL) return 0;
+    memcpy((void*)dst->bytes, src->bytes, src->size);
+    dst->size = src->size;
+  }
+  return 1;
+}
+
+#ifdef __cplusplus
+}    // extern "C"
+#endif
+
+#endif  /* WEBP_WEBP_MUX_TYPES_H_ */
diff --git a/drivers/webp/types.h b/drivers/webp/types.h
index 3e27190bef..98fff35a11 100644
--- a/drivers/webp/types.h
+++ b/drivers/webp/types.h
@@ -1,8 +1,10 @@
 // Copyright 2010 Google Inc. All Rights Reserved.
 //
-// This code is licensed under the same terms as WebM:
-//  Software License Agreement:  http://www.webmproject.org/license/software/
-//  Additional IP Rights Grant:  http://www.webmproject.org/license/additional/
+// Use of this source code is governed by a BSD-style license
+// that can be found in the COPYING file in the root of the source
+// tree. An additional intellectual property rights grant can be found
+// in the file PATENTS. All contributing project authors may
+// be found in the AUTHORS file in the root of the source tree.
 // -----------------------------------------------------------------------------
 //
 //  Common types
@@ -16,10 +18,11 @@
 
 #ifndef _MSC_VER
 #include <inttypes.h>
-#ifdef __STRICT_ANSI__
-#define WEBP_INLINE
-#else  /* __STRICT_ANSI__ */
+#if defined(__cplusplus) || !defined(__STRICT_ANSI__) || \
+    (defined(__STDC_VERSION__) && __STDC_VERSION__ >= 199901L)
 #define WEBP_INLINE inline
+#else
+#define WEBP_INLINE
 #endif
 #else
 typedef signed   char int8_t;
@@ -36,7 +39,11 @@ typedef long long int int64_t;
 #ifndef WEBP_EXTERN
 // This explicitly marks library functions and allows for changing the
 // signature for e.g., Windows DLL builds.
-#define WEBP_EXTERN(type) extern type
+# if defined(__GNUC__) && __GNUC__ >= 4
+#  define WEBP_EXTERN(type) extern __attribute__ ((visibility ("default"))) type
+# else
+#  define WEBP_EXTERN(type) extern type
+# endif  /* __GNUC__ >= 4 */
 #endif  /* WEBP_EXTERN */
 
 // Macro to check ABI compatibility (same major revision number)
diff --git a/drivers/webp/utils/bit_reader.c b/drivers/webp/utils/bit_reader.c
index 1afb1db890..5081d5cd4d 100644
--- a/drivers/webp/utils/bit_reader.c
+++ b/drivers/webp/utils/bit_reader.c
@@ -1,36 +1,54 @@
 // Copyright 2010 Google Inc. All Rights Reserved.
 //
-// This code is licensed under the same terms as WebM:
-//  Software License Agreement:  http://www.webmproject.org/license/software/
-//  Additional IP Rights Grant:  http://www.webmproject.org/license/additional/
+// Use of this source code is governed by a BSD-style license
+// that can be found in the COPYING file in the root of the source
+// tree. An additional intellectual property rights grant can be found
+// in the file PATENTS. All contributing project authors may
+// be found in the AUTHORS file in the root of the source tree.
 // -----------------------------------------------------------------------------
 //
-// Boolean decoder
+// Boolean decoder non-inlined methods
 //
 // Author: Skal (pascal.massimino@gmail.com)
 
-#include "./bit_reader.h"
-
-#if defined(__cplusplus) || defined(c_plusplus)
-extern "C" {
+#ifdef HAVE_CONFIG_H
+#include "webp/config.h"
 #endif
 
-#define MK(X) (((bit_t)(X) << (BITS)) | (MASK))
+#include "./bit_reader_inl.h"
 
 //------------------------------------------------------------------------------
 // VP8BitReader
 
+void VP8BitReaderSetBuffer(VP8BitReader* const br,
+                           const uint8_t* const start,
+                           size_t size) {
+  br->buf_     = start;
+  br->buf_end_ = start + size;
+  br->buf_max_ =
+      (size >= sizeof(lbit_t)) ? start + size - sizeof(lbit_t) + 1
+                               : start;
+}
+
 void VP8InitBitReader(VP8BitReader* const br,
-                      const uint8_t* const start, const uint8_t* const end) {
+                      const uint8_t* const start, size_t size) {
   assert(br != NULL);
   assert(start != NULL);
-  assert(start <= end);
-  br->range_   = MK(255 - 1);
-  br->buf_     = start;
-  br->buf_end_ = end;
+  assert(size < (1u << 31));   // limit ensured by format and upstream checks
+  br->range_   = 255 - 1;
   br->value_   = 0;
-  br->missing_ = 8;   // to load the very first 8bits
+  br->bits_    = -8;   // to load the very first 8bits
   br->eof_     = 0;
+  VP8BitReaderSetBuffer(br, start, size);
+  VP8LoadNewBytes(br);
+}
+
+void VP8RemapBitReader(VP8BitReader* const br, ptrdiff_t offset) {
+  if (br->buf_ != NULL) {
+    br->buf_ += offset;
+    br->buf_end_ += offset;
+    br->buf_max_ += offset;
+  }
 }
 
 const uint8_t kVP8Log2Range[128] = {
@@ -45,36 +63,38 @@ const uint8_t kVP8Log2Range[128] = {
   0
 };
 
-// range = (range << kVP8Log2Range[range]) + trailing 1's
-const bit_t kVP8NewRange[128] = {
-  MK(127), MK(127), MK(191), MK(127), MK(159), MK(191), MK(223), MK(127),
-  MK(143), MK(159), MK(175), MK(191), MK(207), MK(223), MK(239), MK(127),
-  MK(135), MK(143), MK(151), MK(159), MK(167), MK(175), MK(183), MK(191),
-  MK(199), MK(207), MK(215), MK(223), MK(231), MK(239), MK(247), MK(127),
-  MK(131), MK(135), MK(139), MK(143), MK(147), MK(151), MK(155), MK(159),
-  MK(163), MK(167), MK(171), MK(175), MK(179), MK(183), MK(187), MK(191),
-  MK(195), MK(199), MK(203), MK(207), MK(211), MK(215), MK(219), MK(223),
-  MK(227), MK(231), MK(235), MK(239), MK(243), MK(247), MK(251), MK(127),
-  MK(129), MK(131), MK(133), MK(135), MK(137), MK(139), MK(141), MK(143),
-  MK(145), MK(147), MK(149), MK(151), MK(153), MK(155), MK(157), MK(159),
-  MK(161), MK(163), MK(165), MK(167), MK(169), MK(171), MK(173), MK(175),
-  MK(177), MK(179), MK(181), MK(183), MK(185), MK(187), MK(189), MK(191),
-  MK(193), MK(195), MK(197), MK(199), MK(201), MK(203), MK(205), MK(207),
-  MK(209), MK(211), MK(213), MK(215), MK(217), MK(219), MK(221), MK(223),
-  MK(225), MK(227), MK(229), MK(231), MK(233), MK(235), MK(237), MK(239),
-  MK(241), MK(243), MK(245), MK(247), MK(249), MK(251), MK(253), MK(127)
+// range = ((range - 1) << kVP8Log2Range[range]) + 1
+const uint8_t kVP8NewRange[128] = {
+  127, 127, 191, 127, 159, 191, 223, 127,
+  143, 159, 175, 191, 207, 223, 239, 127,
+  135, 143, 151, 159, 167, 175, 183, 191,
+  199, 207, 215, 223, 231, 239, 247, 127,
+  131, 135, 139, 143, 147, 151, 155, 159,
+  163, 167, 171, 175, 179, 183, 187, 191,
+  195, 199, 203, 207, 211, 215, 219, 223,
+  227, 231, 235, 239, 243, 247, 251, 127,
+  129, 131, 133, 135, 137, 139, 141, 143,
+  145, 147, 149, 151, 153, 155, 157, 159,
+  161, 163, 165, 167, 169, 171, 173, 175,
+  177, 179, 181, 183, 185, 187, 189, 191,
+  193, 195, 197, 199, 201, 203, 205, 207,
+  209, 211, 213, 215, 217, 219, 221, 223,
+  225, 227, 229, 231, 233, 235, 237, 239,
+  241, 243, 245, 247, 249, 251, 253, 127
 };
 
-#undef MK
-
 void VP8LoadFinalBytes(VP8BitReader* const br) {
   assert(br != NULL && br->buf_ != NULL);
   // Only read 8bits at a time
   if (br->buf_ < br->buf_end_) {
-    br->value_ |= (bit_t)(*br->buf_++) << ((BITS) - 8 + br->missing_);
-    br->missing_ -= 8;
-  } else {
+    br->bits_ += 8;
+    br->value_ = (bit_t)(*br->buf_++) | (br->value_ << 8);
+  } else if (!br->eof_) {
+    br->value_ <<= 8;
+    br->bits_ += 8;
     br->eof_ = 1;
+  } else {
+    br->bits_ = 0;  // This is to avoid undefined behaviour with shifts.
   }
 }
 
@@ -97,32 +117,47 @@ int32_t VP8GetSignedValue(VP8BitReader* const br, int bits) {
 //------------------------------------------------------------------------------
 // VP8LBitReader
 
-#define MAX_NUM_BIT_READ 25
+#define VP8L_LOG8_WBITS 4  // Number of bytes needed to store VP8L_WBITS bits.
 
-static const uint32_t kBitMask[MAX_NUM_BIT_READ] = {
-  0, 1, 3, 7, 15, 31, 63, 127, 255, 511, 1023, 2047, 4095, 8191, 16383, 32767,
-  65535, 131071, 262143, 524287, 1048575, 2097151, 4194303, 8388607, 16777215
+#if !defined(WEBP_FORCE_ALIGNED) && \
+    (defined(__arm__) || defined(_M_ARM) || defined(__aarch64__) || \
+     defined(__i386__) || defined(_M_IX86) || \
+     defined(__x86_64__) || defined(_M_X64))
+#define VP8L_USE_UNALIGNED_LOAD
+#endif
+
+static const uint32_t kBitMask[VP8L_MAX_NUM_BIT_READ + 1] = {
+  0,
+  0x000001, 0x000003, 0x000007, 0x00000f,
+  0x00001f, 0x00003f, 0x00007f, 0x0000ff,
+  0x0001ff, 0x0003ff, 0x0007ff, 0x000fff,
+  0x001fff, 0x003fff, 0x007fff, 0x00ffff,
+  0x01ffff, 0x03ffff, 0x07ffff, 0x0fffff,
+  0x1fffff, 0x3fffff, 0x7fffff, 0xffffff
 };
 
-void VP8LInitBitReader(VP8LBitReader* const br,
-                       const uint8_t* const start,
+void VP8LInitBitReader(VP8LBitReader* const br, const uint8_t* const start,
                        size_t length) {
   size_t i;
+  vp8l_val_t value = 0;
   assert(br != NULL);
   assert(start != NULL);
   assert(length < 0xfffffff8u);   // can't happen with a RIFF chunk.
 
-  br->buf_ = start;
   br->len_ = length;
   br->val_ = 0;
-  br->pos_ = 0;
   br->bit_pos_ = 0;
   br->eos_ = 0;
-  br->error_ = 0;
-  for (i = 0; i < sizeof(br->val_) && i < br->len_; ++i) {
-    br->val_ |= ((uint64_t)br->buf_[br->pos_]) << (8 * i);
-    ++br->pos_;
+
+  if (length > sizeof(br->val_)) {
+    length = sizeof(br->val_);
+  }
+  for (i = 0; i < length; ++i) {
+    value |= (vp8l_val_t)start[i] << (8 * i);
   }
+  br->val_ = value;
+  br->pos_ = length;
+  br->buf_ = start;
 }
 
 void VP8LBitReaderSetBuffer(VP8LBitReader* const br,
@@ -130,100 +165,62 @@ void VP8LBitReaderSetBuffer(VP8LBitReader* const br,
   assert(br != NULL);
   assert(buf != NULL);
   assert(len < 0xfffffff8u);   // can't happen with a RIFF chunk.
-  br->eos_ = (br->pos_ >= len);
   br->buf_ = buf;
   br->len_ = len;
+  // pos_ > len_ should be considered a param error.
+  br->eos_ = (br->pos_ > br->len_) || VP8LIsEndOfStream(br);
+}
+
+static void VP8LSetEndOfStream(VP8LBitReader* const br) {
+  br->eos_ = 1;
+  br->bit_pos_ = 0;  // To avoid undefined behaviour with shifts.
 }
 
+// If not at EOS, reload up to VP8L_LBITS byte-by-byte
 static void ShiftBytes(VP8LBitReader* const br) {
   while (br->bit_pos_ >= 8 && br->pos_ < br->len_) {
     br->val_ >>= 8;
-    br->val_ |= ((uint64_t)br->buf_[br->pos_]) << 56;
+    br->val_ |= ((vp8l_val_t)br->buf_[br->pos_]) << (VP8L_LBITS - 8);
     ++br->pos_;
     br->bit_pos_ -= 8;
   }
-}
-
-void VP8LFillBitWindow(VP8LBitReader* const br) {
-  if (br->bit_pos_ >= 32) {
-#if defined(__x86_64__) || defined(_M_X64)
-    if (br->pos_ + 8 < br->len_) {
-      br->val_ >>= 32;
-      // The expression below needs a little-endian arch to work correctly.
-      // This gives a large speedup for decoding speed.
-      br->val_ |= *(const uint64_t *)(br->buf_ + br->pos_) << 32;
-      br->pos_ += 4;
-      br->bit_pos_ -= 32;
-    } else {
-      // Slow path.
-      ShiftBytes(br);
-    }
-#else
-    // Always the slow path.
-    ShiftBytes(br);
-#endif
-  }
-  if (br->pos_ == br->len_ && br->bit_pos_ == 64) {
-    br->eos_ = 1;
+  if (VP8LIsEndOfStream(br)) {
+    VP8LSetEndOfStream(br);
   }
 }
 
-uint32_t VP8LReadOneBit(VP8LBitReader* const br) {
-  const uint32_t val = (br->val_ >> br->bit_pos_) & 1;
-  // Flag an error at end_of_stream.
-  if (!br->eos_) {
-    ++br->bit_pos_;
-    if (br->bit_pos_ >= 32) {
-      ShiftBytes(br);
-    }
-    // After this last bit is read, check if eos needs to be flagged.
-    if (br->pos_ == br->len_ && br->bit_pos_ == 64) {
-      br->eos_ = 1;
-    }
-  } else {
-    br->error_ = 1;
+void VP8LDoFillBitWindow(VP8LBitReader* const br) {
+  assert(br->bit_pos_ >= VP8L_WBITS);
+  // TODO(jzern): given the fixed read size it may be possible to force
+  //              alignment in this block.
+#if defined(VP8L_USE_UNALIGNED_LOAD)
+  if (br->pos_ + sizeof(br->val_) < br->len_) {
+    br->val_ >>= VP8L_WBITS;
+    br->bit_pos_ -= VP8L_WBITS;
+    // The expression below needs a little-endian arch to work correctly.
+    // This gives a large speedup for decoding speed.
+    br->val_ |= (vp8l_val_t)*(const uint32_t*)(br->buf_ + br->pos_) <<
+                (VP8L_LBITS - VP8L_WBITS);
+    br->pos_ += VP8L_LOG8_WBITS;
+    return;
   }
-  return val;
+#endif
+  ShiftBytes(br);       // Slow path.
 }
 
 uint32_t VP8LReadBits(VP8LBitReader* const br, int n_bits) {
-  uint32_t val = 0;
   assert(n_bits >= 0);
   // Flag an error if end_of_stream or n_bits is more than allowed limit.
-  if (!br->eos_ && n_bits < MAX_NUM_BIT_READ) {
-    // If this read is going to cross the read buffer, set the eos flag.
-    if (br->pos_ == br->len_) {
-      if ((br->bit_pos_ + n_bits) >= 64) {
-        br->eos_ = 1;
-        if ((br->bit_pos_ + n_bits) > 64) return val;
-      }
-    }
-    val = (br->val_ >> br->bit_pos_) & kBitMask[n_bits];
-    br->bit_pos_ += n_bits;
-    if (br->bit_pos_ >= 40) {
-      if (br->pos_ + 5 < br->len_) {
-        br->val_ >>= 40;
-        br->val_ |=
-            (((uint64_t)br->buf_[br->pos_ + 0]) << 24) |
-            (((uint64_t)br->buf_[br->pos_ + 1]) << 32) |
-            (((uint64_t)br->buf_[br->pos_ + 2]) << 40) |
-            (((uint64_t)br->buf_[br->pos_ + 3]) << 48) |
-            (((uint64_t)br->buf_[br->pos_ + 4]) << 56);
-        br->pos_ += 5;
-        br->bit_pos_ -= 40;
-      }
-      if (br->bit_pos_ >= 8) {
-        ShiftBytes(br);
-      }
-    }
+  if (!br->eos_ && n_bits <= VP8L_MAX_NUM_BIT_READ) {
+    const uint32_t val = VP8LPrefetchBits(br) & kBitMask[n_bits];
+    const int new_bits = br->bit_pos_ + n_bits;
+    br->bit_pos_ = new_bits;
+    ShiftBytes(br);
+    return val;
   } else {
-    br->error_ = 1;
+    VP8LSetEndOfStream(br);
+    return 0;
   }
-  return val;
 }
 
 //------------------------------------------------------------------------------
-
-#if defined(__cplusplus) || defined(c_plusplus)
-}    // extern "C"
-#endif
diff --git a/drivers/webp/utils/bit_reader.h b/drivers/webp/utils/bit_reader.h
index 43cd948fd4..7e09653ebc 100644
--- a/drivers/webp/utils/bit_reader.h
+++ b/drivers/webp/utils/bit_reader.h
@@ -1,9 +1,10 @@
-//
 // Copyright 2010 Google Inc. All Rights Reserved.
 //
-// This code is licensed under the same terms as WebM:
-//  Software License Agreement:  http://www.webmproject.org/license/software/
-//  Additional IP Rights Grant:  http://www.webmproject.org/license/additional/
+// Use of this source code is governed by a BSD-style license
+// that can be found in the COPYING file in the root of the source
+// tree. An additional intellectual property rights grant can be found
+// in the file PATENTS. All contributing project authors may
+// be found in the AUTHORS file in the root of the source tree.
 // -----------------------------------------------------------------------------
 //
 // Boolean decoder
@@ -18,44 +19,75 @@
 #ifdef _MSC_VER
 #include <stdlib.h>  // _byteswap_ulong
 #endif
-#include <string.h>  // For memcpy
-#include "../types.h"
+#include "webp/types.h"
 
-#if defined(__cplusplus) || defined(c_plusplus)
+#ifdef __cplusplus
 extern "C" {
 #endif
 
-#define BITS 32     // can be 32, 16 or 8
-#define MASK ((((bit_t)1) << (BITS)) - 1)
-#if (BITS == 32)
-typedef uint64_t bit_t;   // natural register type
-typedef uint32_t lbit_t;  // natural type for memory I/O
-#elif (BITS == 16)
-typedef uint32_t bit_t;
-typedef uint16_t lbit_t;
+// The Boolean decoder needs to maintain infinite precision on the value_ field.
+// However, since range_ is only 8bit, we only need an active window of 8 bits
+// for value_. Left bits (MSB) gets zeroed and shifted away when value_ falls
+// below 128, range_ is updated, and fresh bits read from the bitstream are
+// brought in as LSB. To avoid reading the fresh bits one by one (slow), we
+// cache BITS of them ahead. The total of (BITS + 8) bits must fit into a
+// natural register (with type bit_t). To fetch BITS bits from bitstream we
+// use a type lbit_t.
+//
+// BITS can be any multiple of 8 from 8 to 56 (inclusive).
+// Pick values that fit natural register size.
+
+#if defined(__i386__) || defined(_M_IX86)      // x86 32bit
+#define BITS 24
+#elif defined(__x86_64__) || defined(_M_X64)   // x86 64bit
+#define BITS 56
+#elif defined(__arm__) || defined(_M_ARM)      // ARM
+#define BITS 24
+#elif defined(__mips__)                        // MIPS
+#define BITS 24
+#else                                          // reasonable default
+#define BITS 24  // TODO(skal): test aarch64 and find the proper BITS value.
+#endif
+
+//------------------------------------------------------------------------------
+// Derived types and constants:
+//   bit_t = natural register type for storing 'value_' (which is BITS+8 bits)
+//   range_t = register for 'range_' (which is 8bits only)
+
+#if (BITS > 24)
+typedef uint64_t bit_t;
 #else
 typedef uint32_t bit_t;
-typedef uint8_t lbit_t;
 #endif
 
+typedef uint32_t range_t;
+
 //------------------------------------------------------------------------------
-// Bitreader and code-tree reader
+// Bitreader
 
 typedef struct VP8BitReader VP8BitReader;
 struct VP8BitReader {
+  // boolean decoder  (keep the field ordering as is!)
+  bit_t value_;               // current value
+  range_t range_;             // current range minus 1. In [127, 254] interval.
+  int bits_;                  // number of valid bits left
+  // read buffer
   const uint8_t* buf_;        // next byte to be read
   const uint8_t* buf_end_;    // end of read buffer
+  const uint8_t* buf_max_;    // max packed-read position on buffer
   int eof_;                   // true if input is exhausted
-
-  // boolean decoder
-  bit_t range_;            // current range minus 1. In [127, 254] interval.
-  bit_t value_;            // current value
-  int missing_;            // number of missing bits in value_ (8bit)
 };
 
 // Initialize the bit reader and the boolean decoder.
 void VP8InitBitReader(VP8BitReader* const br,
-                      const uint8_t* const start, const uint8_t* const end);
+                      const uint8_t* const start, size_t size);
+// Sets the working read buffer.
+void VP8BitReaderSetBuffer(VP8BitReader* const br,
+                           const uint8_t* const start, size_t size);
+
+// Update internal pointers to displace the byte buffer by the
+// relative offset 'offset'.
+void VP8RemapBitReader(VP8BitReader* const br, ptrdiff_t offset);
 
 // return the next value made of 'num_bits' bits
 uint32_t VP8GetValue(VP8BitReader* const br, int num_bits);
@@ -66,100 +98,31 @@ static WEBP_INLINE uint32_t VP8Get(VP8BitReader* const br) {
 // return the next value with sign-extension.
 int32_t VP8GetSignedValue(VP8BitReader* const br, int num_bits);
 
-// Read a bit with proba 'prob'. Speed-critical function!
-extern const uint8_t kVP8Log2Range[128];
-extern const bit_t kVP8NewRange[128];
-
-void VP8LoadFinalBytes(VP8BitReader* const br);    // special case for the tail
-
-static WEBP_INLINE void VP8LoadNewBytes(VP8BitReader* const br) {
-  assert(br && br->buf_);
-  // Read 'BITS' bits at a time if possible.
-  if (br->buf_ + sizeof(lbit_t) <= br->buf_end_) {
-    // convert memory type to register type (with some zero'ing!)
-    bit_t bits;
-    lbit_t in_bits = *(lbit_t*)br->buf_;
-    br->buf_ += (BITS) >> 3;
-#if !defined(__BIG_ENDIAN__)
-#if (BITS == 32)
-#if defined(__i386__) || defined(__x86_64__)
-    __asm__ volatile("bswap %k0" : "=r"(in_bits) : "0"(in_bits));
-    bits = (bit_t)in_bits;   // 32b -> 64b zero-extension
-#elif defined(_MSC_VER)
-    bits = _byteswap_ulong(in_bits);
-#else
-    bits = (bit_t)(in_bits >> 24) | ((in_bits >> 8) & 0xff00)
-         | ((in_bits << 8) & 0xff0000)  | (in_bits << 24);
-#endif  // x86
-#elif (BITS == 16)
-    // gcc will recognize a 'rorw $8, ...' here:
-    bits = (bit_t)(in_bits >> 8) | ((in_bits & 0xff) << 8);
-#endif
-#else    // LITTLE_ENDIAN
-    bits = (bit_t)in_bits;
-#endif
-    br->value_ |= bits << br->missing_;
-    br->missing_ -= (BITS);
-  } else {
-    VP8LoadFinalBytes(br);    // no need to be inlined
-  }
-}
+// bit_reader_inl.h will implement the following methods:
+//   static WEBP_INLINE int VP8GetBit(VP8BitReader* const br, int prob)
+//   static WEBP_INLINE int VP8GetSigned(VP8BitReader* const br, int v)
+// and should be included by the .c files that actually need them.
+// This is to avoid recompiling the whole library whenever this file is touched,
+// and also allowing platform-specific ad-hoc hacks.
 
-static WEBP_INLINE int VP8BitUpdate(VP8BitReader* const br, bit_t split) {
-  const bit_t value_split = split | (MASK);
-  if (br->missing_ > 0) {  // Make sure we have a least BITS bits in 'value_'
-    VP8LoadNewBytes(br);
-  }
-  if (br->value_ > value_split) {
-    br->range_ -= value_split + 1;
-    br->value_ -= value_split + 1;
-    return 1;
-  } else {
-    br->range_ = value_split;
-    return 0;
-  }
-}
-
-static WEBP_INLINE void VP8Shift(VP8BitReader* const br) {
-  // range_ is in [0..127] interval here.
-  const int idx = br->range_ >> (BITS);
-  const int shift = kVP8Log2Range[idx];
-  br->range_ = kVP8NewRange[idx];
-  br->value_ <<= shift;
-  br->missing_ += shift;
-}
-
-static WEBP_INLINE int VP8GetBit(VP8BitReader* const br, int prob) {
-  // It's important to avoid generating a 64bit x 64bit multiply here.
-  // We just need an 8b x 8b after all.
-  const bit_t split =
-      (bit_t)((uint32_t)(br->range_ >> (BITS)) * prob) << ((BITS) - 8);
-  const int bit = VP8BitUpdate(br, split);
-  if (br->range_ <= (((bit_t)0x7e << (BITS)) | (MASK))) {
-    VP8Shift(br);
-  }
-  return bit;
-}
+// -----------------------------------------------------------------------------
+// Bitreader for lossless format
 
-static WEBP_INLINE int VP8GetSigned(VP8BitReader* const br, int v) {
-  const bit_t split = (br->range_ >> 1);
-  const int bit = VP8BitUpdate(br, split);
-  VP8Shift(br);
-  return bit ? -v : v;
-}
+// maximum number of bits (inclusive) the bit-reader can handle:
+#define VP8L_MAX_NUM_BIT_READ 24
 
+#define VP8L_LBITS 64  // Number of bits prefetched (= bit-size of vp8l_val_t).
+#define VP8L_WBITS 32  // Minimum number of bytes ready after VP8LFillBitWindow.
 
-// -----------------------------------------------------------------------------
-// Bitreader
+typedef uint64_t vp8l_val_t;  // right now, this bit-reader can only use 64bit.
 
 typedef struct {
-  uint64_t       val_;
-  const uint8_t* buf_;
-  size_t         len_;
-  size_t         pos_;
-  int            bit_pos_;
-  int            eos_;
-  int            error_;
+  vp8l_val_t     val_;        // pre-fetched bits
+  const uint8_t* buf_;        // input byte buffer
+  size_t         len_;        // buffer length
+  size_t         pos_;        // byte position in buf_
+  int            bit_pos_;    // current bit-reading position in val_
+  int            eos_;        // true if a bit was read past the end of buffer
 } VP8LBitReader;
 
 void VP8LInitBitReader(VP8LBitReader* const br,
@@ -170,28 +133,39 @@ void VP8LInitBitReader(VP8LBitReader* const br,
 void VP8LBitReaderSetBuffer(VP8LBitReader* const br,
                             const uint8_t* const buffer, size_t length);
 
-// Reads the specified number of bits from Read Buffer.
-// Flags an error in case end_of_stream or n_bits is more than allowed limit.
-// Flags eos if this read attempt is going to cross the read buffer.
+// Reads the specified number of bits from read buffer.
+// Flags an error in case end_of_stream or n_bits is more than the allowed limit
+// of VP8L_MAX_NUM_BIT_READ (inclusive).
+// Flags eos_ if this read attempt is going to cross the read buffer.
 uint32_t VP8LReadBits(VP8LBitReader* const br, int n_bits);
 
-// Reads one bit from Read Buffer. Flags an error in case end_of_stream.
-// Flags eos after reading last bit from the buffer.
-uint32_t VP8LReadOneBit(VP8LBitReader* const br);
-
-// VP8LReadOneBitUnsafe is faster than VP8LReadOneBit, but it can be called only
-// 32 times after the last VP8LFillBitWindow. Any subsequent calls
-// (without VP8LFillBitWindow) will return invalid data.
-static WEBP_INLINE uint32_t VP8LReadOneBitUnsafe(VP8LBitReader* const br) {
-  const uint32_t val = (br->val_ >> br->bit_pos_) & 1;
-  ++br->bit_pos_;
-  return val;
+// Return the prefetched bits, so they can be looked up.
+static WEBP_INLINE uint32_t VP8LPrefetchBits(VP8LBitReader* const br) {
+  return (uint32_t)(br->val_ >> (br->bit_pos_ & (VP8L_LBITS - 1)));
+}
+
+// Returns true if there was an attempt at reading bit past the end of
+// the buffer. Doesn't set br->eos_ flag.
+static WEBP_INLINE int VP8LIsEndOfStream(const VP8LBitReader* const br) {
+  assert(br->pos_ <= br->len_);
+  return br->eos_ || ((br->pos_ == br->len_) && (br->bit_pos_ > VP8L_LBITS));
 }
 
-// Advances the Read buffer by 4 bytes to make room for reading next 32 bits.
-void VP8LFillBitWindow(VP8LBitReader* const br);
+// For jumping over a number of bits in the bit stream when accessed with
+// VP8LPrefetchBits and VP8LFillBitWindow.
+static WEBP_INLINE void VP8LSetBitPos(VP8LBitReader* const br, int val) {
+  br->bit_pos_ = val;
+  br->eos_ = VP8LIsEndOfStream(br);
+}
+
+// Advances the read buffer by 4 bytes to make room for reading next 32 bits.
+// Speed critical, but infrequent part of the code can be non-inlined.
+extern void VP8LDoFillBitWindow(VP8LBitReader* const br);
+static WEBP_INLINE void VP8LFillBitWindow(VP8LBitReader* const br) {
+  if (br->bit_pos_ >= VP8L_WBITS) VP8LDoFillBitWindow(br);
+}
 
-#if defined(__cplusplus) || defined(c_plusplus)
+#ifdef __cplusplus
 }    // extern "C"
 #endif
 
diff --git a/drivers/webp/utils/bit_reader_inl.h b/drivers/webp/utils/bit_reader_inl.h
new file mode 100644
index 0000000000..20ce5f3cc7
--- /dev/null
+++ b/drivers/webp/utils/bit_reader_inl.h
@@ -0,0 +1,172 @@
+// Copyright 2014 Google Inc. All Rights Reserved.
+//
+// Use of this source code is governed by a BSD-style license
+// that can be found in the COPYING file in the root of the source
+// tree. An additional intellectual property rights grant can be found
+// in the file PATENTS. All contributing project authors may
+// be found in the AUTHORS file in the root of the source tree.
+// -----------------------------------------------------------------------------
+//
+// Specific inlined methods for boolean decoder [VP8GetBit() ...]
+// This file should be included by the .c sources that actually need to call
+// these methods.
+//
+// Author: Skal (pascal.massimino@gmail.com)
+
+#ifndef WEBP_UTILS_BIT_READER_INL_H_
+#define WEBP_UTILS_BIT_READER_INL_H_
+
+#ifdef HAVE_CONFIG_H
+#include "webp/config.h"
+#endif
+
+#ifdef WEBP_FORCE_ALIGNED
+#include <string.h>  // memcpy
+#endif
+
+#include "../dsp/dsp.h"
+#include "./bit_reader.h"
+#include "./endian_inl.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+//------------------------------------------------------------------------------
+// Derived type lbit_t = natural type for memory I/O
+
+#if   (BITS > 32)
+typedef uint64_t lbit_t;
+#elif (BITS > 16)
+typedef uint32_t lbit_t;
+#elif (BITS >  8)
+typedef uint16_t lbit_t;
+#else
+typedef uint8_t lbit_t;
+#endif
+
+extern const uint8_t kVP8Log2Range[128];
+extern const uint8_t kVP8NewRange[128];
+
+// special case for the tail byte-reading
+void VP8LoadFinalBytes(VP8BitReader* const br);
+
+//------------------------------------------------------------------------------
+// Inlined critical functions
+
+// makes sure br->value_ has at least BITS bits worth of data
+static WEBP_INLINE void VP8LoadNewBytes(VP8BitReader* const br) {
+  assert(br != NULL && br->buf_ != NULL);
+  // Read 'BITS' bits at a time if possible.
+  if (br->buf_ < br->buf_max_) {
+    // convert memory type to register type (with some zero'ing!)
+    bit_t bits;
+#if defined(WEBP_FORCE_ALIGNED)
+    lbit_t in_bits;
+    memcpy(&in_bits, br->buf_, sizeof(in_bits));
+#elif defined(WEBP_USE_MIPS32)
+    // This is needed because of un-aligned read.
+    lbit_t in_bits;
+    lbit_t* p_buf_ = (lbit_t*)br->buf_;
+    __asm__ volatile(
+      ".set   push                             \n\t"
+      ".set   at                               \n\t"
+      ".set   macro                            \n\t"
+      "ulw    %[in_bits], 0(%[p_buf_])         \n\t"
+      ".set   pop                              \n\t"
+      : [in_bits]"=r"(in_bits)
+      : [p_buf_]"r"(p_buf_)
+      : "memory", "at"
+    );
+#else
+    const lbit_t in_bits = *(const lbit_t*)br->buf_;
+#endif
+    br->buf_ += BITS >> 3;
+#if !defined(WORDS_BIGENDIAN)
+#if (BITS > 32)
+    bits = BSwap64(in_bits);
+    bits >>= 64 - BITS;
+#elif (BITS >= 24)
+    bits = BSwap32(in_bits);
+    bits >>= (32 - BITS);
+#elif (BITS == 16)
+    bits = BSwap16(in_bits);
+#else   // BITS == 8
+    bits = (bit_t)in_bits;
+#endif  // BITS > 32
+#else    // WORDS_BIGENDIAN
+    bits = (bit_t)in_bits;
+    if (BITS != 8 * sizeof(bit_t)) bits >>= (8 * sizeof(bit_t) - BITS);
+#endif
+    br->value_ = bits | (br->value_ << BITS);
+    br->bits_ += BITS;
+  } else {
+    VP8LoadFinalBytes(br);    // no need to be inlined
+  }
+}
+
+// Read a bit with proba 'prob'. Speed-critical function!
+static WEBP_INLINE int VP8GetBit(VP8BitReader* const br, int prob) {
+  // Don't move this declaration! It makes a big speed difference to store
+  // 'range' *before* calling VP8LoadNewBytes(), even if this function doesn't
+  // alter br->range_ value.
+  range_t range = br->range_;
+  if (br->bits_ < 0) {
+    VP8LoadNewBytes(br);
+  }
+  {
+    const int pos = br->bits_;
+    const range_t split = (range * prob) >> 8;
+    const range_t value = (range_t)(br->value_ >> pos);
+#if defined(__arm__) || defined(_M_ARM)      // ARM-specific
+    const int bit = ((int)(split - value) >> 31) & 1;
+    if (value > split) {
+      range -= split + 1;
+      br->value_ -= (bit_t)(split + 1) << pos;
+    } else {
+      range = split;
+    }
+#else  // faster version on x86
+    int bit;  // Don't use 'const int bit = (value > split);", it's slower.
+    if (value > split) {
+      range -= split + 1;
+      br->value_ -= (bit_t)(split + 1) << pos;
+      bit = 1;
+    } else {
+      range = split;
+      bit = 0;
+    }
+#endif
+    if (range <= (range_t)0x7e) {
+      const int shift = kVP8Log2Range[range];
+      range = kVP8NewRange[range];
+      br->bits_ -= shift;
+    }
+    br->range_ = range;
+    return bit;
+  }
+}
+
+// simplified version of VP8GetBit() for prob=0x80 (note shift is always 1 here)
+static WEBP_INLINE int VP8GetSigned(VP8BitReader* const br, int v) {
+  if (br->bits_ < 0) {
+    VP8LoadNewBytes(br);
+  }
+  {
+    const int pos = br->bits_;
+    const range_t split = br->range_ >> 1;
+    const range_t value = (range_t)(br->value_ >> pos);
+    const int32_t mask = (int32_t)(split - value) >> 31;  // -1 or 0
+    br->bits_ -= 1;
+    br->range_ += mask;
+    br->range_ |= 1;
+    br->value_ -= (bit_t)((split + 1) & mask) << pos;
+    return (v ^ mask) - mask;
+  }
+}
+
+#ifdef __cplusplus
+}    // extern "C"
+#endif
+
+#endif   // WEBP_UTILS_BIT_READER_INL_H_
diff --git a/drivers/webp/utils/bit_writer.c b/drivers/webp/utils/bit_writer.c
index 671159cacd..064428691b 100644
--- a/drivers/webp/utils/bit_writer.c
+++ b/drivers/webp/utils/bit_writer.c
@@ -1,8 +1,10 @@
 // Copyright 2011 Google Inc. All Rights Reserved.
 //
-// This code is licensed under the same terms as WebM:
-//  Software License Agreement:  http://www.webmproject.org/license/software/
-//  Additional IP Rights Grant:  http://www.webmproject.org/license/additional/
+// Use of this source code is governed by a BSD-style license
+// that can be found in the COPYING file in the root of the source
+// tree. An additional intellectual property rights grant can be found
+// in the file PATENTS. All contributing project authors may
+// be found in the AUTHORS file in the root of the source tree.
 // -----------------------------------------------------------------------------
 //
 // Bit writing and boolean coder
@@ -13,11 +15,10 @@
 #include <assert.h>
 #include <string.h>   // for memcpy()
 #include <stdlib.h>
-#include "./bit_writer.h"
 
-#if defined(__cplusplus) || defined(c_plusplus)
-extern "C" {
-#endif
+#include "./bit_writer.h"
+#include "./endian_inl.h"
+#include "./utils.h"
 
 //------------------------------------------------------------------------------
 // VP8BitWriter
@@ -36,19 +37,22 @@ static int BitWriterResize(VP8BitWriter* const bw, size_t extra_size) {
   new_size = 2 * bw->max_pos_;
   if (new_size < needed_size) new_size = needed_size;
   if (new_size < 1024) new_size = 1024;
-  new_buf = (uint8_t*)malloc(new_size);
+  new_buf = (uint8_t*)WebPSafeMalloc(1ULL, new_size);
   if (new_buf == NULL) {
     bw->error_ = 1;
     return 0;
   }
-  memcpy(new_buf, bw->buf_, bw->pos_);
-  free(bw->buf_);
+  if (bw->pos_ > 0) {
+    assert(bw->buf_ != NULL);
+    memcpy(new_buf, bw->buf_, bw->pos_);
+  }
+  WebPSafeFree(bw->buf_);
   bw->buf_ = new_buf;
   bw->max_pos_ = new_size;
   return 1;
 }
 
-static void kFlush(VP8BitWriter* const bw) {
+static void Flush(VP8BitWriter* const bw) {
   const int s = 8 + bw->nb_bits_;
   const int32_t bits = bw->value_ >> s;
   assert(bw->nb_bits_ >= 0);
@@ -114,7 +118,7 @@ int VP8PutBit(VP8BitWriter* const bw, int bit, int prob) {
     bw->range_ = kNewRange[bw->range_];
     bw->value_ <<= shift;
     bw->nb_bits_ += shift;
-    if (bw->nb_bits_ > 0) kFlush(bw);
+    if (bw->nb_bits_ > 0) Flush(bw);
   }
   return bit;
 }
@@ -131,24 +135,25 @@ int VP8PutBitUniform(VP8BitWriter* const bw, int bit) {
     bw->range_ = kNewRange[bw->range_];
     bw->value_ <<= 1;
     bw->nb_bits_ += 1;
-    if (bw->nb_bits_ > 0) kFlush(bw);
+    if (bw->nb_bits_ > 0) Flush(bw);
   }
   return bit;
 }
 
-void VP8PutValue(VP8BitWriter* const bw, int value, int nb_bits) {
-  int mask;
-  for (mask = 1 << (nb_bits - 1); mask; mask >>= 1)
+void VP8PutBits(VP8BitWriter* const bw, uint32_t value, int nb_bits) {
+  uint32_t mask;
+  assert(nb_bits > 0 && nb_bits < 32);
+  for (mask = 1u << (nb_bits - 1); mask; mask >>= 1)
     VP8PutBitUniform(bw, value & mask);
 }
 
-void VP8PutSignedValue(VP8BitWriter* const bw, int value, int nb_bits) {
+void VP8PutSignedBits(VP8BitWriter* const bw, int value, int nb_bits) {
   if (!VP8PutBitUniform(bw, value != 0))
     return;
   if (value < 0) {
-    VP8PutValue(bw, ((-value) << 1) | 1, nb_bits + 1);
+    VP8PutBits(bw, ((-value) << 1) | 1, nb_bits + 1);
   } else {
-    VP8PutValue(bw, value << 1, nb_bits + 1);
+    VP8PutBits(bw, value << 1, nb_bits + 1);
   }
 }
 
@@ -167,16 +172,16 @@ int VP8BitWriterInit(VP8BitWriter* const bw, size_t expected_size) {
 }
 
 uint8_t* VP8BitWriterFinish(VP8BitWriter* const bw) {
-  VP8PutValue(bw, 0, 9 - bw->nb_bits_);
+  VP8PutBits(bw, 0, 9 - bw->nb_bits_);
   bw->nb_bits_ = 0;   // pad with zeroes
-  kFlush(bw);
+  Flush(bw);
   return bw->buf_;
 }
 
 int VP8BitWriterAppend(VP8BitWriter* const bw,
                        const uint8_t* data, size_t size) {
-  assert(data);
-  if (bw->nb_bits_ != -8) return 0;   // kFlush() must have been called
+  assert(data != NULL);
+  if (bw->nb_bits_ != -8) return 0;   // Flush() must have been called
   if (!BitWriterResize(bw, size)) return 0;
   memcpy(bw->buf_ + bw->pos_, data, size);
   bw->pos_ += size;
@@ -184,8 +189,8 @@ int VP8BitWriterAppend(VP8BitWriter* const bw,
 }
 
 void VP8BitWriterWipeOut(VP8BitWriter* const bw) {
-  if (bw) {
-    free(bw->buf_);
+  if (bw != NULL) {
+    WebPSafeFree(bw->buf_);
     memset(bw, 0, sizeof(*bw));
   }
 }
@@ -193,32 +198,39 @@ void VP8BitWriterWipeOut(VP8BitWriter* const bw) {
 //------------------------------------------------------------------------------
 // VP8LBitWriter
 
+// This is the minimum amount of size the memory buffer is guaranteed to grow
+// when extra space is needed.
+#define MIN_EXTRA_SIZE  (32768ULL)
+
 // Returns 1 on success.
 static int VP8LBitWriterResize(VP8LBitWriter* const bw, size_t extra_size) {
   uint8_t* allocated_buf;
   size_t allocated_size;
-  const size_t current_size = VP8LBitWriterNumBytes(bw);
+  const size_t max_bytes = bw->end_ - bw->buf_;
+  const size_t current_size = bw->cur_ - bw->buf_;
   const uint64_t size_required_64b = (uint64_t)current_size + extra_size;
   const size_t size_required = (size_t)size_required_64b;
   if (size_required != size_required_64b) {
     bw->error_ = 1;
     return 0;
   }
-  if (bw->max_bytes_ > 0 && size_required <= bw->max_bytes_) return 1;
-  allocated_size = (3 * bw->max_bytes_) >> 1;
+  if (max_bytes > 0 && size_required <= max_bytes) return 1;
+  allocated_size = (3 * max_bytes) >> 1;
   if (allocated_size < size_required) allocated_size = size_required;
   // make allocated size multiple of 1k
   allocated_size = (((allocated_size >> 10) + 1) << 10);
-  allocated_buf = (uint8_t*)malloc(allocated_size);
+  allocated_buf = (uint8_t*)WebPSafeMalloc(1ULL, allocated_size);
   if (allocated_buf == NULL) {
     bw->error_ = 1;
     return 0;
   }
-  memcpy(allocated_buf, bw->buf_, current_size);
-  free(bw->buf_);
+  if (current_size > 0) {
+    memcpy(allocated_buf, bw->buf_, current_size);
+  }
+  WebPSafeFree(bw->buf_);
   bw->buf_ = allocated_buf;
-  bw->max_bytes_ = allocated_size;
-  memset(allocated_buf + current_size, 0, allocated_size - current_size);
+  bw->cur_ = bw->buf_ + current_size;
+  bw->end_ = bw->buf_ + allocated_size;
   return 1;
 }
 
@@ -227,58 +239,81 @@ int VP8LBitWriterInit(VP8LBitWriter* const bw, size_t expected_size) {
   return VP8LBitWriterResize(bw, expected_size);
 }
 
-void VP8LBitWriterDestroy(VP8LBitWriter* const bw) {
+void VP8LBitWriterWipeOut(VP8LBitWriter* const bw) {
   if (bw != NULL) {
-    free(bw->buf_);
+    WebPSafeFree(bw->buf_);
     memset(bw, 0, sizeof(*bw));
   }
 }
 
-void VP8LWriteBits(VP8LBitWriter* const bw, int n_bits, uint32_t bits) {
-  if (n_bits < 1) return;
-#if !defined(__BIG_ENDIAN__)
-  // Technically, this branch of the code can write up to 25 bits at a time,
-  // but in prefix encoding, the maximum number of bits written is 18 at a time.
-  {
-    uint8_t* const p = &bw->buf_[bw->bit_pos_ >> 3];
-    uint32_t v = *(const uint32_t*)p;
-    v |= bits << (bw->bit_pos_ & 7);
-    *(uint32_t*)p = v;
-    bw->bit_pos_ += n_bits;
+void VP8LPutBitsFlushBits(VP8LBitWriter* const bw) {
+  // If needed, make some room by flushing some bits out.
+  if (bw->cur_ + VP8L_WRITER_BYTES > bw->end_) {
+    const uint64_t extra_size = (bw->end_ - bw->buf_) + MIN_EXTRA_SIZE;
+    if (extra_size != (size_t)extra_size ||
+        !VP8LBitWriterResize(bw, (size_t)extra_size)) {
+      bw->cur_ = bw->buf_;
+      bw->error_ = 1;
+      return;
+    }
   }
-#else  // BIG_ENDIAN
-  {
-    uint8_t* p = &bw->buf_[bw->bit_pos_ >> 3];
-    const int bits_reserved_in_first_byte = bw->bit_pos_ & 7;
-    const int bits_left_to_write = n_bits - 8 + bits_reserved_in_first_byte;
-    // implicit & 0xff is assumed for uint8_t arithmetics
-    *p++ |= bits << bits_reserved_in_first_byte;
-    bits >>= 8 - bits_reserved_in_first_byte;
-    if (bits_left_to_write >= 1) {
-      *p++ = bits;
-      bits >>= 8;
-      if (bits_left_to_write >= 9) {
-        *p++ = bits;
-        bits >>= 8;
+  *(vp8l_wtype_t*)bw->cur_ = (vp8l_wtype_t)WSWAP((vp8l_wtype_t)bw->bits_);
+  bw->cur_ += VP8L_WRITER_BYTES;
+  bw->bits_ >>= VP8L_WRITER_BITS;
+  bw->used_ -= VP8L_WRITER_BITS;
+}
+
+void VP8LPutBitsInternal(VP8LBitWriter* const bw, uint32_t bits, int n_bits) {
+  assert(n_bits <= 32);
+  // That's the max we can handle:
+  assert(sizeof(vp8l_wtype_t) == 2);
+  if (n_bits > 0) {
+    vp8l_atype_t lbits = bw->bits_;
+    int used = bw->used_;
+    // Special case of overflow handling for 32bit accumulator (2-steps flush).
+#if VP8L_WRITER_BITS == 16
+    if (used + n_bits >= VP8L_WRITER_MAX_BITS) {
+      // Fill up all the VP8L_WRITER_MAX_BITS so it can be flushed out below.
+      const int shift = VP8L_WRITER_MAX_BITS - used;
+      lbits |= (vp8l_atype_t)bits << used;
+      used = VP8L_WRITER_MAX_BITS;
+      n_bits -= shift;
+      bits >>= shift;
+      assert(n_bits <= VP8L_WRITER_MAX_BITS);
+    }
+#endif
+    // If needed, make some room by flushing some bits out.
+    while (used >= VP8L_WRITER_BITS) {
+      if (bw->cur_ + VP8L_WRITER_BYTES > bw->end_) {
+        const uint64_t extra_size = (bw->end_ - bw->buf_) + MIN_EXTRA_SIZE;
+        if (extra_size != (size_t)extra_size ||
+            !VP8LBitWriterResize(bw, (size_t)extra_size)) {
+          bw->cur_ = bw->buf_;
+          bw->error_ = 1;
+          return;
+        }
       }
+      *(vp8l_wtype_t*)bw->cur_ = (vp8l_wtype_t)WSWAP((vp8l_wtype_t)lbits);
+      bw->cur_ += VP8L_WRITER_BYTES;
+      lbits >>= VP8L_WRITER_BITS;
+      used -= VP8L_WRITER_BITS;
     }
-    assert(n_bits <= 25);
-    *p = bits;
-    bw->bit_pos_ += n_bits;
+    bw->bits_ = lbits | ((vp8l_atype_t)bits << used);
+    bw->used_ = used + n_bits;
   }
-#endif
-  if ((bw->bit_pos_ >> 3) > (bw->max_bytes_ - 8)) {
-    const uint64_t extra_size = 32768ULL + bw->max_bytes_;
-    if (extra_size != (size_t)extra_size ||
-        !VP8LBitWriterResize(bw, (size_t)extra_size)) {
-      bw->bit_pos_ = 0;
-      bw->error_ = 1;
+}
+
+uint8_t* VP8LBitWriterFinish(VP8LBitWriter* const bw) {
+  // flush leftover bits
+  if (VP8LBitWriterResize(bw, (bw->used_ + 7) >> 3)) {
+    while (bw->used_ > 0) {
+      *bw->cur_++ = (uint8_t)bw->bits_;
+      bw->bits_ >>= 8;
+      bw->used_ -= 8;
     }
+    bw->used_ = 0;
   }
+  return bw->buf_;
 }
 
 //------------------------------------------------------------------------------
-
-#if defined(__cplusplus) || defined(c_plusplus)
-}    // extern "C"
-#endif
diff --git a/drivers/webp/utils/bit_writer.h b/drivers/webp/utils/bit_writer.h
index 57f39b11b1..867a5ee055 100644
--- a/drivers/webp/utils/bit_writer.h
+++ b/drivers/webp/utils/bit_writer.h
@@ -1,8 +1,10 @@
 // Copyright 2011 Google Inc. All Rights Reserved.
 //
-// This code is licensed under the same terms as WebM:
-//  Software License Agreement:  http://www.webmproject.org/license/software/
-//  Additional IP Rights Grant:  http://www.webmproject.org/license/additional/
+// Use of this source code is governed by a BSD-style license
+// that can be found in the COPYING file in the root of the source
+// tree. An additional intellectual property rights grant can be found
+// in the file PATENTS. All contributing project authors may
+// be found in the AUTHORS file in the root of the source tree.
 // -----------------------------------------------------------------------------
 //
 // Bit writing and boolean coder
@@ -12,9 +14,9 @@
 #ifndef WEBP_UTILS_BIT_WRITER_H_
 #define WEBP_UTILS_BIT_WRITER_H_
 
-#include "../types.h"
+#include "webp/types.h"
 
-#if defined(__cplusplus) || defined(c_plusplus)
+#ifdef __cplusplus
 extern "C" {
 #endif
 
@@ -43,8 +45,8 @@ void VP8BitWriterWipeOut(VP8BitWriter* const bw);
 
 int VP8PutBit(VP8BitWriter* const bw, int bit, int prob);
 int VP8PutBitUniform(VP8BitWriter* const bw, int bit);
-void VP8PutValue(VP8BitWriter* const bw, int value, int nb_bits);
-void VP8PutSignedValue(VP8BitWriter* const bw, int value, int nb_bits);
+void VP8PutBits(VP8BitWriter* const bw, uint32_t value, int nb_bits);
+void VP8PutSignedBits(VP8BitWriter* const bw, int value, int nb_bits);
 
 // Appends some bytes to the internal buffer. Data is copied.
 int VP8BitWriterAppend(VP8BitWriter* const bw,
@@ -66,57 +68,77 @@ static WEBP_INLINE size_t VP8BitWriterSize(const VP8BitWriter* const bw) {
 
 //------------------------------------------------------------------------------
 // VP8LBitWriter
-// TODO(vikasa): VP8LBitWriter is copied as-is from lossless code. There's scope
-// of re-using VP8BitWriter. Will evaluate once basic lossless encoder is
-// implemented.
 
-typedef struct {
-  uint8_t* buf_;
-  size_t bit_pos_;
-  size_t max_bytes_;
+#if defined(__x86_64__) || defined(_M_X64)   // 64bit
+typedef uint64_t vp8l_atype_t;   // accumulator type
+typedef uint32_t vp8l_wtype_t;   // writing type
+#define WSWAP HToLE32
+#define VP8L_WRITER_BYTES    4   // sizeof(vp8l_wtype_t)
+#define VP8L_WRITER_BITS     32  // 8 * sizeof(vp8l_wtype_t)
+#define VP8L_WRITER_MAX_BITS 64  // 8 * sizeof(vp8l_atype_t)
+#else
+typedef uint32_t vp8l_atype_t;
+typedef uint16_t vp8l_wtype_t;
+#define WSWAP HToLE16
+#define VP8L_WRITER_BYTES    2
+#define VP8L_WRITER_BITS     16
+#define VP8L_WRITER_MAX_BITS 32
+#endif
 
-  // After all bits are written, the caller must observe the state of
-  // error_. A value of 1 indicates that a memory allocation failure
-  // has happened during bit writing. A value of 0 indicates successful
+typedef struct {
+  vp8l_atype_t bits_;   // bit accumulator
+  int          used_;   // number of bits used in accumulator
+  uint8_t*     buf_;    // start of buffer
+  uint8_t*     cur_;    // current write position
+  uint8_t*     end_;    // end of buffer
+
+  // After all bits are written (VP8LBitWriterFinish()), the caller must observe
+  // the state of error_. A value of 1 indicates that a memory allocation
+  // failure has happened during bit writing. A value of 0 indicates successful
   // writing of bits.
   int error_;
 } VP8LBitWriter;
 
 static WEBP_INLINE size_t VP8LBitWriterNumBytes(VP8LBitWriter* const bw) {
-  return (bw->bit_pos_ + 7) >> 3;
-}
-
-static WEBP_INLINE uint8_t* VP8LBitWriterFinish(VP8LBitWriter* const bw) {
-  return bw->buf_;
+  return (bw->cur_ - bw->buf_) + ((bw->used_ + 7) >> 3);
 }
 
-// Returns 0 in case of memory allocation error.
+// Returns false in case of memory allocation error.
 int VP8LBitWriterInit(VP8LBitWriter* const bw, size_t expected_size);
+// Finalize the bitstream coding. Returns a pointer to the internal buffer.
+uint8_t* VP8LBitWriterFinish(VP8LBitWriter* const bw);
+// Release any pending memory and zeroes the object.
+void VP8LBitWriterWipeOut(VP8LBitWriter* const bw);
 
-void VP8LBitWriterDestroy(VP8LBitWriter* const bw);
+// Internal function for VP8LPutBits flushing 32 bits from the written state.
+void VP8LPutBitsFlushBits(VP8LBitWriter* const bw);
 
-// This function writes bits into bytes in increasing addresses, and within
-// a byte least-significant-bit first.
-//
-// The function can write up to 16 bits in one go with WriteBits
-// Example: let's assume that 3 bits (Rs below) have been written already:
-//
-// BYTE-0     BYTE+1       BYTE+2
-//
-// 0000 0RRR    0000 0000    0000 0000
-//
-// Now, we could write 5 or less bits in MSB by just sifting by 3
-// and OR'ing to BYTE-0.
-//
-// For n bits, we take the last 5 bytes, OR that with high bits in BYTE-0,
-// and locate the rest in BYTE+1 and BYTE+2.
-//
+// PutBits internal function used in the 16 bit vp8l_wtype_t case.
+void VP8LPutBitsInternal(VP8LBitWriter* const bw, uint32_t bits, int n_bits);
+
+// This function writes bits into bytes in increasing addresses (little endian),
+// and within a byte least-significant-bit first.
+// This function can write up to 32 bits in one go, but VP8LBitReader can only
+// read 24 bits max (VP8L_MAX_NUM_BIT_READ).
 // VP8LBitWriter's error_ flag is set in case of  memory allocation error.
-void VP8LWriteBits(VP8LBitWriter* const bw, int n_bits, uint32_t bits);
+static WEBP_INLINE void VP8LPutBits(VP8LBitWriter* const bw,
+                                    uint32_t bits, int n_bits) {
+  if (sizeof(vp8l_wtype_t) == 4) {
+    if (n_bits > 0) {
+      if (bw->used_ >= 32) {
+        VP8LPutBitsFlushBits(bw);
+      }
+      bw->bits_ |= (vp8l_atype_t)bits << bw->used_;
+      bw->used_ += n_bits;
+    }
+  } else {
+    VP8LPutBitsInternal(bw, bits, n_bits);
+  }
+}
 
 //------------------------------------------------------------------------------
 
-#if defined(__cplusplus) || defined(c_plusplus)
+#ifdef __cplusplus
 }    // extern "C"
 #endif
 
diff --git a/drivers/webp/utils/color_cache.c b/drivers/webp/utils/color_cache.c
index 560f81db10..f9ff4b5451 100644
--- a/drivers/webp/utils/color_cache.c
+++ b/drivers/webp/utils/color_cache.c
@@ -1,8 +1,10 @@
 // Copyright 2012 Google Inc. All Rights Reserved.
 //
-// This code is licensed under the same terms as WebM:
-//  Software License Agreement:  http://www.webmproject.org/license/software/
-//  Additional IP Rights Grant:  http://www.webmproject.org/license/additional/
+// Use of this source code is governed by a BSD-style license
+// that can be found in the COPYING file in the root of the source
+// tree. An additional intellectual property rights grant can be found
+// in the file PATENTS. All contributing project authors may
+// be found in the AUTHORS file in the root of the source tree.
 // -----------------------------------------------------------------------------
 //
 // Color Cache for WebP Lossless
@@ -11,13 +13,10 @@
 
 #include <assert.h>
 #include <stdlib.h>
+#include <string.h>
 #include "./color_cache.h"
 #include "../utils/utils.h"
 
-#if defined(__cplusplus) || defined(c_plusplus)
-extern "C" {
-#endif
-
 //------------------------------------------------------------------------------
 // VP8LColorCache.
 
@@ -29,16 +28,22 @@ int VP8LColorCacheInit(VP8LColorCache* const cc, int hash_bits) {
                                           sizeof(*cc->colors_));
   if (cc->colors_ == NULL) return 0;
   cc->hash_shift_ = 32 - hash_bits;
+  cc->hash_bits_ = hash_bits;
   return 1;
 }
 
 void VP8LColorCacheClear(VP8LColorCache* const cc) {
   if (cc != NULL) {
-    free(cc->colors_);
+    WebPSafeFree(cc->colors_);
     cc->colors_ = NULL;
   }
 }
 
-#if defined(__cplusplus) || defined(c_plusplus)
+void VP8LColorCacheCopy(const VP8LColorCache* const src,
+                        VP8LColorCache* const dst) {
+  assert(src != NULL);
+  assert(dst != NULL);
+  assert(src->hash_bits_ == dst->hash_bits_);
+  memcpy(dst->colors_, src->colors_,
+         ((size_t)1u << dst->hash_bits_) * sizeof(*dst->colors_));
 }
-#endif
diff --git a/drivers/webp/utils/color_cache.h b/drivers/webp/utils/color_cache.h
index da5e260195..34299e4c4e 100644
--- a/drivers/webp/utils/color_cache.h
+++ b/drivers/webp/utils/color_cache.h
@@ -1,8 +1,10 @@
 // Copyright 2012 Google Inc. All Rights Reserved.
 //
-// This code is licensed under the same terms as WebM:
-//  Software License Agreement:  http://www.webmproject.org/license/software/
-//  Additional IP Rights Grant:  http://www.webmproject.org/license/additional/
+// Use of this source code is governed by a BSD-style license
+// that can be found in the COPYING file in the root of the source
+// tree. An additional intellectual property rights grant can be found
+// in the file PATENTS. All contributing project authors may
+// be found in the AUTHORS file in the root of the source tree.
 // -----------------------------------------------------------------------------
 //
 // Color Cache for WebP Lossless
@@ -13,26 +15,33 @@
 #ifndef WEBP_UTILS_COLOR_CACHE_H_
 #define WEBP_UTILS_COLOR_CACHE_H_
 
-#include "../types.h"
+#include "webp/types.h"
 
-#if defined(__cplusplus) || defined(c_plusplus)
+#ifdef __cplusplus
 extern "C" {
 #endif
 
 // Main color cache struct.
 typedef struct {
   uint32_t *colors_;  // color entries
-  int hash_shift_;    // Hash shift: 32 - hash_bits.
+  int hash_shift_;    // Hash shift: 32 - hash_bits_.
+  int hash_bits_;
 } VP8LColorCache;
 
 static const uint32_t kHashMul = 0x1e35a7bd;
 
 static WEBP_INLINE uint32_t VP8LColorCacheLookup(
     const VP8LColorCache* const cc, uint32_t key) {
-  assert(key <= (~0U >> cc->hash_shift_));
+  assert((key >> cc->hash_bits_) == 0u);
   return cc->colors_[key];
 }
 
+static WEBP_INLINE void VP8LColorCacheSet(const VP8LColorCache* const cc,
+                                          uint32_t key, uint32_t argb) {
+  assert((key >> cc->hash_bits_) == 0u);
+  cc->colors_[key] = argb;
+}
+
 static WEBP_INLINE void VP8LColorCacheInsert(const VP8LColorCache* const cc,
                                              uint32_t argb) {
   const uint32_t key = (kHashMul * argb) >> cc->hash_shift_;
@@ -47,7 +56,7 @@ static WEBP_INLINE int VP8LColorCacheGetIndex(const VP8LColorCache* const cc,
 static WEBP_INLINE int VP8LColorCacheContains(const VP8LColorCache* const cc,
                                               uint32_t argb) {
   const uint32_t key = (kHashMul * argb) >> cc->hash_shift_;
-  return cc->colors_[key] == argb;
+  return (cc->colors_[key] == argb);
 }
 
 //------------------------------------------------------------------------------
@@ -56,12 +65,15 @@ static WEBP_INLINE int VP8LColorCacheContains(const VP8LColorCache* const cc,
 // Returns false in case of memory error.
 int VP8LColorCacheInit(VP8LColorCache* const color_cache, int hash_bits);
 
+void VP8LColorCacheCopy(const VP8LColorCache* const src,
+                        VP8LColorCache* const dst);
+
 // Delete the memory associated to color cache.
 void VP8LColorCacheClear(VP8LColorCache* const color_cache);
 
 //------------------------------------------------------------------------------
 
-#if defined(__cplusplus) || defined(c_plusplus)
+#ifdef __cplusplus
 }
 #endif
 
diff --git a/drivers/webp/utils/endian_inl.h b/drivers/webp/utils/endian_inl.h
new file mode 100644
index 0000000000..253b7be8ee
--- /dev/null
+++ b/drivers/webp/utils/endian_inl.h
@@ -0,0 +1,100 @@
+// Copyright 2014 Google Inc. All Rights Reserved.
+//
+// Use of this source code is governed by a BSD-style license
+// that can be found in the COPYING file in the root of the source
+// tree. An additional intellectual property rights grant can be found
+// in the file PATENTS. All contributing project authors may
+// be found in the AUTHORS file in the root of the source tree.
+// -----------------------------------------------------------------------------
+//
+// Endian related functions.
+
+#ifndef WEBP_UTILS_ENDIAN_INL_H_
+#define WEBP_UTILS_ENDIAN_INL_H_
+
+#ifdef HAVE_CONFIG_H
+#include "webp/config.h"
+#endif
+
+#include "../dsp/dsp.h"
+#include "webp/types.h"
+
+// some endian fix (e.g.: mips-gcc doesn't define __BIG_ENDIAN__)
+#if !defined(WORDS_BIGENDIAN) && \
+    (defined(__BIG_ENDIAN__) || defined(_M_PPC) || \
+     (defined(__BYTE_ORDER__) && (__BYTE_ORDER__ == __ORDER_BIG_ENDIAN__)))
+#define WORDS_BIGENDIAN
+#endif
+
+#if defined(WORDS_BIGENDIAN)
+#define HToLE32 BSwap32
+#define HToLE16 BSwap16
+#else
+#define HToLE32(x) (x)
+#define HToLE16(x) (x)
+#endif
+
+#if !defined(HAVE_CONFIG_H)
+#if LOCAL_GCC_PREREQ(4,8) || __has_builtin(__builtin_bswap16)
+#define HAVE_BUILTIN_BSWAP16
+#endif
+#if LOCAL_GCC_PREREQ(4,3) || __has_builtin(__builtin_bswap32)
+#define HAVE_BUILTIN_BSWAP32
+#endif
+#if LOCAL_GCC_PREREQ(4,3) || __has_builtin(__builtin_bswap64)
+#define HAVE_BUILTIN_BSWAP64
+#endif
+#endif  // !HAVE_CONFIG_H
+
+static WEBP_INLINE uint16_t BSwap16(uint16_t x) {
+#if defined(HAVE_BUILTIN_BSWAP16)
+  return __builtin_bswap16(x);
+#elif defined(_MSC_VER)
+  return _byteswap_ushort(x);
+#else
+  // gcc will recognize a 'rorw $8, ...' here:
+  return (x >> 8) | ((x & 0xff) << 8);
+#endif  // HAVE_BUILTIN_BSWAP16
+}
+
+static WEBP_INLINE uint32_t BSwap32(uint32_t x) {
+#if defined(WEBP_USE_MIPS32_R2)
+  uint32_t ret;
+  __asm__ volatile (
+    "wsbh   %[ret], %[x]          \n\t"
+    "rotr   %[ret], %[ret],  16   \n\t"
+    : [ret]"=r"(ret)
+    : [x]"r"(x)
+  );
+  return ret;
+#elif defined(HAVE_BUILTIN_BSWAP32)
+  return __builtin_bswap32(x);
+#elif defined(__i386__) || defined(__x86_64__)
+  uint32_t swapped_bytes;
+  __asm__ volatile("bswap %0" : "=r"(swapped_bytes) : "0"(x));
+  return swapped_bytes;
+#elif defined(_MSC_VER)
+  return (uint32_t)_byteswap_ulong(x);
+#else
+  return (x >> 24) | ((x >> 8) & 0xff00) | ((x << 8) & 0xff0000) | (x << 24);
+#endif  // HAVE_BUILTIN_BSWAP32
+}
+
+static WEBP_INLINE uint64_t BSwap64(uint64_t x) {
+#if defined(HAVE_BUILTIN_BSWAP64)
+  return __builtin_bswap64(x);
+#elif defined(__x86_64__)
+  uint64_t swapped_bytes;
+  __asm__ volatile("bswapq %0" : "=r"(swapped_bytes) : "0"(x));
+  return swapped_bytes;
+#elif defined(_MSC_VER)
+  return (uint64_t)_byteswap_uint64(x);
+#else  // generic code for swapping 64-bit values (suggested by bdb@)
+  x = ((x & 0xffffffff00000000ull) >> 32) | ((x & 0x00000000ffffffffull) << 32);
+  x = ((x & 0xffff0000ffff0000ull) >> 16) | ((x & 0x0000ffff0000ffffull) << 16);
+  x = ((x & 0xff00ff00ff00ff00ull) >>  8) | ((x & 0x00ff00ff00ff00ffull) <<  8);
+  return x;
+#endif  // HAVE_BUILTIN_BSWAP64
+}
+
+#endif  // WEBP_UTILS_ENDIAN_INL_H_
diff --git a/drivers/webp/utils/filters.c b/drivers/webp/utils/filters.c
index 08f52a3d20..15543b1271 100644
--- a/drivers/webp/utils/filters.c
+++ b/drivers/webp/utils/filters.c
@@ -1,171 +1,37 @@
 // Copyright 2011 Google Inc. All Rights Reserved.
 //
-// This code is licensed under the same terms as WebM:
-//  Software License Agreement:  http://www.webmproject.org/license/software/
-//  Additional IP Rights Grant:  http://www.webmproject.org/license/additional/
+// Use of this source code is governed by a BSD-style license
+// that can be found in the COPYING file in the root of the source
+// tree. An additional intellectual property rights grant can be found
+// in the file PATENTS. All contributing project authors may
+// be found in the AUTHORS file in the root of the source tree.
 // -----------------------------------------------------------------------------
 //
-// Spatial prediction using various filters
+// filter estimation
 //
 // Author: Urvang (urvang@google.com)
 
 #include "./filters.h"
-#include <assert.h>
 #include <stdlib.h>
 #include <string.h>
 
-#if defined(__cplusplus) || defined(c_plusplus)
-extern "C" {
-#endif
-
-//------------------------------------------------------------------------------
-// Helpful macro.
-
-# define SANITY_CHECK(in, out)                              \
-  assert(in != NULL);                                       \
-  assert(out != NULL);                                      \
-  assert(width > 0);                                        \
-  assert(height > 0);                                       \
-  assert(bpp > 0);                                          \
-  assert(stride >= width * bpp);
-
-static WEBP_INLINE void PredictLine(const uint8_t* src, const uint8_t* pred,
-                                    uint8_t* dst, int length, int inverse) {
-  int i;
-  if (inverse) {
-    for (i = 0; i < length; ++i) dst[i] = src[i] + pred[i];
-  } else {
-    for (i = 0; i < length; ++i) dst[i] = src[i] - pred[i];
-  }
-}
-
-//------------------------------------------------------------------------------
-// Horizontal filter.
-
-static WEBP_INLINE void DoHorizontalFilter(const uint8_t* in,
-    int width, int height, int bpp, int stride, int inverse, uint8_t* out) {
-  int h;
-  const uint8_t* preds = (inverse ? out : in);
-  SANITY_CHECK(in, out);
-
-  // Filter line-by-line.
-  for (h = 0; h < height; ++h) {
-    // Leftmost pixel is predicted from above (except for topmost scanline).
-    if (h == 0) {
-      memcpy((void*)out, (const void*)in, bpp);
-    } else {
-      PredictLine(in, preds - stride, out, bpp, inverse);
-    }
-    PredictLine(in + bpp, preds, out + bpp, bpp * (width - 1), inverse);
-    preds += stride;
-    in += stride;
-    out += stride;
-  }
-}
-
-static void HorizontalFilter(const uint8_t* data, int width, int height,
-                             int bpp, int stride, uint8_t* filtered_data) {
-  DoHorizontalFilter(data, width, height, bpp, stride, 0, filtered_data);
-}
-
-static void HorizontalUnfilter(const uint8_t* data, int width, int height,
-                               int bpp, int stride, uint8_t* recon_data) {
-  DoHorizontalFilter(data, width, height, bpp, stride, 1, recon_data);
-}
-
-//------------------------------------------------------------------------------
-// Vertical filter.
-
-static WEBP_INLINE void DoVerticalFilter(const uint8_t* in,
-    int width, int height, int bpp, int stride, int inverse, uint8_t* out) {
-  int h;
-  const uint8_t* preds = (inverse ? out : in);
-  SANITY_CHECK(in, out);
-
-  // Very first top-left pixel is copied.
-  memcpy((void*)out, (const void*)in, bpp);
-  // Rest of top scan-line is left-predicted.
-  PredictLine(in + bpp, preds, out + bpp, bpp * (width - 1), inverse);
-
-  // Filter line-by-line.
-  for (h = 1; h < height; ++h) {
-    in += stride;
-    out += stride;
-    PredictLine(in, preds, out, bpp * width, inverse);
-    preds += stride;
-  }
-}
-
-static void VerticalFilter(const uint8_t* data, int width, int height,
-                           int bpp, int stride, uint8_t* filtered_data) {
-  DoVerticalFilter(data, width, height, bpp, stride, 0, filtered_data);
-}
-
-static void VerticalUnfilter(const uint8_t* data, int width, int height,
-                             int bpp, int stride, uint8_t* recon_data) {
-  DoVerticalFilter(data, width, height, bpp, stride, 1, recon_data);
-}
+// -----------------------------------------------------------------------------
+// Quick estimate of a potentially interesting filter mode to try.
 
-//------------------------------------------------------------------------------
-// Gradient filter.
+#define SMAX 16
+#define SDIFF(a, b) (abs((a) - (b)) >> 4)   // Scoring diff, in [0..SMAX)
 
 static WEBP_INLINE int GradientPredictor(uint8_t a, uint8_t b, uint8_t c) {
   const int g = a + b - c;
-  return (g < 0) ? 0 : (g > 255) ? 255 : g;
-}
-
-static WEBP_INLINE
-void DoGradientFilter(const uint8_t* in, int width, int height,
-                      int bpp, int stride, int inverse, uint8_t* out) {
-  const uint8_t* preds = (inverse ? out : in);
-  int h;
-  SANITY_CHECK(in, out);
-
-  // left prediction for top scan-line
-  memcpy((void*)out, (const void*)in, bpp);
-  PredictLine(in + bpp, preds, out + bpp, bpp * (width - 1), inverse);
-
-  // Filter line-by-line.
-  for (h = 1; h < height; ++h) {
-    int w;
-    preds += stride;
-    in += stride;
-    out += stride;
-    // leftmost pixel: predict from above.
-    PredictLine(in, preds - stride, out, bpp, inverse);
-    for (w = bpp; w < width * bpp; ++w) {
-      const int pred = GradientPredictor(preds[w - bpp],
-                                         preds[w - stride],
-                                         preds[w - stride - bpp]);
-      out[w] = in[w] + (inverse ? pred : -pred);
-    }
-  }
+  return ((g & ~0xff) == 0) ? g : (g < 0) ? 0 : 255;  // clip to 8bit
 }
 
-static void GradientFilter(const uint8_t* data, int width, int height,
-                           int bpp, int stride, uint8_t* filtered_data) {
-  DoGradientFilter(data, width, height, bpp, stride, 0, filtered_data);
-}
-
-static void GradientUnfilter(const uint8_t* data, int width, int height,
-                             int bpp, int stride, uint8_t* recon_data) {
-  DoGradientFilter(data, width, height, bpp, stride, 1, recon_data);
-}
-
-#undef SANITY_CHECK
-
-// -----------------------------------------------------------------------------
-// Quick estimate of a potentially interesting filter mode to try, in addition
-// to the default NONE.
-
-#define SMAX 16
-#define SDIFF(a, b) (abs((a) - (b)) >> 4)   // Scoring diff, in [0..SMAX)
-
-WEBP_FILTER_TYPE EstimateBestFilter(const uint8_t* data,
-                                    int width, int height, int stride) {
+WEBP_FILTER_TYPE WebPEstimateBestFilter(const uint8_t* data,
+                                        int width, int height, int stride) {
   int i, j;
   int bins[WEBP_FILTER_LAST][SMAX];
   memset(bins, 0, sizeof(bins));
+
   // We only sample every other pixels. That's enough.
   for (j = 2; j < height - 1; j += 2) {
     const uint8_t* const p = data + j * stride;
@@ -185,7 +51,8 @@ WEBP_FILTER_TYPE EstimateBestFilter(const uint8_t* data,
     }
   }
   {
-    WEBP_FILTER_TYPE filter, best_filter = WEBP_FILTER_NONE;
+    int filter;
+    WEBP_FILTER_TYPE best_filter = WEBP_FILTER_NONE;
     int best_score = 0x7fffffff;
     for (filter = WEBP_FILTER_NONE; filter < WEBP_FILTER_LAST; ++filter) {
       int score = 0;
@@ -196,7 +63,7 @@ WEBP_FILTER_TYPE EstimateBestFilter(const uint8_t* data,
       }
       if (score < best_score) {
         best_score = score;
-        best_filter = filter;
+        best_filter = (WEBP_FILTER_TYPE)filter;
       }
     }
     return best_filter;
@@ -207,23 +74,3 @@ WEBP_FILTER_TYPE EstimateBestFilter(const uint8_t* data,
 #undef SDIFF
 
 //------------------------------------------------------------------------------
-
-const WebPFilterFunc WebPFilters[WEBP_FILTER_LAST] = {
-  NULL,              // WEBP_FILTER_NONE
-  HorizontalFilter,  // WEBP_FILTER_HORIZONTAL
-  VerticalFilter,    // WEBP_FILTER_VERTICAL
-  GradientFilter     // WEBP_FILTER_GRADIENT
-};
-
-const WebPFilterFunc WebPUnfilters[WEBP_FILTER_LAST] = {
-  NULL,                // WEBP_FILTER_NONE
-  HorizontalUnfilter,  // WEBP_FILTER_HORIZONTAL
-  VerticalUnfilter,    // WEBP_FILTER_VERTICAL
-  GradientUnfilter     // WEBP_FILTER_GRADIENT
-};
-
-//------------------------------------------------------------------------------
-
-#if defined(__cplusplus) || defined(c_plusplus)
-}    // extern "C"
-#endif
diff --git a/drivers/webp/utils/filters.h b/drivers/webp/utils/filters.h
index db886be29a..4aba3fd3b7 100644
--- a/drivers/webp/utils/filters.h
+++ b/drivers/webp/utils/filters.h
@@ -1,8 +1,10 @@
 // Copyright 2011 Google Inc. All Rights Reserved.
 //
-// This code is licensed under the same terms as WebM:
-//  Software License Agreement:  http://www.webmproject.org/license/software/
-//  Additional IP Rights Grant:  http://www.webmproject.org/license/additional/
+// Use of this source code is governed by a BSD-style license
+// that can be found in the COPYING file in the root of the source
+// tree. An additional intellectual property rights grant can be found
+// in the file PATENTS. All contributing project authors may
+// be found in the AUTHORS file in the root of the source tree.
 // -----------------------------------------------------------------------------
 //
 // Spatial prediction using various filters
@@ -12,42 +14,18 @@
 #ifndef WEBP_UTILS_FILTERS_H_
 #define WEBP_UTILS_FILTERS_H_
 
-#include "../types.h"
+#include "webp/types.h"
+#include "../dsp/dsp.h"
 
-#if defined(__cplusplus) || defined(c_plusplus)
+#ifdef __cplusplus
 extern "C" {
 #endif
 
-// Filters.
-typedef enum {
-  WEBP_FILTER_NONE = 0,
-  WEBP_FILTER_HORIZONTAL,
-  WEBP_FILTER_VERTICAL,
-  WEBP_FILTER_GRADIENT,
-  WEBP_FILTER_LAST = WEBP_FILTER_GRADIENT + 1,  // end marker
-  WEBP_FILTER_BEST,
-  WEBP_FILTER_FAST
-} WEBP_FILTER_TYPE;
-
-typedef void (*WebPFilterFunc)(const uint8_t* in, int width, int height,
-                               int bpp, int stride, uint8_t* out);
-
-// Filter the given data using the given predictor.
-// 'in' corresponds to a 2-dimensional pixel array of size (stride * height)
-// in raster order.
-// 'bpp' is number of bytes per pixel, and
-// 'stride' is number of bytes per scan line (with possible padding).
-// 'out' should be pre-allocated.
-extern const WebPFilterFunc WebPFilters[WEBP_FILTER_LAST];
-
-// Reconstruct the original data from the given filtered data.
-extern const WebPFilterFunc WebPUnfilters[WEBP_FILTER_LAST];
-
 // Fast estimate of a potentially good filter.
-extern WEBP_FILTER_TYPE EstimateBestFilter(const uint8_t* data,
-                                           int width, int height, int stride);
+WEBP_FILTER_TYPE WebPEstimateBestFilter(const uint8_t* data,
+                                        int width, int height, int stride);
 
-#if defined(__cplusplus) || defined(c_plusplus)
+#ifdef __cplusplus
 }    // extern "C"
 #endif
 
diff --git a/drivers/webp/utils/huffman.c b/drivers/webp/utils/huffman.c
index 1cc1cfd355..e6f482a6a8 100644
--- a/drivers/webp/utils/huffman.c
+++ b/drivers/webp/utils/huffman.c
@@ -1,8 +1,10 @@
 // Copyright 2012 Google Inc. All Rights Reserved.
 //
-// This code is licensed under the same terms as WebM:
-//  Software License Agreement:  http://www.webmproject.org/license/software/
-//  Additional IP Rights Grant:  http://www.webmproject.org/license/additional/
+// Use of this source code is governed by a BSD-style license
+// that can be found in the COPYING file in the root of the source
+// tree. An additional intellectual property rights grant can be found
+// in the file PATENTS. All contributing project authors may
+// be found in the AUTHORS file in the root of the source tree.
 // -----------------------------------------------------------------------------
 //
 // Utilities for building and looking up Huffman trees.
@@ -11,228 +13,193 @@
 
 #include <assert.h>
 #include <stdlib.h>
+#include <string.h>
 #include "./huffman.h"
 #include "../utils/utils.h"
-#include "../format_constants.h"
+#include "webp/format_constants.h"
 
-#if defined(__cplusplus) || defined(c_plusplus)
-extern "C" {
-#endif
+// Huffman data read via DecodeImageStream is represented in two (red and green)
+// bytes.
+#define MAX_HTREE_GROUPS    0x10000
 
-#define NON_EXISTENT_SYMBOL (-1)
-
-static void TreeNodeInit(HuffmanTreeNode* const node) {
-  node->children_ = -1;   // means: 'unassigned so far'
-}
-
-static int NodeIsEmpty(const HuffmanTreeNode* const node) {
-  return (node->children_ < 0);
+HTreeGroup* VP8LHtreeGroupsNew(int num_htree_groups) {
+  HTreeGroup* const htree_groups =
+      (HTreeGroup*)WebPSafeMalloc(num_htree_groups, sizeof(*htree_groups));
+  if (htree_groups == NULL) {
+    return NULL;
+  }
+  assert(num_htree_groups <= MAX_HTREE_GROUPS);
+  return htree_groups;
 }
 
-static int IsFull(const HuffmanTree* const tree) {
-  return (tree->num_nodes_ == tree->max_nodes_);
+void VP8LHtreeGroupsFree(HTreeGroup* const htree_groups) {
+  if (htree_groups != NULL) {
+    WebPSafeFree(htree_groups);
+  }
 }
 
-static void AssignChildren(HuffmanTree* const tree,
-                           HuffmanTreeNode* const node) {
-  HuffmanTreeNode* const children = tree->root_ + tree->num_nodes_;
-  node->children_ = (int)(children - node);
-  assert(children - node == (int)(children - node));
-  tree->num_nodes_ += 2;
-  TreeNodeInit(children + 0);
-  TreeNodeInit(children + 1);
+// Returns reverse(reverse(key, len) + 1, len), where reverse(key, len) is the
+// bit-wise reversal of the len least significant bits of key.
+static WEBP_INLINE uint32_t GetNextKey(uint32_t key, int len) {
+  uint32_t step = 1 << (len - 1);
+  while (key & step) {
+    step >>= 1;
+  }
+  return (key & (step - 1)) + step;
 }
 
-static int TreeInit(HuffmanTree* const tree, int num_leaves) {
-  assert(tree != NULL);
-  if (num_leaves == 0) return 0;
-  // We allocate maximum possible nodes in the tree at once.
-  // Note that a Huffman tree is a full binary tree; and in a full binary tree
-  // with L leaves, the total number of nodes N = 2 * L - 1.
-  tree->max_nodes_ = 2 * num_leaves - 1;
-  tree->root_ = (HuffmanTreeNode*)WebPSafeMalloc((uint64_t)tree->max_nodes_,
-                                                 sizeof(*tree->root_));
-  if (tree->root_ == NULL) return 0;
-  TreeNodeInit(tree->root_);  // Initialize root.
-  tree->num_nodes_ = 1;
-  return 1;
+// Stores code in table[0], table[step], table[2*step], ..., table[end].
+// Assumes that end is an integer multiple of step.
+static WEBP_INLINE void ReplicateValue(HuffmanCode* table,
+                                       int step, int end,
+                                       HuffmanCode code) {
+  assert(end % step == 0);
+  do {
+    end -= step;
+    table[end] = code;
+  } while (end > 0);
 }
 
-void HuffmanTreeRelease(HuffmanTree* const tree) {
-  if (tree != NULL) {
-    free(tree->root_);
-    tree->root_ = NULL;
-    tree->max_nodes_ = 0;
-    tree->num_nodes_ = 0;
+// Returns the table width of the next 2nd level table. count is the histogram
+// of bit lengths for the remaining symbols, len is the code length of the next
+// processed symbol
+static WEBP_INLINE int NextTableBitSize(const int* const count,
+                                        int len, int root_bits) {
+  int left = 1 << (len - root_bits);
+  while (len < MAX_ALLOWED_CODE_LENGTH) {
+    left -= count[len];
+    if (left <= 0) break;
+    ++len;
+    left <<= 1;
   }
+  return len - root_bits;
 }
 
-int HuffmanCodeLengthsToCodes(const int* const code_lengths,
-                              int code_lengths_size, int* const huff_codes) {
-  int symbol;
-  int code_len;
-  int code_length_hist[MAX_ALLOWED_CODE_LENGTH + 1] = { 0 };
-  int curr_code;
-  int next_codes[MAX_ALLOWED_CODE_LENGTH + 1] = { 0 };
-  int max_code_length = 0;
-
+int VP8LBuildHuffmanTable(HuffmanCode* const root_table, int root_bits,
+                          const int code_lengths[], int code_lengths_size) {
+  HuffmanCode* table = root_table;  // next available space in table
+  int total_size = 1 << root_bits;  // total size root table + 2nd level table
+  int* sorted = NULL;               // symbols sorted by code length
+  int len;                          // current code length
+  int symbol;                       // symbol index in original or sorted table
+  // number of codes of each length:
+  int count[MAX_ALLOWED_CODE_LENGTH + 1] = { 0 };
+  // offsets in sorted table for each length:
+  int offset[MAX_ALLOWED_CODE_LENGTH + 1];
+
+  assert(code_lengths_size != 0);
   assert(code_lengths != NULL);
-  assert(code_lengths_size > 0);
-  assert(huff_codes != NULL);
+  assert(root_table != NULL);
+  assert(root_bits > 0);
 
-  // Calculate max code length.
+  // Build histogram of code lengths.
   for (symbol = 0; symbol < code_lengths_size; ++symbol) {
-    if (code_lengths[symbol] > max_code_length) {
-      max_code_length = code_lengths[symbol];
+    if (code_lengths[symbol] > MAX_ALLOWED_CODE_LENGTH) {
+      return 0;
     }
+    ++count[code_lengths[symbol]];
   }
-  if (max_code_length > MAX_ALLOWED_CODE_LENGTH) return 0;
 
-  // Calculate code length histogram.
-  for (symbol = 0; symbol < code_lengths_size; ++symbol) {
-    ++code_length_hist[code_lengths[symbol]];
-  }
-  code_length_hist[0] = 0;
-
-  // Calculate the initial values of 'next_codes' for each code length.
-  // next_codes[code_len] denotes the code to be assigned to the next symbol
-  // of code length 'code_len'.
-  curr_code = 0;
-  next_codes[0] = -1;  // Unused, as code length = 0 implies code doesn't exist.
-  for (code_len = 1; code_len <= max_code_length; ++code_len) {
-    curr_code = (curr_code + code_length_hist[code_len - 1]) << 1;
-    next_codes[code_len] = curr_code;
+  // Error, all code lengths are zeros.
+  if (count[0] == code_lengths_size) {
+    return 0;
   }
 
-  // Get symbols.
-  for (symbol = 0; symbol < code_lengths_size; ++symbol) {
-    if (code_lengths[symbol] > 0) {
-      huff_codes[symbol] = next_codes[code_lengths[symbol]]++;
-    } else {
-      huff_codes[symbol] = NON_EXISTENT_SYMBOL;
-    }
-  }
-  return 1;
-}
-
-static int TreeAddSymbol(HuffmanTree* const tree,
-                         int symbol, int code, int code_length) {
-  HuffmanTreeNode* node = tree->root_;
-  const HuffmanTreeNode* const max_node = tree->root_ + tree->max_nodes_;
-  while (code_length-- > 0) {
-    if (node >= max_node) {
+  // Generate offsets into sorted symbol table by code length.
+  offset[1] = 0;
+  for (len = 1; len < MAX_ALLOWED_CODE_LENGTH; ++len) {
+    if (count[len] > (1 << len)) {
       return 0;
     }
-    if (NodeIsEmpty(node)) {
-      if (IsFull(tree)) return 0;    // error: too many symbols.
-      AssignChildren(tree, node);
-    } else if (HuffmanTreeNodeIsLeaf(node)) {
-      return 0;  // leaf is already occupied.
-    }
-    node += node->children_ + ((code >> code_length) & 1);
-  }
-  if (NodeIsEmpty(node)) {
-    node->children_ = 0;      // turn newly created node into a leaf.
-  } else if (!HuffmanTreeNodeIsLeaf(node)) {
-    return 0;   // trying to assign a symbol to already used code.
+    offset[len + 1] = offset[len] + count[len];
   }
-  node->symbol_ = symbol;  // Add symbol in this node.
-  return 1;
-}
 
-int HuffmanTreeBuildImplicit(HuffmanTree* const tree,
-                             const int* const code_lengths,
-                             int code_lengths_size) {
-  int symbol;
-  int num_symbols = 0;
-  int root_symbol = 0;
-
-  assert(tree != NULL);
-  assert(code_lengths != NULL);
+  sorted = (int*)WebPSafeMalloc(code_lengths_size, sizeof(*sorted));
+  if (sorted == NULL) {
+    return 0;
+  }
 
-  // Find out number of symbols and the root symbol.
+  // Sort symbols by length, by symbol order within each length.
   for (symbol = 0; symbol < code_lengths_size; ++symbol) {
+    const int symbol_code_length = code_lengths[symbol];
     if (code_lengths[symbol] > 0) {
-      // Note: code length = 0 indicates non-existent symbol.
-      ++num_symbols;
-      root_symbol = symbol;
+      sorted[offset[symbol_code_length]++] = symbol;
     }
   }
 
-  // Initialize the tree. Will fail for num_symbols = 0
-  if (!TreeInit(tree, num_symbols)) return 0;
-
-  // Build tree.
-  if (num_symbols == 1) {  // Trivial case.
-    const int max_symbol = code_lengths_size;
-    if (root_symbol < 0 || root_symbol >= max_symbol) {
-      HuffmanTreeRelease(tree);
-      return 0;
-    }
-    return TreeAddSymbol(tree, root_symbol, 0, 0);
-  } else {  // Normal case.
-    int ok = 0;
-
-    // Get Huffman codes from the code lengths.
-    int* const codes =
-        (int*)WebPSafeMalloc((uint64_t)code_lengths_size, sizeof(*codes));
-    if (codes == NULL) goto End;
+  // Special case code with only one value.
+  if (offset[MAX_ALLOWED_CODE_LENGTH] == 1) {
+    HuffmanCode code;
+    code.bits = 0;
+    code.value = (uint16_t)sorted[0];
+    ReplicateValue(table, 1, total_size, code);
+    WebPSafeFree(sorted);
+    return total_size;
+  }
 
-    if (!HuffmanCodeLengthsToCodes(code_lengths, code_lengths_size, codes)) {
-      goto End;
+  {
+    int step;              // step size to replicate values in current table
+    uint32_t low = -1;     // low bits for current root entry
+    uint32_t mask = total_size - 1;    // mask for low bits
+    uint32_t key = 0;      // reversed prefix code
+    int num_nodes = 1;     // number of Huffman tree nodes
+    int num_open = 1;      // number of open branches in current tree level
+    int table_bits = root_bits;        // key length of current table
+    int table_size = 1 << table_bits;  // size of current table
+    symbol = 0;
+    // Fill in root table.
+    for (len = 1, step = 2; len <= root_bits; ++len, step <<= 1) {
+      num_open <<= 1;
+      num_nodes += num_open;
+      num_open -= count[len];
+      if (num_open < 0) {
+        WebPSafeFree(sorted);
+        return 0;
+      }
+      for (; count[len] > 0; --count[len]) {
+        HuffmanCode code;
+        code.bits = (uint8_t)len;
+        code.value = (uint16_t)sorted[symbol++];
+        ReplicateValue(&table[key], step, table_size, code);
+        key = GetNextKey(key, len);
+      }
     }
 
-    // Add symbols one-by-one.
-    for (symbol = 0; symbol < code_lengths_size; ++symbol) {
-      if (code_lengths[symbol] > 0) {
-        if (!TreeAddSymbol(tree, symbol, codes[symbol], code_lengths[symbol])) {
-          goto End;
+    // Fill in 2nd level tables and add pointers to root table.
+    for (len = root_bits + 1, step = 2; len <= MAX_ALLOWED_CODE_LENGTH;
+         ++len, step <<= 1) {
+      num_open <<= 1;
+      num_nodes += num_open;
+      num_open -= count[len];
+      if (num_open < 0) {
+        WebPSafeFree(sorted);
+        return 0;
+      }
+      for (; count[len] > 0; --count[len]) {
+        HuffmanCode code;
+        if ((key & mask) != low) {
+          table += table_size;
+          table_bits = NextTableBitSize(count, len, root_bits);
+          table_size = 1 << table_bits;
+          total_size += table_size;
+          low = key & mask;
+          root_table[low].bits = (uint8_t)(table_bits + root_bits);
+          root_table[low].value = (uint16_t)((table - root_table) - low);
         }
+        code.bits = (uint8_t)(len - root_bits);
+        code.value = (uint16_t)sorted[symbol++];
+        ReplicateValue(&table[key >> root_bits], step, table_size, code);
+        key = GetNextKey(key, len);
       }
     }
-    ok = 1;
- End:
-    free(codes);
-    ok = ok && IsFull(tree);
-    if (!ok) HuffmanTreeRelease(tree);
-    return ok;
-  }
-}
-
-int HuffmanTreeBuildExplicit(HuffmanTree* const tree,
-                             const int* const code_lengths,
-                             const int* const codes,
-                             const int* const symbols, int max_symbol,
-                             int num_symbols) {
-  int ok = 0;
-  int i;
-
-  assert(tree != NULL);
-  assert(code_lengths != NULL);
-  assert(codes != NULL);
-  assert(symbols != NULL);
-
-  // Initialize the tree. Will fail if num_symbols = 0.
-  if (!TreeInit(tree, num_symbols)) return 0;
 
-  // Add symbols one-by-one.
-  for (i = 0; i < num_symbols; ++i) {
-    if (codes[i] != NON_EXISTENT_SYMBOL) {
-      if (symbols[i] < 0 || symbols[i] >= max_symbol) {
-        goto End;
-      }
-      if (!TreeAddSymbol(tree, symbols[i], codes[i], code_lengths[i])) {
-        goto End;
-      }
+    // Check if tree is full.
+    if (num_nodes != 2 * offset[MAX_ALLOWED_CODE_LENGTH] - 1) {
+      WebPSafeFree(sorted);
+      return 0;
     }
   }
-  ok = 1;
- End:
-  ok = ok && IsFull(tree);
-  if (!ok) HuffmanTreeRelease(tree);
-  return ok;
-}
 
-#if defined(__cplusplus) || defined(c_plusplus)
-}    // extern "C"
-#endif
+  WebPSafeFree(sorted);
+  return total_size;
+}
diff --git a/drivers/webp/utils/huffman.h b/drivers/webp/utils/huffman.h
index f16447e649..a8cc0da1c3 100644
--- a/drivers/webp/utils/huffman.h
+++ b/drivers/webp/utils/huffman.h
@@ -1,8 +1,10 @@
 // Copyright 2012 Google Inc. All Rights Reserved.
 //
-// This code is licensed under the same terms as WebM:
-//  Software License Agreement:  http://www.webmproject.org/license/software/
-//  Additional IP Rights Grant:  http://www.webmproject.org/license/additional/
+// Use of this source code is governed by a BSD-style license
+// that can be found in the COPYING file in the root of the source
+// tree. An additional intellectual property rights grant can be found
+// in the file PATENTS. All contributing project authors may
+// be found in the AUTHORS file in the root of the source tree.
 // -----------------------------------------------------------------------------
 //
 // Utilities for building and looking up Huffman trees.
@@ -13,65 +15,73 @@
 #define WEBP_UTILS_HUFFMAN_H_
 
 #include <assert.h>
-#include "../types.h"
+#include "webp/format_constants.h"
+#include "webp/types.h"
 
-#if defined(__cplusplus) || defined(c_plusplus)
+#ifdef __cplusplus
 extern "C" {
 #endif
 
-// A node of a Huffman tree.
-typedef struct {
-  int symbol_;
-  int children_;  // delta offset to both children (contiguous) or 0 if leaf.
-} HuffmanTreeNode;
+#define HUFFMAN_TABLE_BITS      8
+#define HUFFMAN_TABLE_MASK      ((1 << HUFFMAN_TABLE_BITS) - 1)
+
+#define LENGTHS_TABLE_BITS      7
+#define LENGTHS_TABLE_MASK      ((1 << LENGTHS_TABLE_BITS) - 1)
 
-// Huffman Tree.
-typedef struct HuffmanTree HuffmanTree;
-struct HuffmanTree {
-  HuffmanTreeNode* root_;   // all the nodes, starting at root.
-  int max_nodes_;           // max number of nodes
-  int num_nodes_;           // number of currently occupied nodes
-};
 
-// Returns true if the given node is a leaf of the Huffman tree.
-static WEBP_INLINE int HuffmanTreeNodeIsLeaf(
-    const HuffmanTreeNode* const node) {
-  return (node->children_ == 0);
-}
+// Huffman lookup table entry
+typedef struct {
+  uint8_t bits;     // number of bits used for this symbol
+  uint16_t value;   // symbol value or table offset
+} HuffmanCode;
 
-// Go down one level. Most critical function. 'right_child' must be 0 or 1.
-static WEBP_INLINE const HuffmanTreeNode* HuffmanTreeNextNode(
-    const HuffmanTreeNode* node, int right_child) {
-  return node + node->children_ + right_child;
-}
+// long version for holding 32b values
+typedef struct {
+  int bits;         // number of bits used for this symbol,
+                    // or an impossible value if not a literal code.
+  uint32_t value;   // 32b packed ARGB value if literal,
+                    // or non-literal symbol otherwise
+} HuffmanCode32;
 
-// Releases the nodes of the Huffman tree.
-// Note: It does NOT free 'tree' itself.
-void HuffmanTreeRelease(HuffmanTree* const tree);
+#define HUFFMAN_PACKED_BITS 6
+#define HUFFMAN_PACKED_TABLE_SIZE (1u << HUFFMAN_PACKED_BITS)
 
-// Builds Huffman tree assuming code lengths are implicitly in symbol order.
-// Returns false in case of error (invalid tree or memory error).
-int HuffmanTreeBuildImplicit(HuffmanTree* const tree,
-                             const int* const code_lengths,
-                             int code_lengths_size);
+// Huffman table group.
+// Includes special handling for the following cases:
+//  - is_trivial_literal: one common literal base for RED/BLUE/ALPHA (not GREEN)
+//  - is_trivial_code: only 1 code (no bit is read from bitstream)
+//  - use_packed_table: few enough literal symbols, so all the bit codes
+//    can fit into a small look-up table packed_table[]
+// The common literal base, if applicable, is stored in 'literal_arb'.
+typedef struct HTreeGroup HTreeGroup;
+struct HTreeGroup {
+  HuffmanCode* htrees[HUFFMAN_CODES_PER_META_CODE];
+  int      is_trivial_literal;  // True, if huffman trees for Red, Blue & Alpha
+                                // Symbols are trivial (have a single code).
+  uint32_t literal_arb;         // If is_trivial_literal is true, this is the
+                                // ARGB value of the pixel, with Green channel
+                                // being set to zero.
+  int is_trivial_code;          // true if is_trivial_literal with only one code
+  int use_packed_table;         // use packed table below for short literal code
+  // table mapping input bits to a packed values, or escape case to literal code
+  HuffmanCode32 packed_table[HUFFMAN_PACKED_TABLE_SIZE];
+};
 
-// Build a Huffman tree with explicitly given lists of code lengths, codes
-// and symbols. Verifies that all symbols added are smaller than max_symbol.
-// Returns false in case of an invalid symbol, invalid tree or memory error.
-int HuffmanTreeBuildExplicit(HuffmanTree* const tree,
-                             const int* const code_lengths,
-                             const int* const codes,
-                             const int* const symbols, int max_symbol,
-                             int num_symbols);
+// Creates the instance of HTreeGroup with specified number of tree-groups.
+HTreeGroup* VP8LHtreeGroupsNew(int num_htree_groups);
 
-// Utility: converts Huffman code lengths to corresponding Huffman codes.
-// 'huff_codes' should be pre-allocated.
-// Returns false in case of error (memory allocation, invalid codes).
-int HuffmanCodeLengthsToCodes(const int* const code_lengths,
-                              int code_lengths_size, int* const huff_codes);
+// Releases the memory allocated for HTreeGroup.
+void VP8LHtreeGroupsFree(HTreeGroup* const htree_groups);
 
+// Builds Huffman lookup table assuming code lengths are in symbol order.
+// The 'code_lengths' is pre-allocated temporary memory buffer used for creating
+// the huffman table.
+// Returns built table size or 0 in case of error (invalid tree or
+// memory error).
+int VP8LBuildHuffmanTable(HuffmanCode* const root_table, int root_bits,
+                          const int code_lengths[], int code_lengths_size);
 
-#if defined(__cplusplus) || defined(c_plusplus)
+#ifdef __cplusplus
 }    // extern "C"
 #endif
 
diff --git a/drivers/webp/utils/huffman_encode.c b/drivers/webp/utils/huffman_encode.c
index e172b10a85..d7aad6f56d 100644
--- a/drivers/webp/utils/huffman_encode.c
+++ b/drivers/webp/utils/huffman_encode.c
@@ -1,8 +1,10 @@
 // Copyright 2011 Google Inc. All Rights Reserved.
 //
-// This code is licensed under the same terms as WebM:
-//  Software License Agreement:  http://www.webmproject.org/license/software/
-//  Additional IP Rights Grant:  http://www.webmproject.org/license/additional/
+// Use of this source code is governed by a BSD-style license
+// that can be found in the COPYING file in the root of the source
+// tree. An additional intellectual property rights grant can be found
+// in the file PATENTS. All contributing project authors may
+// be found in the AUTHORS file in the root of the source tree.
 // -----------------------------------------------------------------------------
 //
 // Author: Jyrki Alakuijala (jyrki@google.com)
@@ -14,7 +16,7 @@
 #include <string.h>
 #include "./huffman_encode.h"
 #include "../utils/utils.h"
-#include "../format_constants.h"
+#include "webp/format_constants.h"
 
 // -----------------------------------------------------------------------------
 // Util function to optimize the symbol map for RLE coding
@@ -25,14 +27,14 @@ static int ValuesShouldBeCollapsedToStrideAverage(int a, int b) {
 }
 
 // Change the population counts in a way that the consequent
-// Hufmann tree compression, especially its RLE-part, give smaller output.
-static int OptimizeHuffmanForRle(int length, int* const counts) {
-  uint8_t* good_for_rle;
+// Huffman tree compression, especially its RLE-part, give smaller output.
+static void OptimizeHuffmanForRle(int length, uint8_t* const good_for_rle,
+                                  uint32_t* const counts) {
   // 1) Let's make the Huffman code more compatible with rle encoding.
   int i;
   for (; length >= 0; --length) {
     if (length == 0) {
-      return 1;  // All zeros.
+      return;  // All zeros.
     }
     if (counts[length - 1] != 0) {
       // Now counts[0..length - 1] does not have trailing zeros.
@@ -41,15 +43,11 @@ static int OptimizeHuffmanForRle(int length, int* const counts) {
   }
   // 2) Let's mark all population counts that already can be encoded
   // with an rle code.
-  good_for_rle = (uint8_t*)calloc(length, 1);
-  if (good_for_rle == NULL) {
-    return 0;
-  }
   {
     // Let's not spoil any of the existing good rle codes.
     // Mark any seq of 0's that is longer as 5 as a good_for_rle.
     // Mark any seq of non-0's that is longer as 7 as a good_for_rle.
-    int symbol = counts[0];
+    uint32_t symbol = counts[0];
     int stride = 0;
     for (i = 0; i < length + 1; ++i) {
       if (i == length || counts[i] != symbol) {
@@ -71,17 +69,17 @@ static int OptimizeHuffmanForRle(int length, int* const counts) {
   }
   // 3) Let's replace those population counts that lead to more rle codes.
   {
-    int stride = 0;
-    int limit = counts[0];
-    int sum = 0;
+    uint32_t stride = 0;
+    uint32_t limit = counts[0];
+    uint32_t sum = 0;
     for (i = 0; i < length + 1; ++i) {
       if (i == length || good_for_rle[i] ||
           (i != 0 && good_for_rle[i - 1]) ||
           !ValuesShouldBeCollapsedToStrideAverage(counts[i], limit)) {
         if (stride >= 4 || (stride >= 3 && sum == 0)) {
-          int k;
+          uint32_t k;
           // The stride must end, collapse what we have, if we have enough (4).
-          int count = (sum + stride / 2) / stride;
+          uint32_t count = (sum + stride / 2) / stride;
           if (count < 1) {
             count = 1;
           }
@@ -117,17 +115,8 @@ static int OptimizeHuffmanForRle(int length, int* const counts) {
       }
     }
   }
-  free(good_for_rle);
-  return 1;
 }
 
-typedef struct {
-  int total_count_;
-  int value_;
-  int pool_index_left_;
-  int pool_index_right_;
-} HuffmanTree;
-
 // A comparer function for two Huffman trees: sorts first by 'total count'
 // (more comes first), and then by 'value' (more comes first).
 static int CompareHuffmanTrees(const void* ptr1, const void* ptr2) {
@@ -138,13 +127,8 @@ static int CompareHuffmanTrees(const void* ptr1, const void* ptr2) {
   } else if (t1->total_count_ < t2->total_count_) {
     return 1;
   } else {
-    if (t1->value_ < t2->value_) {
-      return -1;
-    }
-    if (t1->value_ > t2->value_) {
-      return 1;
-    }
-    return 0;
+    assert(t1->value_ != t2->value_);
+    return (t1->value_ < t2->value_) ? -1 : 1;
   }
 }
 
@@ -178,12 +162,12 @@ static void SetBitDepths(const HuffmanTree* const tree,
 // we are not planning to use this with extremely long blocks.
 //
 // See http://en.wikipedia.org/wiki/Huffman_coding
-static int GenerateOptimalTree(const int* const histogram, int histogram_size,
-                               int tree_depth_limit,
-                               uint8_t* const bit_depths) {
-  int count_min;
+static void GenerateOptimalTree(const uint32_t* const histogram,
+                                int histogram_size,
+                                HuffmanTree* tree, int tree_depth_limit,
+                                uint8_t* const bit_depths) {
+  uint32_t count_min;
   HuffmanTree* tree_pool;
-  HuffmanTree* tree;
   int tree_size_orig = 0;
   int i;
 
@@ -193,12 +177,10 @@ static int GenerateOptimalTree(const int* const histogram, int histogram_size,
     }
   }
 
-  // 3 * tree_size is enough to cover all the nodes representing a
-  // population and all the inserted nodes combining two existing nodes.
-  // The tree pool needs 2 * (tree_size_orig - 1) entities, and the
-  // tree needs exactly tree_size_orig entities.
-  tree = (HuffmanTree*)WebPSafeMalloc(3ULL * tree_size_orig, sizeof(*tree));
-  if (tree == NULL) return 0;
+  if (tree_size_orig == 0) {   // pretty optimal already!
+    return;
+  }
+
   tree_pool = tree + tree_size_orig;
 
   // For block sizes with less than 64k symbols we never need to do a
@@ -214,7 +196,7 @@ static int GenerateOptimalTree(const int* const histogram, int histogram_size,
     int j;
     for (j = 0; j < histogram_size; ++j) {
       if (histogram[j] != 0) {
-        const int count =
+        const uint32_t count =
             (histogram[j] < count_min) ? count_min : histogram[j];
         tree[idx].total_count_ = count;
         tree[idx].value_ = j;
@@ -230,11 +212,11 @@ static int GenerateOptimalTree(const int* const histogram, int histogram_size,
     if (tree_size > 1) {  // Normal case.
       int tree_pool_size = 0;
       while (tree_size > 1) {  // Finish when we have only one root.
-        int count;
+        uint32_t count;
         tree_pool[tree_pool_size++] = tree[tree_size - 1];
         tree_pool[tree_pool_size++] = tree[tree_size - 2];
         count = tree_pool[tree_pool_size - 1].total_count_ +
-            tree_pool[tree_pool_size - 2].total_count_;
+                tree_pool[tree_pool_size - 2].total_count_;
         tree_size -= 2;
         {
           // Search for the insertion point.
@@ -271,8 +253,6 @@ static int GenerateOptimalTree(const int* const histogram, int histogram_size,
       }
     }
   }
-  free(tree);
-  return 1;
 }
 
 // -----------------------------------------------------------------------------
@@ -423,17 +403,15 @@ static void ConvertBitDepthsToSymbols(HuffmanTreeCode* const tree) {
 // -----------------------------------------------------------------------------
 // Main entry point
 
-int VP8LCreateHuffmanTree(int* const histogram, int tree_depth_limit,
-                          HuffmanTreeCode* const tree) {
-  const int num_symbols = tree->num_symbols;
-  if (!OptimizeHuffmanForRle(num_symbols, histogram)) {
-    return 0;
-  }
-  if (!GenerateOptimalTree(histogram, num_symbols,
-                           tree_depth_limit, tree->code_lengths)) {
-    return 0;
-  }
+void VP8LCreateHuffmanTree(uint32_t* const histogram, int tree_depth_limit,
+                           uint8_t* const buf_rle,
+                           HuffmanTree* const huff_tree,
+                           HuffmanTreeCode* const huff_code) {
+  const int num_symbols = huff_code->num_symbols;
+  memset(buf_rle, 0, num_symbols * sizeof(*buf_rle));
+  OptimizeHuffmanForRle(num_symbols, buf_rle, histogram);
+  GenerateOptimalTree(histogram, num_symbols, huff_tree, tree_depth_limit,
+                      huff_code->code_lengths);
   // Create the actual bit codes for the bit lengths.
-  ConvertBitDepthsToSymbols(tree);
-  return 1;
+  ConvertBitDepthsToSymbols(huff_code);
 }
diff --git a/drivers/webp/utils/huffman_encode.h b/drivers/webp/utils/huffman_encode.h
index 7f4aedc102..93610066f3 100644
--- a/drivers/webp/utils/huffman_encode.h
+++ b/drivers/webp/utils/huffman_encode.h
@@ -1,8 +1,10 @@
 // Copyright 2011 Google Inc. All Rights Reserved.
 //
-// This code is licensed under the same terms as WebM:
-//  Software License Agreement:  http://www.webmproject.org/license/software/
-//  Additional IP Rights Grant:  http://www.webmproject.org/license/additional/
+// Use of this source code is governed by a BSD-style license
+// that can be found in the COPYING file in the root of the source
+// tree. An additional intellectual property rights grant can be found
+// in the file PATENTS. All contributing project authors may
+// be found in the AUTHORS file in the root of the source tree.
 // -----------------------------------------------------------------------------
 //
 // Author: Jyrki Alakuijala (jyrki@google.com)
@@ -12,9 +14,9 @@
 #ifndef WEBP_UTILS_HUFFMAN_ENCODE_H_
 #define WEBP_UTILS_HUFFMAN_ENCODE_H_
 
-#include "../types.h"
+#include "webp/types.h"
 
-#if defined(__cplusplus) || defined(c_plusplus)
+#ifdef __cplusplus
 extern "C" {
 #endif
 
@@ -31,16 +33,27 @@ typedef struct {
   uint16_t* codes;         // Symbol Codes.
 } HuffmanTreeCode;
 
+// Struct to represent the Huffman tree.
+typedef struct {
+  uint32_t total_count_;   // Symbol frequency.
+  int value_;              // Symbol value.
+  int pool_index_left_;    // Index for the left sub-tree.
+  int pool_index_right_;   // Index for the right sub-tree.
+} HuffmanTree;
+
 // Turn the Huffman tree into a token sequence.
 // Returns the number of tokens used.
 int VP8LCreateCompressedHuffmanTree(const HuffmanTreeCode* const tree,
                                     HuffmanTreeToken* tokens, int max_tokens);
 
 // Create an optimized tree, and tokenize it.
-int VP8LCreateHuffmanTree(int* const histogram, int tree_depth_limit,
-                          HuffmanTreeCode* const tree);
+// 'buf_rle' and 'huff_tree' are pre-allocated and the 'tree' is the constructed
+// huffman code tree.
+void VP8LCreateHuffmanTree(uint32_t* const histogram, int tree_depth_limit,
+                           uint8_t* const buf_rle, HuffmanTree* const huff_tree,
+                           HuffmanTreeCode* const tree);
 
-#if defined(__cplusplus) || defined(c_plusplus)
+#ifdef __cplusplus
 }
 #endif
 
diff --git a/drivers/webp/utils/quant_levels.c b/drivers/webp/utils/quant_levels.c
index f6884392aa..d7c8aab922 100644
--- a/drivers/webp/utils/quant_levels.c
+++ b/drivers/webp/utils/quant_levels.c
@@ -1,8 +1,10 @@
 // Copyright 2011 Google Inc. All Rights Reserved.
 //
-// This code is licensed under the same terms as WebM:
-//  Software License Agreement:  http://www.webmproject.org/license/software/
-//  Additional IP Rights Grant:  http://www.webmproject.org/license/additional/
+// Use of this source code is governed by a BSD-style license
+// that can be found in the COPYING file in the root of the source
+// tree. An additional intellectual property rights grant can be found
+// in the file PATENTS. All contributing project authors may
+// be found in the AUTHORS file in the root of the source tree.
 // -----------------------------------------------------------------------------
 //
 // Quantize levels for specified number of quantization-levels ([2, 256]).
@@ -14,10 +16,6 @@
 
 #include "./quant_levels.h"
 
-#if defined(__cplusplus) || defined(c_plusplus)
-extern "C" {
-#endif
-
 #define NUM_SYMBOLS     256
 
 #define MAX_ITER  6             // Maximum number of convergence steps.
@@ -140,15 +138,3 @@ int QuantizeLevels(uint8_t* const data, int width, int height,
   return 1;
 }
 
-int DequantizeLevels(uint8_t* const data, int width, int height) {
-  if (data == NULL || width <= 0 || height <= 0) return 0;
-  // TODO(skal): implement gradient smoothing.
-  (void)data;
-  (void)width;
-  (void)height;
-  return 1;
-}
-
-#if defined(__cplusplus) || defined(c_plusplus)
-}    // extern "C"
-#endif
diff --git a/drivers/webp/utils/quant_levels.h b/drivers/webp/utils/quant_levels.h
index 4f165fd230..3916b977ab 100644
--- a/drivers/webp/utils/quant_levels.h
+++ b/drivers/webp/utils/quant_levels.h
@@ -1,8 +1,10 @@
 // Copyright 2011 Google Inc. All Rights Reserved.
 //
-// This code is licensed under the same terms as WebM:
-//  Software License Agreement:  http://www.webmproject.org/license/software/
-//  Additional IP Rights Grant:  http://www.webmproject.org/license/additional/
+// Use of this source code is governed by a BSD-style license
+// that can be found in the COPYING file in the root of the source
+// tree. An additional intellectual property rights grant can be found
+// in the file PATENTS. All contributing project authors may
+// be found in the AUTHORS file in the root of the source tree.
 // -----------------------------------------------------------------------------
 //
 // Alpha plane quantization utility
@@ -14,9 +16,9 @@
 
 #include <stdlib.h>
 
-#include "../types.h"
+#include "webp/types.h"
 
-#if defined(__cplusplus) || defined(c_plusplus)
+#ifdef __cplusplus
 extern "C" {
 #endif
 
@@ -27,12 +29,7 @@ extern "C" {
 int QuantizeLevels(uint8_t* const data, int width, int height, int num_levels,
                    uint64_t* const sse);
 
-// Apply post-processing to input 'data' of size 'width'x'height' assuming
-// that the source was quantized to a reduced number of levels.
-// Returns false in case of error (data is NULL, invalid parameters, ...).
-int DequantizeLevels(uint8_t* const data, int width, int height);
-
-#if defined(__cplusplus) || defined(c_plusplus)
+#ifdef __cplusplus
 }    // extern "C"
 #endif
 
diff --git a/drivers/webp/utils/quant_levels_dec.c b/drivers/webp/utils/quant_levels_dec.c
new file mode 100644
index 0000000000..5b8b8b49e6
--- /dev/null
+++ b/drivers/webp/utils/quant_levels_dec.c
@@ -0,0 +1,279 @@
+// Copyright 2013 Google Inc. All Rights Reserved.
+//
+// Use of this source code is governed by a BSD-style license
+// that can be found in the COPYING file in the root of the source
+// tree. An additional intellectual property rights grant can be found
+// in the file PATENTS. All contributing project authors may
+// be found in the AUTHORS file in the root of the source tree.
+// -----------------------------------------------------------------------------
+//
+// Implement gradient smoothing: we replace a current alpha value by its
+// surrounding average if it's close enough (that is: the change will be less
+// than the minimum distance between two quantized level).
+// We use sliding window for computing the 2d moving average.
+//
+// Author: Skal (pascal.massimino@gmail.com)
+
+#include "./quant_levels_dec.h"
+
+#include <string.h>   // for memset
+
+#include "./utils.h"
+
+// #define USE_DITHERING   // uncomment to enable ordered dithering (not vital)
+
+#define FIX 16     // fix-point precision for averaging
+#define LFIX 2     // extra precision for look-up table
+#define LUT_SIZE ((1 << (8 + LFIX)) - 1)  // look-up table size
+
+#if defined(USE_DITHERING)
+
+#define DFIX 4           // extra precision for ordered dithering
+#define DSIZE 4          // dithering size (must be a power of two)
+// cf. http://en.wikipedia.org/wiki/Ordered_dithering
+static const uint8_t kOrderedDither[DSIZE][DSIZE] = {
+  {  0,  8,  2, 10 },     // coefficients are in DFIX fixed-point precision
+  { 12,  4, 14,  6 },
+  {  3, 11,  1,  9 },
+  { 15,  7, 13,  5 }
+};
+
+#else
+#define DFIX 0
+#endif
+
+typedef struct {
+  int width_, height_;  // dimension
+  int row_;             // current input row being processed
+  uint8_t* src_;        // input pointer
+  uint8_t* dst_;        // output pointer
+
+  int radius_;          // filter radius (=delay)
+  int scale_;           // normalization factor, in FIX bits precision
+
+  void* mem_;           // all memory
+
+  // various scratch buffers
+  uint16_t* start_;
+  uint16_t* cur_;
+  uint16_t* end_;
+  uint16_t* top_;
+  uint16_t* average_;
+
+  // input levels distribution
+  int num_levels_;       // number of quantized levels
+  int min_, max_;        // min and max level values
+  int min_level_dist_;   // smallest distance between two consecutive levels
+
+  int16_t* correction_;  // size = 1 + 2*LUT_SIZE  -> ~4k memory
+} SmoothParams;
+
+//------------------------------------------------------------------------------
+
+#define CLIP_MASK (int)(~0U << (8 + DFIX))
+static WEBP_INLINE uint8_t clip_8b(int v) {
+  return (!(v & CLIP_MASK)) ? (uint8_t)(v >> DFIX) : (v < 0) ? 0u : 255u;
+}
+
+// vertical accumulation
+static void VFilter(SmoothParams* const p) {
+  const uint8_t* src = p->src_;
+  const int w = p->width_;
+  uint16_t* const cur = p->cur_;
+  const uint16_t* const top = p->top_;
+  uint16_t* const out = p->end_;
+  uint16_t sum = 0;               // all arithmetic is modulo 16bit
+  int x;
+
+  for (x = 0; x < w; ++x) {
+    uint16_t new_value;
+    sum += src[x];
+    new_value = top[x] + sum;
+    out[x] = new_value - cur[x];  // vertical sum of 'r' pixels.
+    cur[x] = new_value;
+  }
+  // move input pointers one row down
+  p->top_ = p->cur_;
+  p->cur_ += w;
+  if (p->cur_ == p->end_) p->cur_ = p->start_;  // roll-over
+  // We replicate edges, as it's somewhat easier as a boundary condition.
+  // That's why we don't update the 'src' pointer on top/bottom area:
+  if (p->row_ >= 0 && p->row_ < p->height_ - 1) {
+    p->src_ += p->width_;
+  }
+}
+
+// horizontal accumulation. We use mirror replication of missing pixels, as it's
+// a little easier to implement (surprisingly).
+static void HFilter(SmoothParams* const p) {
+  const uint16_t* const in = p->end_;
+  uint16_t* const out = p->average_;
+  const uint32_t scale = p->scale_;
+  const int w = p->width_;
+  const int r = p->radius_;
+
+  int x;
+  for (x = 0; x <= r; ++x) {   // left mirroring
+    const uint16_t delta = in[x + r - 1] + in[r - x];
+    out[x] = (delta * scale) >> FIX;
+  }
+  for (; x < w - r; ++x) {     // bulk middle run
+    const uint16_t delta = in[x + r] - in[x - r - 1];
+    out[x] = (delta * scale) >> FIX;
+  }
+  for (; x < w; ++x) {         // right mirroring
+    const uint16_t delta =
+        2 * in[w - 1] - in[2 * w - 2 - r - x] - in[x - r - 1];
+    out[x] = (delta * scale) >> FIX;
+  }
+}
+
+// emit one filtered output row
+static void ApplyFilter(SmoothParams* const p) {
+  const uint16_t* const average = p->average_;
+  const int w = p->width_;
+  const int16_t* const correction = p->correction_;
+#if defined(USE_DITHERING)
+  const uint8_t* const dither = kOrderedDither[p->row_ % DSIZE];
+#endif
+  uint8_t* const dst = p->dst_;
+  int x;
+  for (x = 0; x < w; ++x) {
+    const int v = dst[x];
+    if (v < p->max_ && v > p->min_) {
+      const int c = (v << DFIX) + correction[average[x] - (v << LFIX)];
+#if defined(USE_DITHERING)
+      dst[x] = clip_8b(c + dither[x % DSIZE]);
+#else
+      dst[x] = clip_8b(c);
+#endif
+    }
+  }
+  p->dst_ += w;  // advance output pointer
+}
+
+//------------------------------------------------------------------------------
+// Initialize correction table
+
+static void InitCorrectionLUT(int16_t* const lut, int min_dist) {
+  // The correction curve is:
+  //   f(x) = x for x <= threshold2
+  //   f(x) = 0 for x >= threshold1
+  // and a linear interpolation for range x=[threshold2, threshold1]
+  // (along with f(-x) = -f(x) symmetry).
+  // Note that: threshold2 = 3/4 * threshold1
+  const int threshold1 = min_dist << LFIX;
+  const int threshold2 = (3 * threshold1) >> 2;
+  const int max_threshold = threshold2 << DFIX;
+  const int delta = threshold1 - threshold2;
+  int i;
+  for (i = 1; i <= LUT_SIZE; ++i) {
+    int c = (i <= threshold2) ? (i << DFIX)
+          : (i < threshold1) ? max_threshold * (threshold1 - i) / delta
+          : 0;
+    c >>= LFIX;
+    lut[+i] = +c;
+    lut[-i] = -c;
+  }
+  lut[0] = 0;
+}
+
+static void CountLevels(const uint8_t* const data, int size,
+                        SmoothParams* const p) {
+  int i, last_level;
+  uint8_t used_levels[256] = { 0 };
+  p->min_ = 255;
+  p->max_ = 0;
+  for (i = 0; i < size; ++i) {
+    const int v = data[i];
+    if (v < p->min_) p->min_ = v;
+    if (v > p->max_) p->max_ = v;
+    used_levels[v] = 1;
+  }
+  // Compute the mininum distance between two non-zero levels.
+  p->min_level_dist_ = p->max_ - p->min_;
+  last_level = -1;
+  for (i = 0; i < 256; ++i) {
+    if (used_levels[i]) {
+      ++p->num_levels_;
+      if (last_level >= 0) {
+        const int level_dist = i - last_level;
+        if (level_dist < p->min_level_dist_) {
+          p->min_level_dist_ = level_dist;
+        }
+      }
+      last_level = i;
+    }
+  }
+}
+
+// Initialize all params.
+static int InitParams(uint8_t* const data, int width, int height,
+                      int radius, SmoothParams* const p) {
+  const int R = 2 * radius + 1;  // total size of the kernel
+
+  const size_t size_scratch_m = (R + 1) * width * sizeof(*p->start_);
+  const size_t size_m =  width * sizeof(*p->average_);
+  const size_t size_lut = (1 + 2 * LUT_SIZE) * sizeof(*p->correction_);
+  const size_t total_size = size_scratch_m + size_m + size_lut;
+  uint8_t* mem = (uint8_t*)WebPSafeMalloc(1U, total_size);
+
+  if (mem == NULL) return 0;
+  p->mem_ = (void*)mem;
+
+  p->start_ = (uint16_t*)mem;
+  p->cur_ = p->start_;
+  p->end_ = p->start_ + R * width;
+  p->top_ = p->end_ - width;
+  memset(p->top_, 0, width * sizeof(*p->top_));
+  mem += size_scratch_m;
+
+  p->average_ = (uint16_t*)mem;
+  mem += size_m;
+
+  p->width_ = width;
+  p->height_ = height;
+  p->src_ = data;
+  p->dst_ = data;
+  p->radius_ = radius;
+  p->scale_ = (1 << (FIX + LFIX)) / (R * R);  // normalization constant
+  p->row_ = -radius;
+
+  // analyze the input distribution so we can best-fit the threshold
+  CountLevels(data, width * height, p);
+
+  // correction table
+  p->correction_ = ((int16_t*)mem) + LUT_SIZE;
+  InitCorrectionLUT(p->correction_, p->min_level_dist_);
+
+  return 1;
+}
+
+static void CleanupParams(SmoothParams* const p) {
+  WebPSafeFree(p->mem_);
+}
+
+int WebPDequantizeLevels(uint8_t* const data, int width, int height,
+                         int strength) {
+  const int radius = 4 * strength / 100;
+  if (strength < 0 || strength > 100) return 0;
+  if (data == NULL || width <= 0 || height <= 0) return 0;  // bad params
+  if (radius > 0) {
+    SmoothParams p;
+    memset(&p, 0, sizeof(p));
+    if (!InitParams(data, width, height, radius, &p)) return 0;
+    if (p.num_levels_ > 2) {
+      for (; p.row_ < p.height_; ++p.row_) {
+        VFilter(&p);  // accumulate average of input
+        // Need to wait few rows in order to prime the filter,
+        // before emitting some output.
+        if (p.row_ >= p.radius_) {
+          HFilter(&p);
+          ApplyFilter(&p);
+        }
+      }
+    }
+    CleanupParams(&p);
+  }
+  return 1;
+}
diff --git a/drivers/webp/utils/quant_levels_dec.h b/drivers/webp/utils/quant_levels_dec.h
new file mode 100644
index 0000000000..29c7e6e205
--- /dev/null
+++ b/drivers/webp/utils/quant_levels_dec.h
@@ -0,0 +1,35 @@
+// Copyright 2013 Google Inc. All Rights Reserved.
+//
+// Use of this source code is governed by a BSD-style license
+// that can be found in the COPYING file in the root of the source
+// tree. An additional intellectual property rights grant can be found
+// in the file PATENTS. All contributing project authors may
+// be found in the AUTHORS file in the root of the source tree.
+// -----------------------------------------------------------------------------
+//
+// Alpha plane de-quantization utility
+//
+// Author:  Vikas Arora (vikasa@google.com)
+
+#ifndef WEBP_UTILS_QUANT_LEVELS_DEC_H_
+#define WEBP_UTILS_QUANT_LEVELS_DEC_H_
+
+#include "webp/types.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+// Apply post-processing to input 'data' of size 'width'x'height' assuming that
+// the source was quantized to a reduced number of levels.
+// Strength is in [0..100] and controls the amount of dithering applied.
+// Returns false in case of error (data is NULL, invalid parameters,
+// malloc failure, ...).
+int WebPDequantizeLevels(uint8_t* const data, int width, int height,
+                         int strength);
+
+#ifdef __cplusplus
+}    // extern "C"
+#endif
+
+#endif  /* WEBP_UTILS_QUANT_LEVELS_DEC_H_ */
diff --git a/drivers/webp/utils/random.c b/drivers/webp/utils/random.c
new file mode 100644
index 0000000000..24e96ad648
--- /dev/null
+++ b/drivers/webp/utils/random.c
@@ -0,0 +1,43 @@
+// Copyright 2013 Google Inc. All Rights Reserved.
+//
+// Use of this source code is governed by a BSD-style license
+// that can be found in the COPYING file in the root of the source
+// tree. An additional intellectual property rights grant can be found
+// in the file PATENTS. All contributing project authors may
+// be found in the AUTHORS file in the root of the source tree.
+// -----------------------------------------------------------------------------
+//
+// Pseudo-random utilities
+//
+// Author: Skal (pascal.massimino@gmail.com)
+
+#include <string.h>
+#include "./random.h"
+
+//------------------------------------------------------------------------------
+
+// 31b-range values
+static const uint32_t kRandomTable[VP8_RANDOM_TABLE_SIZE] = {
+  0x0de15230, 0x03b31886, 0x775faccb, 0x1c88626a, 0x68385c55, 0x14b3b828,
+  0x4a85fef8, 0x49ddb84b, 0x64fcf397, 0x5c550289, 0x4a290000, 0x0d7ec1da,
+  0x5940b7ab, 0x5492577d, 0x4e19ca72, 0x38d38c69, 0x0c01ee65, 0x32a1755f,
+  0x5437f652, 0x5abb2c32, 0x0faa57b1, 0x73f533e7, 0x685feeda, 0x7563cce2,
+  0x6e990e83, 0x4730a7ed, 0x4fc0d9c6, 0x496b153c, 0x4f1403fa, 0x541afb0c,
+  0x73990b32, 0x26d7cb1c, 0x6fcc3706, 0x2cbb77d8, 0x75762f2a, 0x6425ccdd,
+  0x24b35461, 0x0a7d8715, 0x220414a8, 0x141ebf67, 0x56b41583, 0x73e502e3,
+  0x44cab16f, 0x28264d42, 0x73baaefb, 0x0a50ebed, 0x1d6ab6fb, 0x0d3ad40b,
+  0x35db3b68, 0x2b081e83, 0x77ce6b95, 0x5181e5f0, 0x78853bbc, 0x009f9494,
+  0x27e5ed3c
+};
+
+void VP8InitRandom(VP8Random* const rg, float dithering) {
+  memcpy(rg->tab_, kRandomTable, sizeof(rg->tab_));
+  rg->index1_ = 0;
+  rg->index2_ = 31;
+  rg->amp_ = (dithering < 0.0) ? 0
+           : (dithering > 1.0) ? (1 << VP8_RANDOM_DITHER_FIX)
+           : (uint32_t)((1 << VP8_RANDOM_DITHER_FIX) * dithering);
+}
+
+//------------------------------------------------------------------------------
+
diff --git a/drivers/webp/utils/random.h b/drivers/webp/utils/random.h
new file mode 100644
index 0000000000..745f3e2e87
--- /dev/null
+++ b/drivers/webp/utils/random.h
@@ -0,0 +1,63 @@
+// Copyright 2013 Google Inc. All Rights Reserved.
+//
+// Use of this source code is governed by a BSD-style license
+// that can be found in the COPYING file in the root of the source
+// tree. An additional intellectual property rights grant can be found
+// in the file PATENTS. All contributing project authors may
+// be found in the AUTHORS file in the root of the source tree.
+// -----------------------------------------------------------------------------
+//
+// Pseudo-random utilities
+//
+// Author: Skal (pascal.massimino@gmail.com)
+
+#ifndef WEBP_UTILS_RANDOM_H_
+#define WEBP_UTILS_RANDOM_H_
+
+#include <assert.h>
+#include "webp/types.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#define VP8_RANDOM_DITHER_FIX 8   // fixed-point precision for dithering
+#define VP8_RANDOM_TABLE_SIZE 55
+
+typedef struct {
+  int index1_, index2_;
+  uint32_t tab_[VP8_RANDOM_TABLE_SIZE];
+  int amp_;
+} VP8Random;
+
+// Initializes random generator with an amplitude 'dithering' in range [0..1].
+void VP8InitRandom(VP8Random* const rg, float dithering);
+
+// Returns a centered pseudo-random number with 'num_bits' amplitude.
+// (uses D.Knuth's Difference-based random generator).
+// 'amp' is in VP8_RANDOM_DITHER_FIX fixed-point precision.
+static WEBP_INLINE int VP8RandomBits2(VP8Random* const rg, int num_bits,
+                                      int amp) {
+  int diff;
+  assert(num_bits + VP8_RANDOM_DITHER_FIX <= 31);
+  diff = rg->tab_[rg->index1_] - rg->tab_[rg->index2_];
+  if (diff < 0) diff += (1u << 31);
+  rg->tab_[rg->index1_] = diff;
+  if (++rg->index1_ == VP8_RANDOM_TABLE_SIZE) rg->index1_ = 0;
+  if (++rg->index2_ == VP8_RANDOM_TABLE_SIZE) rg->index2_ = 0;
+  // sign-extend, 0-center
+  diff = (int)((uint32_t)diff << 1) >> (32 - num_bits);
+  diff = (diff * amp) >> VP8_RANDOM_DITHER_FIX;  // restrict range
+  diff += 1 << (num_bits - 1);                   // shift back to 0.5-center
+  return diff;
+}
+
+static WEBP_INLINE int VP8RandomBits(VP8Random* const rg, int num_bits) {
+  return VP8RandomBits2(rg, num_bits, rg->amp_);
+}
+
+#ifdef __cplusplus
+}    // extern "C"
+#endif
+
+#endif  /* WEBP_UTILS_RANDOM_H_ */
diff --git a/drivers/webp/utils/rescaler.c b/drivers/webp/utils/rescaler.c
index 9825dcbc5f..00c9300bfb 100644
--- a/drivers/webp/utils/rescaler.c
+++ b/drivers/webp/utils/rescaler.c
@@ -1,8 +1,10 @@
 // Copyright 2012 Google Inc. All Rights Reserved.
 //
-// This code is licensed under the same terms as WebM:
-//  Software License Agreement:  http://www.webmproject.org/license/software/
-//  Additional IP Rights Grant:  http://www.webmproject.org/license/additional/
+// Use of this source code is governed by a BSD-style license
+// that can be found in the COPYING file in the root of the source
+// tree. An additional intellectual property rights grant can be found
+// in the file PATENTS. All contributing project authors may
+// be found in the AUTHORS file in the root of the source tree.
 // -----------------------------------------------------------------------------
 //
 // Rescaling functions
@@ -11,124 +13,116 @@
 
 #include <assert.h>
 #include <stdlib.h>
+#include <string.h>
+#include "../dsp/dsp.h"
 #include "./rescaler.h"
 
 //------------------------------------------------------------------------------
 
-#if defined(__cplusplus) || defined(c_plusplus)
-extern "C" {
-#endif
-
-#define RFIX 30
-#define MULT_FIX(x,y) (((int64_t)(x) * (y) + (1 << (RFIX - 1))) >> RFIX)
-
 void WebPRescalerInit(WebPRescaler* const wrk, int src_width, int src_height,
-                      uint8_t* const dst, int dst_width, int dst_height,
-                      int dst_stride, int num_channels, int x_add, int x_sub,
-                      int y_add, int y_sub, int32_t* const work) {
+                      uint8_t* const dst,
+                      int dst_width, int dst_height, int dst_stride,
+                      int num_channels, rescaler_t* const work) {
+  const int x_add = src_width, x_sub = dst_width;
+  const int y_add = src_height, y_sub = dst_height;
   wrk->x_expand = (src_width < dst_width);
+  wrk->y_expand = (src_height < dst_height);
   wrk->src_width = src_width;
   wrk->src_height = src_height;
   wrk->dst_width = dst_width;
   wrk->dst_height = dst_height;
+  wrk->src_y = 0;
+  wrk->dst_y = 0;
   wrk->dst = dst;
   wrk->dst_stride = dst_stride;
   wrk->num_channels = num_channels;
+
   // for 'x_expand', we use bilinear interpolation
-  wrk->x_add = wrk->x_expand ? (x_sub - 1) : x_add - x_sub;
+  wrk->x_add = wrk->x_expand ? (x_sub - 1) : x_add;
   wrk->x_sub = wrk->x_expand ? (x_add - 1) : x_sub;
-  wrk->y_accum = y_add;
-  wrk->y_add = y_add;
-  wrk->y_sub = y_sub;
-  wrk->fx_scale = (1 << RFIX) / x_sub;
-  wrk->fy_scale = (1 << RFIX) / y_sub;
-  wrk->fxy_scale = wrk->x_expand ?
-      ((int64_t)dst_height << RFIX) / (x_sub * src_height) :
-      ((int64_t)dst_height << RFIX) / (x_add * src_height);
+  if (!wrk->x_expand) {  // fx_scale is not used otherwise
+    wrk->fx_scale = WEBP_RESCALER_FRAC(1, wrk->x_sub);
+  }
+  // vertical scaling parameters
+  wrk->y_add = wrk->y_expand ? y_add - 1 : y_add;
+  wrk->y_sub = wrk->y_expand ? y_sub - 1 : y_sub;
+  wrk->y_accum = wrk->y_expand ? wrk->y_sub : wrk->y_add;
+  if (!wrk->y_expand) {
+    // this is WEBP_RESCALER_FRAC(dst_height, x_add * y_add) without the cast.
+    const uint64_t ratio =
+        (uint64_t)dst_height * WEBP_RESCALER_ONE / (wrk->x_add * wrk->y_add);
+    if (ratio != (uint32_t)ratio) {
+      // We can't represent the ratio with the current fixed-point precision.
+      // => We special-case fxy_scale = 0, in WebPRescalerExportRow().
+      wrk->fxy_scale = 0;
+    } else {
+      wrk->fxy_scale = (uint32_t)ratio;
+    }
+    wrk->fy_scale = WEBP_RESCALER_FRAC(1, wrk->y_sub);
+  } else {
+    wrk->fy_scale = WEBP_RESCALER_FRAC(1, wrk->x_add);
+    // wrk->fxy_scale is unused here.
+  }
   wrk->irow = work;
   wrk->frow = work + num_channels * dst_width;
-}
+  memset(work, 0, 2 * dst_width * num_channels * sizeof(*work));
 
-void WebPRescalerImportRow(WebPRescaler* const wrk,
-                           const uint8_t* const src, int channel) {
-  const int x_stride = wrk->num_channels;
-  const int x_out_max = wrk->dst_width * wrk->num_channels;
-  int x_in = channel;
-  int x_out;
-  int accum = 0;
-  if (!wrk->x_expand) {
-    int sum = 0;
-    for (x_out = channel; x_out < x_out_max; x_out += x_stride) {
-      accum += wrk->x_add;
-      for (; accum > 0; accum -= wrk->x_sub) {
-        sum += src[x_in];
-        x_in += x_stride;
-      }
-      {        // Emit next horizontal pixel.
-        const int32_t base = src[x_in];
-        const int32_t frac = base * (-accum);
-        x_in += x_stride;
-        wrk->frow[x_out] = (sum + base) * wrk->x_sub - frac;
-        // fresh fractional start for next pixel
-        sum = (int)MULT_FIX(frac, wrk->fx_scale);
-      }
-    }
-  } else {        // simple bilinear interpolation
-    int left = src[channel], right = src[channel];
-    for (x_out = channel; x_out < x_out_max; x_out += x_stride) {
-      if (accum < 0) {
-        left = right;
-        x_in += x_stride;
-        right = src[x_in];
-        accum += wrk->x_add;
-      }
-      wrk->frow[x_out] = right * wrk->x_add + (left - right) * accum;
-      accum -= wrk->x_sub;
-    }
-  }
-  // Accumulate the new row's contribution
-  for (x_out = channel; x_out < x_out_max; x_out += x_stride) {
-    wrk->irow[x_out] += wrk->frow[x_out];
-  }
+  WebPRescalerDspInit();
 }
 
-uint8_t* WebPRescalerExportRow(WebPRescaler* const wrk) {
-  if (wrk->y_accum <= 0) {
-    int x_out;
-    uint8_t* const dst = wrk->dst;
-    int32_t* const irow = wrk->irow;
-    const int32_t* const frow = wrk->frow;
-    const int yscale = wrk->fy_scale * (-wrk->y_accum);
-    const int x_out_max = wrk->dst_width * wrk->num_channels;
+int WebPRescalerGetScaledDimensions(int src_width, int src_height,
+                                    int* const scaled_width,
+                                    int* const scaled_height) {
+  assert(scaled_width != NULL);
+  assert(scaled_height != NULL);
+  {
+    int width = *scaled_width;
+    int height = *scaled_height;
 
-    for (x_out = 0; x_out < x_out_max; ++x_out) {
-      const int frac = (int)MULT_FIX(frow[x_out], yscale);
-      const int v = (int)MULT_FIX(irow[x_out] - frac, wrk->fxy_scale);
-      dst[x_out] = (!(v & ~0xff)) ? v : (v < 0) ? 0 : 255;
-      irow[x_out] = frac;   // new fractional start
+    // if width is unspecified, scale original proportionally to height ratio.
+    if (width == 0) {
+      width = (src_width * height + src_height / 2) / src_height;
     }
-    wrk->y_accum += wrk->y_add;
-    wrk->dst += wrk->dst_stride;
-    return dst;
-  } else {
-    return NULL;
+    // if height is unspecified, scale original proportionally to width ratio.
+    if (height == 0) {
+      height = (src_height * width + src_width / 2) / src_width;
+    }
+    // Check if the overall dimensions still make sense.
+    if (width <= 0 || height <= 0) {
+      return 0;
+    }
+
+    *scaled_width = width;
+    *scaled_height = height;
+    return 1;
   }
 }
 
-#undef MULT_FIX
-#undef RFIX
-
 //------------------------------------------------------------------------------
 // all-in-one calls
 
+int WebPRescaleNeededLines(const WebPRescaler* const wrk, int max_num_lines) {
+  const int num_lines = (wrk->y_accum + wrk->y_sub - 1) / wrk->y_sub;
+  return (num_lines > max_num_lines) ? max_num_lines : num_lines;
+}
+
 int WebPRescalerImport(WebPRescaler* const wrk, int num_lines,
                        const uint8_t* src, int src_stride) {
   int total_imported = 0;
-  while (total_imported < num_lines && wrk->y_accum > 0) {
-    int channel;
-    for (channel = 0; channel < wrk->num_channels; ++channel) {
-      WebPRescalerImportRow(wrk, src, channel);
+  while (total_imported < num_lines && !WebPRescalerHasPendingOutput(wrk)) {
+    if (wrk->y_expand) {
+      rescaler_t* const tmp = wrk->irow;
+      wrk->irow = wrk->frow;
+      wrk->frow = tmp;
     }
+    WebPRescalerImportRow(wrk, src);
+    if (!wrk->y_expand) {     // Accumulate the contribution of the new row.
+      int x;
+      for (x = 0; x < wrk->num_channels * wrk->dst_width; ++x) {
+        wrk->irow[x] += wrk->frow[x];
+      }
+    }
+    ++wrk->src_y;
     src += src_stride;
     ++total_imported;
     wrk->y_accum -= wrk->y_sub;
@@ -146,7 +140,3 @@ int WebPRescalerExport(WebPRescaler* const rescaler) {
 }
 
 //------------------------------------------------------------------------------
-
-#if defined(__cplusplus) || defined(c_plusplus)
-}    // extern "C"
-#endif
diff --git a/drivers/webp/utils/rescaler.h b/drivers/webp/utils/rescaler.h
index 9c9133d19b..868467b4d7 100644
--- a/drivers/webp/utils/rescaler.h
+++ b/drivers/webp/utils/rescaler.h
@@ -1,8 +1,10 @@
 // Copyright 2012 Google Inc. All Rights Reserved.
 //
-// This code is licensed under the same terms as WebM:
-//  Software License Agreement:  http://www.webmproject.org/license/software/
-//  Additional IP Rights Grant:  http://www.webmproject.org/license/additional/
+// Use of this source code is governed by a BSD-style license
+// that can be found in the COPYING file in the root of the source
+// tree. An additional intellectual property rights grant can be found
+// in the file PATENTS. All contributing project authors may
+// be found in the AUTHORS file in the root of the source tree.
 // -----------------------------------------------------------------------------
 //
 // Rescaling functions
@@ -12,64 +14,87 @@
 #ifndef WEBP_UTILS_RESCALER_H_
 #define WEBP_UTILS_RESCALER_H_
 
-#if defined(__cplusplus) || defined(c_plusplus)
+#ifdef __cplusplus
 extern "C" {
 #endif
 
-#include "../types.h"
+#include "webp/types.h"
+
+#define WEBP_RESCALER_RFIX 32   // fixed-point precision for multiplies
+#define WEBP_RESCALER_ONE (1ull << WEBP_RESCALER_RFIX)
+#define WEBP_RESCALER_FRAC(x, y) \
+    ((uint32_t)(((uint64_t)(x) << WEBP_RESCALER_RFIX) / (y)))
 
 // Structure used for on-the-fly rescaling
-typedef struct {
+typedef uint32_t rescaler_t;   // type for side-buffer
+typedef struct WebPRescaler WebPRescaler;
+struct WebPRescaler {
   int x_expand;               // true if we're expanding in the x direction
+  int y_expand;               // true if we're expanding in the y direction
   int num_channels;           // bytes to jump between pixels
-  int fy_scale, fx_scale;     // fixed-point scaling factor
-  int64_t fxy_scale;          // ''
-  // we need hpel-precise add/sub increments, for the downsampled U/V planes.
+  uint32_t fx_scale;          // fixed-point scaling factors
+  uint32_t fy_scale;          // ''
+  uint32_t fxy_scale;         // ''
   int y_accum;                // vertical accumulator
-  int y_add, y_sub;           // vertical increments (add ~= src, sub ~= dst)
-  int x_add, x_sub;           // horizontal increments (add ~= src, sub ~= dst)
+  int y_add, y_sub;           // vertical increments
+  int x_add, x_sub;           // horizontal increments
   int src_width, src_height;  // source dimensions
   int dst_width, dst_height;  // destination dimensions
+  int src_y, dst_y;           // row counters for input and output
   uint8_t* dst;
   int dst_stride;
-  int32_t* irow, *frow;       // work buffer
-} WebPRescaler;
+  rescaler_t* irow, *frow;    // work buffer
+};
 
 // Initialize a rescaler given scratch area 'work' and dimensions of src & dst.
-void WebPRescalerInit(WebPRescaler* const wrk, int src_width, int src_height,
+void WebPRescalerInit(WebPRescaler* const rescaler,
+                      int src_width, int src_height,
                       uint8_t* const dst,
                       int dst_width, int dst_height, int dst_stride,
                       int num_channels,
-                      int x_add, int x_sub,
-                      int y_add, int y_sub,
-                      int32_t* const work);
+                      rescaler_t* const work);
+
+// If either 'scaled_width' or 'scaled_height' (but not both) is 0 the value
+// will be calculated preserving the aspect ratio, otherwise the values are
+// left unmodified. Returns true on success, false if either value is 0 after
+// performing the scaling calculation.
+int WebPRescalerGetScaledDimensions(int src_width, int src_height,
+                                    int* const scaled_width,
+                                    int* const scaled_height);
 
-// Import a row of data and save its contribution in the rescaler.
-// 'channel' denotes the channel number to be imported.
-void WebPRescalerImportRow(WebPRescaler* const rescaler,
-                           const uint8_t* const src, int channel);
+// Returns the number of input lines needed next to produce one output line,
+// considering that the maximum available input lines are 'max_num_lines'.
+int WebPRescaleNeededLines(const WebPRescaler* const rescaler,
+                           int max_num_lines);
 
 // Import multiple rows over all channels, until at least one row is ready to
 // be exported. Returns the actual number of lines that were imported.
 int WebPRescalerImport(WebPRescaler* const rescaler, int num_rows,
                        const uint8_t* src, int src_stride);
 
-// Return true if there is pending output rows ready.
+// Export as many rows as possible. Return the numbers of rows written.
+int WebPRescalerExport(WebPRescaler* const rescaler);
+
+// Return true if input is finished
 static WEBP_INLINE
-int WebPRescalerHasPendingOutput(const WebPRescaler* const rescaler) {
-  return (rescaler->y_accum <= 0);
+int WebPRescalerInputDone(const WebPRescaler* const rescaler) {
+  return (rescaler->src_y >= rescaler->src_height);
+}
+// Return true if output is finished
+static WEBP_INLINE
+int WebPRescalerOutputDone(const WebPRescaler* const rescaler) {
+  return (rescaler->dst_y >= rescaler->dst_height);
 }
 
-// Export one row from rescaler. Returns the pointer where output was written,
-// or NULL if no row was pending.
-uint8_t* WebPRescalerExportRow(WebPRescaler* const wrk);
-
-// Export as many rows as possible. Return the numbers of rows written.
-int WebPRescalerExport(WebPRescaler* const wrk);
+// Return true if there are pending output rows ready.
+static WEBP_INLINE
+int WebPRescalerHasPendingOutput(const WebPRescaler* const rescaler) {
+  return !WebPRescalerOutputDone(rescaler) && (rescaler->y_accum <= 0);
+}
 
 //------------------------------------------------------------------------------
 
-#if defined(__cplusplus) || defined(c_plusplus)
+#ifdef __cplusplus
 }    // extern "C"
 #endif
 
diff --git a/drivers/webp/utils/thread.c b/drivers/webp/utils/thread.c
index ce89cf9dc7..93f7622797 100644
--- a/drivers/webp/utils/thread.c
+++ b/drivers/webp/utils/thread.c
@@ -1,27 +1,60 @@
 // Copyright 2011 Google Inc. All Rights Reserved.
 //
-// This code is licensed under the same terms as WebM:
-//  Software License Agreement:  http://www.webmproject.org/license/software/
-//  Additional IP Rights Grant:  http://www.webmproject.org/license/additional/
+// Use of this source code is governed by a BSD-style license
+// that can be found in the COPYING file in the root of the source
+// tree. An additional intellectual property rights grant can be found
+// in the file PATENTS. All contributing project authors may
+// be found in the AUTHORS file in the root of the source tree.
 // -----------------------------------------------------------------------------
 //
 // Multi-threaded worker
 //
 // Author: Skal (pascal.massimino@gmail.com)
 
-#ifdef HAVE_CONFIG_H
-#include "config.h"
-#endif
-
 #include <assert.h>
 #include <string.h>   // for memset()
 #include "./thread.h"
+#include "./utils.h"
+
+#ifdef WEBP_USE_THREAD
+
+#if defined(_WIN32)
+
+#include <windows.h>
+typedef HANDLE pthread_t;
+typedef CRITICAL_SECTION pthread_mutex_t;
 
-#if defined(__cplusplus) || defined(c_plusplus)
-extern "C" {
+#if _WIN32_WINNT >= 0x0600  // Windows Vista / Server 2008 or greater
+#define USE_WINDOWS_CONDITION_VARIABLE
+typedef CONDITION_VARIABLE pthread_cond_t;
+#else
+typedef struct {
+  HANDLE waiting_sem_;
+  HANDLE received_sem_;
+  HANDLE signal_event_;
+} pthread_cond_t;
+#endif  // _WIN32_WINNT >= 0x600
+
+#ifndef WINAPI_FAMILY_PARTITION
+#define WINAPI_PARTITION_DESKTOP 1
+#define WINAPI_FAMILY_PARTITION(x) x
 #endif
 
-#ifdef WEBP_USE_THREAD
+#if !WINAPI_FAMILY_PARTITION(WINAPI_PARTITION_DESKTOP)
+#define USE_CREATE_THREAD
+#endif
+
+#else  // !_WIN32
+
+#include <pthread.h>
+
+#endif  // _WIN32
+
+struct WebPWorkerImpl {
+  pthread_mutex_t mutex_;
+  pthread_cond_t  condition_;
+  pthread_t       thread_;
+};
 
 #if defined(_WIN32)
 
@@ -34,15 +67,29 @@ extern "C" {
 #define THREADFN unsigned int __stdcall
 #define THREAD_RETURN(val) (unsigned int)((DWORD_PTR)val)
 
+#if _WIN32_WINNT >= 0x0501  // Windows XP or greater
+#define WaitForSingleObject(obj, timeout) \
+  WaitForSingleObjectEx(obj, timeout, FALSE /*bAlertable*/)
+#endif
+
 static int pthread_create(pthread_t* const thread, const void* attr,
                           unsigned int (__stdcall *start)(void*), void* arg) {
   (void)attr;
+#ifdef USE_CREATE_THREAD
+  *thread = CreateThread(NULL,   /* lpThreadAttributes */
+                         0,      /* dwStackSize */
+                         start,
+                         arg,
+                         0,      /* dwStackSize */
+                         NULL);  /* lpThreadId */
+#else
   *thread = (pthread_t)_beginthreadex(NULL,   /* void *security */
                                       0,      /* unsigned stack_size */
                                       start,
                                       arg,
                                       0,      /* unsigned initflag */
                                       NULL);  /* unsigned *thrdaddr */
+#endif
   if (*thread == NULL) return 1;
   SetThreadPriority(*thread, THREAD_PRIORITY_ABOVE_NORMAL);
   return 0;
@@ -57,7 +104,11 @@ static int pthread_join(pthread_t thread, void** value_ptr) {
 // Mutex
 static int pthread_mutex_init(pthread_mutex_t* const mutex, void* mutexattr) {
   (void)mutexattr;
+#if _WIN32_WINNT >= 0x0600  // Windows Vista / Server 2008 or greater
+  InitializeCriticalSectionEx(mutex, 0 /*dwSpinCount*/, 0 /*Flags*/);
+#else
   InitializeCriticalSection(mutex);
+#endif
   return 0;
 }
 
@@ -79,14 +130,21 @@ static int pthread_mutex_destroy(pthread_mutex_t* const mutex) {
 // Condition
 static int pthread_cond_destroy(pthread_cond_t* const condition) {
   int ok = 1;
+#ifdef USE_WINDOWS_CONDITION_VARIABLE
+  (void)condition;
+#else
   ok &= (CloseHandle(condition->waiting_sem_) != 0);
   ok &= (CloseHandle(condition->received_sem_) != 0);
   ok &= (CloseHandle(condition->signal_event_) != 0);
+#endif
   return !ok;
 }
 
 static int pthread_cond_init(pthread_cond_t* const condition, void* cond_attr) {
   (void)cond_attr;
+#ifdef USE_WINDOWS_CONDITION_VARIABLE
+  InitializeConditionVariable(condition);
+#else
   condition->waiting_sem_ = CreateSemaphore(NULL, 0, 1, NULL);
   condition->received_sem_ = CreateSemaphore(NULL, 0, 1, NULL);
   condition->signal_event_ = CreateEvent(NULL, FALSE, FALSE, NULL);
@@ -96,11 +154,15 @@ static int pthread_cond_init(pthread_cond_t* const condition, void* cond_attr) {
     pthread_cond_destroy(condition);
     return 1;
   }
+#endif
   return 0;
 }
 
 static int pthread_cond_signal(pthread_cond_t* const condition) {
   int ok = 1;
+#ifdef USE_WINDOWS_CONDITION_VARIABLE
+  WakeConditionVariable(condition);
+#else
   if (WaitForSingleObject(condition->waiting_sem_, 0) == WAIT_OBJECT_0) {
     // a thread is waiting in pthread_cond_wait: allow it to be notified
     ok = SetEvent(condition->signal_event_);
@@ -109,12 +171,16 @@ static int pthread_cond_signal(pthread_cond_t* const condition) {
     ok &= (WaitForSingleObject(condition->received_sem_, INFINITE) !=
            WAIT_OBJECT_0);
   }
+#endif
   return !ok;
 }
 
 static int pthread_cond_wait(pthread_cond_t* const condition,
                              pthread_mutex_t* const mutex) {
   int ok;
+#ifdef USE_WINDOWS_CONDITION_VARIABLE
+  ok = SleepConditionVariableCS(condition, mutex, INFINITE);
+#else
   // note that there is a consumer available so the signal isn't dropped in
   // pthread_cond_signal
   if (!ReleaseSemaphore(condition->waiting_sem_, 1, NULL))
@@ -125,123 +191,168 @@ static int pthread_cond_wait(pthread_cond_t* const condition,
         WAIT_OBJECT_0);
   ok &= ReleaseSemaphore(condition->received_sem_, 1, NULL);
   pthread_mutex_lock(mutex);
+#endif
   return !ok;
 }
 
-#else  // _WIN32
+#else  // !_WIN32
 # define THREADFN void*
 # define THREAD_RETURN(val) val
-#endif
+#endif  // _WIN32
 
 //------------------------------------------------------------------------------
 
-static THREADFN WebPWorkerThreadLoop(void *ptr) {    // thread loop
+static void Execute(WebPWorker* const worker);  // Forward declaration.
+
+static THREADFN ThreadLoop(void* ptr) {
   WebPWorker* const worker = (WebPWorker*)ptr;
   int done = 0;
   while (!done) {
-    pthread_mutex_lock(&worker->mutex_);
+    pthread_mutex_lock(&worker->impl_->mutex_);
     while (worker->status_ == OK) {   // wait in idling mode
-      pthread_cond_wait(&worker->condition_, &worker->mutex_);
+      pthread_cond_wait(&worker->impl_->condition_, &worker->impl_->mutex_);
     }
     if (worker->status_ == WORK) {
-      if (worker->hook) {
-        worker->had_error |= !worker->hook(worker->data1, worker->data2);
-      }
+      Execute(worker);
       worker->status_ = OK;
     } else if (worker->status_ == NOT_OK) {   // finish the worker
       done = 1;
     }
     // signal to the main thread that we're done (for Sync())
-    pthread_cond_signal(&worker->condition_);
-    pthread_mutex_unlock(&worker->mutex_);
+    pthread_cond_signal(&worker->impl_->condition_);
+    pthread_mutex_unlock(&worker->impl_->mutex_);
   }
   return THREAD_RETURN(NULL);    // Thread is finished
 }
 
 // main thread state control
-static void WebPWorkerChangeState(WebPWorker* const worker,
-                                  WebPWorkerStatus new_status) {
-  // no-op when attempting to change state on a thread that didn't come up
-  if (worker->status_ < OK) return;
-
-  pthread_mutex_lock(&worker->mutex_);
-  // wait for the worker to finish
-  while (worker->status_ != OK) {
-    pthread_cond_wait(&worker->condition_, &worker->mutex_);
-  }
-  // assign new status and release the working thread if needed
-  if (new_status != OK) {
-    worker->status_ = new_status;
-    pthread_cond_signal(&worker->condition_);
+static void ChangeState(WebPWorker* const worker,
+                        WebPWorkerStatus new_status) {
+  // No-op when attempting to change state on a thread that didn't come up.
+  // Checking status_ without acquiring the lock first would result in a data
+  // race.
+  if (worker->impl_ == NULL) return;
+
+  pthread_mutex_lock(&worker->impl_->mutex_);
+  if (worker->status_ >= OK) {
+    // wait for the worker to finish
+    while (worker->status_ != OK) {
+      pthread_cond_wait(&worker->impl_->condition_, &worker->impl_->mutex_);
+    }
+    // assign new status and release the working thread if needed
+    if (new_status != OK) {
+      worker->status_ = new_status;
+      pthread_cond_signal(&worker->impl_->condition_);
+    }
   }
-  pthread_mutex_unlock(&worker->mutex_);
+  pthread_mutex_unlock(&worker->impl_->mutex_);
 }
 
-#endif
+#endif  // WEBP_USE_THREAD
 
 //------------------------------------------------------------------------------
 
-void WebPWorkerInit(WebPWorker* const worker) {
+static void Init(WebPWorker* const worker) {
   memset(worker, 0, sizeof(*worker));
   worker->status_ = NOT_OK;
 }
 
-int WebPWorkerSync(WebPWorker* const worker) {
+static int Sync(WebPWorker* const worker) {
 #ifdef WEBP_USE_THREAD
-  WebPWorkerChangeState(worker, OK);
+  ChangeState(worker, OK);
 #endif
   assert(worker->status_ <= OK);
   return !worker->had_error;
 }
 
-int WebPWorkerReset(WebPWorker* const worker) {
+static int Reset(WebPWorker* const worker) {
   int ok = 1;
   worker->had_error = 0;
   if (worker->status_ < OK) {
 #ifdef WEBP_USE_THREAD
-    if (pthread_mutex_init(&worker->mutex_, NULL) ||
-        pthread_cond_init(&worker->condition_, NULL)) {
+    worker->impl_ = (WebPWorkerImpl*)WebPSafeCalloc(1, sizeof(*worker->impl_));
+    if (worker->impl_ == NULL) {
       return 0;
     }
-    pthread_mutex_lock(&worker->mutex_);
-    ok = !pthread_create(&worker->thread_, NULL, WebPWorkerThreadLoop, worker);
+    if (pthread_mutex_init(&worker->impl_->mutex_, NULL)) {
+      goto Error;
+    }
+    if (pthread_cond_init(&worker->impl_->condition_, NULL)) {
+      pthread_mutex_destroy(&worker->impl_->mutex_);
+      goto Error;
+    }
+    pthread_mutex_lock(&worker->impl_->mutex_);
+    ok = !pthread_create(&worker->impl_->thread_, NULL, ThreadLoop, worker);
     if (ok) worker->status_ = OK;
-    pthread_mutex_unlock(&worker->mutex_);
+    pthread_mutex_unlock(&worker->impl_->mutex_);
+    if (!ok) {
+      pthread_mutex_destroy(&worker->impl_->mutex_);
+      pthread_cond_destroy(&worker->impl_->condition_);
+ Error:
+      WebPSafeFree(worker->impl_);
+      worker->impl_ = NULL;
+      return 0;
+    }
 #else
     worker->status_ = OK;
 #endif
   } else if (worker->status_ > OK) {
-    ok = WebPWorkerSync(worker);
+    ok = Sync(worker);
   }
   assert(!ok || (worker->status_ == OK));
   return ok;
 }
 
-void WebPWorkerLaunch(WebPWorker* const worker) {
+static void Execute(WebPWorker* const worker) {
+  if (worker->hook != NULL) {
+    worker->had_error |= !worker->hook(worker->data1, worker->data2);
+  }
+}
+
+static void Launch(WebPWorker* const worker) {
 #ifdef WEBP_USE_THREAD
-  WebPWorkerChangeState(worker, WORK);
+  ChangeState(worker, WORK);
 #else
-  if (worker->hook)
-    worker->had_error |= !worker->hook(worker->data1, worker->data2);
+  Execute(worker);
 #endif
 }
 
-void WebPWorkerEnd(WebPWorker* const worker) {
-  if (worker->status_ >= OK) {
+static void End(WebPWorker* const worker) {
 #ifdef WEBP_USE_THREAD
-    WebPWorkerChangeState(worker, NOT_OK);
-    pthread_join(worker->thread_, NULL);
-    pthread_mutex_destroy(&worker->mutex_);
-    pthread_cond_destroy(&worker->condition_);
+  if (worker->impl_ != NULL) {
+    ChangeState(worker, NOT_OK);
+    pthread_join(worker->impl_->thread_, NULL);
+    pthread_mutex_destroy(&worker->impl_->mutex_);
+    pthread_cond_destroy(&worker->impl_->condition_);
+    WebPSafeFree(worker->impl_);
+    worker->impl_ = NULL;
+  }
 #else
-    worker->status_ = NOT_OK;
+  worker->status_ = NOT_OK;
+  assert(worker->impl_ == NULL);
 #endif
-  }
   assert(worker->status_ == NOT_OK);
 }
 
 //------------------------------------------------------------------------------
 
-#if defined(__cplusplus) || defined(c_plusplus)
-}    // extern "C"
-#endif
+static WebPWorkerInterface g_worker_interface = {
+  Init, Reset, Sync, Launch, Execute, End
+};
+
+int WebPSetWorkerInterface(const WebPWorkerInterface* const winterface) {
+  if (winterface == NULL ||
+      winterface->Init == NULL || winterface->Reset == NULL ||
+      winterface->Sync == NULL || winterface->Launch == NULL ||
+      winterface->Execute == NULL || winterface->End == NULL) {
+    return 0;
+  }
+  g_worker_interface = *winterface;
+  return 1;
+}
+
+const WebPWorkerInterface* WebPGetWorkerInterface(void) {
+  return &g_worker_interface;
+}
+
+//------------------------------------------------------------------------------
diff --git a/drivers/webp/utils/thread.h b/drivers/webp/utils/thread.h
index 3191890b76..6008bb7c01 100644
--- a/drivers/webp/utils/thread.h
+++ b/drivers/webp/utils/thread.h
@@ -1,8 +1,10 @@
 // Copyright 2011 Google Inc. All Rights Reserved.
 //
-// This code is licensed under the same terms as WebM:
-//  Software License Agreement:  http://www.webmproject.org/license/software/
-//  Additional IP Rights Grant:  http://www.webmproject.org/license/additional/
+// Use of this source code is governed by a BSD-style license
+// that can be found in the COPYING file in the root of the source
+// tree. An additional intellectual property rights grant can be found
+// in the file PATENTS. All contributing project authors may
+// be found in the AUTHORS file in the root of the source tree.
 // -----------------------------------------------------------------------------
 //
 // Multi-threaded worker
@@ -12,29 +14,15 @@
 #ifndef WEBP_UTILS_THREAD_H_
 #define WEBP_UTILS_THREAD_H_
 
-#if defined(__cplusplus) || defined(c_plusplus)
-extern "C" {
+#ifdef HAVE_CONFIG_H
+#include "webp/config.h"
 #endif
 
-#if WEBP_USE_THREAD
-
-#if defined(_WIN32)
-
-#include <windows.h>
-typedef HANDLE pthread_t;
-typedef CRITICAL_SECTION pthread_mutex_t;
-typedef struct {
-  HANDLE waiting_sem_;
-  HANDLE received_sem_;
-  HANDLE signal_event_;
-} pthread_cond_t;
+#include "webp/types.h"
 
-#else
-
-#include <pthread.h>
-
-#endif    /* _WIN32 */
-#endif    /* WEBP_USE_THREAD */
+#ifdef __cplusplus
+extern "C" {
+#endif
 
 // State of the worker thread object
 typedef enum {
@@ -47,13 +35,12 @@ typedef enum {
 // arguments (data1 and data2), and should return false in case of error.
 typedef int (*WebPWorkerHook)(void*, void*);
 
-// Synchronize object used to launch job in the worker thread
+// Platform-dependent implementation details for the worker.
+typedef struct WebPWorkerImpl WebPWorkerImpl;
+
+// Synchronization object used to launch job in the worker thread
 typedef struct {
-#if WEBP_USE_THREAD
-  pthread_mutex_t mutex_;
-  pthread_cond_t  condition_;
-  pthread_t       thread_;
-#endif
+  WebPWorkerImpl* impl_;
   WebPWorkerStatus status_;
   WebPWorkerHook hook;    // hook to call
   void* data1;            // first argument passed to 'hook'
@@ -61,25 +48,45 @@ typedef struct {
   int had_error;          // return value of the last call to 'hook'
 } WebPWorker;
 
-// Must be called first, before any other method.
-void WebPWorkerInit(WebPWorker* const worker);
-// Must be called initialize the object and spawn the thread. Re-entrant.
-// Will potentially launch the thread. Returns false in case of error.
-int WebPWorkerReset(WebPWorker* const worker);
-// Make sure the previous work is finished. Returns true if worker->had_error
-// was not set and not error condition was triggered by the working thread.
-int WebPWorkerSync(WebPWorker* const worker);
-// Trigger the thread to call hook() with data1 and data2 argument. These
-// hook/data1/data2 can be changed at any time before calling this function,
-// but not be changed afterward until the next call to WebPWorkerSync().
-void WebPWorkerLaunch(WebPWorker* const worker);
-// Kill the thread and terminate the object. To use the object again, one
-// must call WebPWorkerReset() again.
-void WebPWorkerEnd(WebPWorker* const worker);
+// The interface for all thread-worker related functions. All these functions
+// must be implemented.
+typedef struct {
+  // Must be called first, before any other method.
+  void (*Init)(WebPWorker* const worker);
+  // Must be called to initialize the object and spawn the thread. Re-entrant.
+  // Will potentially launch the thread. Returns false in case of error.
+  int (*Reset)(WebPWorker* const worker);
+  // Makes sure the previous work is finished. Returns true if worker->had_error
+  // was not set and no error condition was triggered by the working thread.
+  int (*Sync)(WebPWorker* const worker);
+  // Triggers the thread to call hook() with data1 and data2 arguments. These
+  // hook/data1/data2 values can be changed at any time before calling this
+  // function, but not be changed afterward until the next call to Sync().
+  void (*Launch)(WebPWorker* const worker);
+  // This function is similar to Launch() except that it calls the
+  // hook directly instead of using a thread. Convenient to bypass the thread
+  // mechanism while still using the WebPWorker structs. Sync() must
+  // still be called afterward (for error reporting).
+  void (*Execute)(WebPWorker* const worker);
+  // Kill the thread and terminate the object. To use the object again, one
+  // must call Reset() again.
+  void (*End)(WebPWorker* const worker);
+} WebPWorkerInterface;
+
+// Install a new set of threading functions, overriding the defaults. This
+// should be done before any workers are started, i.e., before any encoding or
+// decoding takes place. The contents of the interface struct are copied, it
+// is safe to free the corresponding memory after this call. This function is
+// not thread-safe. Return false in case of invalid pointer or methods.
+WEBP_EXTERN(int) WebPSetWorkerInterface(
+    const WebPWorkerInterface* const winterface);
+
+// Retrieve the currently set thread worker interface.
+WEBP_EXTERN(const WebPWorkerInterface*) WebPGetWorkerInterface(void);
 
 //------------------------------------------------------------------------------
 
-#if defined(__cplusplus) || defined(c_plusplus)
+#ifdef __cplusplus
 }    // extern "C"
 #endif
 
diff --git a/drivers/webp/utils/utils.c b/drivers/webp/utils/utils.c
index 673b7e284c..35aeae6ab8 100644
--- a/drivers/webp/utils/utils.c
+++ b/drivers/webp/utils/utils.c
@@ -1,8 +1,10 @@
 // Copyright 2012 Google Inc. All Rights Reserved.
 //
-// This code is licensed under the same terms as WebM:
-//  Software License Agreement:  http://www.webmproject.org/license/software/
-//  Additional IP Rights Grant:  http://www.webmproject.org/license/additional/
+// Use of this source code is governed by a BSD-style license
+// that can be found in the COPYING file in the root of the source
+// tree. An additional intellectual property rights grant can be found
+// in the file PATENTS. All contributing project authors may
+// be found in the AUTHORS file in the root of the source tree.
 // -----------------------------------------------------------------------------
 //
 // Misc. common utility functions
@@ -10,35 +12,228 @@
 // Author: Skal (pascal.massimino@gmail.com)
 
 #include <stdlib.h>
+#include <string.h>  // for memcpy()
+#include "webp/decode.h"
+#include "webp/encode.h"
 #include "./utils.h"
 
-#if defined(__cplusplus) || defined(c_plusplus)
-extern "C" {
-#endif
+// If PRINT_MEM_INFO is defined, extra info (like total memory used, number of
+// alloc/free etc) is printed. For debugging/tuning purpose only (it's slow,
+// and not multi-thread safe!).
+// An interesting alternative is valgrind's 'massif' tool:
+//    http://valgrind.org/docs/manual/ms-manual.html
+// Here is an example command line:
+/*    valgrind --tool=massif --massif-out-file=massif.out \
+               --stacks=yes --alloc-fn=WebPSafeAlloc --alloc-fn=WebPSafeCalloc
+      ms_print massif.out
+*/
+// In addition:
+// * if PRINT_MEM_TRAFFIC is defined, all the details of the malloc/free cycles
+//   are printed.
+// * if MALLOC_FAIL_AT is defined, the global environment variable
+//   $MALLOC_FAIL_AT is used to simulate a memory error when calloc or malloc
+//   is called for the nth time. Example usage:
+//   export MALLOC_FAIL_AT=50 && ./examples/cwebp input.png
+// * if MALLOC_LIMIT is defined, the global environment variable $MALLOC_LIMIT
+//   sets the maximum amount of memory (in bytes) made available to libwebp.
+//   This can be used to emulate environment with very limited memory.
+//   Example: export MALLOC_LIMIT=64000000 && ./examples/dwebp picture.webp
+
+// #define PRINT_MEM_INFO
+// #define PRINT_MEM_TRAFFIC
+// #define MALLOC_FAIL_AT
+// #define MALLOC_LIMIT
 
 //------------------------------------------------------------------------------
 // Checked memory allocation
 
-static int CheckSizeArguments(uint64_t nmemb, size_t size) {
+#if defined(PRINT_MEM_INFO)
+
+#include <stdio.h>
+
+static int num_malloc_calls = 0;
+static int num_calloc_calls = 0;
+static int num_free_calls = 0;
+static int countdown_to_fail = 0;     // 0 = off
+
+typedef struct MemBlock MemBlock;
+struct MemBlock {
+  void* ptr_;
+  size_t size_;
+  MemBlock* next_;
+};
+
+static MemBlock* all_blocks = NULL;
+static size_t total_mem = 0;
+static size_t total_mem_allocated = 0;
+static size_t high_water_mark = 0;
+static size_t mem_limit = 0;
+
+static int exit_registered = 0;
+
+static void PrintMemInfo(void) {
+  fprintf(stderr, "\nMEMORY INFO:\n");
+  fprintf(stderr, "num calls to: malloc = %4d\n", num_malloc_calls);
+  fprintf(stderr, "              calloc = %4d\n", num_calloc_calls);
+  fprintf(stderr, "              free   = %4d\n", num_free_calls);
+  fprintf(stderr, "total_mem: %u\n", (uint32_t)total_mem);
+  fprintf(stderr, "total_mem allocated: %u\n", (uint32_t)total_mem_allocated);
+  fprintf(stderr, "high-water mark: %u\n", (uint32_t)high_water_mark);
+  while (all_blocks != NULL) {
+    MemBlock* b = all_blocks;
+    all_blocks = b->next_;
+    free(b);
+  }
+}
+
+static void Increment(int* const v) {
+  if (!exit_registered) {
+#if defined(MALLOC_FAIL_AT)
+    {
+      const char* const malloc_fail_at_str = getenv("MALLOC_FAIL_AT");
+      if (malloc_fail_at_str != NULL) {
+        countdown_to_fail = atoi(malloc_fail_at_str);
+      }
+    }
+#endif
+#if defined(MALLOC_LIMIT)
+    {
+      const char* const malloc_limit_str = getenv("MALLOC_LIMIT");
+      if (malloc_limit_str != NULL) {
+        mem_limit = atoi(malloc_limit_str);
+      }
+    }
+#endif
+    (void)countdown_to_fail;
+    (void)mem_limit;
+    atexit(PrintMemInfo);
+    exit_registered = 1;
+  }
+  ++*v;
+}
+
+static void AddMem(void* ptr, size_t size) {
+  if (ptr != NULL) {
+    MemBlock* const b = (MemBlock*)malloc(sizeof(*b));
+    if (b == NULL) abort();
+    b->next_ = all_blocks;
+    all_blocks = b;
+    b->ptr_ = ptr;
+    b->size_ = size;
+    total_mem += size;
+    total_mem_allocated += size;
+#if defined(PRINT_MEM_TRAFFIC)
+#if defined(MALLOC_FAIL_AT)
+    fprintf(stderr, "fail-count: %5d [mem=%u]\n",
+            num_malloc_calls + num_calloc_calls, (uint32_t)total_mem);
+#else
+    fprintf(stderr, "Mem: %u (+%u)\n", (uint32_t)total_mem, (uint32_t)size);
+#endif
+#endif
+    if (total_mem > high_water_mark) high_water_mark = total_mem;
+  }
+}
+
+static void SubMem(void* ptr) {
+  if (ptr != NULL) {
+    MemBlock** b = &all_blocks;
+    // Inefficient search, but that's just for debugging.
+    while (*b != NULL && (*b)->ptr_ != ptr) b = &(*b)->next_;
+    if (*b == NULL) {
+      fprintf(stderr, "Invalid pointer free! (%p)\n", ptr);
+      abort();
+    }
+    {
+      MemBlock* const block = *b;
+      *b = block->next_;
+      total_mem -= block->size_;
+#if defined(PRINT_MEM_TRAFFIC)
+      fprintf(stderr, "Mem: %u (-%u)\n",
+              (uint32_t)total_mem, (uint32_t)block->size_);
+#endif
+      free(block);
+    }
+  }
+}
+
+#else
+#define Increment(v) do {} while (0)
+#define AddMem(p, s) do {} while (0)
+#define SubMem(p)    do {} while (0)
+#endif
+
+// Returns 0 in case of overflow of nmemb * size.
+static int CheckSizeArgumentsOverflow(uint64_t nmemb, size_t size) {
   const uint64_t total_size = nmemb * size;
   if (nmemb == 0) return 1;
   if ((uint64_t)size > WEBP_MAX_ALLOCABLE_MEMORY / nmemb) return 0;
   if (total_size != (size_t)total_size) return 0;
+#if defined(PRINT_MEM_INFO) && defined(MALLOC_FAIL_AT)
+  if (countdown_to_fail > 0 && --countdown_to_fail == 0) {
+    return 0;    // fake fail!
+  }
+#endif
+#if defined(MALLOC_LIMIT)
+  if (mem_limit > 0 && total_mem + total_size >= mem_limit) {
+    return 0;   // fake fail!
+  }
+#endif
+
   return 1;
 }
 
 void* WebPSafeMalloc(uint64_t nmemb, size_t size) {
-  if (!CheckSizeArguments(nmemb, size)) return NULL;
-  return malloc((size_t)(nmemb * size));
+  void* ptr;
+  Increment(&num_malloc_calls);
+  if (!CheckSizeArgumentsOverflow(nmemb, size)) return NULL;
+  assert(nmemb * size > 0);
+  ptr = malloc((size_t)(nmemb * size));
+  AddMem(ptr, (size_t)(nmemb * size));
+  return ptr;
 }
 
 void* WebPSafeCalloc(uint64_t nmemb, size_t size) {
-  if (!CheckSizeArguments(nmemb, size)) return NULL;
-  return calloc((size_t)nmemb, size);
+  void* ptr;
+  Increment(&num_calloc_calls);
+  if (!CheckSizeArgumentsOverflow(nmemb, size)) return NULL;
+  assert(nmemb * size > 0);
+  ptr = calloc((size_t)nmemb, size);
+  AddMem(ptr, (size_t)(nmemb * size));
+  return ptr;
+}
+
+void WebPSafeFree(void* const ptr) {
+  if (ptr != NULL) {
+    Increment(&num_free_calls);
+    SubMem(ptr);
+  }
+  free(ptr);
+}
+
+// Public API function.
+void WebPFree(void* ptr) {
+  free(ptr);
 }
 
 //------------------------------------------------------------------------------
 
-#if defined(__cplusplus) || defined(c_plusplus)
-}    // extern "C"
-#endif
+void WebPCopyPlane(const uint8_t* src, int src_stride,
+                   uint8_t* dst, int dst_stride, int width, int height) {
+  assert(src != NULL && dst != NULL);
+  assert(src_stride >= width && dst_stride >= width);
+  while (height-- > 0) {
+    memcpy(dst, src, width);
+    src += src_stride;
+    dst += dst_stride;
+  }
+}
+
+void WebPCopyPixels(const WebPPicture* const src, WebPPicture* const dst) {
+  assert(src != NULL && dst != NULL);
+  assert(src->width == dst->width && src->height == dst->height);
+  assert(src->use_argb && dst->use_argb);
+  WebPCopyPlane((uint8_t*)src->argb, 4 * src->argb_stride, (uint8_t*)dst->argb,
+                4 * dst->argb_stride, 4 * src->width, src->height);
+}
+
+//------------------------------------------------------------------------------
diff --git a/drivers/webp/utils/utils.h b/drivers/webp/utils/utils.h
index 316ac90612..d0e1cb250a 100644
--- a/drivers/webp/utils/utils.h
+++ b/drivers/webp/utils/utils.h
@@ -1,20 +1,25 @@
 // Copyright 2012 Google Inc. All Rights Reserved.
 //
-// This code is licensed under the same terms as WebM:
-//  Software License Agreement:  http://www.webmproject.org/license/software/
-//  Additional IP Rights Grant:  http://www.webmproject.org/license/additional/
+// Use of this source code is governed by a BSD-style license
+// that can be found in the COPYING file in the root of the source
+// tree. An additional intellectual property rights grant can be found
+// in the file PATENTS. All contributing project authors may
+// be found in the AUTHORS file in the root of the source tree.
 // -----------------------------------------------------------------------------
 //
 // Misc. common utility functions
 //
-// Author: Skal (pascal.massimino@gmail.com)
+// Authors: Skal (pascal.massimino@gmail.com)
+//          Urvang (urvang@google.com)
 
 #ifndef WEBP_UTILS_UTILS_H_
 #define WEBP_UTILS_UTILS_H_
 
-#include "../types.h"
+#include <assert.h>
 
-#if defined(__cplusplus) || defined(c_plusplus)
+#include "webp/types.h"
+
+#ifdef __cplusplus
 extern "C" {
 #endif
 
@@ -30,14 +35,107 @@ extern "C" {
 // somewhere (like: malloc(num_pixels * sizeof(*something))). That's why this
 // safe malloc() borrows the signature from calloc(), pointing at the dangerous
 // underlying multiply involved.
-void* WebPSafeMalloc(uint64_t nmemb, size_t size);
+WEBP_EXTERN(void*) WebPSafeMalloc(uint64_t nmemb, size_t size);
 // Note that WebPSafeCalloc() expects the second argument type to be 'size_t'
 // in order to favor the "calloc(num_foo, sizeof(foo))" pattern.
-void* WebPSafeCalloc(uint64_t nmemb, size_t size);
+WEBP_EXTERN(void*) WebPSafeCalloc(uint64_t nmemb, size_t size);
+
+// Companion deallocation function to the above allocations.
+WEBP_EXTERN(void) WebPSafeFree(void* const ptr);
+
+//------------------------------------------------------------------------------
+// Alignment
+
+#define WEBP_ALIGN_CST 31
+#define WEBP_ALIGN(PTR) ((uintptr_t)((PTR) + WEBP_ALIGN_CST) & ~WEBP_ALIGN_CST)
+
+//------------------------------------------------------------------------------
+// Reading/writing data.
+
+// Read 16, 24 or 32 bits stored in little-endian order.
+static WEBP_INLINE int GetLE16(const uint8_t* const data) {
+  return (int)(data[0] << 0) | (data[1] << 8);
+}
+
+static WEBP_INLINE int GetLE24(const uint8_t* const data) {
+  return GetLE16(data) | (data[2] << 16);
+}
+
+static WEBP_INLINE uint32_t GetLE32(const uint8_t* const data) {
+  return GetLE16(data) | ((uint32_t)GetLE16(data + 2) << 16);
+}
+
+// Store 16, 24 or 32 bits in little-endian order.
+static WEBP_INLINE void PutLE16(uint8_t* const data, int val) {
+  assert(val < (1 << 16));
+  data[0] = (val >> 0);
+  data[1] = (val >> 8);
+}
+
+static WEBP_INLINE void PutLE24(uint8_t* const data, int val) {
+  assert(val < (1 << 24));
+  PutLE16(data, val & 0xffff);
+  data[2] = (val >> 16);
+}
+
+static WEBP_INLINE void PutLE32(uint8_t* const data, uint32_t val) {
+  PutLE16(data, (int)(val & 0xffff));
+  PutLE16(data + 2, (int)(val >> 16));
+}
+
+// Returns (int)floor(log2(n)). n must be > 0.
+// use GNU builtins where available.
+#if defined(__GNUC__) && \
+    ((__GNUC__ == 3 && __GNUC_MINOR__ >= 4) || __GNUC__ >= 4)
+static WEBP_INLINE int BitsLog2Floor(uint32_t n) {
+  return 31 ^ __builtin_clz(n);
+}
+#elif defined(_MSC_VER) && _MSC_VER > 1310 && \
+      (defined(_M_X64) || defined(_M_IX86))
+#include <intrin.h>
+#pragma intrinsic(_BitScanReverse)
+
+static WEBP_INLINE int BitsLog2Floor(uint32_t n) {
+  unsigned long first_set_bit;
+  _BitScanReverse(&first_set_bit, n);
+  return first_set_bit;
+}
+#else
+static WEBP_INLINE int BitsLog2Floor(uint32_t n) {
+  int log = 0;
+  uint32_t value = n;
+  int i;
+
+  for (i = 4; i >= 0; --i) {
+    const int shift = (1 << i);
+    const uint32_t x = value >> shift;
+    if (x != 0) {
+      value = x;
+      log += shift;
+    }
+  }
+  return log;
+}
+#endif
+
+//------------------------------------------------------------------------------
+// Pixel copying.
+
+struct WebPPicture;
+
+// Copy width x height pixels from 'src' to 'dst' honoring the strides.
+WEBP_EXTERN(void) WebPCopyPlane(const uint8_t* src, int src_stride,
+                                uint8_t* dst, int dst_stride,
+                                int width, int height);
+
+// Copy ARGB pixels from 'src' to 'dst' honoring strides. 'src' and 'dst' are
+// assumed to be already allocated and using ARGB data.
+WEBP_EXTERN(void) WebPCopyPixels(const struct WebPPicture* const src,
+                                 struct WebPPicture* const dst);
 
 //------------------------------------------------------------------------------
 
-#if defined(__cplusplus) || defined(c_plusplus)
+#ifdef __cplusplus
 }    // extern "C"
 #endif
 
diff --git a/drivers/webpold/SCsub b/drivers/webpold/SCsub
new file mode 100644
index 0000000000..5596edbe09
--- /dev/null
+++ b/drivers/webpold/SCsub
@@ -0,0 +1,63 @@
+Import('env')
+
+
+webp_sources = [
+	"webp/mux/muxedit.c",
+	"webp/mux/muxread.c",
+	"webp/mux/muxinternal.c",
+	"webp/mux/demux.c",
+	"webp/enc/tree.c",
+	"webp/enc/analysis.c",
+	"webp/enc/backward_references.c",
+	"webp/enc/alpha.c",
+	"webp/enc/picture.c",
+	"webp/enc/frame.c",
+	"webp/enc/webpenc.c",
+	"webp/enc/cost.c",
+	"webp/enc/filter.c",
+	"webp/enc/vp8l.c",
+	"webp/enc/quant.c",
+	"webp/enc/histogram.c",
+	"webp/enc/syntax.c",
+	"webp/enc/config.c",
+	"webp/enc/layer.c",
+	"webp/enc/iterator.c",
+	"webp/dsp/dec_sse2.c",
+	"webp/dsp/upsampling_sse2.c",
+	"webp/dsp/dec_neon.c",
+	"webp/dsp/enc.c",
+	"webp/dsp/enc_sse2.c",
+	"webp/dsp/upsampling.c",
+	"webp/dsp/lossless.c",
+	"webp/dsp/cpu.c",
+	"webp/dsp/dec.c",
+	"webp/dsp/yuv.c",
+	"webp/utils/bit_reader.c",
+	"webp/utils/filters.c",
+	"webp/utils/bit_writer.c",
+	"webp/utils/thread.c",
+	"webp/utils/quant_levels.c",
+	"webp/utils/color_cache.c",
+	"webp/utils/rescaler.c",
+	"webp/utils/utils.c",
+	"webp/utils/huffman.c",
+	"webp/utils/huffman_encode.c",
+	"webp/dec/tree.c",
+	"webp/dec/alpha.c",
+	"webp/dec/frame.c",
+	"webp/dec/vp8l.c",
+	"webp/dec/vp8.c",
+	"webp/dec/quant.c",
+	"webp/dec/webp.c",
+	"webp/dec/buffer.c",
+	"webp/dec/io.c",
+	"webp/dec/layer.c",
+	"webp/dec/idec.c",
+	"webp/image_loader_webp.cpp"
+]
+
+env.drivers_sources+=webp_sources
+
+#env.add_source_files(env.drivers_sources, webp_sources)
+
+Export('env')
diff --git a/drivers/webpold/dec/alpha.c b/drivers/webpold/dec/alpha.c
new file mode 100644
index 0000000000..d1095fa555
--- /dev/null
+++ b/drivers/webpold/dec/alpha.c
@@ -0,0 +1,140 @@
+// Copyright 2011 Google Inc. All Rights Reserved.
+//
+// This code is licensed under the same terms as WebM:
+//  Software License Agreement:  http://www.webmproject.org/license/software/
+//  Additional IP Rights Grant:  http://www.webmproject.org/license/additional/
+// -----------------------------------------------------------------------------
+//
+// Alpha-plane decompression.
+//
+// Author: Skal (pascal.massimino@gmail.com)
+
+#include <stdlib.h>
+#include "./vp8i.h"
+#include "./vp8li.h"
+#include "../utils/filters.h"
+#include "../utils/quant_levels.h"
+#include "../format_constants.h"
+
+#if defined(__cplusplus) || defined(c_plusplus)
+extern "C" {
+#endif
+
+// TODO(skal): move to dsp/ ?
+static void CopyPlane(const uint8_t* src, int src_stride,
+                      uint8_t* dst, int dst_stride, int width, int height) {
+  while (height-- > 0) {
+    memcpy(dst, src, width);
+    src += src_stride;
+    dst += dst_stride;
+  }
+}
+
+//------------------------------------------------------------------------------
+// Decodes the compressed data 'data' of size 'data_size' into the 'output'.
+// The 'output' buffer should be pre-allocated and must be of the same
+// dimension 'height'x'stride', as that of the image.
+//
+// Returns 1 on successfully decoding the compressed alpha and
+//         0 if either:
+//           error in bit-stream header (invalid compression mode or filter), or
+//           error returned by appropriate compression method.
+
+static int DecodeAlpha(const uint8_t* data, size_t data_size,
+                       int width, int height, int stride, uint8_t* output) {
+  uint8_t* decoded_data = NULL;
+  const size_t decoded_size = height * width;
+  uint8_t* unfiltered_data = NULL;
+  WEBP_FILTER_TYPE filter;
+  int pre_processing;
+  int rsrv;
+  int ok = 0;
+  int method;
+
+  assert(width > 0 && height > 0 && stride >= width);
+  assert(data != NULL && output != NULL);
+
+  if (data_size <= ALPHA_HEADER_LEN) {
+    return 0;
+  }
+
+  method = (data[0] >> 0) & 0x03;
+  filter = (data[0] >> 2) & 0x03;
+  pre_processing = (data[0] >> 4) & 0x03;
+  rsrv = (data[0] >> 6) & 0x03;
+  if (method < ALPHA_NO_COMPRESSION ||
+      method > ALPHA_LOSSLESS_COMPRESSION ||
+      filter >= WEBP_FILTER_LAST ||
+      pre_processing > ALPHA_PREPROCESSED_LEVELS ||
+      rsrv != 0) {
+    return 0;
+  }
+
+  if (method == ALPHA_NO_COMPRESSION) {
+    ok = (data_size >= decoded_size);
+    decoded_data = (uint8_t*)data + ALPHA_HEADER_LEN;
+  } else {
+    decoded_data = (uint8_t*)malloc(decoded_size);
+    if (decoded_data == NULL) return 0;
+    ok = VP8LDecodeAlphaImageStream(width, height,
+                                    data + ALPHA_HEADER_LEN,
+                                    data_size - ALPHA_HEADER_LEN,
+                                    decoded_data);
+  }
+
+  if (ok) {
+    WebPFilterFunc unfilter_func = WebPUnfilters[filter];
+    if (unfilter_func != NULL) {
+      unfiltered_data = (uint8_t*)malloc(decoded_size);
+      if (unfiltered_data == NULL) {
+        ok = 0;
+        goto Error;
+      }
+      // TODO(vikas): Implement on-the-fly decoding & filter mechanism to decode
+      // and apply filter per image-row.
+      unfilter_func(decoded_data, width, height, 1, width, unfiltered_data);
+      // Construct raw_data (height x stride) from alpha data (height x width).
+      CopyPlane(unfiltered_data, width, output, stride, width, height);
+      free(unfiltered_data);
+    } else {
+      // Construct raw_data (height x stride) from alpha data (height x width).
+      CopyPlane(decoded_data, width, output, stride, width, height);
+    }
+    if (pre_processing == ALPHA_PREPROCESSED_LEVELS) {
+      ok = DequantizeLevels(decoded_data, width, height);
+    }
+  }
+
+ Error:
+  if (method != ALPHA_NO_COMPRESSION) {
+    free(decoded_data);
+  }
+  return ok;
+}
+
+//------------------------------------------------------------------------------
+
+const uint8_t* VP8DecompressAlphaRows(VP8Decoder* const dec,
+                                      int row, int num_rows) {
+  const int stride = dec->pic_hdr_.width_;
+
+  if (row < 0 || num_rows < 0 || row + num_rows > dec->pic_hdr_.height_) {
+    return NULL;    // sanity check.
+  }
+
+  if (row == 0) {
+    // Decode everything during the first call.
+    if (!DecodeAlpha(dec->alpha_data_, (size_t)dec->alpha_data_size_,
+                     dec->pic_hdr_.width_, dec->pic_hdr_.height_, stride,
+                     dec->alpha_plane_)) {
+      return NULL;  // Error.
+    }
+  }
+
+  // Return a pointer to the current decoded row.
+  return dec->alpha_plane_ + row * stride;
+}
+
+#if defined(__cplusplus) || defined(c_plusplus)
+}    // extern "C"
+#endif
diff --git a/drivers/webpold/dec/buffer.c b/drivers/webpold/dec/buffer.c
new file mode 100644
index 0000000000..c159f6f248
--- /dev/null
+++ b/drivers/webpold/dec/buffer.c
@@ -0,0 +1,215 @@
+// Copyright 2011 Google Inc. All Rights Reserved.
+//
+// This code is licensed under the same terms as WebM:
+//  Software License Agreement:  http://www.webmproject.org/license/software/
+//  Additional IP Rights Grant:  http://www.webmproject.org/license/additional/
+// -----------------------------------------------------------------------------
+//
+// Everything about WebPDecBuffer
+//
+// Author: Skal (pascal.massimino@gmail.com)
+
+#include <stdlib.h>
+
+#include "./vp8i.h"
+#include "./webpi.h"
+#include "../utils/utils.h"
+
+#if defined(__cplusplus) || defined(c_plusplus)
+extern "C" {
+#endif
+
+//------------------------------------------------------------------------------
+// WebPDecBuffer
+
+// Number of bytes per pixel for the different color-spaces.
+static const int kModeBpp[MODE_LAST] = {
+  3, 4, 3, 4, 4, 2, 2,
+  4, 4, 4, 2,    // pre-multiplied modes
+  1, 1 };
+
+// Check that webp_csp_mode is within the bounds of WEBP_CSP_MODE.
+// Convert to an integer to handle both the unsigned/signed enum cases
+// without the need for casting to remove type limit warnings.
+static int IsValidColorspace(int webp_csp_mode) {
+  return (webp_csp_mode >= MODE_RGB && webp_csp_mode < MODE_LAST);
+}
+
+static VP8StatusCode CheckDecBuffer(const WebPDecBuffer* const buffer) {
+  int ok = 1;
+  const WEBP_CSP_MODE mode = buffer->colorspace;
+  const int width = buffer->width;
+  const int height = buffer->height;
+  if (!IsValidColorspace(mode)) {
+    ok = 0;
+  } else if (!WebPIsRGBMode(mode)) {   // YUV checks
+    const WebPYUVABuffer* const buf = &buffer->u.YUVA;
+    const uint64_t y_size = (uint64_t)buf->y_stride * height;
+    const uint64_t u_size = (uint64_t)buf->u_stride * ((height + 1) / 2);
+    const uint64_t v_size = (uint64_t)buf->v_stride * ((height + 1) / 2);
+    const uint64_t a_size = (uint64_t)buf->a_stride * height;
+    ok &= (y_size <= buf->y_size);
+    ok &= (u_size <= buf->u_size);
+    ok &= (v_size <= buf->v_size);
+    ok &= (buf->y_stride >= width);
+    ok &= (buf->u_stride >= (width + 1) / 2);
+    ok &= (buf->v_stride >= (width + 1) / 2);
+    ok &= (buf->y != NULL);
+    ok &= (buf->u != NULL);
+    ok &= (buf->v != NULL);
+    if (mode == MODE_YUVA) {
+      ok &= (buf->a_stride >= width);
+      ok &= (a_size <= buf->a_size);
+      ok &= (buf->a != NULL);
+    }
+  } else {    // RGB checks
+    const WebPRGBABuffer* const buf = &buffer->u.RGBA;
+    const uint64_t size = (uint64_t)buf->stride * height;
+    ok &= (size <= buf->size);
+    ok &= (buf->stride >= width * kModeBpp[mode]);
+    ok &= (buf->rgba != NULL);
+  }
+  return ok ? VP8_STATUS_OK : VP8_STATUS_INVALID_PARAM;
+}
+
+static VP8StatusCode AllocateBuffer(WebPDecBuffer* const buffer) {
+  const int w = buffer->width;
+  const int h = buffer->height;
+  const WEBP_CSP_MODE mode = buffer->colorspace;
+
+  if (w <= 0 || h <= 0 || !IsValidColorspace(mode)) {
+    return VP8_STATUS_INVALID_PARAM;
+  }
+
+  if (!buffer->is_external_memory && buffer->private_memory == NULL) {
+    uint8_t* output;
+    int uv_stride = 0, a_stride = 0;
+    uint64_t uv_size = 0, a_size = 0, total_size;
+    // We need memory and it hasn't been allocated yet.
+    // => initialize output buffer, now that dimensions are known.
+    const int stride = w * kModeBpp[mode];
+    const uint64_t size = (uint64_t)stride * h;
+
+    if (!WebPIsRGBMode(mode)) {
+      uv_stride = (w + 1) / 2;
+      uv_size = (uint64_t)uv_stride * ((h + 1) / 2);
+      if (mode == MODE_YUVA) {
+        a_stride = w;
+        a_size = (uint64_t)a_stride * h;
+      }
+    }
+    total_size = size + 2 * uv_size + a_size;
+
+    // Security/sanity checks
+    output = (uint8_t*)WebPSafeMalloc(total_size, sizeof(*output));
+    if (output == NULL) {
+      return VP8_STATUS_OUT_OF_MEMORY;
+    }
+    buffer->private_memory = output;
+
+    if (!WebPIsRGBMode(mode)) {   // YUVA initialization
+      WebPYUVABuffer* const buf = &buffer->u.YUVA;
+      buf->y = output;
+      buf->y_stride = stride;
+      buf->y_size = (size_t)size;
+      buf->u = output + size;
+      buf->u_stride = uv_stride;
+      buf->u_size = (size_t)uv_size;
+      buf->v = output + size + uv_size;
+      buf->v_stride = uv_stride;
+      buf->v_size = (size_t)uv_size;
+      if (mode == MODE_YUVA) {
+        buf->a = output + size + 2 * uv_size;
+      }
+      buf->a_size = (size_t)a_size;
+      buf->a_stride = a_stride;
+    } else {  // RGBA initialization
+      WebPRGBABuffer* const buf = &buffer->u.RGBA;
+      buf->rgba = output;
+      buf->stride = stride;
+      buf->size = (size_t)size;
+    }
+  }
+  return CheckDecBuffer(buffer);
+}
+
+VP8StatusCode WebPAllocateDecBuffer(int w, int h,
+                                    const WebPDecoderOptions* const options,
+                                    WebPDecBuffer* const out) {
+  if (out == NULL || w <= 0 || h <= 0) {
+    return VP8_STATUS_INVALID_PARAM;
+  }
+  if (options != NULL) {    // First, apply options if there is any.
+    if (options->use_cropping) {
+      const int cw = options->crop_width;
+      const int ch = options->crop_height;
+      const int x = options->crop_left & ~1;
+      const int y = options->crop_top & ~1;
+      if (x < 0 || y < 0 || cw <= 0 || ch <= 0 || x + cw > w || y + ch > h) {
+        return VP8_STATUS_INVALID_PARAM;   // out of frame boundary.
+      }
+      w = cw;
+      h = ch;
+    }
+    if (options->use_scaling) {
+      if (options->scaled_width <= 0 || options->scaled_height <= 0) {
+        return VP8_STATUS_INVALID_PARAM;
+      }
+      w = options->scaled_width;
+      h = options->scaled_height;
+    }
+  }
+  out->width = w;
+  out->height = h;
+
+  // Then, allocate buffer for real
+  return AllocateBuffer(out);
+}
+
+//------------------------------------------------------------------------------
+// constructors / destructors
+
+int WebPInitDecBufferInternal(WebPDecBuffer* buffer, int version) {
+  if (WEBP_ABI_IS_INCOMPATIBLE(version, WEBP_DECODER_ABI_VERSION)) {
+    return 0;  // version mismatch
+  }
+  if (buffer == NULL) return 0;
+  memset(buffer, 0, sizeof(*buffer));
+  return 1;
+}
+
+void WebPFreeDecBuffer(WebPDecBuffer* buffer) {
+  if (buffer != NULL) {
+    if (!buffer->is_external_memory)
+      free(buffer->private_memory);
+    buffer->private_memory = NULL;
+  }
+}
+
+void WebPCopyDecBuffer(const WebPDecBuffer* const src,
+                       WebPDecBuffer* const dst) {
+  if (src != NULL && dst != NULL) {
+    *dst = *src;
+    if (src->private_memory != NULL) {
+      dst->is_external_memory = 1;   // dst buffer doesn't own the memory.
+      dst->private_memory = NULL;
+    }
+  }
+}
+
+// Copy and transfer ownership from src to dst (beware of parameter order!)
+void WebPGrabDecBuffer(WebPDecBuffer* const src, WebPDecBuffer* const dst) {
+  if (src != NULL && dst != NULL) {
+    *dst = *src;
+    if (src->private_memory != NULL) {
+      src->is_external_memory = 1;   // src relinquishes ownership
+      src->private_memory = NULL;
+    }
+  }
+}
+
+//------------------------------------------------------------------------------
+
+#if defined(__cplusplus) || defined(c_plusplus)
+}    // extern "C"
+#endif
diff --git a/drivers/webpold/dec/decode_vp8.h b/drivers/webpold/dec/decode_vp8.h
new file mode 100644
index 0000000000..c26a9fc891
--- /dev/null
+++ b/drivers/webpold/dec/decode_vp8.h
@@ -0,0 +1,182 @@
+// Copyright 2010 Google Inc. All Rights Reserved.
+//
+// This code is licensed under the same terms as WebM:
+//  Software License Agreement:  http://www.webmproject.org/license/software/
+//  Additional IP Rights Grant:  http://www.webmproject.org/license/additional/
+// -----------------------------------------------------------------------------
+//
+//  Low-level API for VP8 decoder
+//
+// Author: Skal (pascal.massimino@gmail.com)
+
+#ifndef WEBP_WEBP_DECODE_VP8_H_
+#define WEBP_WEBP_DECODE_VP8_H_
+
+#include "../decode.h"
+
+#if defined(__cplusplus) || defined(c_plusplus)
+extern "C" {
+#endif
+
+//------------------------------------------------------------------------------
+// Lower-level API
+//
+// These functions provide fine-grained control of the decoding process.
+// The call flow should resemble:
+//
+//   VP8Io io;
+//   VP8InitIo(&io);
+//   io.data = data;
+//   io.data_size = size;
+//   /* customize io's functions (setup()/put()/teardown()) if needed. */
+//
+//   VP8Decoder* dec = VP8New();
+//   bool ok = VP8Decode(dec);
+//   if (!ok) printf("Error: %s\n", VP8StatusMessage(dec));
+//   VP8Delete(dec);
+//   return ok;
+
+// Input / Output
+typedef struct VP8Io VP8Io;
+typedef int (*VP8IoPutHook)(const VP8Io* io);
+typedef int (*VP8IoSetupHook)(VP8Io* io);
+typedef void (*VP8IoTeardownHook)(const VP8Io* io);
+
+struct VP8Io {
+  // set by VP8GetHeaders()
+  int width, height;         // picture dimensions, in pixels (invariable).
+                             // These are the original, uncropped dimensions.
+                             // The actual area passed to put() is stored
+                             // in mb_w / mb_h fields.
+
+  // set before calling put()
+  int mb_y;                  // position of the current rows (in pixels)
+  int mb_w;                  // number of columns in the sample
+  int mb_h;                  // number of rows in the sample
+  const uint8_t* y, *u, *v;  // rows to copy (in yuv420 format)
+  int y_stride;              // row stride for luma
+  int uv_stride;             // row stride for chroma
+
+  void* opaque;              // user data
+
+  // called when fresh samples are available. Currently, samples are in
+  // YUV420 format, and can be up to width x 24 in size (depending on the
+  // in-loop filtering level, e.g.). Should return false in case of error
+  // or abort request. The actual size of the area to update is mb_w x mb_h
+  // in size, taking cropping into account.
+  VP8IoPutHook put;
+
+  // called just before starting to decode the blocks.
+  // Must return false in case of setup error, true otherwise. If false is
+  // returned, teardown() will NOT be called. But if the setup succeeded
+  // and true is returned, then teardown() will always be called afterward.
+  VP8IoSetupHook setup;
+
+  // Called just after block decoding is finished (or when an error occurred
+  // during put()). Is NOT called if setup() failed.
+  VP8IoTeardownHook teardown;
+
+  // this is a recommendation for the user-side yuv->rgb converter. This flag
+  // is set when calling setup() hook and can be overwritten by it. It then
+  // can be taken into consideration during the put() method.
+  int fancy_upsampling;
+
+  // Input buffer.
+  size_t data_size;
+  const uint8_t* data;
+
+  // If true, in-loop filtering will not be performed even if present in the
+  // bitstream. Switching off filtering may speed up decoding at the expense
+  // of more visible blocking. Note that output will also be non-compliant
+  // with the VP8 specifications.
+  int bypass_filtering;
+
+  // Cropping parameters.
+  int use_cropping;
+  int crop_left, crop_right, crop_top, crop_bottom;
+
+  // Scaling parameters.
+  int use_scaling;
+  int scaled_width, scaled_height;
+
+  // If non NULL, pointer to the alpha data (if present) corresponding to the
+  // start of the current row (That is: it is pre-offset by mb_y and takes
+  // cropping into account).
+  const uint8_t* a;
+};
+
+// Internal, version-checked, entry point
+int VP8InitIoInternal(VP8Io* const, int);
+
+// Set the custom IO function pointers and user-data. The setter for IO hooks
+// should be called before initiating incremental decoding. Returns true if
+// WebPIDecoder object is successfully modified, false otherwise.
+int WebPISetIOHooks(WebPIDecoder* const idec,
+                    VP8IoPutHook put,
+                    VP8IoSetupHook setup,
+                    VP8IoTeardownHook teardown,
+                    void* user_data);
+
+// Main decoding object. This is an opaque structure.
+typedef struct VP8Decoder VP8Decoder;
+
+// Create a new decoder object.
+VP8Decoder* VP8New(void);
+
+// Must be called to make sure 'io' is initialized properly.
+// Returns false in case of version mismatch. Upon such failure, no other
+// decoding function should be called (VP8Decode, VP8GetHeaders, ...)
+static WEBP_INLINE int VP8InitIo(VP8Io* const io) {
+  return VP8InitIoInternal(io, WEBP_DECODER_ABI_VERSION);
+}
+
+// Start decoding a new picture. Returns true if ok.
+int VP8GetHeaders(VP8Decoder* const dec, VP8Io* const io);
+
+// Decode a picture. Will call VP8GetHeaders() if it wasn't done already.
+// Returns false in case of error.
+int VP8Decode(VP8Decoder* const dec, VP8Io* const io);
+
+// Return current status of the decoder:
+VP8StatusCode VP8Status(VP8Decoder* const dec);
+
+// return readable string corresponding to the last status.
+const char* VP8StatusMessage(VP8Decoder* const dec);
+
+// Resets the decoder in its initial state, reclaiming memory.
+// Not a mandatory call between calls to VP8Decode().
+void VP8Clear(VP8Decoder* const dec);
+
+// Destroy the decoder object.
+void VP8Delete(VP8Decoder* const dec);
+
+//------------------------------------------------------------------------------
+// Miscellaneous VP8/VP8L bitstream probing functions.
+
+// Returns true if the next 3 bytes in data contain the VP8 signature.
+WEBP_EXTERN(int) VP8CheckSignature(const uint8_t* const data, size_t data_size);
+
+// Validates the VP8 data-header and retrieves basic header information viz
+// width and height. Returns 0 in case of formatting error. *width/*height
+// can be passed NULL.
+WEBP_EXTERN(int) VP8GetInfo(
+    const uint8_t* data,
+    size_t data_size,    // data available so far
+    size_t chunk_size,   // total data size expected in the chunk
+    int* const width, int* const height);
+
+// Returns true if the next byte(s) in data is a VP8L signature.
+WEBP_EXTERN(int) VP8LCheckSignature(const uint8_t* const data, size_t size);
+
+// Validates the VP8L data-header and retrieves basic header information viz
+// width, height and alpha. Returns 0 in case of formatting error.
+// width/height/has_alpha can be passed NULL.
+WEBP_EXTERN(int) VP8LGetInfo(
+    const uint8_t* data, size_t data_size,  // data available so far
+    int* const width, int* const height, int* const has_alpha);
+
+#if defined(__cplusplus) || defined(c_plusplus)
+}    // extern "C"
+#endif
+
+#endif  /* WEBP_WEBP_DECODE_VP8_H_ */
diff --git a/drivers/webpold/dec/frame.c b/drivers/webpold/dec/frame.c
new file mode 100644
index 0000000000..9c91a48e17
--- /dev/null
+++ b/drivers/webpold/dec/frame.c
@@ -0,0 +1,679 @@
+// Copyright 2010 Google Inc. All Rights Reserved.
+//
+// This code is licensed under the same terms as WebM:
+//  Software License Agreement:  http://www.webmproject.org/license/software/
+//  Additional IP Rights Grant:  http://www.webmproject.org/license/additional/
+// -----------------------------------------------------------------------------
+//
+// Frame-reconstruction function. Memory allocation.
+//
+// Author: Skal (pascal.massimino@gmail.com)
+
+#include <stdlib.h>
+#include "./vp8i.h"
+#include "../utils/utils.h"
+
+#if defined(__cplusplus) || defined(c_plusplus)
+extern "C" {
+#endif
+
+#define ALIGN_MASK (32 - 1)
+
+//------------------------------------------------------------------------------
+// Filtering
+
+// kFilterExtraRows[] = How many extra lines are needed on the MB boundary
+// for caching, given a filtering level.
+// Simple filter:  up to 2 luma samples are read and 1 is written.
+// Complex filter: up to 4 luma samples are read and 3 are written. Same for
+//                 U/V, so it's 8 samples total (because of the 2x upsampling).
+static const uint8_t kFilterExtraRows[3] = { 0, 2, 8 };
+
+static WEBP_INLINE int hev_thresh_from_level(int level, int keyframe) {
+  if (keyframe) {
+    return (level >= 40) ? 2 : (level >= 15) ? 1 : 0;
+  } else {
+    return (level >= 40) ? 3 : (level >= 20) ? 2 : (level >= 15) ? 1 : 0;
+  }
+}
+
+static void DoFilter(const VP8Decoder* const dec, int mb_x, int mb_y) {
+  const VP8ThreadContext* const ctx = &dec->thread_ctx_;
+  const int y_bps = dec->cache_y_stride_;
+  VP8FInfo* const f_info = ctx->f_info_ + mb_x;
+  uint8_t* const y_dst = dec->cache_y_ + ctx->id_ * 16 * y_bps + mb_x * 16;
+  const int level = f_info->f_level_;
+  const int ilevel = f_info->f_ilevel_;
+  const int limit = 2 * level + ilevel;
+  if (level == 0) {
+    return;
+  }
+  if (dec->filter_type_ == 1) {   // simple
+    if (mb_x > 0) {
+      VP8SimpleHFilter16(y_dst, y_bps, limit + 4);
+    }
+    if (f_info->f_inner_) {
+      VP8SimpleHFilter16i(y_dst, y_bps, limit);
+    }
+    if (mb_y > 0) {
+      VP8SimpleVFilter16(y_dst, y_bps, limit + 4);
+    }
+    if (f_info->f_inner_) {
+      VP8SimpleVFilter16i(y_dst, y_bps, limit);
+    }
+  } else {    // complex
+    const int uv_bps = dec->cache_uv_stride_;
+    uint8_t* const u_dst = dec->cache_u_ + ctx->id_ * 8 * uv_bps + mb_x * 8;
+    uint8_t* const v_dst = dec->cache_v_ + ctx->id_ * 8 * uv_bps + mb_x * 8;
+    const int hev_thresh =
+        hev_thresh_from_level(level, dec->frm_hdr_.key_frame_);
+    if (mb_x > 0) {
+      VP8HFilter16(y_dst, y_bps, limit + 4, ilevel, hev_thresh);
+      VP8HFilter8(u_dst, v_dst, uv_bps, limit + 4, ilevel, hev_thresh);
+    }
+    if (f_info->f_inner_) {
+      VP8HFilter16i(y_dst, y_bps, limit, ilevel, hev_thresh);
+      VP8HFilter8i(u_dst, v_dst, uv_bps, limit, ilevel, hev_thresh);
+    }
+    if (mb_y > 0) {
+      VP8VFilter16(y_dst, y_bps, limit + 4, ilevel, hev_thresh);
+      VP8VFilter8(u_dst, v_dst, uv_bps, limit + 4, ilevel, hev_thresh);
+    }
+    if (f_info->f_inner_) {
+      VP8VFilter16i(y_dst, y_bps, limit, ilevel, hev_thresh);
+      VP8VFilter8i(u_dst, v_dst, uv_bps, limit, ilevel, hev_thresh);
+    }
+  }
+}
+
+// Filter the decoded macroblock row (if needed)
+static void FilterRow(const VP8Decoder* const dec) {
+  int mb_x;
+  const int mb_y = dec->thread_ctx_.mb_y_;
+  assert(dec->thread_ctx_.filter_row_);
+  for (mb_x = dec->tl_mb_x_; mb_x < dec->br_mb_x_; ++mb_x) {
+    DoFilter(dec, mb_x, mb_y);
+  }
+}
+
+//------------------------------------------------------------------------------
+
+void VP8StoreBlock(VP8Decoder* const dec) {
+  if (dec->filter_type_ > 0) {
+    VP8FInfo* const info = dec->f_info_ + dec->mb_x_;
+    const int skip = dec->mb_info_[dec->mb_x_].skip_;
+    int level = dec->filter_levels_[dec->segment_];
+    if (dec->filter_hdr_.use_lf_delta_) {
+      // TODO(skal): only CURRENT is handled for now.
+      level += dec->filter_hdr_.ref_lf_delta_[0];
+      if (dec->is_i4x4_) {
+        level += dec->filter_hdr_.mode_lf_delta_[0];
+      }
+    }
+    level = (level < 0) ? 0 : (level > 63) ? 63 : level;
+    info->f_level_ = level;
+
+    if (dec->filter_hdr_.sharpness_ > 0) {
+      if (dec->filter_hdr_.sharpness_ > 4) {
+        level >>= 2;
+      } else {
+        level >>= 1;
+      }
+      if (level > 9 - dec->filter_hdr_.sharpness_) {
+        level = 9 - dec->filter_hdr_.sharpness_;
+      }
+    }
+
+    info->f_ilevel_ = (level < 1) ? 1 : level;
+    info->f_inner_ = (!skip || dec->is_i4x4_);
+  }
+  {
+    // Transfer samples to row cache
+    int y;
+    const int y_offset = dec->cache_id_ * 16 * dec->cache_y_stride_;
+    const int uv_offset = dec->cache_id_ * 8 * dec->cache_uv_stride_;
+    uint8_t* const ydst = dec->cache_y_ + dec->mb_x_ * 16 + y_offset;
+    uint8_t* const udst = dec->cache_u_ + dec->mb_x_ * 8 + uv_offset;
+    uint8_t* const vdst = dec->cache_v_ + dec->mb_x_ * 8 + uv_offset;
+    for (y = 0; y < 16; ++y) {
+      memcpy(ydst + y * dec->cache_y_stride_,
+             dec->yuv_b_ + Y_OFF + y * BPS, 16);
+    }
+    for (y = 0; y < 8; ++y) {
+      memcpy(udst + y * dec->cache_uv_stride_,
+           dec->yuv_b_ + U_OFF + y * BPS, 8);
+      memcpy(vdst + y * dec->cache_uv_stride_,
+           dec->yuv_b_ + V_OFF + y * BPS, 8);
+    }
+  }
+}
+
+//------------------------------------------------------------------------------
+// This function is called after a row of macroblocks is finished decoding.
+// It also takes into account the following restrictions:
+//  * In case of in-loop filtering, we must hold off sending some of the bottom
+//    pixels as they are yet unfiltered. They will be when the next macroblock
+//    row is decoded. Meanwhile, we must preserve them by rotating them in the
+//    cache area. This doesn't hold for the very bottom row of the uncropped
+//    picture of course.
+//  * we must clip the remaining pixels against the cropping area. The VP8Io
+//    struct must have the following fields set correctly before calling put():
+
+#define MACROBLOCK_VPOS(mb_y)  ((mb_y) * 16)    // vertical position of a MB
+
+// Finalize and transmit a complete row. Return false in case of user-abort.
+static int FinishRow(VP8Decoder* const dec, VP8Io* const io) {
+  int ok = 1;
+  const VP8ThreadContext* const ctx = &dec->thread_ctx_;
+  const int extra_y_rows = kFilterExtraRows[dec->filter_type_];
+  const int ysize = extra_y_rows * dec->cache_y_stride_;
+  const int uvsize = (extra_y_rows / 2) * dec->cache_uv_stride_;
+  const int y_offset = ctx->id_ * 16 * dec->cache_y_stride_;
+  const int uv_offset = ctx->id_ * 8 * dec->cache_uv_stride_;
+  uint8_t* const ydst = dec->cache_y_ - ysize + y_offset;
+  uint8_t* const udst = dec->cache_u_ - uvsize + uv_offset;
+  uint8_t* const vdst = dec->cache_v_ - uvsize + uv_offset;
+  const int first_row = (ctx->mb_y_ == 0);
+  const int last_row = (ctx->mb_y_ >= dec->br_mb_y_ - 1);
+  int y_start = MACROBLOCK_VPOS(ctx->mb_y_);
+  int y_end = MACROBLOCK_VPOS(ctx->mb_y_ + 1);
+
+  if (ctx->filter_row_) {
+    FilterRow(dec);
+  }
+
+  if (io->put) {
+    if (!first_row) {
+      y_start -= extra_y_rows;
+      io->y = ydst;
+      io->u = udst;
+      io->v = vdst;
+    } else {
+      io->y = dec->cache_y_ + y_offset;
+      io->u = dec->cache_u_ + uv_offset;
+      io->v = dec->cache_v_ + uv_offset;
+    }
+
+    if (!last_row) {
+      y_end -= extra_y_rows;
+    }
+    if (y_end > io->crop_bottom) {
+      y_end = io->crop_bottom;    // make sure we don't overflow on last row.
+    }
+    io->a = NULL;
+    if (dec->alpha_data_ != NULL && y_start < y_end) {
+      // TODO(skal): several things to correct here:
+      // * testing presence of alpha with dec->alpha_data_ is not a good idea
+      // * we're actually decompressing the full plane only once. It should be
+      //   more obvious from signature.
+      // * we could free alpha_data_ right after this call, but we don't own.
+      io->a = VP8DecompressAlphaRows(dec, y_start, y_end - y_start);
+      if (io->a == NULL) {
+        return VP8SetError(dec, VP8_STATUS_BITSTREAM_ERROR,
+                           "Could not decode alpha data.");
+      }
+    }
+    if (y_start < io->crop_top) {
+      const int delta_y = io->crop_top - y_start;
+      y_start = io->crop_top;
+      assert(!(delta_y & 1));
+      io->y += dec->cache_y_stride_ * delta_y;
+      io->u += dec->cache_uv_stride_ * (delta_y >> 1);
+      io->v += dec->cache_uv_stride_ * (delta_y >> 1);
+      if (io->a != NULL) {
+        io->a += io->width * delta_y;
+      }
+    }
+    if (y_start < y_end) {
+      io->y += io->crop_left;
+      io->u += io->crop_left >> 1;
+      io->v += io->crop_left >> 1;
+      if (io->a != NULL) {
+        io->a += io->crop_left;
+      }
+      io->mb_y = y_start - io->crop_top;
+      io->mb_w = io->crop_right - io->crop_left;
+      io->mb_h = y_end - y_start;
+      ok = io->put(io);
+    }
+  }
+  // rotate top samples if needed
+  if (ctx->id_ + 1 == dec->num_caches_) {
+    if (!last_row) {
+      memcpy(dec->cache_y_ - ysize, ydst + 16 * dec->cache_y_stride_, ysize);
+      memcpy(dec->cache_u_ - uvsize, udst + 8 * dec->cache_uv_stride_, uvsize);
+      memcpy(dec->cache_v_ - uvsize, vdst + 8 * dec->cache_uv_stride_, uvsize);
+    }
+  }
+
+  return ok;
+}
+
+#undef MACROBLOCK_VPOS
+
+//------------------------------------------------------------------------------
+
+int VP8ProcessRow(VP8Decoder* const dec, VP8Io* const io) {
+  int ok = 1;
+  VP8ThreadContext* const ctx = &dec->thread_ctx_;
+  if (!dec->use_threads_) {
+    // ctx->id_ and ctx->f_info_ are already set
+    ctx->mb_y_ = dec->mb_y_;
+    ctx->filter_row_ = dec->filter_row_;
+    ok = FinishRow(dec, io);
+  } else {
+    WebPWorker* const worker = &dec->worker_;
+    // Finish previous job *before* updating context
+    ok &= WebPWorkerSync(worker);
+    assert(worker->status_ == OK);
+    if (ok) {   // spawn a new deblocking/output job
+      ctx->io_ = *io;
+      ctx->id_ = dec->cache_id_;
+      ctx->mb_y_ = dec->mb_y_;
+      ctx->filter_row_ = dec->filter_row_;
+      if (ctx->filter_row_) {    // just swap filter info
+        VP8FInfo* const tmp = ctx->f_info_;
+        ctx->f_info_ = dec->f_info_;
+        dec->f_info_ = tmp;
+      }
+      WebPWorkerLaunch(worker);
+      if (++dec->cache_id_ == dec->num_caches_) {
+        dec->cache_id_ = 0;
+      }
+    }
+  }
+  return ok;
+}
+
+//------------------------------------------------------------------------------
+// Finish setting up the decoding parameter once user's setup() is called.
+
+VP8StatusCode VP8EnterCritical(VP8Decoder* const dec, VP8Io* const io) {
+  // Call setup() first. This may trigger additional decoding features on 'io'.
+  // Note: Afterward, we must call teardown() not matter what.
+  if (io->setup && !io->setup(io)) {
+    VP8SetError(dec, VP8_STATUS_USER_ABORT, "Frame setup failed");
+    return dec->status_;
+  }
+
+  // Disable filtering per user request
+  if (io->bypass_filtering) {
+    dec->filter_type_ = 0;
+  }
+  // TODO(skal): filter type / strength / sharpness forcing
+
+  // Define the area where we can skip in-loop filtering, in case of cropping.
+  //
+  // 'Simple' filter reads two luma samples outside of the macroblock and
+  // and filters one. It doesn't filter the chroma samples. Hence, we can
+  // avoid doing the in-loop filtering before crop_top/crop_left position.
+  // For the 'Complex' filter, 3 samples are read and up to 3 are filtered.
+  // Means: there's a dependency chain that goes all the way up to the
+  // top-left corner of the picture (MB #0). We must filter all the previous
+  // macroblocks.
+  // TODO(skal): add an 'approximate_decoding' option, that won't produce
+  // a 1:1 bit-exactness for complex filtering?
+  {
+    const int extra_pixels = kFilterExtraRows[dec->filter_type_];
+    if (dec->filter_type_ == 2) {
+      // For complex filter, we need to preserve the dependency chain.
+      dec->tl_mb_x_ = 0;
+      dec->tl_mb_y_ = 0;
+    } else {
+      // For simple filter, we can filter only the cropped region.
+      // We include 'extra_pixels' on the other side of the boundary, since
+      // vertical or horizontal filtering of the previous macroblock can
+      // modify some abutting pixels.
+      dec->tl_mb_x_ = (io->crop_left - extra_pixels) >> 4;
+      dec->tl_mb_y_ = (io->crop_top - extra_pixels) >> 4;
+      if (dec->tl_mb_x_ < 0) dec->tl_mb_x_ = 0;
+      if (dec->tl_mb_y_ < 0) dec->tl_mb_y_ = 0;
+    }
+    // We need some 'extra' pixels on the right/bottom.
+    dec->br_mb_y_ = (io->crop_bottom + 15 + extra_pixels) >> 4;
+    dec->br_mb_x_ = (io->crop_right + 15 + extra_pixels) >> 4;
+    if (dec->br_mb_x_ > dec->mb_w_) {
+      dec->br_mb_x_ = dec->mb_w_;
+    }
+    if (dec->br_mb_y_ > dec->mb_h_) {
+      dec->br_mb_y_ = dec->mb_h_;
+    }
+  }
+  return VP8_STATUS_OK;
+}
+
+int VP8ExitCritical(VP8Decoder* const dec, VP8Io* const io) {
+  int ok = 1;
+  if (dec->use_threads_) {
+    ok = WebPWorkerSync(&dec->worker_);
+  }
+
+  if (io->teardown) {
+    io->teardown(io);
+  }
+  return ok;
+}
+
+//------------------------------------------------------------------------------
+// For multi-threaded decoding we need to use 3 rows of 16 pixels as delay line.
+//
+// Reason is: the deblocking filter cannot deblock the bottom horizontal edges
+// immediately, and needs to wait for first few rows of the next macroblock to
+// be decoded. Hence, deblocking is lagging behind by 4 or 8 pixels (depending
+// on strength).
+// With two threads, the vertical positions of the rows being decoded are:
+// Decode:  [ 0..15][16..31][32..47][48..63][64..79][...
+// Deblock:         [ 0..11][12..27][28..43][44..59][...
+// If we use two threads and two caches of 16 pixels, the sequence would be:
+// Decode:  [ 0..15][16..31][ 0..15!!][16..31][ 0..15][...
+// Deblock:         [ 0..11][12..27!!][-4..11][12..27][...
+// The problem occurs during row [12..15!!] that both the decoding and
+// deblocking threads are writing simultaneously.
+// With 3 cache lines, one get a safe write pattern:
+// Decode:  [ 0..15][16..31][32..47][ 0..15][16..31][32..47][0..
+// Deblock:         [ 0..11][12..27][28..43][-4..11][12..27][28...
+// Note that multi-threaded output _without_ deblocking can make use of two
+// cache lines of 16 pixels only, since there's no lagging behind. The decoding
+// and output process have non-concurrent writing:
+// Decode:  [ 0..15][16..31][ 0..15][16..31][...
+// io->put:         [ 0..15][16..31][ 0..15][...
+
+#define MT_CACHE_LINES 3
+#define ST_CACHE_LINES 1   // 1 cache row only for single-threaded case
+
+// Initialize multi/single-thread worker
+static int InitThreadContext(VP8Decoder* const dec) {
+  dec->cache_id_ = 0;
+  if (dec->use_threads_) {
+    WebPWorker* const worker = &dec->worker_;
+    if (!WebPWorkerReset(worker)) {
+      return VP8SetError(dec, VP8_STATUS_OUT_OF_MEMORY,
+                         "thread initialization failed.");
+    }
+    worker->data1 = dec;
+    worker->data2 = (void*)&dec->thread_ctx_.io_;
+    worker->hook = (WebPWorkerHook)FinishRow;
+    dec->num_caches_ =
+      (dec->filter_type_ > 0) ? MT_CACHE_LINES : MT_CACHE_LINES - 1;
+  } else {
+    dec->num_caches_ = ST_CACHE_LINES;
+  }
+  return 1;
+}
+
+#undef MT_CACHE_LINES
+#undef ST_CACHE_LINES
+
+//------------------------------------------------------------------------------
+// Memory setup
+
+static int AllocateMemory(VP8Decoder* const dec) {
+  const int num_caches = dec->num_caches_;
+  const int mb_w = dec->mb_w_;
+  // Note: we use 'size_t' when there's no overflow risk, uint64_t otherwise.
+  const size_t intra_pred_mode_size = 4 * mb_w * sizeof(uint8_t);
+  const size_t top_size = (16 + 8 + 8) * mb_w;
+  const size_t mb_info_size = (mb_w + 1) * sizeof(VP8MB);
+  const size_t f_info_size =
+      (dec->filter_type_ > 0) ?
+          mb_w * (dec->use_threads_ ? 2 : 1) * sizeof(VP8FInfo)
+        : 0;
+  const size_t yuv_size = YUV_SIZE * sizeof(*dec->yuv_b_);
+  const size_t coeffs_size = 384 * sizeof(*dec->coeffs_);
+  const size_t cache_height = (16 * num_caches
+                            + kFilterExtraRows[dec->filter_type_]) * 3 / 2;
+  const size_t cache_size = top_size * cache_height;
+  // alpha_size is the only one that scales as width x height.
+  const uint64_t alpha_size = (dec->alpha_data_ != NULL) ?
+      (uint64_t)dec->pic_hdr_.width_ * dec->pic_hdr_.height_ : 0ULL;
+  const uint64_t needed = (uint64_t)intra_pred_mode_size
+                        + top_size + mb_info_size + f_info_size
+                        + yuv_size + coeffs_size
+                        + cache_size + alpha_size + ALIGN_MASK;
+  uint8_t* mem;
+
+  if (needed != (size_t)needed) return 0;  // check for overflow
+  if (needed > dec->mem_size_) {
+    free(dec->mem_);
+    dec->mem_size_ = 0;
+    dec->mem_ = WebPSafeMalloc(needed, sizeof(uint8_t));
+    if (dec->mem_ == NULL) {
+      return VP8SetError(dec, VP8_STATUS_OUT_OF_MEMORY,
+                         "no memory during frame initialization.");
+    }
+    // down-cast is ok, thanks to WebPSafeAlloc() above.
+    dec->mem_size_ = (size_t)needed;
+  }
+
+  mem = (uint8_t*)dec->mem_;
+  dec->intra_t_ = (uint8_t*)mem;
+  mem += intra_pred_mode_size;
+
+  dec->y_t_ = (uint8_t*)mem;
+  mem += 16 * mb_w;
+  dec->u_t_ = (uint8_t*)mem;
+  mem += 8 * mb_w;
+  dec->v_t_ = (uint8_t*)mem;
+  mem += 8 * mb_w;
+
+  dec->mb_info_ = ((VP8MB*)mem) + 1;
+  mem += mb_info_size;
+
+  dec->f_info_ = f_info_size ? (VP8FInfo*)mem : NULL;
+  mem += f_info_size;
+  dec->thread_ctx_.id_ = 0;
+  dec->thread_ctx_.f_info_ = dec->f_info_;
+  if (dec->use_threads_) {
+    // secondary cache line. The deblocking process need to make use of the
+    // filtering strength from previous macroblock row, while the new ones
+    // are being decoded in parallel. We'll just swap the pointers.
+    dec->thread_ctx_.f_info_ += mb_w;
+  }
+
+  mem = (uint8_t*)((uintptr_t)(mem + ALIGN_MASK) & ~ALIGN_MASK);
+  assert((yuv_size & ALIGN_MASK) == 0);
+  dec->yuv_b_ = (uint8_t*)mem;
+  mem += yuv_size;
+
+  dec->coeffs_ = (int16_t*)mem;
+  mem += coeffs_size;
+
+  dec->cache_y_stride_ = 16 * mb_w;
+  dec->cache_uv_stride_ = 8 * mb_w;
+  {
+    const int extra_rows = kFilterExtraRows[dec->filter_type_];
+    const int extra_y = extra_rows * dec->cache_y_stride_;
+    const int extra_uv = (extra_rows / 2) * dec->cache_uv_stride_;
+    dec->cache_y_ = ((uint8_t*)mem) + extra_y;
+    dec->cache_u_ = dec->cache_y_
+                  + 16 * num_caches * dec->cache_y_stride_ + extra_uv;
+    dec->cache_v_ = dec->cache_u_
+                  + 8 * num_caches * dec->cache_uv_stride_ + extra_uv;
+    dec->cache_id_ = 0;
+  }
+  mem += cache_size;
+
+  // alpha plane
+  dec->alpha_plane_ = alpha_size ? (uint8_t*)mem : NULL;
+  mem += alpha_size;
+
+  // note: left-info is initialized once for all.
+  memset(dec->mb_info_ - 1, 0, mb_info_size);
+
+  // initialize top
+  memset(dec->intra_t_, B_DC_PRED, intra_pred_mode_size);
+
+  return 1;
+}
+
+static void InitIo(VP8Decoder* const dec, VP8Io* io) {
+  // prepare 'io'
+  io->mb_y = 0;
+  io->y = dec->cache_y_;
+  io->u = dec->cache_u_;
+  io->v = dec->cache_v_;
+  io->y_stride = dec->cache_y_stride_;
+  io->uv_stride = dec->cache_uv_stride_;
+  io->a = NULL;
+}
+
+int VP8InitFrame(VP8Decoder* const dec, VP8Io* io) {
+  if (!InitThreadContext(dec)) return 0;  // call first. Sets dec->num_caches_.
+  if (!AllocateMemory(dec)) return 0;
+  InitIo(dec, io);
+  VP8DspInit();  // Init critical function pointers and look-up tables.
+  return 1;
+}
+
+//------------------------------------------------------------------------------
+// Main reconstruction function.
+
+static const int kScan[16] = {
+  0 +  0 * BPS,  4 +  0 * BPS, 8 +  0 * BPS, 12 +  0 * BPS,
+  0 +  4 * BPS,  4 +  4 * BPS, 8 +  4 * BPS, 12 +  4 * BPS,
+  0 +  8 * BPS,  4 +  8 * BPS, 8 +  8 * BPS, 12 +  8 * BPS,
+  0 + 12 * BPS,  4 + 12 * BPS, 8 + 12 * BPS, 12 + 12 * BPS
+};
+
+static WEBP_INLINE int CheckMode(VP8Decoder* const dec, int mode) {
+  if (mode == B_DC_PRED) {
+    if (dec->mb_x_ == 0) {
+      return (dec->mb_y_ == 0) ? B_DC_PRED_NOTOPLEFT : B_DC_PRED_NOLEFT;
+    } else {
+      return (dec->mb_y_ == 0) ? B_DC_PRED_NOTOP : B_DC_PRED;
+    }
+  }
+  return mode;
+}
+
+static WEBP_INLINE void Copy32b(uint8_t* dst, uint8_t* src) {
+  *(uint32_t*)dst = *(uint32_t*)src;
+}
+
+void VP8ReconstructBlock(VP8Decoder* const dec) {
+  uint8_t* const y_dst = dec->yuv_b_ + Y_OFF;
+  uint8_t* const u_dst = dec->yuv_b_ + U_OFF;
+  uint8_t* const v_dst = dec->yuv_b_ + V_OFF;
+
+  // Rotate in the left samples from previously decoded block. We move four
+  // pixels at a time for alignment reason, and because of in-loop filter.
+  if (dec->mb_x_ > 0) {
+    int j;
+    for (j = -1; j < 16; ++j) {
+      Copy32b(&y_dst[j * BPS - 4], &y_dst[j * BPS + 12]);
+    }
+    for (j = -1; j < 8; ++j) {
+      Copy32b(&u_dst[j * BPS - 4], &u_dst[j * BPS + 4]);
+      Copy32b(&v_dst[j * BPS - 4], &v_dst[j * BPS + 4]);
+    }
+  } else {
+    int j;
+    for (j = 0; j < 16; ++j) {
+      y_dst[j * BPS - 1] = 129;
+    }
+    for (j = 0; j < 8; ++j) {
+      u_dst[j * BPS - 1] = 129;
+      v_dst[j * BPS - 1] = 129;
+    }
+    // Init top-left sample on left column too
+    if (dec->mb_y_ > 0) {
+      y_dst[-1 - BPS] = u_dst[-1 - BPS] = v_dst[-1 - BPS] = 129;
+    }
+  }
+  {
+    // bring top samples into the cache
+    uint8_t* const top_y = dec->y_t_ + dec->mb_x_ * 16;
+    uint8_t* const top_u = dec->u_t_ + dec->mb_x_ * 8;
+    uint8_t* const top_v = dec->v_t_ + dec->mb_x_ * 8;
+    const int16_t* coeffs = dec->coeffs_;
+    int n;
+
+    if (dec->mb_y_ > 0) {
+      memcpy(y_dst - BPS, top_y, 16);
+      memcpy(u_dst - BPS, top_u, 8);
+      memcpy(v_dst - BPS, top_v, 8);
+    } else if (dec->mb_x_ == 0) {
+      // we only need to do this init once at block (0,0).
+      // Afterward, it remains valid for the whole topmost row.
+      memset(y_dst - BPS - 1, 127, 16 + 4 + 1);
+      memset(u_dst - BPS - 1, 127, 8 + 1);
+      memset(v_dst - BPS - 1, 127, 8 + 1);
+    }
+
+    // predict and add residuals
+
+    if (dec->is_i4x4_) {   // 4x4
+      uint32_t* const top_right = (uint32_t*)(y_dst - BPS + 16);
+
+      if (dec->mb_y_ > 0) {
+        if (dec->mb_x_ >= dec->mb_w_ - 1) {    // on rightmost border
+          top_right[0] = top_y[15] * 0x01010101u;
+        } else {
+          memcpy(top_right, top_y + 16, sizeof(*top_right));
+        }
+      }
+      // replicate the top-right pixels below
+      top_right[BPS] = top_right[2 * BPS] = top_right[3 * BPS] = top_right[0];
+
+      // predict and add residues for all 4x4 blocks in turn.
+      for (n = 0; n < 16; n++) {
+        uint8_t* const dst = y_dst + kScan[n];
+        VP8PredLuma4[dec->imodes_[n]](dst);
+        if (dec->non_zero_ac_ & (1 << n)) {
+          VP8Transform(coeffs + n * 16, dst, 0);
+        } else if (dec->non_zero_ & (1 << n)) {  // only DC is present
+          VP8TransformDC(coeffs + n * 16, dst);
+        }
+      }
+    } else {    // 16x16
+      const int pred_func = CheckMode(dec, dec->imodes_[0]);
+      VP8PredLuma16[pred_func](y_dst);
+      if (dec->non_zero_) {
+        for (n = 0; n < 16; n++) {
+          uint8_t* const dst = y_dst + kScan[n];
+          if (dec->non_zero_ac_ & (1 << n)) {
+            VP8Transform(coeffs + n * 16, dst, 0);
+          } else if (dec->non_zero_ & (1 << n)) {  // only DC is present
+            VP8TransformDC(coeffs + n * 16, dst);
+          }
+        }
+      }
+    }
+    {
+      // Chroma
+      const int pred_func = CheckMode(dec, dec->uvmode_);
+      VP8PredChroma8[pred_func](u_dst);
+      VP8PredChroma8[pred_func](v_dst);
+
+      if (dec->non_zero_ & 0x0f0000) {   // chroma-U
+        const int16_t* const u_coeffs = dec->coeffs_ + 16 * 16;
+        if (dec->non_zero_ac_ & 0x0f0000) {
+          VP8TransformUV(u_coeffs, u_dst);
+        } else {
+          VP8TransformDCUV(u_coeffs, u_dst);
+        }
+      }
+      if (dec->non_zero_ & 0xf00000) {   // chroma-V
+        const int16_t* const v_coeffs = dec->coeffs_ + 20 * 16;
+        if (dec->non_zero_ac_ & 0xf00000) {
+          VP8TransformUV(v_coeffs, v_dst);
+        } else {
+          VP8TransformDCUV(v_coeffs, v_dst);
+        }
+      }
+
+      // stash away top samples for next block
+      if (dec->mb_y_ < dec->mb_h_ - 1) {
+        memcpy(top_y, y_dst + 15 * BPS, 16);
+        memcpy(top_u, u_dst +  7 * BPS,  8);
+        memcpy(top_v, v_dst +  7 * BPS,  8);
+      }
+    }
+  }
+}
+
+//------------------------------------------------------------------------------
+
+#if defined(__cplusplus) || defined(c_plusplus)
+}    // extern "C"
+#endif
diff --git a/drivers/webpold/dec/idec.c b/drivers/webpold/dec/idec.c
new file mode 100644
index 0000000000..7df790ced8
--- /dev/null
+++ b/drivers/webpold/dec/idec.c
@@ -0,0 +1,785 @@
+// Copyright 2011 Google Inc. All Rights Reserved.
+//
+// This code is licensed under the same terms as WebM:
+//  Software License Agreement:  http://www.webmproject.org/license/software/
+//  Additional IP Rights Grant:  http://www.webmproject.org/license/additional/
+// -----------------------------------------------------------------------------
+//
+// Incremental decoding
+//
+// Author: somnath@google.com (Somnath Banerjee)
+
+#include <assert.h>
+#include <string.h>
+#include <stdlib.h>
+
+#include "./webpi.h"
+#include "./vp8i.h"
+#include "../utils/utils.h"
+
+#if defined(__cplusplus) || defined(c_plusplus)
+extern "C" {
+#endif
+
+// In append mode, buffer allocations increase as multiples of this value.
+// Needs to be a power of 2.
+#define CHUNK_SIZE 4096
+#define MAX_MB_SIZE 4096
+
+//------------------------------------------------------------------------------
+// Data structures for memory and states
+
+// Decoding states. State normally flows like HEADER->PARTS0->DATA->DONE.
+// If there is any error the decoder goes into state ERROR.
+typedef enum {
+  STATE_PRE_VP8,  // All data before that of the first VP8 chunk.
+  STATE_VP8_FRAME_HEADER,  // For VP8 Frame header (within VP8 chunk).
+  STATE_VP8_PARTS0,
+  STATE_VP8_DATA,
+  STATE_VP8L_HEADER,
+  STATE_VP8L_DATA,
+  STATE_DONE,
+  STATE_ERROR
+} DecState;
+
+// Operating state for the MemBuffer
+typedef enum {
+  MEM_MODE_NONE = 0,
+  MEM_MODE_APPEND,
+  MEM_MODE_MAP
+} MemBufferMode;
+
+// storage for partition #0 and partial data (in a rolling fashion)
+typedef struct {
+  MemBufferMode mode_;  // Operation mode
+  size_t start_;        // start location of the data to be decoded
+  size_t end_;          // end location
+  size_t buf_size_;     // size of the allocated buffer
+  uint8_t* buf_;        // We don't own this buffer in case WebPIUpdate()
+
+  size_t part0_size_;         // size of partition #0
+  const uint8_t* part0_buf_;  // buffer to store partition #0
+} MemBuffer;
+
+struct WebPIDecoder {
+  DecState state_;         // current decoding state
+  WebPDecParams params_;   // Params to store output info
+  int is_lossless_;        // for down-casting 'dec_'.
+  void* dec_;              // either a VP8Decoder or a VP8LDecoder instance
+  VP8Io io_;
+
+  MemBuffer mem_;          // input memory buffer.
+  WebPDecBuffer output_;   // output buffer (when no external one is supplied)
+  size_t chunk_size_;      // Compressed VP8/VP8L size extracted from Header.
+};
+
+// MB context to restore in case VP8DecodeMB() fails
+typedef struct {
+  VP8MB left_;
+  VP8MB info_;
+  uint8_t intra_t_[4];
+  uint8_t intra_l_[4];
+  VP8BitReader br_;
+  VP8BitReader token_br_;
+} MBContext;
+
+//------------------------------------------------------------------------------
+// MemBuffer: incoming data handling
+
+static void RemapBitReader(VP8BitReader* const br, ptrdiff_t offset) {
+  if (br->buf_ != NULL) {
+    br->buf_ += offset;
+    br->buf_end_ += offset;
+  }
+}
+
+static WEBP_INLINE size_t MemDataSize(const MemBuffer* mem) {
+  return (mem->end_ - mem->start_);
+}
+
+static void DoRemap(WebPIDecoder* const idec, ptrdiff_t offset) {
+  MemBuffer* const mem = &idec->mem_;
+  const uint8_t* const new_base = mem->buf_ + mem->start_;
+  // note: for VP8, setting up idec->io_ is only really needed at the beginning
+  // of the decoding, till partition #0 is complete.
+  idec->io_.data = new_base;
+  idec->io_.data_size = MemDataSize(mem);
+
+  if (idec->dec_ != NULL) {
+    if (!idec->is_lossless_) {
+      VP8Decoder* const dec = (VP8Decoder*)idec->dec_;
+      const int last_part = dec->num_parts_ - 1;
+      if (offset != 0) {
+        int p;
+        for (p = 0; p <= last_part; ++p) {
+          RemapBitReader(dec->parts_ + p, offset);
+        }
+        // Remap partition #0 data pointer to new offset, but only in MAP
+        // mode (in APPEND mode, partition #0 is copied into a fixed memory).
+        if (mem->mode_ == MEM_MODE_MAP) {
+          RemapBitReader(&dec->br_, offset);
+        }
+      }
+      assert(last_part >= 0);
+      dec->parts_[last_part].buf_end_ = mem->buf_ + mem->end_;
+    } else {    // Resize lossless bitreader
+      VP8LDecoder* const dec = (VP8LDecoder*)idec->dec_;
+      VP8LBitReaderSetBuffer(&dec->br_, new_base, MemDataSize(mem));
+    }
+  }
+}
+
+// Appends data to the end of MemBuffer->buf_. It expands the allocated memory
+// size if required and also updates VP8BitReader's if new memory is allocated.
+static int AppendToMemBuffer(WebPIDecoder* const idec,
+                             const uint8_t* const data, size_t data_size) {
+  MemBuffer* const mem = &idec->mem_;
+  const uint8_t* const old_base = mem->buf_ + mem->start_;
+  assert(mem->mode_ == MEM_MODE_APPEND);
+  if (data_size > MAX_CHUNK_PAYLOAD) {
+    // security safeguard: trying to allocate more than what the format
+    // allows for a chunk should be considered a smoke smell.
+    return 0;
+  }
+
+  if (mem->end_ + data_size > mem->buf_size_) {  // Need some free memory
+    const size_t current_size = MemDataSize(mem);
+    const uint64_t new_size = (uint64_t)current_size + data_size;
+    const uint64_t extra_size = (new_size + CHUNK_SIZE - 1) & ~(CHUNK_SIZE - 1);
+    uint8_t* const new_buf =
+        (uint8_t*)WebPSafeMalloc(extra_size, sizeof(*new_buf));
+    if (new_buf == NULL) return 0;
+    memcpy(new_buf, old_base, current_size);
+    free(mem->buf_);
+    mem->buf_ = new_buf;
+    mem->buf_size_ = (size_t)extra_size;
+    mem->start_ = 0;
+    mem->end_ = current_size;
+  }
+
+  memcpy(mem->buf_ + mem->end_, data, data_size);
+  mem->end_ += data_size;
+  assert(mem->end_ <= mem->buf_size_);
+
+  DoRemap(idec, mem->buf_ + mem->start_ - old_base);
+  return 1;
+}
+
+static int RemapMemBuffer(WebPIDecoder* const idec,
+                          const uint8_t* const data, size_t data_size) {
+  MemBuffer* const mem = &idec->mem_;
+  const uint8_t* const old_base = mem->buf_ + mem->start_;
+  assert(mem->mode_ == MEM_MODE_MAP);
+
+  if (data_size < mem->buf_size_) return 0;  // can't remap to a shorter buffer!
+
+  mem->buf_ = (uint8_t*)data;
+  mem->end_ = mem->buf_size_ = data_size;
+
+  DoRemap(idec, mem->buf_ + mem->start_ - old_base);
+  return 1;
+}
+
+static void InitMemBuffer(MemBuffer* const mem) {
+  mem->mode_       = MEM_MODE_NONE;
+  mem->buf_        = NULL;
+  mem->buf_size_   = 0;
+  mem->part0_buf_  = NULL;
+  mem->part0_size_ = 0;
+}
+
+static void ClearMemBuffer(MemBuffer* const mem) {
+  assert(mem);
+  if (mem->mode_ == MEM_MODE_APPEND) {
+    free(mem->buf_);
+    free((void*)mem->part0_buf_);
+  }
+}
+
+static int CheckMemBufferMode(MemBuffer* const mem, MemBufferMode expected) {
+  if (mem->mode_ == MEM_MODE_NONE) {
+    mem->mode_ = expected;    // switch to the expected mode
+  } else if (mem->mode_ != expected) {
+    return 0;         // we mixed the modes => error
+  }
+  assert(mem->mode_ == expected);   // mode is ok
+  return 1;
+}
+
+//------------------------------------------------------------------------------
+// Macroblock-decoding contexts
+
+static void SaveContext(const VP8Decoder* dec, const VP8BitReader* token_br,
+                        MBContext* const context) {
+  const VP8BitReader* const br = &dec->br_;
+  const VP8MB* const left = dec->mb_info_ - 1;
+  const VP8MB* const info = dec->mb_info_ + dec->mb_x_;
+
+  context->left_ = *left;
+  context->info_ = *info;
+  context->br_ = *br;
+  context->token_br_ = *token_br;
+  memcpy(context->intra_t_, dec->intra_t_ + 4 * dec->mb_x_, 4);
+  memcpy(context->intra_l_, dec->intra_l_, 4);
+}
+
+static void RestoreContext(const MBContext* context, VP8Decoder* const dec,
+                           VP8BitReader* const token_br) {
+  VP8BitReader* const br = &dec->br_;
+  VP8MB* const left = dec->mb_info_ - 1;
+  VP8MB* const info = dec->mb_info_ + dec->mb_x_;
+
+  *left = context->left_;
+  *info = context->info_;
+  *br = context->br_;
+  *token_br = context->token_br_;
+  memcpy(dec->intra_t_ + 4 * dec->mb_x_, context->intra_t_, 4);
+  memcpy(dec->intra_l_, context->intra_l_, 4);
+}
+
+//------------------------------------------------------------------------------
+
+static VP8StatusCode IDecError(WebPIDecoder* const idec, VP8StatusCode error) {
+  if (idec->state_ == STATE_VP8_DATA) {
+    VP8Io* const io = &idec->io_;
+    if (io->teardown) {
+      io->teardown(io);
+    }
+  }
+  idec->state_ = STATE_ERROR;
+  return error;
+}
+
+static void ChangeState(WebPIDecoder* const idec, DecState new_state,
+                        size_t consumed_bytes) {
+  MemBuffer* const mem = &idec->mem_;
+  idec->state_ = new_state;
+  mem->start_ += consumed_bytes;
+  assert(mem->start_ <= mem->end_);
+  idec->io_.data = mem->buf_ + mem->start_;
+  idec->io_.data_size = MemDataSize(mem);
+}
+
+// Headers
+static VP8StatusCode DecodeWebPHeaders(WebPIDecoder* const idec) {
+  MemBuffer* const mem = &idec->mem_;
+  const uint8_t* data = mem->buf_ + mem->start_;
+  size_t curr_size = MemDataSize(mem);
+  VP8StatusCode status;
+  WebPHeaderStructure headers;
+
+  headers.data = data;
+  headers.data_size = curr_size;
+  status = WebPParseHeaders(&headers);
+  if (status == VP8_STATUS_NOT_ENOUGH_DATA) {
+    return VP8_STATUS_SUSPENDED;  // We haven't found a VP8 chunk yet.
+  } else if (status != VP8_STATUS_OK) {
+    return IDecError(idec, status);
+  }
+
+  idec->chunk_size_ = headers.compressed_size;
+  idec->is_lossless_ = headers.is_lossless;
+  if (!idec->is_lossless_) {
+    VP8Decoder* const dec = VP8New();
+    if (dec == NULL) {
+      return VP8_STATUS_OUT_OF_MEMORY;
+    }
+    idec->dec_ = dec;
+#ifdef WEBP_USE_THREAD
+    dec->use_threads_ = (idec->params_.options != NULL) &&
+                        (idec->params_.options->use_threads > 0);
+#else
+    dec->use_threads_ = 0;
+#endif
+    dec->alpha_data_ = headers.alpha_data;
+    dec->alpha_data_size_ = headers.alpha_data_size;
+    ChangeState(idec, STATE_VP8_FRAME_HEADER, headers.offset);
+  } else {
+    VP8LDecoder* const dec = VP8LNew();
+    if (dec == NULL) {
+      return VP8_STATUS_OUT_OF_MEMORY;
+    }
+    idec->dec_ = dec;
+    ChangeState(idec, STATE_VP8L_HEADER, headers.offset);
+  }
+  return VP8_STATUS_OK;
+}
+
+static VP8StatusCode DecodeVP8FrameHeader(WebPIDecoder* const idec) {
+  const uint8_t* data = idec->mem_.buf_ + idec->mem_.start_;
+  const size_t curr_size = MemDataSize(&idec->mem_);
+  uint32_t bits;
+
+  if (curr_size < VP8_FRAME_HEADER_SIZE) {
+    // Not enough data bytes to extract VP8 Frame Header.
+    return VP8_STATUS_SUSPENDED;
+  }
+  if (!VP8GetInfo(data, curr_size, idec->chunk_size_, NULL, NULL)) {
+    return IDecError(idec, VP8_STATUS_BITSTREAM_ERROR);
+  }
+
+  bits = data[0] | (data[1] << 8) | (data[2] << 16);
+  idec->mem_.part0_size_ = (bits >> 5) + VP8_FRAME_HEADER_SIZE;
+
+  idec->io_.data = data;
+  idec->io_.data_size = curr_size;
+  idec->state_ = STATE_VP8_PARTS0;
+  return VP8_STATUS_OK;
+}
+
+// Partition #0
+static int CopyParts0Data(WebPIDecoder* const idec) {
+  VP8Decoder* const dec = (VP8Decoder*)idec->dec_;
+  VP8BitReader* const br = &dec->br_;
+  const size_t psize = br->buf_end_ - br->buf_;
+  MemBuffer* const mem = &idec->mem_;
+  assert(!idec->is_lossless_);
+  assert(mem->part0_buf_ == NULL);
+  assert(psize > 0);
+  assert(psize <= mem->part0_size_);  // Format limit: no need for runtime check
+  if (mem->mode_ == MEM_MODE_APPEND) {
+    // We copy and grab ownership of the partition #0 data.
+    uint8_t* const part0_buf = (uint8_t*)malloc(psize);
+    if (part0_buf == NULL) {
+      return 0;
+    }
+    memcpy(part0_buf, br->buf_, psize);
+    mem->part0_buf_ = part0_buf;
+    br->buf_ = part0_buf;
+    br->buf_end_ = part0_buf + psize;
+  } else {
+    // Else: just keep pointers to the partition #0's data in dec_->br_.
+  }
+  mem->start_ += psize;
+  return 1;
+}
+
+static VP8StatusCode DecodePartition0(WebPIDecoder* const idec) {
+  VP8Decoder* const dec = (VP8Decoder*)idec->dec_;
+  VP8Io* const io = &idec->io_;
+  const WebPDecParams* const params = &idec->params_;
+  WebPDecBuffer* const output = params->output;
+
+  // Wait till we have enough data for the whole partition #0
+  if (MemDataSize(&idec->mem_) < idec->mem_.part0_size_) {
+    return VP8_STATUS_SUSPENDED;
+  }
+
+  if (!VP8GetHeaders(dec, io)) {
+    const VP8StatusCode status = dec->status_;
+    if (status == VP8_STATUS_SUSPENDED ||
+        status == VP8_STATUS_NOT_ENOUGH_DATA) {
+      // treating NOT_ENOUGH_DATA as SUSPENDED state
+      return VP8_STATUS_SUSPENDED;
+    }
+    return IDecError(idec, status);
+  }
+
+  // Allocate/Verify output buffer now
+  dec->status_ = WebPAllocateDecBuffer(io->width, io->height, params->options,
+                                       output);
+  if (dec->status_ != VP8_STATUS_OK) {
+    return IDecError(idec, dec->status_);
+  }
+
+  if (!CopyParts0Data(idec)) {
+    return IDecError(idec, VP8_STATUS_OUT_OF_MEMORY);
+  }
+
+  // Finish setting up the decoding parameters. Will call io->setup().
+  if (VP8EnterCritical(dec, io) != VP8_STATUS_OK) {
+    return IDecError(idec, dec->status_);
+  }
+
+  // Note: past this point, teardown() must always be called
+  // in case of error.
+  idec->state_ = STATE_VP8_DATA;
+  // Allocate memory and prepare everything.
+  if (!VP8InitFrame(dec, io)) {
+    return IDecError(idec, dec->status_);
+  }
+  return VP8_STATUS_OK;
+}
+
+// Remaining partitions
+static VP8StatusCode DecodeRemaining(WebPIDecoder* const idec) {
+  VP8Decoder* const dec = (VP8Decoder*)idec->dec_;
+  VP8Io* const io = &idec->io_;
+
+  assert(dec->ready_);
+
+  for (; dec->mb_y_ < dec->mb_h_; ++dec->mb_y_) {
+    VP8BitReader* token_br = &dec->parts_[dec->mb_y_ & (dec->num_parts_ - 1)];
+    if (dec->mb_x_ == 0) {
+      VP8InitScanline(dec);
+    }
+    for (; dec->mb_x_ < dec->mb_w_;  dec->mb_x_++) {
+      MBContext context;
+      SaveContext(dec, token_br, &context);
+
+      if (!VP8DecodeMB(dec, token_br)) {
+        RestoreContext(&context, dec, token_br);
+        // We shouldn't fail when MAX_MB data was available
+        if (dec->num_parts_ == 1 && MemDataSize(&idec->mem_) > MAX_MB_SIZE) {
+          return IDecError(idec, VP8_STATUS_BITSTREAM_ERROR);
+        }
+        return VP8_STATUS_SUSPENDED;
+      }
+      VP8ReconstructBlock(dec);
+      // Store data and save block's filtering params
+      VP8StoreBlock(dec);
+
+      // Release buffer only if there is only one partition
+      if (dec->num_parts_ == 1) {
+        idec->mem_.start_ = token_br->buf_ - idec->mem_.buf_;
+        assert(idec->mem_.start_ <= idec->mem_.end_);
+      }
+    }
+    if (!VP8ProcessRow(dec, io)) {
+      return IDecError(idec, VP8_STATUS_USER_ABORT);
+    }
+    dec->mb_x_ = 0;
+  }
+  // Synchronize the thread and check for errors.
+  if (!VP8ExitCritical(dec, io)) {
+    return IDecError(idec, VP8_STATUS_USER_ABORT);
+  }
+  dec->ready_ = 0;
+  idec->state_ = STATE_DONE;
+
+  return VP8_STATUS_OK;
+}
+
+static int ErrorStatusLossless(WebPIDecoder* const idec, VP8StatusCode status) {
+  if (status == VP8_STATUS_SUSPENDED || status == VP8_STATUS_NOT_ENOUGH_DATA) {
+    return VP8_STATUS_SUSPENDED;
+  }
+  return IDecError(idec, status);
+}
+
+static VP8StatusCode DecodeVP8LHeader(WebPIDecoder* const idec) {
+  VP8Io* const io = &idec->io_;
+  VP8LDecoder* const dec = (VP8LDecoder*)idec->dec_;
+  const WebPDecParams* const params = &idec->params_;
+  WebPDecBuffer* const output = params->output;
+  size_t curr_size = MemDataSize(&idec->mem_);
+  assert(idec->is_lossless_);
+
+  // Wait until there's enough data for decoding header.
+  if (curr_size < (idec->chunk_size_ >> 3)) {
+    return VP8_STATUS_SUSPENDED;
+  }
+  if (!VP8LDecodeHeader(dec, io)) {
+    return ErrorStatusLossless(idec, dec->status_);
+  }
+  // Allocate/verify output buffer now.
+  dec->status_ = WebPAllocateDecBuffer(io->width, io->height, params->options,
+                                       output);
+  if (dec->status_ != VP8_STATUS_OK) {
+    return IDecError(idec, dec->status_);
+  }
+
+  idec->state_ = STATE_VP8L_DATA;
+  return VP8_STATUS_OK;
+}
+
+static VP8StatusCode DecodeVP8LData(WebPIDecoder* const idec) {
+  VP8LDecoder* const dec = (VP8LDecoder*)idec->dec_;
+  const size_t curr_size = MemDataSize(&idec->mem_);
+  assert(idec->is_lossless_);
+
+  // At present Lossless decoder can't decode image incrementally. So wait till
+  // all the image data is aggregated before image can be decoded.
+  if (curr_size < idec->chunk_size_) {
+    return VP8_STATUS_SUSPENDED;
+  }
+
+  if (!VP8LDecodeImage(dec)) {
+    return ErrorStatusLossless(idec, dec->status_);
+  }
+
+  idec->state_ = STATE_DONE;
+
+  return VP8_STATUS_OK;
+}
+
+  // Main decoding loop
+static VP8StatusCode IDecode(WebPIDecoder* idec) {
+  VP8StatusCode status = VP8_STATUS_SUSPENDED;
+
+  if (idec->state_ == STATE_PRE_VP8) {
+    status = DecodeWebPHeaders(idec);
+  } else {
+    if (idec->dec_ == NULL) {
+      return VP8_STATUS_SUSPENDED;    // can't continue if we have no decoder.
+    }
+  }
+  if (idec->state_ == STATE_VP8_FRAME_HEADER) {
+    status = DecodeVP8FrameHeader(idec);
+  }
+  if (idec->state_ == STATE_VP8_PARTS0) {
+    status = DecodePartition0(idec);
+  }
+  if (idec->state_ == STATE_VP8_DATA) {
+    status = DecodeRemaining(idec);
+  }
+  if (idec->state_ == STATE_VP8L_HEADER) {
+    status = DecodeVP8LHeader(idec);
+  }
+  if (idec->state_ == STATE_VP8L_DATA) {
+    status = DecodeVP8LData(idec);
+  }
+  return status;
+}
+
+//------------------------------------------------------------------------------
+// Public functions
+
+WebPIDecoder* WebPINewDecoder(WebPDecBuffer* output_buffer) {
+  WebPIDecoder* idec = (WebPIDecoder*)calloc(1, sizeof(*idec));
+  if (idec == NULL) {
+    return NULL;
+  }
+
+  idec->state_ = STATE_PRE_VP8;
+  idec->chunk_size_ = 0;
+
+  InitMemBuffer(&idec->mem_);
+  WebPInitDecBuffer(&idec->output_);
+  VP8InitIo(&idec->io_);
+
+  WebPResetDecParams(&idec->params_);
+  idec->params_.output = output_buffer ? output_buffer : &idec->output_;
+  WebPInitCustomIo(&idec->params_, &idec->io_);  // Plug the I/O functions.
+
+  return idec;
+}
+
+WebPIDecoder* WebPIDecode(const uint8_t* data, size_t data_size,
+                          WebPDecoderConfig* config) {
+  WebPIDecoder* idec;
+
+  // Parse the bitstream's features, if requested:
+  if (data != NULL && data_size > 0 && config != NULL) {
+    if (WebPGetFeatures(data, data_size, &config->input) != VP8_STATUS_OK) {
+      return NULL;
+    }
+  }
+  // Create an instance of the incremental decoder
+  idec = WebPINewDecoder(config ? &config->output : NULL);
+  if (idec == NULL) {
+    return NULL;
+  }
+  // Finish initialization
+  if (config != NULL) {
+    idec->params_.options = &config->options;
+  }
+  return idec;
+}
+
+void WebPIDelete(WebPIDecoder* idec) {
+  if (idec == NULL) return;
+  if (idec->dec_ != NULL) {
+    if (!idec->is_lossless_) {
+      VP8Delete(idec->dec_);
+    } else {
+      VP8LDelete(idec->dec_);
+    }
+  }
+  ClearMemBuffer(&idec->mem_);
+  WebPFreeDecBuffer(&idec->output_);
+  free(idec);
+}
+
+//------------------------------------------------------------------------------
+// Wrapper toward WebPINewDecoder
+
+WebPIDecoder* WebPINewRGB(WEBP_CSP_MODE mode, uint8_t* output_buffer,
+                          size_t output_buffer_size, int output_stride) {
+  WebPIDecoder* idec;
+  if (mode >= MODE_YUV) return NULL;
+  idec = WebPINewDecoder(NULL);
+  if (idec == NULL) return NULL;
+  idec->output_.colorspace = mode;
+  idec->output_.is_external_memory = 1;
+  idec->output_.u.RGBA.rgba = output_buffer;
+  idec->output_.u.RGBA.stride = output_stride;
+  idec->output_.u.RGBA.size = output_buffer_size;
+  return idec;
+}
+
+WebPIDecoder* WebPINewYUVA(uint8_t* luma, size_t luma_size, int luma_stride,
+                           uint8_t* u, size_t u_size, int u_stride,
+                           uint8_t* v, size_t v_size, int v_stride,
+                           uint8_t* a, size_t a_size, int a_stride) {
+  WebPIDecoder* const idec = WebPINewDecoder(NULL);
+  if (idec == NULL) return NULL;
+  idec->output_.colorspace = (a == NULL) ? MODE_YUV : MODE_YUVA;
+  idec->output_.is_external_memory = 1;
+  idec->output_.u.YUVA.y = luma;
+  idec->output_.u.YUVA.y_stride = luma_stride;
+  idec->output_.u.YUVA.y_size = luma_size;
+  idec->output_.u.YUVA.u = u;
+  idec->output_.u.YUVA.u_stride = u_stride;
+  idec->output_.u.YUVA.u_size = u_size;
+  idec->output_.u.YUVA.v = v;
+  idec->output_.u.YUVA.v_stride = v_stride;
+  idec->output_.u.YUVA.v_size = v_size;
+  idec->output_.u.YUVA.a = a;
+  idec->output_.u.YUVA.a_stride = a_stride;
+  idec->output_.u.YUVA.a_size = a_size;
+  return idec;
+}
+
+WebPIDecoder* WebPINewYUV(uint8_t* luma, size_t luma_size, int luma_stride,
+                          uint8_t* u, size_t u_size, int u_stride,
+                          uint8_t* v, size_t v_size, int v_stride) {
+  return WebPINewYUVA(luma, luma_size, luma_stride,
+                      u, u_size, u_stride,
+                      v, v_size, v_stride,
+                      NULL, 0, 0);
+}
+
+//------------------------------------------------------------------------------
+
+static VP8StatusCode IDecCheckStatus(const WebPIDecoder* const idec) {
+  assert(idec);
+  if (idec->state_ == STATE_ERROR) {
+    return VP8_STATUS_BITSTREAM_ERROR;
+  }
+  if (idec->state_ == STATE_DONE) {
+    return VP8_STATUS_OK;
+  }
+  return VP8_STATUS_SUSPENDED;
+}
+
+VP8StatusCode WebPIAppend(WebPIDecoder* idec,
+                          const uint8_t* data, size_t data_size) {
+  VP8StatusCode status;
+  if (idec == NULL || data == NULL) {
+    return VP8_STATUS_INVALID_PARAM;
+  }
+  status = IDecCheckStatus(idec);
+  if (status != VP8_STATUS_SUSPENDED) {
+    return status;
+  }
+  // Check mixed calls between RemapMemBuffer and AppendToMemBuffer.
+  if (!CheckMemBufferMode(&idec->mem_, MEM_MODE_APPEND)) {
+    return VP8_STATUS_INVALID_PARAM;
+  }
+  // Append data to memory buffer
+  if (!AppendToMemBuffer(idec, data, data_size)) {
+    return VP8_STATUS_OUT_OF_MEMORY;
+  }
+  return IDecode(idec);
+}
+
+VP8StatusCode WebPIUpdate(WebPIDecoder* idec,
+                          const uint8_t* data, size_t data_size) {
+  VP8StatusCode status;
+  if (idec == NULL || data == NULL) {
+    return VP8_STATUS_INVALID_PARAM;
+  }
+  status = IDecCheckStatus(idec);
+  if (status != VP8_STATUS_SUSPENDED) {
+    return status;
+  }
+  // Check mixed calls between RemapMemBuffer and AppendToMemBuffer.
+  if (!CheckMemBufferMode(&idec->mem_, MEM_MODE_MAP)) {
+    return VP8_STATUS_INVALID_PARAM;
+  }
+  // Make the memory buffer point to the new buffer
+  if (!RemapMemBuffer(idec, data, data_size)) {
+    return VP8_STATUS_INVALID_PARAM;
+  }
+  return IDecode(idec);
+}
+
+//------------------------------------------------------------------------------
+
+static const WebPDecBuffer* GetOutputBuffer(const WebPIDecoder* const idec) {
+  if (idec == NULL || idec->dec_ == NULL) {
+    return NULL;
+  }
+  if (idec->state_ <= STATE_VP8_PARTS0) {
+    return NULL;
+  }
+  return idec->params_.output;
+}
+
+const WebPDecBuffer* WebPIDecodedArea(const WebPIDecoder* idec,
+                                      int* left, int* top,
+                                      int* width, int* height) {
+  const WebPDecBuffer* const src = GetOutputBuffer(idec);
+  if (left != NULL) *left = 0;
+  if (top != NULL) *top = 0;
+  // TODO(skal): later include handling of rotations.
+  if (src) {
+    if (width != NULL) *width = src->width;
+    if (height != NULL) *height = idec->params_.last_y;
+  } else {
+    if (width != NULL) *width = 0;
+    if (height != NULL) *height = 0;
+  }
+  return src;
+}
+
+uint8_t* WebPIDecGetRGB(const WebPIDecoder* idec, int* last_y,
+                        int* width, int* height, int* stride) {
+  const WebPDecBuffer* const src = GetOutputBuffer(idec);
+  if (src == NULL) return NULL;
+  if (src->colorspace >= MODE_YUV) {
+    return NULL;
+  }
+
+  if (last_y != NULL) *last_y = idec->params_.last_y;
+  if (width != NULL) *width = src->width;
+  if (height != NULL) *height = src->height;
+  if (stride != NULL) *stride = src->u.RGBA.stride;
+
+  return src->u.RGBA.rgba;
+}
+
+uint8_t* WebPIDecGetYUVA(const WebPIDecoder* idec, int* last_y,
+                         uint8_t** u, uint8_t** v, uint8_t** a,
+                         int* width, int* height,
+                         int* stride, int* uv_stride, int* a_stride) {
+  const WebPDecBuffer* const src = GetOutputBuffer(idec);
+  if (src == NULL) return NULL;
+  if (src->colorspace < MODE_YUV) {
+    return NULL;
+  }
+
+  if (last_y != NULL) *last_y = idec->params_.last_y;
+  if (u != NULL) *u = src->u.YUVA.u;
+  if (v != NULL) *v = src->u.YUVA.v;
+  if (a != NULL) *a = src->u.YUVA.a;
+  if (width != NULL) *width = src->width;
+  if (height != NULL) *height = src->height;
+  if (stride != NULL) *stride = src->u.YUVA.y_stride;
+  if (uv_stride != NULL) *uv_stride = src->u.YUVA.u_stride;
+  if (a_stride != NULL) *a_stride = src->u.YUVA.a_stride;
+
+  return src->u.YUVA.y;
+}
+
+int WebPISetIOHooks(WebPIDecoder* const idec,
+                    VP8IoPutHook put,
+                    VP8IoSetupHook setup,
+                    VP8IoTeardownHook teardown,
+                    void* user_data) {
+  if (idec == NULL || idec->state_ > STATE_PRE_VP8) {
+    return 0;
+  }
+
+  idec->io_.put = put;
+  idec->io_.setup = setup;
+  idec->io_.teardown = teardown;
+  idec->io_.opaque = user_data;
+
+  return 1;
+}
+
+#if defined(__cplusplus) || defined(c_plusplus)
+}    // extern "C"
+#endif
diff --git a/drivers/webpold/dec/io.c b/drivers/webpold/dec/io.c
new file mode 100644
index 0000000000..594804c2e6
--- /dev/null
+++ b/drivers/webpold/dec/io.c
@@ -0,0 +1,633 @@
+// Copyright 2011 Google Inc. All Rights Reserved.
+//
+// This code is licensed under the same terms as WebM:
+//  Software License Agreement:  http://www.webmproject.org/license/software/
+//  Additional IP Rights Grant:  http://www.webmproject.org/license/additional/
+// -----------------------------------------------------------------------------
+//
+// functions for sample output.
+//
+// Author: Skal (pascal.massimino@gmail.com)
+
+#include <assert.h>
+#include <stdlib.h>
+#include "../dec/vp8i.h"
+#include "./webpi.h"
+#include "../dsp/dsp.h"
+#include "../dsp/yuv.h"
+
+#if defined(__cplusplus) || defined(c_plusplus)
+extern "C" {
+#endif
+
+//------------------------------------------------------------------------------
+// Main YUV<->RGB conversion functions
+
+static int EmitYUV(const VP8Io* const io, WebPDecParams* const p) {
+  WebPDecBuffer* output = p->output;
+  const WebPYUVABuffer* const buf = &output->u.YUVA;
+  uint8_t* const y_dst = buf->y + io->mb_y * buf->y_stride;
+  uint8_t* const u_dst = buf->u + (io->mb_y >> 1) * buf->u_stride;
+  uint8_t* const v_dst = buf->v + (io->mb_y >> 1) * buf->v_stride;
+  const int mb_w = io->mb_w;
+  const int mb_h = io->mb_h;
+  const int uv_w = (mb_w + 1) / 2;
+  const int uv_h = (mb_h + 1) / 2;
+  int j;
+  for (j = 0; j < mb_h; ++j) {
+    memcpy(y_dst + j * buf->y_stride, io->y + j * io->y_stride, mb_w);
+  }
+  for (j = 0; j < uv_h; ++j) {
+    memcpy(u_dst + j * buf->u_stride, io->u + j * io->uv_stride, uv_w);
+    memcpy(v_dst + j * buf->v_stride, io->v + j * io->uv_stride, uv_w);
+  }
+  return io->mb_h;
+}
+
+// Point-sampling U/V sampler.
+static int EmitSampledRGB(const VP8Io* const io, WebPDecParams* const p) {
+  WebPDecBuffer* output = p->output;
+  const WebPRGBABuffer* const buf = &output->u.RGBA;
+  uint8_t* dst = buf->rgba + io->mb_y * buf->stride;
+  const uint8_t* y_src = io->y;
+  const uint8_t* u_src = io->u;
+  const uint8_t* v_src = io->v;
+  const WebPSampleLinePairFunc sample = WebPSamplers[output->colorspace];
+  const int mb_w = io->mb_w;
+  const int last = io->mb_h - 1;
+  int j;
+  for (j = 0; j < last; j += 2) {
+    sample(y_src, y_src + io->y_stride, u_src, v_src,
+           dst, dst + buf->stride, mb_w);
+    y_src += 2 * io->y_stride;
+    u_src += io->uv_stride;
+    v_src += io->uv_stride;
+    dst += 2 * buf->stride;
+  }
+  if (j == last) {  // Just do the last line twice
+    sample(y_src, y_src, u_src, v_src, dst, dst, mb_w);
+  }
+  return io->mb_h;
+}
+
+//------------------------------------------------------------------------------
+// YUV444 -> RGB conversion
+
+#if 0   // TODO(skal): this is for future rescaling.
+static int EmitRGB(const VP8Io* const io, WebPDecParams* const p) {
+  WebPDecBuffer* output = p->output;
+  const WebPRGBABuffer* const buf = &output->u.RGBA;
+  uint8_t* dst = buf->rgba + io->mb_y * buf->stride;
+  const uint8_t* y_src = io->y;
+  const uint8_t* u_src = io->u;
+  const uint8_t* v_src = io->v;
+  const WebPYUV444Converter convert = WebPYUV444Converters[output->colorspace];
+  const int mb_w = io->mb_w;
+  const int last = io->mb_h;
+  int j;
+  for (j = 0; j < last; ++j) {
+    convert(y_src, u_src, v_src, dst, mb_w);
+    y_src += io->y_stride;
+    u_src += io->uv_stride;
+    v_src += io->uv_stride;
+    dst += buf->stride;
+  }
+  return io->mb_h;
+}
+#endif
+
+//------------------------------------------------------------------------------
+// Fancy upsampling
+
+#ifdef FANCY_UPSAMPLING
+static int EmitFancyRGB(const VP8Io* const io, WebPDecParams* const p) {
+  int num_lines_out = io->mb_h;   // a priori guess
+  const WebPRGBABuffer* const buf = &p->output->u.RGBA;
+  uint8_t* dst = buf->rgba + io->mb_y * buf->stride;
+  WebPUpsampleLinePairFunc upsample = WebPUpsamplers[p->output->colorspace];
+  const uint8_t* cur_y = io->y;
+  const uint8_t* cur_u = io->u;
+  const uint8_t* cur_v = io->v;
+  const uint8_t* top_u = p->tmp_u;
+  const uint8_t* top_v = p->tmp_v;
+  int y = io->mb_y;
+  const int y_end = io->mb_y + io->mb_h;
+  const int mb_w = io->mb_w;
+  const int uv_w = (mb_w + 1) / 2;
+
+  if (y == 0) {
+    // First line is special cased. We mirror the u/v samples at boundary.
+    upsample(NULL, cur_y, cur_u, cur_v, cur_u, cur_v, NULL, dst, mb_w);
+  } else {
+    // We can finish the left-over line from previous call.
+    upsample(p->tmp_y, cur_y, top_u, top_v, cur_u, cur_v,
+             dst - buf->stride, dst, mb_w);
+    ++num_lines_out;
+  }
+  // Loop over each output pairs of row.
+  for (; y + 2 < y_end; y += 2) {
+    top_u = cur_u;
+    top_v = cur_v;
+    cur_u += io->uv_stride;
+    cur_v += io->uv_stride;
+    dst += 2 * buf->stride;
+    cur_y += 2 * io->y_stride;
+    upsample(cur_y - io->y_stride, cur_y,
+             top_u, top_v, cur_u, cur_v,
+             dst - buf->stride, dst, mb_w);
+  }
+  // move to last row
+  cur_y += io->y_stride;
+  if (io->crop_top + y_end < io->crop_bottom) {
+    // Save the unfinished samples for next call (as we're not done yet).
+    memcpy(p->tmp_y, cur_y, mb_w * sizeof(*p->tmp_y));
+    memcpy(p->tmp_u, cur_u, uv_w * sizeof(*p->tmp_u));
+    memcpy(p->tmp_v, cur_v, uv_w * sizeof(*p->tmp_v));
+    // The fancy upsampler leaves a row unfinished behind
+    // (except for the very last row)
+    num_lines_out--;
+  } else {
+    // Process the very last row of even-sized picture
+    if (!(y_end & 1)) {
+      upsample(cur_y, NULL, cur_u, cur_v, cur_u, cur_v,
+               dst + buf->stride, NULL, mb_w);
+    }
+  }
+  return num_lines_out;
+}
+
+#endif    /* FANCY_UPSAMPLING */
+
+//------------------------------------------------------------------------------
+
+static int EmitAlphaYUV(const VP8Io* const io, WebPDecParams* const p) {
+  const uint8_t* alpha = io->a;
+  const WebPYUVABuffer* const buf = &p->output->u.YUVA;
+  const int mb_w = io->mb_w;
+  const int mb_h = io->mb_h;
+  uint8_t* dst = buf->a + io->mb_y * buf->a_stride;
+  int j;
+
+  if (alpha != NULL) {
+    for (j = 0; j < mb_h; ++j) {
+      memcpy(dst, alpha, mb_w * sizeof(*dst));
+      alpha += io->width;
+      dst += buf->a_stride;
+    }
+  } else if (buf->a != NULL) {
+    // the user requested alpha, but there is none, set it to opaque.
+    for (j = 0; j < mb_h; ++j) {
+      memset(dst, 0xff, mb_w * sizeof(*dst));
+      dst += buf->a_stride;
+    }
+  }
+  return 0;
+}
+
+static int GetAlphaSourceRow(const VP8Io* const io,
+                             const uint8_t** alpha, int* const num_rows) {
+  int start_y = io->mb_y;
+  *num_rows = io->mb_h;
+
+  // Compensate for the 1-line delay of the fancy upscaler.
+  // This is similar to EmitFancyRGB().
+  if (io->fancy_upsampling) {
+    if (start_y == 0) {
+      // We don't process the last row yet. It'll be done during the next call.
+      --*num_rows;
+    } else {
+      --start_y;
+      // Fortunately, *alpha data is persistent, so we can go back
+      // one row and finish alpha blending, now that the fancy upscaler
+      // completed the YUV->RGB interpolation.
+      *alpha -= io->width;
+    }
+    if (io->crop_top + io->mb_y + io->mb_h == io->crop_bottom) {
+      // If it's the very last call, we process all the remaining rows!
+      *num_rows = io->crop_bottom - io->crop_top - start_y;
+    }
+  }
+  return start_y;
+}
+
+static int EmitAlphaRGB(const VP8Io* const io, WebPDecParams* const p) {
+  const uint8_t* alpha = io->a;
+  if (alpha != NULL) {
+    const int mb_w = io->mb_w;
+    const WEBP_CSP_MODE colorspace = p->output->colorspace;
+    const int alpha_first =
+        (colorspace == MODE_ARGB || colorspace == MODE_Argb);
+    const WebPRGBABuffer* const buf = &p->output->u.RGBA;
+    int num_rows;
+    const int start_y = GetAlphaSourceRow(io, &alpha, &num_rows);
+    uint8_t* const base_rgba = buf->rgba + start_y * buf->stride;
+    uint8_t* dst = base_rgba + (alpha_first ? 0 : 3);
+    uint32_t alpha_mask = 0xff;
+    int i, j;
+
+    for (j = 0; j < num_rows; ++j) {
+      for (i = 0; i < mb_w; ++i) {
+        const uint32_t alpha_value = alpha[i];
+        dst[4 * i] = alpha_value;
+        alpha_mask &= alpha_value;
+      }
+      alpha += io->width;
+      dst += buf->stride;
+    }
+    // alpha_mask is < 0xff if there's non-trivial alpha to premultiply with.
+    if (alpha_mask != 0xff && WebPIsPremultipliedMode(colorspace)) {
+      WebPApplyAlphaMultiply(base_rgba, alpha_first,
+                             mb_w, num_rows, buf->stride);
+    }
+  }
+  return 0;
+}
+
+static int EmitAlphaRGBA4444(const VP8Io* const io, WebPDecParams* const p) {
+  const uint8_t* alpha = io->a;
+  if (alpha != NULL) {
+    const int mb_w = io->mb_w;
+    const WEBP_CSP_MODE colorspace = p->output->colorspace;
+    const WebPRGBABuffer* const buf = &p->output->u.RGBA;
+    int num_rows;
+    const int start_y = GetAlphaSourceRow(io, &alpha, &num_rows);
+    uint8_t* const base_rgba = buf->rgba + start_y * buf->stride;
+    uint8_t* alpha_dst = base_rgba + 1;
+    uint32_t alpha_mask = 0x0f;
+    int i, j;
+
+    for (j = 0; j < num_rows; ++j) {
+      for (i = 0; i < mb_w; ++i) {
+        // Fill in the alpha value (converted to 4 bits).
+        const uint32_t alpha_value = alpha[i] >> 4;
+        alpha_dst[2 * i] = (alpha_dst[2 * i] & 0xf0) | alpha_value;
+        alpha_mask &= alpha_value;
+      }
+      alpha += io->width;
+      alpha_dst += buf->stride;
+    }
+    if (alpha_mask != 0x0f && WebPIsPremultipliedMode(colorspace)) {
+      WebPApplyAlphaMultiply4444(base_rgba, mb_w, num_rows, buf->stride);
+    }
+  }
+  return 0;
+}
+
+//------------------------------------------------------------------------------
+// YUV rescaling (no final RGB conversion needed)
+
+static int Rescale(const uint8_t* src, int src_stride,
+                   int new_lines, WebPRescaler* const wrk) {
+  int num_lines_out = 0;
+  while (new_lines > 0) {    // import new contributions of source rows.
+    const int lines_in = WebPRescalerImport(wrk, new_lines, src, src_stride);
+    src += lines_in * src_stride;
+    new_lines -= lines_in;
+    num_lines_out += WebPRescalerExport(wrk);    // emit output row(s)
+  }
+  return num_lines_out;
+}
+
+static int EmitRescaledYUV(const VP8Io* const io, WebPDecParams* const p) {
+  const int mb_h = io->mb_h;
+  const int uv_mb_h = (mb_h + 1) >> 1;
+  const int num_lines_out = Rescale(io->y, io->y_stride, mb_h, &p->scaler_y);
+  Rescale(io->u, io->uv_stride, uv_mb_h, &p->scaler_u);
+  Rescale(io->v, io->uv_stride, uv_mb_h, &p->scaler_v);
+  return num_lines_out;
+}
+
+static int EmitRescaledAlphaYUV(const VP8Io* const io, WebPDecParams* const p) {
+  if (io->a != NULL) {
+    Rescale(io->a, io->width, io->mb_h, &p->scaler_a);
+  }
+  return 0;
+}
+
+static int InitYUVRescaler(const VP8Io* const io, WebPDecParams* const p) {
+  const int has_alpha = WebPIsAlphaMode(p->output->colorspace);
+  const WebPYUVABuffer* const buf = &p->output->u.YUVA;
+  const int out_width  = io->scaled_width;
+  const int out_height = io->scaled_height;
+  const int uv_out_width  = (out_width + 1) >> 1;
+  const int uv_out_height = (out_height + 1) >> 1;
+  const int uv_in_width  = (io->mb_w + 1) >> 1;
+  const int uv_in_height = (io->mb_h + 1) >> 1;
+  const size_t work_size = 2 * out_width;   // scratch memory for luma rescaler
+  const size_t uv_work_size = 2 * uv_out_width;  // and for each u/v ones
+  size_t tmp_size;
+  int32_t* work;
+
+  tmp_size = work_size + 2 * uv_work_size;
+  if (has_alpha) {
+    tmp_size += work_size;
+  }
+  p->memory = calloc(1, tmp_size * sizeof(*work));
+  if (p->memory == NULL) {
+    return 0;   // memory error
+  }
+  work = (int32_t*)p->memory;
+  WebPRescalerInit(&p->scaler_y, io->mb_w, io->mb_h,
+                   buf->y, out_width, out_height, buf->y_stride, 1,
+                   io->mb_w, out_width, io->mb_h, out_height,
+                   work);
+  WebPRescalerInit(&p->scaler_u, uv_in_width, uv_in_height,
+                   buf->u, uv_out_width, uv_out_height, buf->u_stride, 1,
+                   uv_in_width, uv_out_width,
+                   uv_in_height, uv_out_height,
+                   work + work_size);
+  WebPRescalerInit(&p->scaler_v, uv_in_width, uv_in_height,
+                   buf->v, uv_out_width, uv_out_height, buf->v_stride, 1,
+                   uv_in_width, uv_out_width,
+                   uv_in_height, uv_out_height,
+                   work + work_size + uv_work_size);
+  p->emit = EmitRescaledYUV;
+
+  if (has_alpha) {
+    WebPRescalerInit(&p->scaler_a, io->mb_w, io->mb_h,
+                     buf->a, out_width, out_height, buf->a_stride, 1,
+                     io->mb_w, out_width, io->mb_h, out_height,
+                     work + work_size + 2 * uv_work_size);
+    p->emit_alpha = EmitRescaledAlphaYUV;
+  }
+  return 1;
+}
+
+//------------------------------------------------------------------------------
+// RGBA rescaling
+
+static int ExportRGB(WebPDecParams* const p, int y_pos) {
+  const WebPYUV444Converter convert =
+      WebPYUV444Converters[p->output->colorspace];
+  const WebPRGBABuffer* const buf = &p->output->u.RGBA;
+  uint8_t* dst = buf->rgba + (p->last_y + y_pos) * buf->stride;
+  int num_lines_out = 0;
+  // For RGB rescaling, because of the YUV420, current scan position
+  // U/V can be +1/-1 line from the Y one.  Hence the double test.
+  while (WebPRescalerHasPendingOutput(&p->scaler_y) &&
+         WebPRescalerHasPendingOutput(&p->scaler_u)) {
+    assert(p->last_y + y_pos + num_lines_out < p->output->height);
+    assert(p->scaler_u.y_accum == p->scaler_v.y_accum);
+    WebPRescalerExportRow(&p->scaler_y);
+    WebPRescalerExportRow(&p->scaler_u);
+    WebPRescalerExportRow(&p->scaler_v);
+    convert(p->scaler_y.dst, p->scaler_u.dst, p->scaler_v.dst,
+            dst, p->scaler_y.dst_width);
+    dst += buf->stride;
+    ++num_lines_out;
+  }
+  return num_lines_out;
+}
+
+static int EmitRescaledRGB(const VP8Io* const io, WebPDecParams* const p) {
+  const int mb_h = io->mb_h;
+  const int uv_mb_h = (mb_h + 1) >> 1;
+  int j = 0, uv_j = 0;
+  int num_lines_out = 0;
+  while (j < mb_h) {
+    const int y_lines_in =
+        WebPRescalerImport(&p->scaler_y, mb_h - j,
+                           io->y + j * io->y_stride, io->y_stride);
+    const int u_lines_in =
+        WebPRescalerImport(&p->scaler_u, uv_mb_h - uv_j,
+                           io->u + uv_j * io->uv_stride, io->uv_stride);
+    const int v_lines_in =
+        WebPRescalerImport(&p->scaler_v, uv_mb_h - uv_j,
+                           io->v + uv_j * io->uv_stride, io->uv_stride);
+    (void)v_lines_in;   // remove a gcc warning
+    assert(u_lines_in == v_lines_in);
+    j += y_lines_in;
+    uv_j += u_lines_in;
+    num_lines_out += ExportRGB(p, num_lines_out);
+  }
+  return num_lines_out;
+}
+
+static int ExportAlpha(WebPDecParams* const p, int y_pos) {
+  const WebPRGBABuffer* const buf = &p->output->u.RGBA;
+  uint8_t* const base_rgba = buf->rgba + (p->last_y + y_pos) * buf->stride;
+  const WEBP_CSP_MODE colorspace = p->output->colorspace;
+  const int alpha_first =
+      (colorspace == MODE_ARGB || colorspace == MODE_Argb);
+  uint8_t* dst = base_rgba + (alpha_first ? 0 : 3);
+  int num_lines_out = 0;
+  const int is_premult_alpha = WebPIsPremultipliedMode(colorspace);
+  uint32_t alpha_mask = 0xff;
+  const int width = p->scaler_a.dst_width;
+
+  while (WebPRescalerHasPendingOutput(&p->scaler_a)) {
+    int i;
+    assert(p->last_y + y_pos + num_lines_out < p->output->height);
+    WebPRescalerExportRow(&p->scaler_a);
+    for (i = 0; i < width; ++i) {
+      const uint32_t alpha_value = p->scaler_a.dst[i];
+      dst[4 * i] = alpha_value;
+      alpha_mask &= alpha_value;
+    }
+    dst += buf->stride;
+    ++num_lines_out;
+  }
+  if (is_premult_alpha && alpha_mask != 0xff) {
+    WebPApplyAlphaMultiply(base_rgba, alpha_first,
+                           width, num_lines_out, buf->stride);
+  }
+  return num_lines_out;
+}
+
+static int ExportAlphaRGBA4444(WebPDecParams* const p, int y_pos) {
+  const WebPRGBABuffer* const buf = &p->output->u.RGBA;
+  uint8_t* const base_rgba = buf->rgba + (p->last_y + y_pos) * buf->stride;
+  uint8_t* alpha_dst = base_rgba + 1;
+  int num_lines_out = 0;
+  const WEBP_CSP_MODE colorspace = p->output->colorspace;
+  const int width = p->scaler_a.dst_width;
+  const int is_premult_alpha = WebPIsPremultipliedMode(colorspace);
+  uint32_t alpha_mask = 0x0f;
+
+  while (WebPRescalerHasPendingOutput(&p->scaler_a)) {
+    int i;
+    assert(p->last_y + y_pos + num_lines_out < p->output->height);
+    WebPRescalerExportRow(&p->scaler_a);
+    for (i = 0; i < width; ++i) {
+      // Fill in the alpha value (converted to 4 bits).
+      const uint32_t alpha_value = p->scaler_a.dst[i] >> 4;
+      alpha_dst[2 * i] = (alpha_dst[2 * i] & 0xf0) | alpha_value;
+      alpha_mask &= alpha_value;
+    }
+    alpha_dst += buf->stride;
+    ++num_lines_out;
+  }
+  if (is_premult_alpha && alpha_mask != 0x0f) {
+    WebPApplyAlphaMultiply4444(base_rgba, width, num_lines_out, buf->stride);
+  }
+  return num_lines_out;
+}
+
+static int EmitRescaledAlphaRGB(const VP8Io* const io, WebPDecParams* const p) {
+  if (io->a != NULL) {
+    WebPRescaler* const scaler = &p->scaler_a;
+    int j = 0;
+    int pos = 0;
+    while (j < io->mb_h) {
+      j += WebPRescalerImport(scaler, io->mb_h - j,
+                              io->a + j * io->width, io->width);
+      pos += p->emit_alpha_row(p, pos);
+    }
+  }
+  return 0;
+}
+
+static int InitRGBRescaler(const VP8Io* const io, WebPDecParams* const p) {
+  const int has_alpha = WebPIsAlphaMode(p->output->colorspace);
+  const int out_width  = io->scaled_width;
+  const int out_height = io->scaled_height;
+  const int uv_in_width  = (io->mb_w + 1) >> 1;
+  const int uv_in_height = (io->mb_h + 1) >> 1;
+  const size_t work_size = 2 * out_width;   // scratch memory for one rescaler
+  int32_t* work;  // rescalers work area
+  uint8_t* tmp;   // tmp storage for scaled YUV444 samples before RGB conversion
+  size_t tmp_size1, tmp_size2;
+
+  tmp_size1 = 3 * work_size;
+  tmp_size2 = 3 * out_width;
+  if (has_alpha) {
+    tmp_size1 += work_size;
+    tmp_size2 += out_width;
+  }
+  p->memory = calloc(1, tmp_size1 * sizeof(*work) + tmp_size2 * sizeof(*tmp));
+  if (p->memory == NULL) {
+    return 0;   // memory error
+  }
+  work = (int32_t*)p->memory;
+  tmp = (uint8_t*)(work + tmp_size1);
+  WebPRescalerInit(&p->scaler_y, io->mb_w, io->mb_h,
+                   tmp + 0 * out_width, out_width, out_height, 0, 1,
+                   io->mb_w, out_width, io->mb_h, out_height,
+                   work + 0 * work_size);
+  WebPRescalerInit(&p->scaler_u, uv_in_width, uv_in_height,
+                   tmp + 1 * out_width, out_width, out_height, 0, 1,
+                   io->mb_w, 2 * out_width, io->mb_h, 2 * out_height,
+                   work + 1 * work_size);
+  WebPRescalerInit(&p->scaler_v, uv_in_width, uv_in_height,
+                   tmp + 2 * out_width, out_width, out_height, 0, 1,
+                   io->mb_w, 2 * out_width, io->mb_h, 2 * out_height,
+                   work + 2 * work_size);
+  p->emit = EmitRescaledRGB;
+
+  if (has_alpha) {
+    WebPRescalerInit(&p->scaler_a, io->mb_w, io->mb_h,
+                     tmp + 3 * out_width, out_width, out_height, 0, 1,
+                     io->mb_w, out_width, io->mb_h, out_height,
+                     work + 3 * work_size);
+    p->emit_alpha = EmitRescaledAlphaRGB;
+    if (p->output->colorspace == MODE_RGBA_4444 ||
+        p->output->colorspace == MODE_rgbA_4444) {
+      p->emit_alpha_row = ExportAlphaRGBA4444;
+    } else {
+      p->emit_alpha_row = ExportAlpha;
+    }
+  }
+  return 1;
+}
+
+//------------------------------------------------------------------------------
+// Default custom functions
+
+static int CustomSetup(VP8Io* io) {
+  WebPDecParams* const p = (WebPDecParams*)io->opaque;
+  const WEBP_CSP_MODE colorspace = p->output->colorspace;
+  const int is_rgb = WebPIsRGBMode(colorspace);
+  const int is_alpha = WebPIsAlphaMode(colorspace);
+
+  p->memory = NULL;
+  p->emit = NULL;
+  p->emit_alpha = NULL;
+  p->emit_alpha_row = NULL;
+  if (!WebPIoInitFromOptions(p->options, io, is_alpha ? MODE_YUV : MODE_YUVA)) {
+    return 0;
+  }
+
+  if (io->use_scaling) {
+    const int ok = is_rgb ? InitRGBRescaler(io, p) : InitYUVRescaler(io, p);
+    if (!ok) {
+      return 0;    // memory error
+    }
+  } else {
+    if (is_rgb) {
+      p->emit = EmitSampledRGB;   // default
+#ifdef FANCY_UPSAMPLING
+      if (io->fancy_upsampling) {
+        const int uv_width = (io->mb_w + 1) >> 1;
+        p->memory = malloc(io->mb_w + 2 * uv_width);
+        if (p->memory == NULL) {
+          return 0;   // memory error.
+        }
+        p->tmp_y = (uint8_t*)p->memory;
+        p->tmp_u = p->tmp_y + io->mb_w;
+        p->tmp_v = p->tmp_u + uv_width;
+        p->emit = EmitFancyRGB;
+        WebPInitUpsamplers();
+      }
+#endif
+    } else {
+      p->emit = EmitYUV;
+    }
+    if (is_alpha) {  // need transparency output
+      if (WebPIsPremultipliedMode(colorspace)) WebPInitPremultiply();
+      p->emit_alpha =
+          (colorspace == MODE_RGBA_4444 || colorspace == MODE_rgbA_4444) ?
+              EmitAlphaRGBA4444
+          : is_rgb ? EmitAlphaRGB
+          : EmitAlphaYUV;
+    }
+  }
+
+  if (is_rgb) {
+    VP8YUVInit();
+  }
+  return 1;
+}
+
+//------------------------------------------------------------------------------
+
+static int CustomPut(const VP8Io* io) {
+  WebPDecParams* const p = (WebPDecParams*)io->opaque;
+  const int mb_w = io->mb_w;
+  const int mb_h = io->mb_h;
+  int num_lines_out;
+  assert(!(io->mb_y & 1));
+
+  if (mb_w <= 0 || mb_h <= 0) {
+    return 0;
+  }
+  num_lines_out = p->emit(io, p);
+  if (p->emit_alpha) {
+    p->emit_alpha(io, p);
+  }
+  p->last_y += num_lines_out;
+  return 1;
+}
+
+//------------------------------------------------------------------------------
+
+static void CustomTeardown(const VP8Io* io) {
+  WebPDecParams* const p = (WebPDecParams*)io->opaque;
+  free(p->memory);
+  p->memory = NULL;
+}
+
+//------------------------------------------------------------------------------
+// Main entry point
+
+void WebPInitCustomIo(WebPDecParams* const params, VP8Io* const io) {
+  io->put      = CustomPut;
+  io->setup    = CustomSetup;
+  io->teardown = CustomTeardown;
+  io->opaque   = params;
+}
+
+//------------------------------------------------------------------------------
+
+#if defined(__cplusplus) || defined(c_plusplus)
+}    // extern "C"
+#endif
diff --git a/drivers/webp/dec/layer.c b/drivers/webpold/dec/layer.c
index a3a5bdcfe8..a3a5bdcfe8 100644
--- a/drivers/webp/dec/layer.c
+++ b/drivers/webpold/dec/layer.c
diff --git a/drivers/webpold/dec/quant.c b/drivers/webpold/dec/quant.c
new file mode 100644
index 0000000000..d54097af0d
--- /dev/null
+++ b/drivers/webpold/dec/quant.c
@@ -0,0 +1,113 @@
+// Copyright 2010 Google Inc. All Rights Reserved.
+//
+// This code is licensed under the same terms as WebM:
+//  Software License Agreement:  http://www.webmproject.org/license/software/
+//  Additional IP Rights Grant:  http://www.webmproject.org/license/additional/
+// -----------------------------------------------------------------------------
+//
+// Quantizer initialization
+//
+// Author: Skal (pascal.massimino@gmail.com)
+
+#include "./vp8i.h"
+
+#if defined(__cplusplus) || defined(c_plusplus)
+extern "C" {
+#endif
+
+static WEBP_INLINE int clip(int v, int M) {
+  return v < 0 ? 0 : v > M ? M : v;
+}
+
+// Paragraph 14.1
+static const uint8_t kDcTable[128] = {
+  4,     5,   6,   7,   8,   9,  10,  10,
+  11,   12,  13,  14,  15,  16,  17,  17,
+  18,   19,  20,  20,  21,  21,  22,  22,
+  23,   23,  24,  25,  25,  26,  27,  28,
+  29,   30,  31,  32,  33,  34,  35,  36,
+  37,   37,  38,  39,  40,  41,  42,  43,
+  44,   45,  46,  46,  47,  48,  49,  50,
+  51,   52,  53,  54,  55,  56,  57,  58,
+  59,   60,  61,  62,  63,  64,  65,  66,
+  67,   68,  69,  70,  71,  72,  73,  74,
+  75,   76,  76,  77,  78,  79,  80,  81,
+  82,   83,  84,  85,  86,  87,  88,  89,
+  91,   93,  95,  96,  98, 100, 101, 102,
+  104, 106, 108, 110, 112, 114, 116, 118,
+  122, 124, 126, 128, 130, 132, 134, 136,
+  138, 140, 143, 145, 148, 151, 154, 157
+};
+
+static const uint16_t kAcTable[128] = {
+  4,     5,   6,   7,   8,   9,  10,  11,
+  12,   13,  14,  15,  16,  17,  18,  19,
+  20,   21,  22,  23,  24,  25,  26,  27,
+  28,   29,  30,  31,  32,  33,  34,  35,
+  36,   37,  38,  39,  40,  41,  42,  43,
+  44,   45,  46,  47,  48,  49,  50,  51,
+  52,   53,  54,  55,  56,  57,  58,  60,
+  62,   64,  66,  68,  70,  72,  74,  76,
+  78,   80,  82,  84,  86,  88,  90,  92,
+  94,   96,  98, 100, 102, 104, 106, 108,
+  110, 112, 114, 116, 119, 122, 125, 128,
+  131, 134, 137, 140, 143, 146, 149, 152,
+  155, 158, 161, 164, 167, 170, 173, 177,
+  181, 185, 189, 193, 197, 201, 205, 209,
+  213, 217, 221, 225, 229, 234, 239, 245,
+  249, 254, 259, 264, 269, 274, 279, 284
+};
+
+//------------------------------------------------------------------------------
+// Paragraph 9.6
+
+void VP8ParseQuant(VP8Decoder* const dec) {
+  VP8BitReader* const br = &dec->br_;
+  const int base_q0 = VP8GetValue(br, 7);
+  const int dqy1_dc = VP8Get(br) ? VP8GetSignedValue(br, 4) : 0;
+  const int dqy2_dc = VP8Get(br) ? VP8GetSignedValue(br, 4) : 0;
+  const int dqy2_ac = VP8Get(br) ? VP8GetSignedValue(br, 4) : 0;
+  const int dquv_dc = VP8Get(br) ? VP8GetSignedValue(br, 4) : 0;
+  const int dquv_ac = VP8Get(br) ? VP8GetSignedValue(br, 4) : 0;
+
+  const VP8SegmentHeader* const hdr = &dec->segment_hdr_;
+  int i;
+
+  for (i = 0; i < NUM_MB_SEGMENTS; ++i) {
+    int q;
+    if (hdr->use_segment_) {
+      q = hdr->quantizer_[i];
+      if (!hdr->absolute_delta_) {
+        q += base_q0;
+      }
+    } else {
+      if (i > 0) {
+        dec->dqm_[i] = dec->dqm_[0];
+        continue;
+      } else {
+        q = base_q0;
+      }
+    }
+    {
+      VP8QuantMatrix* const m = &dec->dqm_[i];
+      m->y1_mat_[0] = kDcTable[clip(q + dqy1_dc, 127)];
+      m->y1_mat_[1] = kAcTable[clip(q + 0,       127)];
+
+      m->y2_mat_[0] = kDcTable[clip(q + dqy2_dc, 127)] * 2;
+      // For all x in [0..284], x*155/100 is bitwise equal to (x*101581) >> 16.
+      // The smallest precision for that is '(x*6349) >> 12' but 16 is a good
+      // word size.
+      m->y2_mat_[1] = (kAcTable[clip(q + dqy2_ac, 127)] * 101581) >> 16;
+      if (m->y2_mat_[1] < 8) m->y2_mat_[1] = 8;
+
+      m->uv_mat_[0] = kDcTable[clip(q + dquv_dc, 117)];
+      m->uv_mat_[1] = kAcTable[clip(q + dquv_ac, 127)];
+    }
+  }
+}
+
+//------------------------------------------------------------------------------
+
+#if defined(__cplusplus) || defined(c_plusplus)
+}    // extern "C"
+#endif
diff --git a/drivers/webpold/dec/tree.c b/drivers/webpold/dec/tree.c
new file mode 100644
index 0000000000..82484e4c55
--- /dev/null
+++ b/drivers/webpold/dec/tree.c
@@ -0,0 +1,589 @@
+// Copyright 2010 Google Inc. All Rights Reserved.
+//
+// This code is licensed under the same terms as WebM:
+//  Software License Agreement:  http://www.webmproject.org/license/software/
+//  Additional IP Rights Grant:  http://www.webmproject.org/license/additional/
+// -----------------------------------------------------------------------------
+//
+// Coding trees and probas
+//
+// Author: Skal (pascal.massimino@gmail.com)
+
+#include "vp8i.h"
+
+#define USE_GENERIC_TREE
+
+#if defined(__cplusplus) || defined(c_plusplus)
+extern "C" {
+#endif
+
+#ifdef USE_GENERIC_TREE
+static const int8_t kYModesIntra4[18] = {
+  -B_DC_PRED, 1,
+    -B_TM_PRED, 2,
+      -B_VE_PRED, 3,
+        4, 6,
+          -B_HE_PRED, 5,
+            -B_RD_PRED, -B_VR_PRED,
+        -B_LD_PRED, 7,
+          -B_VL_PRED, 8,
+            -B_HD_PRED, -B_HU_PRED
+};
+#endif
+
+#ifndef ONLY_KEYFRAME_CODE
+
+// inter prediction modes
+enum {
+  LEFT4 = 0, ABOVE4 = 1, ZERO4 = 2, NEW4 = 3,
+  NEARESTMV, NEARMV, ZEROMV, NEWMV, SPLITMV };
+
+static const int8_t kYModesInter[8] = {
+  -DC_PRED, 1,
+    2, 3,
+      -V_PRED, -H_PRED,
+      -TM_PRED, -B_PRED
+};
+
+static const int8_t kMBSplit[6] = {
+  -3, 1,
+    -2, 2,
+      -0, -1
+};
+
+static const int8_t kMVRef[8] = {
+  -ZEROMV, 1,
+    -NEARESTMV, 2,
+      -NEARMV, 3,
+        -NEWMV, -SPLITMV
+};
+
+static const int8_t kMVRef4[6] = {
+  -LEFT4, 1,
+    -ABOVE4, 2,
+      -ZERO4, -NEW4
+};
+#endif
+
+//------------------------------------------------------------------------------
+// Default probabilities
+
+// Inter
+#ifndef ONLY_KEYFRAME_CODE
+static const uint8_t kYModeProbaInter0[4] = { 112, 86, 140, 37 };
+static const uint8_t kUVModeProbaInter0[3] = { 162, 101, 204 };
+static const uint8_t kMVProba0[2][NUM_MV_PROBAS] = {
+  { 162, 128, 225, 146, 172, 147, 214,  39,
+    156, 128, 129, 132,  75, 145, 178, 206,
+    239, 254, 254 },
+  { 164, 128, 204, 170, 119, 235, 140, 230,
+    228, 128, 130, 130,  74, 148, 180, 203,
+    236, 254, 254 }
+};
+#endif
+
+// Paragraph 13.5
+static const uint8_t
+  CoeffsProba0[NUM_TYPES][NUM_BANDS][NUM_CTX][NUM_PROBAS] = {
+  // genereated using vp8_default_coef_probs() in entropy.c:129
+  { { { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 },
+      { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 },
+      { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }
+    },
+    { { 253, 136, 254, 255, 228, 219, 128, 128, 128, 128, 128 },
+      { 189, 129, 242, 255, 227, 213, 255, 219, 128, 128, 128 },
+      { 106, 126, 227, 252, 214, 209, 255, 255, 128, 128, 128 }
+    },
+    { { 1, 98, 248, 255, 236, 226, 255, 255, 128, 128, 128 },
+      { 181, 133, 238, 254, 221, 234, 255, 154, 128, 128, 128 },
+      { 78, 134, 202, 247, 198, 180, 255, 219, 128, 128, 128 },
+    },
+    { { 1, 185, 249, 255, 243, 255, 128, 128, 128, 128, 128 },
+      { 184, 150, 247, 255, 236, 224, 128, 128, 128, 128, 128 },
+      { 77, 110, 216, 255, 236, 230, 128, 128, 128, 128, 128 },
+    },
+    { { 1, 101, 251, 255, 241, 255, 128, 128, 128, 128, 128 },
+      { 170, 139, 241, 252, 236, 209, 255, 255, 128, 128, 128 },
+      { 37, 116, 196, 243, 228, 255, 255, 255, 128, 128, 128 }
+    },
+    { { 1, 204, 254, 255, 245, 255, 128, 128, 128, 128, 128 },
+      { 207, 160, 250, 255, 238, 128, 128, 128, 128, 128, 128 },
+      { 102, 103, 231, 255, 211, 171, 128, 128, 128, 128, 128 }
+    },
+    { { 1, 152, 252, 255, 240, 255, 128, 128, 128, 128, 128 },
+      { 177, 135, 243, 255, 234, 225, 128, 128, 128, 128, 128 },
+      { 80, 129, 211, 255, 194, 224, 128, 128, 128, 128, 128 }
+    },
+    { { 1, 1, 255, 128, 128, 128, 128, 128, 128, 128, 128 },
+      { 246, 1, 255, 128, 128, 128, 128, 128, 128, 128, 128 },
+      { 255, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }
+    }
+  },
+  { { { 198, 35, 237, 223, 193, 187, 162, 160, 145, 155, 62 },
+      { 131, 45, 198, 221, 172, 176, 220, 157, 252, 221, 1 },
+      { 68, 47, 146, 208, 149, 167, 221, 162, 255, 223, 128 }
+    },
+    { { 1, 149, 241, 255, 221, 224, 255, 255, 128, 128, 128 },
+      { 184, 141, 234, 253, 222, 220, 255, 199, 128, 128, 128 },
+      { 81, 99, 181, 242, 176, 190, 249, 202, 255, 255, 128 }
+    },
+    { { 1, 129, 232, 253, 214, 197, 242, 196, 255, 255, 128 },
+      { 99, 121, 210, 250, 201, 198, 255, 202, 128, 128, 128 },
+      { 23, 91, 163, 242, 170, 187, 247, 210, 255, 255, 128 }
+    },
+    { { 1, 200, 246, 255, 234, 255, 128, 128, 128, 128, 128 },
+      { 109, 178, 241, 255, 231, 245, 255, 255, 128, 128, 128 },
+      { 44, 130, 201, 253, 205, 192, 255, 255, 128, 128, 128 }
+    },
+    { { 1, 132, 239, 251, 219, 209, 255, 165, 128, 128, 128 },
+      { 94, 136, 225, 251, 218, 190, 255, 255, 128, 128, 128 },
+      { 22, 100, 174, 245, 186, 161, 255, 199, 128, 128, 128 }
+    },
+    { { 1, 182, 249, 255, 232, 235, 128, 128, 128, 128, 128 },
+      { 124, 143, 241, 255, 227, 234, 128, 128, 128, 128, 128 },
+      { 35, 77, 181, 251, 193, 211, 255, 205, 128, 128, 128 }
+    },
+    { { 1, 157, 247, 255, 236, 231, 255, 255, 128, 128, 128 },
+      { 121, 141, 235, 255, 225, 227, 255, 255, 128, 128, 128 },
+      { 45, 99, 188, 251, 195, 217, 255, 224, 128, 128, 128 }
+    },
+    { { 1, 1, 251, 255, 213, 255, 128, 128, 128, 128, 128 },
+      { 203, 1, 248, 255, 255, 128, 128, 128, 128, 128, 128 },
+      { 137, 1, 177, 255, 224, 255, 128, 128, 128, 128, 128 }
+    }
+  },
+  { { { 253, 9, 248, 251, 207, 208, 255, 192, 128, 128, 128 },
+      { 175, 13, 224, 243, 193, 185, 249, 198, 255, 255, 128 },
+      { 73, 17, 171, 221, 161, 179, 236, 167, 255, 234, 128 }
+    },
+    { { 1, 95, 247, 253, 212, 183, 255, 255, 128, 128, 128 },
+      { 239, 90, 244, 250, 211, 209, 255, 255, 128, 128, 128 },
+      { 155, 77, 195, 248, 188, 195, 255, 255, 128, 128, 128 }
+    },
+    { { 1, 24, 239, 251, 218, 219, 255, 205, 128, 128, 128 },
+      { 201, 51, 219, 255, 196, 186, 128, 128, 128, 128, 128 },
+      { 69, 46, 190, 239, 201, 218, 255, 228, 128, 128, 128 }
+    },
+    { { 1, 191, 251, 255, 255, 128, 128, 128, 128, 128, 128 },
+      { 223, 165, 249, 255, 213, 255, 128, 128, 128, 128, 128 },
+      { 141, 124, 248, 255, 255, 128, 128, 128, 128, 128, 128 }
+    },
+    { { 1, 16, 248, 255, 255, 128, 128, 128, 128, 128, 128 },
+      { 190, 36, 230, 255, 236, 255, 128, 128, 128, 128, 128 },
+      { 149, 1, 255, 128, 128, 128, 128, 128, 128, 128, 128 }
+    },
+    { { 1, 226, 255, 128, 128, 128, 128, 128, 128, 128, 128 },
+      { 247, 192, 255, 128, 128, 128, 128, 128, 128, 128, 128 },
+      { 240, 128, 255, 128, 128, 128, 128, 128, 128, 128, 128 }
+    },
+    { { 1, 134, 252, 255, 255, 128, 128, 128, 128, 128, 128 },
+      { 213, 62, 250, 255, 255, 128, 128, 128, 128, 128, 128 },
+      { 55, 93, 255, 128, 128, 128, 128, 128, 128, 128, 128 }
+    },
+    { { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 },
+      { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 },
+      { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }
+    }
+  },
+  { { { 202, 24, 213, 235, 186, 191, 220, 160, 240, 175, 255 },
+      { 126, 38, 182, 232, 169, 184, 228, 174, 255, 187, 128 },
+      { 61, 46, 138, 219, 151, 178, 240, 170, 255, 216, 128 }
+    },
+    { { 1, 112, 230, 250, 199, 191, 247, 159, 255, 255, 128 },
+      { 166, 109, 228, 252, 211, 215, 255, 174, 128, 128, 128 },
+      { 39, 77, 162, 232, 172, 180, 245, 178, 255, 255, 128 }
+    },
+    { { 1, 52, 220, 246, 198, 199, 249, 220, 255, 255, 128 },
+      { 124, 74, 191, 243, 183, 193, 250, 221, 255, 255, 128 },
+      { 24, 71, 130, 219, 154, 170, 243, 182, 255, 255, 128 }
+    },
+    { { 1, 182, 225, 249, 219, 240, 255, 224, 128, 128, 128 },
+      { 149, 150, 226, 252, 216, 205, 255, 171, 128, 128, 128 },
+      { 28, 108, 170, 242, 183, 194, 254, 223, 255, 255, 128 }
+    },
+    { { 1, 81, 230, 252, 204, 203, 255, 192, 128, 128, 128 },
+      { 123, 102, 209, 247, 188, 196, 255, 233, 128, 128, 128 },
+      { 20, 95, 153, 243, 164, 173, 255, 203, 128, 128, 128 }
+    },
+    { { 1, 222, 248, 255, 216, 213, 128, 128, 128, 128, 128 },
+      { 168, 175, 246, 252, 235, 205, 255, 255, 128, 128, 128 },
+      { 47, 116, 215, 255, 211, 212, 255, 255, 128, 128, 128 }
+    },
+    { { 1, 121, 236, 253, 212, 214, 255, 255, 128, 128, 128 },
+      { 141, 84, 213, 252, 201, 202, 255, 219, 128, 128, 128 },
+      { 42, 80, 160, 240, 162, 185, 255, 205, 128, 128, 128 }
+    },
+    { { 1, 1, 255, 128, 128, 128, 128, 128, 128, 128, 128 },
+      { 244, 1, 255, 128, 128, 128, 128, 128, 128, 128, 128 },
+      { 238, 1, 255, 128, 128, 128, 128, 128, 128, 128, 128 }
+    }
+  }
+};
+
+// Paragraph 11.5
+static const uint8_t kBModesProba[NUM_BMODES][NUM_BMODES][NUM_BMODES - 1] = {
+  { { 231, 120, 48, 89, 115, 113, 120, 152, 112 },
+    { 152, 179, 64, 126, 170, 118, 46, 70, 95 },
+    { 175, 69, 143, 80, 85, 82, 72, 155, 103 },
+    { 56, 58, 10, 171, 218, 189, 17, 13, 152 },
+    { 114, 26, 17, 163, 44, 195, 21, 10, 173 },
+    { 121, 24, 80, 195, 26, 62, 44, 64, 85 },
+    { 144, 71, 10, 38, 171, 213, 144, 34, 26 },
+    { 170, 46, 55, 19, 136, 160, 33, 206, 71 },
+    { 63, 20, 8, 114, 114, 208, 12, 9, 226 },
+    { 81, 40, 11, 96, 182, 84, 29, 16, 36 } },
+  { { 134, 183, 89, 137, 98, 101, 106, 165, 148 },
+    { 72, 187, 100, 130, 157, 111, 32, 75, 80 },
+    { 66, 102, 167, 99, 74, 62, 40, 234, 128 },
+    { 41, 53, 9, 178, 241, 141, 26, 8, 107 },
+    { 74, 43, 26, 146, 73, 166, 49, 23, 157 },
+    { 65, 38, 105, 160, 51, 52, 31, 115, 128 },
+    { 104, 79, 12, 27, 217, 255, 87, 17, 7 },
+    { 87, 68, 71, 44, 114, 51, 15, 186, 23 },
+    { 47, 41, 14, 110, 182, 183, 21, 17, 194 },
+    { 66, 45, 25, 102, 197, 189, 23, 18, 22 } },
+  { { 88, 88, 147, 150, 42, 46, 45, 196, 205 },
+    { 43, 97, 183, 117, 85, 38, 35, 179, 61 },
+    { 39, 53, 200, 87, 26, 21, 43, 232, 171 },
+    { 56, 34, 51, 104, 114, 102, 29, 93, 77 },
+    { 39, 28, 85, 171, 58, 165, 90, 98, 64 },
+    { 34, 22, 116, 206, 23, 34, 43, 166, 73 },
+    { 107, 54, 32, 26, 51, 1, 81, 43, 31 },
+    { 68, 25, 106, 22, 64, 171, 36, 225, 114 },
+    { 34, 19, 21, 102, 132, 188, 16, 76, 124 },
+    { 62, 18, 78, 95, 85, 57, 50, 48, 51 } },
+  { { 193, 101, 35, 159, 215, 111, 89, 46, 111 },
+    { 60, 148, 31, 172, 219, 228, 21, 18, 111 },
+    { 112, 113, 77, 85, 179, 255, 38, 120, 114 },
+    { 40, 42, 1, 196, 245, 209, 10, 25, 109 },
+    { 88, 43, 29, 140, 166, 213, 37, 43, 154 },
+    { 61, 63, 30, 155, 67, 45, 68, 1, 209 },
+    { 100, 80, 8, 43, 154, 1, 51, 26, 71 },
+    { 142, 78, 78, 16, 255, 128, 34, 197, 171 },
+    { 41, 40, 5, 102, 211, 183, 4, 1, 221 },
+    { 51, 50, 17, 168, 209, 192, 23, 25, 82 } },
+  { { 138, 31, 36, 171, 27, 166, 38, 44, 229 },
+    { 67, 87, 58, 169, 82, 115, 26, 59, 179 },
+    { 63, 59, 90, 180, 59, 166, 93, 73, 154 },
+    { 40, 40, 21, 116, 143, 209, 34, 39, 175 },
+    { 47, 15, 16, 183, 34, 223, 49, 45, 183 },
+    { 46, 17, 33, 183, 6, 98, 15, 32, 183 },
+    { 57, 46, 22, 24, 128, 1, 54, 17, 37 },
+    { 65, 32, 73, 115, 28, 128, 23, 128, 205 },
+    { 40, 3, 9, 115, 51, 192, 18, 6, 223 },
+    { 87, 37, 9, 115, 59, 77, 64, 21, 47 } },
+  { { 104, 55, 44, 218, 9, 54, 53, 130, 226 },
+    { 64, 90, 70, 205, 40, 41, 23, 26, 57 },
+    { 54, 57, 112, 184, 5, 41, 38, 166, 213 },
+    { 30, 34, 26, 133, 152, 116, 10, 32, 134 },
+    { 39, 19, 53, 221, 26, 114, 32, 73, 255 },
+    { 31, 9, 65, 234, 2, 15, 1, 118, 73 },
+    { 75, 32, 12, 51, 192, 255, 160, 43, 51 },
+    { 88, 31, 35, 67, 102, 85, 55, 186, 85 },
+    { 56, 21, 23, 111, 59, 205, 45, 37, 192 },
+    { 55, 38, 70, 124, 73, 102, 1, 34, 98 } },
+  { { 125, 98, 42, 88, 104, 85, 117, 175, 82 },
+    { 95, 84, 53, 89, 128, 100, 113, 101, 45 },
+    { 75, 79, 123, 47, 51, 128, 81, 171, 1 },
+    { 57, 17, 5, 71, 102, 57, 53, 41, 49 },
+    { 38, 33, 13, 121, 57, 73, 26, 1, 85 },
+    { 41, 10, 67, 138, 77, 110, 90, 47, 114 },
+    { 115, 21, 2, 10, 102, 255, 166, 23, 6 },
+    { 101, 29, 16, 10, 85, 128, 101, 196, 26 },
+    { 57, 18, 10, 102, 102, 213, 34, 20, 43 },
+    { 117, 20, 15, 36, 163, 128, 68, 1, 26 } },
+  { { 102, 61, 71, 37, 34, 53, 31, 243, 192 },
+    { 69, 60, 71, 38, 73, 119, 28, 222, 37 },
+    { 68, 45, 128, 34, 1, 47, 11, 245, 171 },
+    { 62, 17, 19, 70, 146, 85, 55, 62, 70 },
+    { 37, 43, 37, 154, 100, 163, 85, 160, 1 },
+    { 63, 9, 92, 136, 28, 64, 32, 201, 85 },
+    { 75, 15, 9, 9, 64, 255, 184, 119, 16 },
+    { 86, 6, 28, 5, 64, 255, 25, 248, 1 },
+    { 56, 8, 17, 132, 137, 255, 55, 116, 128 },
+    { 58, 15, 20, 82, 135, 57, 26, 121, 40 } },
+  { { 164, 50, 31, 137, 154, 133, 25, 35, 218 },
+    { 51, 103, 44, 131, 131, 123, 31, 6, 158 },
+    { 86, 40, 64, 135, 148, 224, 45, 183, 128 },
+    { 22, 26, 17, 131, 240, 154, 14, 1, 209 },
+    { 45, 16, 21, 91, 64, 222, 7, 1, 197 },
+    { 56, 21, 39, 155, 60, 138, 23, 102, 213 },
+    { 83, 12, 13, 54, 192, 255, 68, 47, 28 },
+    { 85, 26, 85, 85, 128, 128, 32, 146, 171 },
+    { 18, 11, 7, 63, 144, 171, 4, 4, 246 },
+    { 35, 27, 10, 146, 174, 171, 12, 26, 128 } },
+  { { 190, 80, 35, 99, 180, 80, 126, 54, 45 },
+    { 85, 126, 47, 87, 176, 51, 41, 20, 32 },
+    { 101, 75, 128, 139, 118, 146, 116, 128, 85 },
+    { 56, 41, 15, 176, 236, 85, 37, 9, 62 },
+    { 71, 30, 17, 119, 118, 255, 17, 18, 138 },
+    { 101, 38, 60, 138, 55, 70, 43, 26, 142 },
+    { 146, 36, 19, 30, 171, 255, 97, 27, 20 },
+    { 138, 45, 61, 62, 219, 1, 81, 188, 64 },
+    { 32, 41, 20, 117, 151, 142, 20, 21, 163 },
+    { 112, 19, 12, 61, 195, 128, 48, 4, 24 } }
+};
+
+void VP8ResetProba(VP8Proba* const proba) {
+  memset(proba->segments_, 255u, sizeof(proba->segments_));
+  memcpy(proba->coeffs_, CoeffsProba0, sizeof(CoeffsProba0));
+#ifndef ONLY_KEYFRAME_CODE
+  memcpy(proba->mv_, kMVProba0, sizeof(kMVProba0));
+  memcpy(proba->ymode_, kYModeProbaInter0, sizeof(kYModeProbaInter0));
+  memcpy(proba->uvmode_, kUVModeProbaInter0, sizeof(kUVModeProbaInter0));
+#endif
+}
+
+void VP8ParseIntraMode(VP8BitReader* const br,  VP8Decoder* const dec) {
+  uint8_t* const top = dec->intra_t_ + 4 * dec->mb_x_;
+  uint8_t* const left = dec->intra_l_;
+  // Hardcoded 16x16 intra-mode decision tree.
+  dec->is_i4x4_ = !VP8GetBit(br, 145);   // decide for B_PRED first
+  if (!dec->is_i4x4_) {
+    const int ymode =
+        VP8GetBit(br, 156) ? (VP8GetBit(br, 128) ? TM_PRED : H_PRED)
+                           : (VP8GetBit(br, 163) ? V_PRED : DC_PRED);
+    dec->imodes_[0] = ymode;
+    memset(top, ymode, 4 * sizeof(top[0]));
+    memset(left, ymode, 4 * sizeof(left[0]));
+  } else {
+    uint8_t* modes = dec->imodes_;
+    int y;
+    for (y = 0; y < 4; ++y) {
+      int ymode = left[y];
+      int x;
+      for (x = 0; x < 4; ++x) {
+        const uint8_t* const prob = kBModesProba[top[x]][ymode];
+#ifdef USE_GENERIC_TREE
+        // Generic tree-parsing
+        int i = 0;
+        do {
+          i = kYModesIntra4[2 * i + VP8GetBit(br, prob[i])];
+        } while (i > 0);
+        ymode = -i;
+#else
+        // Hardcoded tree parsing
+        ymode = !VP8GetBit(br, prob[0]) ? B_DC_PRED :
+                  !VP8GetBit(br, prob[1]) ? B_TM_PRED :
+                    !VP8GetBit(br, prob[2]) ? B_VE_PRED :
+                      !VP8GetBit(br, prob[3]) ?
+                        (!VP8GetBit(br, prob[4]) ? B_HE_PRED :
+                          (!VP8GetBit(br, prob[5]) ? B_RD_PRED : B_VR_PRED)) :
+                        (!VP8GetBit(br, prob[6]) ? B_LD_PRED :
+                          (!VP8GetBit(br, prob[7]) ? B_VL_PRED :
+                            (!VP8GetBit(br, prob[8]) ? B_HD_PRED : B_HU_PRED)));
+#endif    // USE_GENERIC_TREE
+        top[x] = ymode;
+        *modes++ = ymode;
+      }
+      left[y] = ymode;
+    }
+  }
+  // Hardcoded UVMode decision tree
+  dec->uvmode_ = !VP8GetBit(br, 142) ? DC_PRED
+               : !VP8GetBit(br, 114) ? V_PRED
+               : VP8GetBit(br, 183) ? TM_PRED : H_PRED;
+}
+
+//------------------------------------------------------------------------------
+// Paragraph 13
+
+static const uint8_t
+    CoeffsUpdateProba[NUM_TYPES][NUM_BANDS][NUM_CTX][NUM_PROBAS] = {
+  { { { 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255 },
+      { 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255 },
+      { 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255 }
+    },
+    { { 176, 246, 255, 255, 255, 255, 255, 255, 255, 255, 255 },
+      { 223, 241, 252, 255, 255, 255, 255, 255, 255, 255, 255 },
+      { 249, 253, 253, 255, 255, 255, 255, 255, 255, 255, 255 }
+    },
+    { { 255, 244, 252, 255, 255, 255, 255, 255, 255, 255, 255 },
+      { 234, 254, 254, 255, 255, 255, 255, 255, 255, 255, 255 },
+      { 253, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255 }
+    },
+    { { 255, 246, 254, 255, 255, 255, 255, 255, 255, 255, 255 },
+      { 239, 253, 254, 255, 255, 255, 255, 255, 255, 255, 255 },
+      { 254, 255, 254, 255, 255, 255, 255, 255, 255, 255, 255 }
+    },
+    { { 255, 248, 254, 255, 255, 255, 255, 255, 255, 255, 255 },
+      { 251, 255, 254, 255, 255, 255, 255, 255, 255, 255, 255 },
+      { 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255 }
+    },
+    { { 255, 253, 254, 255, 255, 255, 255, 255, 255, 255, 255 },
+      { 251, 254, 254, 255, 255, 255, 255, 255, 255, 255, 255 },
+      { 254, 255, 254, 255, 255, 255, 255, 255, 255, 255, 255 }
+    },
+    { { 255, 254, 253, 255, 254, 255, 255, 255, 255, 255, 255 },
+      { 250, 255, 254, 255, 254, 255, 255, 255, 255, 255, 255 },
+      { 254, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255 }
+    },
+    { { 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255 },
+      { 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255 },
+      { 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255 }
+    }
+  },
+  { { { 217, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255 },
+      { 225, 252, 241, 253, 255, 255, 254, 255, 255, 255, 255 },
+      { 234, 250, 241, 250, 253, 255, 253, 254, 255, 255, 255 }
+    },
+    { { 255, 254, 255, 255, 255, 255, 255, 255, 255, 255, 255 },
+      { 223, 254, 254, 255, 255, 255, 255, 255, 255, 255, 255 },
+      { 238, 253, 254, 254, 255, 255, 255, 255, 255, 255, 255 }
+    },
+    { { 255, 248, 254, 255, 255, 255, 255, 255, 255, 255, 255 },
+      { 249, 254, 255, 255, 255, 255, 255, 255, 255, 255, 255 },
+      { 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255 }
+    },
+    { { 255, 253, 255, 255, 255, 255, 255, 255, 255, 255, 255 },
+      { 247, 254, 255, 255, 255, 255, 255, 255, 255, 255, 255 },
+      { 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255 }
+    },
+    { { 255, 253, 254, 255, 255, 255, 255, 255, 255, 255, 255 },
+      { 252, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255 },
+      { 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255 }
+    },
+    { { 255, 254, 254, 255, 255, 255, 255, 255, 255, 255, 255 },
+      { 253, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255 },
+      { 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255 }
+    },
+    { { 255, 254, 253, 255, 255, 255, 255, 255, 255, 255, 255 },
+      { 250, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255 },
+      { 254, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255 }
+    },
+    { { 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255 },
+      { 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255 },
+      { 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255 }
+    }
+  },
+  { { { 186, 251, 250, 255, 255, 255, 255, 255, 255, 255, 255 },
+      { 234, 251, 244, 254, 255, 255, 255, 255, 255, 255, 255 },
+      { 251, 251, 243, 253, 254, 255, 254, 255, 255, 255, 255 }
+    },
+    { { 255, 253, 254, 255, 255, 255, 255, 255, 255, 255, 255 },
+      { 236, 253, 254, 255, 255, 255, 255, 255, 255, 255, 255 },
+      { 251, 253, 253, 254, 254, 255, 255, 255, 255, 255, 255 }
+    },
+    { { 255, 254, 254, 255, 255, 255, 255, 255, 255, 255, 255 },
+      { 254, 254, 254, 255, 255, 255, 255, 255, 255, 255, 255 },
+      { 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255 }
+    },
+    { { 255, 254, 255, 255, 255, 255, 255, 255, 255, 255, 255 },
+      { 254, 254, 255, 255, 255, 255, 255, 255, 255, 255, 255 },
+      { 254, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255 }
+    },
+    { { 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255 },
+      { 254, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255 },
+      { 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255 }
+    },
+    { { 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255 },
+      { 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255 },
+      { 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255 }
+    },
+    { { 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255 },
+      { 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255 },
+      { 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255 }
+    },
+    { { 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255 },
+      { 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255 },
+      { 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255 }
+    }
+  },
+  { { { 248, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255 },
+      { 250, 254, 252, 254, 255, 255, 255, 255, 255, 255, 255 },
+      { 248, 254, 249, 253, 255, 255, 255, 255, 255, 255, 255 }
+    },
+    { { 255, 253, 253, 255, 255, 255, 255, 255, 255, 255, 255 },
+      { 246, 253, 253, 255, 255, 255, 255, 255, 255, 255, 255 },
+      { 252, 254, 251, 254, 254, 255, 255, 255, 255, 255, 255 }
+    },
+    { { 255, 254, 252, 255, 255, 255, 255, 255, 255, 255, 255 },
+      { 248, 254, 253, 255, 255, 255, 255, 255, 255, 255, 255 },
+      { 253, 255, 254, 254, 255, 255, 255, 255, 255, 255, 255 }
+    },
+    { { 255, 251, 254, 255, 255, 255, 255, 255, 255, 255, 255 },
+      { 245, 251, 254, 255, 255, 255, 255, 255, 255, 255, 255 },
+      { 253, 253, 254, 255, 255, 255, 255, 255, 255, 255, 255 }
+    },
+    { { 255, 251, 253, 255, 255, 255, 255, 255, 255, 255, 255 },
+      { 252, 253, 254, 255, 255, 255, 255, 255, 255, 255, 255 },
+      { 255, 254, 255, 255, 255, 255, 255, 255, 255, 255, 255 }
+    },
+    { { 255, 252, 255, 255, 255, 255, 255, 255, 255, 255, 255 },
+      { 249, 255, 254, 255, 255, 255, 255, 255, 255, 255, 255 },
+      { 255, 255, 254, 255, 255, 255, 255, 255, 255, 255, 255 }
+    },
+    { { 255, 255, 253, 255, 255, 255, 255, 255, 255, 255, 255 },
+      { 250, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255 },
+      { 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255 }
+    },
+    { { 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255 },
+      { 254, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255 },
+      { 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255 }
+    }
+  }
+};
+
+#ifndef ONLY_KEYFRAME_CODE
+static const uint8_t MVUpdateProba[2][NUM_MV_PROBAS] = {
+  { 237, 246, 253, 253, 254, 254, 254, 254,
+    254, 254, 254, 254, 254, 254, 250, 250,
+    252, 254, 254 },
+  { 231, 243, 245, 253, 254, 254, 254, 254,
+    254, 254, 254, 254, 254, 254, 251, 251,
+    254, 254, 254 }
+};
+#endif
+
+// Paragraph 9.9
+void VP8ParseProba(VP8BitReader* const br, VP8Decoder* const dec) {
+  VP8Proba* const proba = &dec->proba_;
+  int t, b, c, p;
+  for (t = 0; t < NUM_TYPES; ++t) {
+    for (b = 0; b < NUM_BANDS; ++b) {
+      for (c = 0; c < NUM_CTX; ++c) {
+        for (p = 0; p < NUM_PROBAS; ++p) {
+          if (VP8GetBit(br, CoeffsUpdateProba[t][b][c][p])) {
+            proba->coeffs_[t][b][c][p] = VP8GetValue(br, 8);
+          }
+        }
+      }
+    }
+  }
+  dec->use_skip_proba_ = VP8Get(br);
+  if (dec->use_skip_proba_) {
+    dec->skip_p_ = VP8GetValue(br, 8);
+  }
+#ifndef ONLY_KEYFRAME_CODE
+  if (!dec->frm_hdr_.key_frame_) {
+    int i;
+    dec->intra_p_ = VP8GetValue(br, 8);
+    dec->last_p_ = VP8GetValue(br, 8);
+    dec->golden_p_ = VP8GetValue(br, 8);
+    if (VP8Get(br)) {   // update y-mode
+      for (i = 0; i < 4; ++i) {
+        proba->ymode_[i] = VP8GetValue(br, 8);
+      }
+    }
+    if (VP8Get(br)) {   // update uv-mode
+      for (i = 0; i < 3; ++i) {
+        proba->uvmode_[i] = VP8GetValue(br, 8);
+      }
+    }
+    // update MV
+    for (i = 0; i < 2; ++i) {
+      int k;
+      for (k = 0; k < NUM_MV_PROBAS; ++k) {
+        if (VP8GetBit(br, MVUpdateProba[i][k])) {
+          const int v = VP8GetValue(br, 7);
+          proba->mv_[i][k] = v ? v << 1 : 1;
+        }
+      }
+    }
+  }
+#endif
+}
+
+#if defined(__cplusplus) || defined(c_plusplus)
+}    // extern "C"
+#endif
diff --git a/drivers/webpold/dec/vp8.c b/drivers/webpold/dec/vp8.c
new file mode 100644
index 0000000000..b0ccfa2a06
--- /dev/null
+++ b/drivers/webpold/dec/vp8.c
@@ -0,0 +1,787 @@
+// Copyright 2010 Google Inc. All Rights Reserved.
+//
+// This code is licensed under the same terms as WebM:
+//  Software License Agreement:  http://www.webmproject.org/license/software/
+//  Additional IP Rights Grant:  http://www.webmproject.org/license/additional/
+// -----------------------------------------------------------------------------
+//
+// main entry for the decoder
+//
+// Author: Skal (pascal.massimino@gmail.com)
+
+#include <stdlib.h>
+
+#include "./vp8i.h"
+#include "./vp8li.h"
+#include "./webpi.h"
+#include "../utils/bit_reader.h"
+
+#if defined(__cplusplus) || defined(c_plusplus)
+extern "C" {
+#endif
+
+//------------------------------------------------------------------------------
+
+int WebPGetDecoderVersion(void) {
+  return (DEC_MAJ_VERSION << 16) | (DEC_MIN_VERSION << 8) | DEC_REV_VERSION;
+}
+
+//------------------------------------------------------------------------------
+// VP8Decoder
+
+static void SetOk(VP8Decoder* const dec) {
+  dec->status_ = VP8_STATUS_OK;
+  dec->error_msg_ = "OK";
+}
+
+int VP8InitIoInternal(VP8Io* const io, int version) {
+  if (WEBP_ABI_IS_INCOMPATIBLE(version, WEBP_DECODER_ABI_VERSION)) {
+    return 0;  // mismatch error
+  }
+  if (io != NULL) {
+    memset(io, 0, sizeof(*io));
+  }
+  return 1;
+}
+
+VP8Decoder* VP8New(void) {
+  VP8Decoder* const dec = (VP8Decoder*)calloc(1, sizeof(*dec));
+  if (dec != NULL) {
+    SetOk(dec);
+    WebPWorkerInit(&dec->worker_);
+    dec->ready_ = 0;
+    dec->num_parts_ = 1;
+  }
+  return dec;
+}
+
+VP8StatusCode VP8Status(VP8Decoder* const dec) {
+  if (!dec) return VP8_STATUS_INVALID_PARAM;
+  return dec->status_;
+}
+
+const char* VP8StatusMessage(VP8Decoder* const dec) {
+  if (dec == NULL) return "no object";
+  if (!dec->error_msg_) return "OK";
+  return dec->error_msg_;
+}
+
+void VP8Delete(VP8Decoder* const dec) {
+  if (dec != NULL) {
+    VP8Clear(dec);
+    free(dec);
+  }
+}
+
+int VP8SetError(VP8Decoder* const dec,
+                VP8StatusCode error, const char* const msg) {
+  // TODO This check would be unnecessary if alpha decompression was separated
+  // from VP8ProcessRow/FinishRow. This avoids setting 'dec->status_' to
+  // something other than VP8_STATUS_BITSTREAM_ERROR on alpha decompression
+  // failure.
+  if (dec->status_ == VP8_STATUS_OK) {
+    dec->status_ = error;
+    dec->error_msg_ = msg;
+    dec->ready_ = 0;
+  }
+  return 0;
+}
+
+//------------------------------------------------------------------------------
+
+int VP8CheckSignature(const uint8_t* const data, size_t data_size) {
+  return (data_size >= 3 &&
+          data[0] == 0x9d && data[1] == 0x01 && data[2] == 0x2a);
+}
+
+int VP8GetInfo(const uint8_t* data, size_t data_size, size_t chunk_size,
+               int* const width, int* const height) {
+  if (data == NULL || data_size < VP8_FRAME_HEADER_SIZE) {
+    return 0;         // not enough data
+  }
+  // check signature
+  if (!VP8CheckSignature(data + 3, data_size - 3)) {
+    return 0;         // Wrong signature.
+  } else {
+    const uint32_t bits = data[0] | (data[1] << 8) | (data[2] << 16);
+    const int key_frame = !(bits & 1);
+    const int w = ((data[7] << 8) | data[6]) & 0x3fff;
+    const int h = ((data[9] << 8) | data[8]) & 0x3fff;
+
+    if (!key_frame) {   // Not a keyframe.
+      return 0;
+    }
+
+    if (((bits >> 1) & 7) > 3) {
+      return 0;         // unknown profile
+    }
+    if (!((bits >> 4) & 1)) {
+      return 0;         // first frame is invisible!
+    }
+    if (((bits >> 5)) >= chunk_size) {  // partition_length
+      return 0;         // inconsistent size information.
+    }
+
+    if (width) {
+      *width = w;
+    }
+    if (height) {
+      *height = h;
+    }
+
+    return 1;
+  }
+}
+
+//------------------------------------------------------------------------------
+// Header parsing
+
+static void ResetSegmentHeader(VP8SegmentHeader* const hdr) {
+  assert(hdr != NULL);
+  hdr->use_segment_ = 0;
+  hdr->update_map_ = 0;
+  hdr->absolute_delta_ = 1;
+  memset(hdr->quantizer_, 0, sizeof(hdr->quantizer_));
+  memset(hdr->filter_strength_, 0, sizeof(hdr->filter_strength_));
+}
+
+// Paragraph 9.3
+static int ParseSegmentHeader(VP8BitReader* br,
+                              VP8SegmentHeader* hdr, VP8Proba* proba) {
+  assert(br != NULL);
+  assert(hdr != NULL);
+  hdr->use_segment_ = VP8Get(br);
+  if (hdr->use_segment_) {
+    hdr->update_map_ = VP8Get(br);
+    if (VP8Get(br)) {   // update data
+      int s;
+      hdr->absolute_delta_ = VP8Get(br);
+      for (s = 0; s < NUM_MB_SEGMENTS; ++s) {
+        hdr->quantizer_[s] = VP8Get(br) ? VP8GetSignedValue(br, 7) : 0;
+      }
+      for (s = 0; s < NUM_MB_SEGMENTS; ++s) {
+        hdr->filter_strength_[s] = VP8Get(br) ? VP8GetSignedValue(br, 6) : 0;
+      }
+    }
+    if (hdr->update_map_) {
+      int s;
+      for (s = 0; s < MB_FEATURE_TREE_PROBS; ++s) {
+        proba->segments_[s] = VP8Get(br) ? VP8GetValue(br, 8) : 255u;
+      }
+    }
+  } else {
+    hdr->update_map_ = 0;
+  }
+  return !br->eof_;
+}
+
+// Paragraph 9.5
+// This function returns VP8_STATUS_SUSPENDED if we don't have all the
+// necessary data in 'buf'.
+// This case is not necessarily an error (for incremental decoding).
+// Still, no bitreader is ever initialized to make it possible to read
+// unavailable memory.
+// If we don't even have the partitions' sizes, than VP8_STATUS_NOT_ENOUGH_DATA
+// is returned, and this is an unrecoverable error.
+// If the partitions were positioned ok, VP8_STATUS_OK is returned.
+static VP8StatusCode ParsePartitions(VP8Decoder* const dec,
+                                     const uint8_t* buf, size_t size) {
+  VP8BitReader* const br = &dec->br_;
+  const uint8_t* sz = buf;
+  const uint8_t* buf_end = buf + size;
+  const uint8_t* part_start;
+  int last_part;
+  int p;
+
+  dec->num_parts_ = 1 << VP8GetValue(br, 2);
+  last_part = dec->num_parts_ - 1;
+  part_start = buf + last_part * 3;
+  if (buf_end < part_start) {
+    // we can't even read the sizes with sz[]! That's a failure.
+    return VP8_STATUS_NOT_ENOUGH_DATA;
+  }
+  for (p = 0; p < last_part; ++p) {
+    const uint32_t psize = sz[0] | (sz[1] << 8) | (sz[2] << 16);
+    const uint8_t* part_end = part_start + psize;
+    if (part_end > buf_end) part_end = buf_end;
+    VP8InitBitReader(dec->parts_ + p, part_start, part_end);
+    part_start = part_end;
+    sz += 3;
+  }
+  VP8InitBitReader(dec->parts_ + last_part, part_start, buf_end);
+  return (part_start < buf_end) ? VP8_STATUS_OK :
+           VP8_STATUS_SUSPENDED;   // Init is ok, but there's not enough data
+}
+
+// Paragraph 9.4
+static int ParseFilterHeader(VP8BitReader* br, VP8Decoder* const dec) {
+  VP8FilterHeader* const hdr = &dec->filter_hdr_;
+  hdr->simple_    = VP8Get(br);
+  hdr->level_     = VP8GetValue(br, 6);
+  hdr->sharpness_ = VP8GetValue(br, 3);
+  hdr->use_lf_delta_ = VP8Get(br);
+  if (hdr->use_lf_delta_) {
+    if (VP8Get(br)) {   // update lf-delta?
+      int i;
+      for (i = 0; i < NUM_REF_LF_DELTAS; ++i) {
+        if (VP8Get(br)) {
+          hdr->ref_lf_delta_[i] = VP8GetSignedValue(br, 6);
+        }
+      }
+      for (i = 0; i < NUM_MODE_LF_DELTAS; ++i) {
+        if (VP8Get(br)) {
+          hdr->mode_lf_delta_[i] = VP8GetSignedValue(br, 6);
+        }
+      }
+    }
+  }
+  dec->filter_type_ = (hdr->level_ == 0) ? 0 : hdr->simple_ ? 1 : 2;
+  if (dec->filter_type_ > 0) {    // precompute filter levels per segment
+    if (dec->segment_hdr_.use_segment_) {
+      int s;
+      for (s = 0; s < NUM_MB_SEGMENTS; ++s) {
+        int strength = dec->segment_hdr_.filter_strength_[s];
+        if (!dec->segment_hdr_.absolute_delta_) {
+          strength += hdr->level_;
+        }
+        dec->filter_levels_[s] = strength;
+      }
+    } else {
+      dec->filter_levels_[0] = hdr->level_;
+    }
+  }
+  return !br->eof_;
+}
+
+// Topmost call
+int VP8GetHeaders(VP8Decoder* const dec, VP8Io* const io) {
+  const uint8_t* buf;
+  size_t buf_size;
+  VP8FrameHeader* frm_hdr;
+  VP8PictureHeader* pic_hdr;
+  VP8BitReader* br;
+  VP8StatusCode status;
+  WebPHeaderStructure headers;
+
+  if (dec == NULL) {
+    return 0;
+  }
+  SetOk(dec);
+  if (io == NULL) {
+    return VP8SetError(dec, VP8_STATUS_INVALID_PARAM,
+                       "null VP8Io passed to VP8GetHeaders()");
+  }
+
+  // Process Pre-VP8 chunks.
+  headers.data = io->data;
+  headers.data_size = io->data_size;
+  status = WebPParseHeaders(&headers);
+  if (status != VP8_STATUS_OK) {
+    return VP8SetError(dec, status, "Incorrect/incomplete header.");
+  }
+  if (headers.is_lossless) {
+    return VP8SetError(dec, VP8_STATUS_BITSTREAM_ERROR,
+                       "Unexpected lossless format encountered.");
+  }
+
+  if (dec->alpha_data_ == NULL) {
+    assert(dec->alpha_data_size_ == 0);
+    // We have NOT set alpha data yet. Set it now.
+    // (This is to ensure that dec->alpha_data_ is NOT reset to NULL if
+    // WebPParseHeaders() is called more than once, as in incremental decoding
+    // case.)
+    dec->alpha_data_ = headers.alpha_data;
+    dec->alpha_data_size_ = headers.alpha_data_size;
+  }
+
+  // Process the VP8 frame header.
+  buf = headers.data + headers.offset;
+  buf_size = headers.data_size - headers.offset;
+  assert(headers.data_size >= headers.offset);  // WebPParseHeaders' guarantee
+  if (buf_size < 4) {
+    return VP8SetError(dec, VP8_STATUS_NOT_ENOUGH_DATA,
+                       "Truncated header.");
+  }
+
+  // Paragraph 9.1
+  {
+    const uint32_t bits = buf[0] | (buf[1] << 8) | (buf[2] << 16);
+    frm_hdr = &dec->frm_hdr_;
+    frm_hdr->key_frame_ = !(bits & 1);
+    frm_hdr->profile_ = (bits >> 1) & 7;
+    frm_hdr->show_ = (bits >> 4) & 1;
+    frm_hdr->partition_length_ = (bits >> 5);
+    if (frm_hdr->profile_ > 3)
+      return VP8SetError(dec, VP8_STATUS_BITSTREAM_ERROR,
+                         "Incorrect keyframe parameters.");
+    if (!frm_hdr->show_)
+      return VP8SetError(dec, VP8_STATUS_UNSUPPORTED_FEATURE,
+                         "Frame not displayable.");
+    buf += 3;
+    buf_size -= 3;
+  }
+
+  pic_hdr = &dec->pic_hdr_;
+  if (frm_hdr->key_frame_) {
+    // Paragraph 9.2
+    if (buf_size < 7) {
+      return VP8SetError(dec, VP8_STATUS_NOT_ENOUGH_DATA,
+                         "cannot parse picture header");
+    }
+    if (!VP8CheckSignature(buf, buf_size)) {
+      return VP8SetError(dec, VP8_STATUS_BITSTREAM_ERROR,
+                         "Bad code word");
+    }
+    pic_hdr->width_ = ((buf[4] << 8) | buf[3]) & 0x3fff;
+    pic_hdr->xscale_ = buf[4] >> 6;   // ratio: 1, 5/4 5/3 or 2
+    pic_hdr->height_ = ((buf[6] << 8) | buf[5]) & 0x3fff;
+    pic_hdr->yscale_ = buf[6] >> 6;
+    buf += 7;
+    buf_size -= 7;
+
+    dec->mb_w_ = (pic_hdr->width_ + 15) >> 4;
+    dec->mb_h_ = (pic_hdr->height_ + 15) >> 4;
+    // Setup default output area (can be later modified during io->setup())
+    io->width = pic_hdr->width_;
+    io->height = pic_hdr->height_;
+    io->use_scaling  = 0;
+    io->use_cropping = 0;
+    io->crop_top  = 0;
+    io->crop_left = 0;
+    io->crop_right  = io->width;
+    io->crop_bottom = io->height;
+    io->mb_w = io->width;   // sanity check
+    io->mb_h = io->height;  // ditto
+
+    VP8ResetProba(&dec->proba_);
+    ResetSegmentHeader(&dec->segment_hdr_);
+    dec->segment_ = 0;    // default for intra
+  }
+
+  // Check if we have all the partition #0 available, and initialize dec->br_
+  // to read this partition (and this partition only).
+  if (frm_hdr->partition_length_ > buf_size) {
+    return VP8SetError(dec, VP8_STATUS_NOT_ENOUGH_DATA,
+                       "bad partition length");
+  }
+
+  br = &dec->br_;
+  VP8InitBitReader(br, buf, buf + frm_hdr->partition_length_);
+  buf += frm_hdr->partition_length_;
+  buf_size -= frm_hdr->partition_length_;
+
+  if (frm_hdr->key_frame_) {
+    pic_hdr->colorspace_ = VP8Get(br);
+    pic_hdr->clamp_type_ = VP8Get(br);
+  }
+  if (!ParseSegmentHeader(br, &dec->segment_hdr_, &dec->proba_)) {
+    return VP8SetError(dec, VP8_STATUS_BITSTREAM_ERROR,
+                       "cannot parse segment header");
+  }
+  // Filter specs
+  if (!ParseFilterHeader(br, dec)) {
+    return VP8SetError(dec, VP8_STATUS_BITSTREAM_ERROR,
+                       "cannot parse filter header");
+  }
+  status = ParsePartitions(dec, buf, buf_size);
+  if (status != VP8_STATUS_OK) {
+    return VP8SetError(dec, status, "cannot parse partitions");
+  }
+
+  // quantizer change
+  VP8ParseQuant(dec);
+
+  // Frame buffer marking
+  if (!frm_hdr->key_frame_) {
+    // Paragraph 9.7
+#ifndef ONLY_KEYFRAME_CODE
+    dec->buffer_flags_ = VP8Get(br) << 0;   // update golden
+    dec->buffer_flags_ |= VP8Get(br) << 1;  // update alt ref
+    if (!(dec->buffer_flags_ & 1)) {
+      dec->buffer_flags_ |= VP8GetValue(br, 2) << 2;
+    }
+    if (!(dec->buffer_flags_ & 2)) {
+      dec->buffer_flags_ |= VP8GetValue(br, 2) << 4;
+    }
+    dec->buffer_flags_ |= VP8Get(br) << 6;    // sign bias golden
+    dec->buffer_flags_ |= VP8Get(br) << 7;    // sign bias alt ref
+#else
+    return VP8SetError(dec, VP8_STATUS_UNSUPPORTED_FEATURE,
+                       "Not a key frame.");
+#endif
+  } else {
+    dec->buffer_flags_ = 0x003 | 0x100;
+  }
+
+  // Paragraph 9.8
+#ifndef ONLY_KEYFRAME_CODE
+  dec->update_proba_ = VP8Get(br);
+  if (!dec->update_proba_) {    // save for later restore
+    dec->proba_saved_ = dec->proba_;
+  }
+  dec->buffer_flags_ &= 1 << 8;
+  dec->buffer_flags_ |=
+      (frm_hdr->key_frame_ || VP8Get(br)) << 8;    // refresh last frame
+#else
+  VP8Get(br);   // just ignore the value of update_proba_
+#endif
+
+  VP8ParseProba(br, dec);
+
+#ifdef WEBP_EXPERIMENTAL_FEATURES
+  // Extensions
+  if (dec->pic_hdr_.colorspace_) {
+    const size_t kTrailerSize = 8;
+    const uint8_t kTrailerMarker = 0x01;
+    const uint8_t* ext_buf = buf - kTrailerSize;
+    size_t size;
+
+    if (frm_hdr->partition_length_ < kTrailerSize ||
+        ext_buf[kTrailerSize - 1] != kTrailerMarker) {
+      return VP8SetError(dec, VP8_STATUS_BITSTREAM_ERROR,
+                         "RIFF: Inconsistent extra information.");
+    }
+
+    // Layer
+    size = (ext_buf[0] << 0) | (ext_buf[1] << 8) | (ext_buf[2] << 16);
+    dec->layer_data_size_ = size;
+    dec->layer_data_ = NULL;  // will be set later
+    dec->layer_colorspace_ = ext_buf[3];
+  }
+#endif
+
+  // sanitized state
+  dec->ready_ = 1;
+  return 1;
+}
+
+//------------------------------------------------------------------------------
+// Residual decoding (Paragraph 13.2 / 13.3)
+
+static const uint8_t kBands[16 + 1] = {
+  0, 1, 2, 3, 6, 4, 5, 6, 6, 6, 6, 6, 6, 6, 6, 7,
+  0  // extra entry as sentinel
+};
+
+static const uint8_t kCat3[] = { 173, 148, 140, 0 };
+static const uint8_t kCat4[] = { 176, 155, 140, 135, 0 };
+static const uint8_t kCat5[] = { 180, 157, 141, 134, 130, 0 };
+static const uint8_t kCat6[] =
+  { 254, 254, 243, 230, 196, 177, 153, 140, 133, 130, 129, 0 };
+static const uint8_t* const kCat3456[] = { kCat3, kCat4, kCat5, kCat6 };
+static const uint8_t kZigzag[16] = {
+  0, 1, 4, 8,  5, 2, 3, 6,  9, 12, 13, 10,  7, 11, 14, 15
+};
+
+typedef const uint8_t (*ProbaArray)[NUM_CTX][NUM_PROBAS];  // for const-casting
+
+// Returns the position of the last non-zero coeff plus one
+// (and 0 if there's no coeff at all)
+static int GetCoeffs(VP8BitReader* const br, ProbaArray prob,
+                     int ctx, const quant_t dq, int n, int16_t* out) {
+  // n is either 0 or 1 here. kBands[n] is not necessary for extracting '*p'.
+  const uint8_t* p = prob[n][ctx];
+  if (!VP8GetBit(br, p[0])) {   // first EOB is more a 'CBP' bit.
+    return 0;
+  }
+  while (1) {
+    ++n;
+    if (!VP8GetBit(br, p[1])) {
+      p = prob[kBands[n]][0];
+    } else {  // non zero coeff
+      int v, j;
+      if (!VP8GetBit(br, p[2])) {
+        p = prob[kBands[n]][1];
+        v = 1;
+      } else {
+        if (!VP8GetBit(br, p[3])) {
+          if (!VP8GetBit(br, p[4])) {
+            v = 2;
+          } else {
+            v = 3 + VP8GetBit(br, p[5]);
+          }
+        } else {
+          if (!VP8GetBit(br, p[6])) {
+            if (!VP8GetBit(br, p[7])) {
+              v = 5 + VP8GetBit(br, 159);
+            } else {
+              v = 7 + 2 * VP8GetBit(br, 165);
+              v += VP8GetBit(br, 145);
+            }
+          } else {
+            const uint8_t* tab;
+            const int bit1 = VP8GetBit(br, p[8]);
+            const int bit0 = VP8GetBit(br, p[9 + bit1]);
+            const int cat = 2 * bit1 + bit0;
+            v = 0;
+            for (tab = kCat3456[cat]; *tab; ++tab) {
+              v += v + VP8GetBit(br, *tab);
+            }
+            v += 3 + (8 << cat);
+          }
+        }
+        p = prob[kBands[n]][2];
+      }
+      j = kZigzag[n - 1];
+      out[j] = VP8GetSigned(br, v) * dq[j > 0];
+      if (n == 16 || !VP8GetBit(br, p[0])) {   // EOB
+        return n;
+      }
+    }
+    if (n == 16) {
+      return 16;
+    }
+  }
+}
+
+// Alias-safe way of converting 4bytes to 32bits.
+typedef union {
+  uint8_t  i8[4];
+  uint32_t i32;
+} PackedNz;
+
+// Table to unpack four bits into four bytes
+static const PackedNz kUnpackTab[16] = {
+  {{0, 0, 0, 0}},  {{1, 0, 0, 0}},  {{0, 1, 0, 0}},  {{1, 1, 0, 0}},
+  {{0, 0, 1, 0}},  {{1, 0, 1, 0}},  {{0, 1, 1, 0}},  {{1, 1, 1, 0}},
+  {{0, 0, 0, 1}},  {{1, 0, 0, 1}},  {{0, 1, 0, 1}},  {{1, 1, 0, 1}},
+  {{0, 0, 1, 1}},  {{1, 0, 1, 1}},  {{0, 1, 1, 1}},  {{1, 1, 1, 1}} };
+
+// Macro to pack four LSB of four bytes into four bits.
+#if defined(__PPC__) || defined(_M_PPC) || defined(_ARCH_PPC) || \
+    defined(__BIG_ENDIAN__)
+#define PACK_CST 0x08040201U
+#else
+#define PACK_CST 0x01020408U
+#endif
+#define PACK(X, S) ((((X).i32 * PACK_CST) & 0xff000000) >> (S))
+
+static void ParseResiduals(VP8Decoder* const dec,
+                           VP8MB* const mb, VP8BitReader* const token_br) {
+  int out_t_nz, out_l_nz, first;
+  ProbaArray ac_prob;
+  const VP8QuantMatrix* q = &dec->dqm_[dec->segment_];
+  int16_t* dst = dec->coeffs_;
+  VP8MB* const left_mb = dec->mb_info_ - 1;
+  PackedNz nz_ac, nz_dc;
+  PackedNz tnz, lnz;
+  uint32_t non_zero_ac = 0;
+  uint32_t non_zero_dc = 0;
+  int x, y, ch;
+
+  nz_dc.i32 = nz_ac.i32 = 0;
+  memset(dst, 0, 384 * sizeof(*dst));
+  if (!dec->is_i4x4_) {    // parse DC
+    int16_t dc[16] = { 0 };
+    const int ctx = mb->dc_nz_ + left_mb->dc_nz_;
+    mb->dc_nz_ = left_mb->dc_nz_ =
+        (GetCoeffs(token_br, (ProbaArray)dec->proba_.coeffs_[1],
+                   ctx, q->y2_mat_, 0, dc) > 0);
+    first = 1;
+    ac_prob = (ProbaArray)dec->proba_.coeffs_[0];
+    VP8TransformWHT(dc, dst);
+  } else {
+    first = 0;
+    ac_prob = (ProbaArray)dec->proba_.coeffs_[3];
+  }
+
+  tnz = kUnpackTab[mb->nz_ & 0xf];
+  lnz = kUnpackTab[left_mb->nz_ & 0xf];
+  for (y = 0; y < 4; ++y) {
+    int l = lnz.i8[y];
+    for (x = 0; x < 4; ++x) {
+      const int ctx = l + tnz.i8[x];
+      const int nz = GetCoeffs(token_br, ac_prob, ctx,
+                               q->y1_mat_, first, dst);
+      tnz.i8[x] = l = (nz > 0);
+      nz_dc.i8[x] = (dst[0] != 0);
+      nz_ac.i8[x] = (nz > 1);
+      dst += 16;
+    }
+    lnz.i8[y] = l;
+    non_zero_dc |= PACK(nz_dc, 24 - y * 4);
+    non_zero_ac |= PACK(nz_ac, 24 - y * 4);
+  }
+  out_t_nz = PACK(tnz, 24);
+  out_l_nz = PACK(lnz, 24);
+
+  tnz = kUnpackTab[mb->nz_ >> 4];
+  lnz = kUnpackTab[left_mb->nz_ >> 4];
+  for (ch = 0; ch < 4; ch += 2) {
+    for (y = 0; y < 2; ++y) {
+      int l = lnz.i8[ch + y];
+      for (x = 0; x < 2; ++x) {
+        const int ctx = l + tnz.i8[ch + x];
+        const int nz =
+            GetCoeffs(token_br, (ProbaArray)dec->proba_.coeffs_[2],
+                      ctx, q->uv_mat_, 0, dst);
+        tnz.i8[ch + x] = l = (nz > 0);
+        nz_dc.i8[y * 2 + x] = (dst[0] != 0);
+        nz_ac.i8[y * 2 + x] = (nz > 1);
+        dst += 16;
+      }
+      lnz.i8[ch + y] = l;
+    }
+    non_zero_dc |= PACK(nz_dc, 8 - ch * 2);
+    non_zero_ac |= PACK(nz_ac, 8 - ch * 2);
+  }
+  out_t_nz |= PACK(tnz, 20);
+  out_l_nz |= PACK(lnz, 20);
+  mb->nz_ = out_t_nz;
+  left_mb->nz_ = out_l_nz;
+
+  dec->non_zero_ac_ = non_zero_ac;
+  dec->non_zero_ = non_zero_ac | non_zero_dc;
+  mb->skip_ = !dec->non_zero_;
+}
+#undef PACK
+
+//------------------------------------------------------------------------------
+// Main loop
+
+int VP8DecodeMB(VP8Decoder* const dec, VP8BitReader* const token_br) {
+  VP8BitReader* const br = &dec->br_;
+  VP8MB* const left = dec->mb_info_ - 1;
+  VP8MB* const info = dec->mb_info_ + dec->mb_x_;
+
+  // Note: we don't save segment map (yet), as we don't expect
+  // to decode more than 1 keyframe.
+  if (dec->segment_hdr_.update_map_) {
+    // Hardcoded tree parsing
+    dec->segment_ = !VP8GetBit(br, dec->proba_.segments_[0]) ?
+        VP8GetBit(br, dec->proba_.segments_[1]) :
+        2 + VP8GetBit(br, dec->proba_.segments_[2]);
+  }
+  info->skip_ = dec->use_skip_proba_ ? VP8GetBit(br, dec->skip_p_) : 0;
+
+  VP8ParseIntraMode(br, dec);
+  if (br->eof_) {
+    return 0;
+  }
+
+  if (!info->skip_) {
+    ParseResiduals(dec, info, token_br);
+  } else {
+    left->nz_ = info->nz_ = 0;
+    if (!dec->is_i4x4_) {
+      left->dc_nz_ = info->dc_nz_ = 0;
+    }
+    dec->non_zero_ = 0;
+    dec->non_zero_ac_ = 0;
+  }
+
+  return (!token_br->eof_);
+}
+
+void VP8InitScanline(VP8Decoder* const dec) {
+  VP8MB* const left = dec->mb_info_ - 1;
+  left->nz_ = 0;
+  left->dc_nz_ = 0;
+  memset(dec->intra_l_, B_DC_PRED, sizeof(dec->intra_l_));
+  dec->filter_row_ =
+    (dec->filter_type_ > 0) &&
+    (dec->mb_y_ >= dec->tl_mb_y_) && (dec->mb_y_ <= dec->br_mb_y_);
+}
+
+static int ParseFrame(VP8Decoder* const dec, VP8Io* io) {
+  for (dec->mb_y_ = 0; dec->mb_y_ < dec->br_mb_y_; ++dec->mb_y_) {
+    VP8BitReader* const token_br =
+        &dec->parts_[dec->mb_y_ & (dec->num_parts_ - 1)];
+    VP8InitScanline(dec);
+    for (dec->mb_x_ = 0; dec->mb_x_ < dec->mb_w_;  dec->mb_x_++) {
+      if (!VP8DecodeMB(dec, token_br)) {
+        return VP8SetError(dec, VP8_STATUS_NOT_ENOUGH_DATA,
+                           "Premature end-of-file encountered.");
+      }
+      VP8ReconstructBlock(dec);
+
+      // Store data and save block's filtering params
+      VP8StoreBlock(dec);
+    }
+    if (!VP8ProcessRow(dec, io)) {
+      return VP8SetError(dec, VP8_STATUS_USER_ABORT, "Output aborted.");
+    }
+  }
+  if (dec->use_threads_ && !WebPWorkerSync(&dec->worker_)) {
+    return 0;
+  }
+
+  // Finish
+#ifndef ONLY_KEYFRAME_CODE
+  if (!dec->update_proba_) {
+    dec->proba_ = dec->proba_saved_;
+  }
+#endif
+
+#ifdef WEBP_EXPERIMENTAL_FEATURES
+  if (dec->layer_data_size_ > 0) {
+    if (!VP8DecodeLayer(dec)) {
+      return 0;
+    }
+  }
+#endif
+
+  return 1;
+}
+
+// Main entry point
+int VP8Decode(VP8Decoder* const dec, VP8Io* const io) {
+  int ok = 0;
+  if (dec == NULL) {
+    return 0;
+  }
+  if (io == NULL) {
+    return VP8SetError(dec, VP8_STATUS_INVALID_PARAM,
+                       "NULL VP8Io parameter in VP8Decode().");
+  }
+
+  if (!dec->ready_) {
+    if (!VP8GetHeaders(dec, io)) {
+      return 0;
+    }
+  }
+  assert(dec->ready_);
+
+  // Finish setting up the decoding parameter. Will call io->setup().
+  ok = (VP8EnterCritical(dec, io) == VP8_STATUS_OK);
+  if (ok) {   // good to go.
+    // Will allocate memory and prepare everything.
+    if (ok) ok = VP8InitFrame(dec, io);
+
+    // Main decoding loop
+    if (ok) ok = ParseFrame(dec, io);
+
+    // Exit.
+    ok &= VP8ExitCritical(dec, io);
+  }
+
+  if (!ok) {
+    VP8Clear(dec);
+    return 0;
+  }
+
+  dec->ready_ = 0;
+  return ok;
+}
+
+void VP8Clear(VP8Decoder* const dec) {
+  if (dec == NULL) {
+    return;
+  }
+  if (dec->use_threads_) {
+    WebPWorkerEnd(&dec->worker_);
+  }
+  if (dec->mem_) {
+    free(dec->mem_);
+  }
+  dec->mem_ = NULL;
+  dec->mem_size_ = 0;
+  memset(&dec->br_, 0, sizeof(dec->br_));
+  dec->ready_ = 0;
+}
+
+//------------------------------------------------------------------------------
+
+#if defined(__cplusplus) || defined(c_plusplus)
+}    // extern "C"
+#endif
diff --git a/drivers/webpold/dec/vp8i.h b/drivers/webpold/dec/vp8i.h
new file mode 100644
index 0000000000..4382edfd8e
--- /dev/null
+++ b/drivers/webpold/dec/vp8i.h
@@ -0,0 +1,335 @@
+// Copyright 2010 Google Inc. All Rights Reserved.
+//
+// This code is licensed under the same terms as WebM:
+//  Software License Agreement:  http://www.webmproject.org/license/software/
+//  Additional IP Rights Grant:  http://www.webmproject.org/license/additional/
+// -----------------------------------------------------------------------------
+//
+// VP8 decoder: internal header.
+//
+// Author: Skal (pascal.massimino@gmail.com)
+
+#ifndef WEBP_DEC_VP8I_H_
+#define WEBP_DEC_VP8I_H_
+
+#include <string.h>     // for memcpy()
+#include "./vp8li.h"
+#include "../utils/bit_reader.h"
+#include "../utils/thread.h"
+#include "../dsp/dsp.h"
+
+#if defined(__cplusplus) || defined(c_plusplus)
+extern "C" {
+#endif
+
+//------------------------------------------------------------------------------
+// Various defines and enums
+
+// version numbers
+#define DEC_MAJ_VERSION 0
+#define DEC_MIN_VERSION 2
+#define DEC_REV_VERSION 0
+
+#define ONLY_KEYFRAME_CODE      // to remove any code related to P-Frames
+
+// intra prediction modes
+enum { B_DC_PRED = 0,   // 4x4 modes
+       B_TM_PRED,
+       B_VE_PRED,
+       B_HE_PRED,
+       B_RD_PRED,
+       B_VR_PRED,
+       B_LD_PRED,
+       B_VL_PRED,
+       B_HD_PRED,
+       B_HU_PRED,
+       NUM_BMODES = B_HU_PRED + 1 - B_DC_PRED,  // = 10
+
+       // Luma16 or UV modes
+       DC_PRED = B_DC_PRED, V_PRED = B_VE_PRED,
+       H_PRED = B_HE_PRED, TM_PRED = B_TM_PRED,
+       B_PRED = NUM_BMODES,   // refined I4x4 mode
+
+       // special modes
+       B_DC_PRED_NOTOP = 4,
+       B_DC_PRED_NOLEFT = 5,
+       B_DC_PRED_NOTOPLEFT = 6,
+       NUM_B_DC_MODES = 7 };
+
+enum { MB_FEATURE_TREE_PROBS = 3,
+       NUM_MB_SEGMENTS = 4,
+       NUM_REF_LF_DELTAS = 4,
+       NUM_MODE_LF_DELTAS = 4,    // I4x4, ZERO, *, SPLIT
+       MAX_NUM_PARTITIONS = 8,
+       // Probabilities
+       NUM_TYPES = 4,
+       NUM_BANDS = 8,
+       NUM_CTX = 3,
+       NUM_PROBAS = 11,
+       NUM_MV_PROBAS = 19 };
+
+// YUV-cache parameters.
+// Constraints are: We need to store one 16x16 block of luma samples (y),
+// and two 8x8 chroma blocks (u/v). These are better be 16-bytes aligned,
+// in order to be SIMD-friendly. We also need to store the top, left and
+// top-left samples (from previously decoded blocks), along with four
+// extra top-right samples for luma (intra4x4 prediction only).
+// One possible layout is, using 32 * (17 + 9) bytes:
+//
+//   .+------   <- only 1 pixel high
+//   .|yyyyt.
+//   .|yyyyt.
+//   .|yyyyt.
+//   .|yyyy..
+//   .+--.+--   <- only 1 pixel high
+//   .|uu.|vv
+//   .|uu.|vv
+//
+// Every character is a 4x4 block, with legend:
+//  '.' = unused
+//  'y' = y-samples   'u' = u-samples     'v' = u-samples
+//  '|' = left sample,   '-' = top sample,    '+' = top-left sample
+//  't' = extra top-right sample for 4x4 modes
+// With this layout, BPS (=Bytes Per Scan-line) is one cacheline size.
+#define BPS       32    // this is the common stride used by yuv[]
+#define YUV_SIZE (BPS * 17 + BPS * 9)
+#define Y_SIZE   (BPS * 17)
+#define Y_OFF    (BPS * 1 + 8)
+#define U_OFF    (Y_OFF + BPS * 16 + BPS)
+#define V_OFF    (U_OFF + 16)
+
+//------------------------------------------------------------------------------
+// Headers
+
+typedef struct {
+  uint8_t key_frame_;
+  uint8_t profile_;
+  uint8_t show_;
+  uint32_t partition_length_;
+} VP8FrameHeader;
+
+typedef struct {
+  uint16_t width_;
+  uint16_t height_;
+  uint8_t xscale_;
+  uint8_t yscale_;
+  uint8_t colorspace_;   // 0 = YCbCr
+  uint8_t clamp_type_;
+} VP8PictureHeader;
+
+// segment features
+typedef struct {
+  int use_segment_;
+  int update_map_;        // whether to update the segment map or not
+  int absolute_delta_;    // absolute or delta values for quantizer and filter
+  int8_t quantizer_[NUM_MB_SEGMENTS];        // quantization changes
+  int8_t filter_strength_[NUM_MB_SEGMENTS];  // filter strength for segments
+} VP8SegmentHeader;
+
+// Struct collecting all frame-persistent probabilities.
+typedef struct {
+  uint8_t segments_[MB_FEATURE_TREE_PROBS];
+  // Type: 0:Intra16-AC  1:Intra16-DC   2:Chroma   3:Intra4
+  uint8_t coeffs_[NUM_TYPES][NUM_BANDS][NUM_CTX][NUM_PROBAS];
+#ifndef ONLY_KEYFRAME_CODE
+  uint8_t ymode_[4], uvmode_[3];
+  uint8_t mv_[2][NUM_MV_PROBAS];
+#endif
+} VP8Proba;
+
+// Filter parameters
+typedef struct {
+  int simple_;                  // 0=complex, 1=simple
+  int level_;                   // [0..63]
+  int sharpness_;               // [0..7]
+  int use_lf_delta_;
+  int ref_lf_delta_[NUM_REF_LF_DELTAS];
+  int mode_lf_delta_[NUM_MODE_LF_DELTAS];
+} VP8FilterHeader;
+
+//------------------------------------------------------------------------------
+// Informations about the macroblocks.
+
+typedef struct {  // filter specs
+  unsigned int f_level_:6;      // filter strength: 0..63
+  unsigned int f_ilevel_:6;     // inner limit: 1..63
+  unsigned int f_inner_:1;      // do inner filtering?
+} VP8FInfo;
+
+typedef struct {  // used for syntax-parsing
+  unsigned int nz_;          // non-zero AC/DC coeffs
+  unsigned int dc_nz_:1;     // non-zero DC coeffs
+  unsigned int skip_:1;      // block type
+} VP8MB;
+
+// Dequantization matrices
+typedef int quant_t[2];      // [DC / AC].  Can be 'uint16_t[2]' too (~slower).
+typedef struct {
+  quant_t y1_mat_, y2_mat_, uv_mat_;
+} VP8QuantMatrix;
+
+// Persistent information needed by the parallel processing
+typedef struct {
+  int id_;            // cache row to process (in [0..2])
+  int mb_y_;          // macroblock position of the row
+  int filter_row_;    // true if row-filtering is needed
+  VP8FInfo* f_info_;  // filter strengths
+  VP8Io io_;          // copy of the VP8Io to pass to put()
+} VP8ThreadContext;
+
+//------------------------------------------------------------------------------
+// VP8Decoder: the main opaque structure handed over to user
+
+struct VP8Decoder {
+  VP8StatusCode status_;
+  int ready_;     // true if ready to decode a picture with VP8Decode()
+  const char* error_msg_;  // set when status_ is not OK.
+
+  // Main data source
+  VP8BitReader br_;
+
+  // headers
+  VP8FrameHeader   frm_hdr_;
+  VP8PictureHeader pic_hdr_;
+  VP8FilterHeader  filter_hdr_;
+  VP8SegmentHeader segment_hdr_;
+
+  // Worker
+  WebPWorker worker_;
+  int use_threads_;    // use multi-thread
+  int cache_id_;       // current cache row
+  int num_caches_;     // number of cached rows of 16 pixels (1, 2 or 3)
+  VP8ThreadContext thread_ctx_;  // Thread context
+
+  // dimension, in macroblock units.
+  int mb_w_, mb_h_;
+
+  // Macroblock to process/filter, depending on cropping and filter_type.
+  int tl_mb_x_, tl_mb_y_;  // top-left MB that must be in-loop filtered
+  int br_mb_x_, br_mb_y_;  // last bottom-right MB that must be decoded
+
+  // number of partitions.
+  int num_parts_;
+  // per-partition boolean decoders.
+  VP8BitReader parts_[MAX_NUM_PARTITIONS];
+
+  // buffer refresh flags
+  //   bit 0: refresh Gold, bit 1: refresh Alt
+  //   bit 2-3: copy to Gold, bit 4-5: copy to Alt
+  //   bit 6: Gold sign bias, bit 7: Alt sign bias
+  //   bit 8: refresh last frame
+  uint32_t buffer_flags_;
+
+  // dequantization (one set of DC/AC dequant factor per segment)
+  VP8QuantMatrix dqm_[NUM_MB_SEGMENTS];
+
+  // probabilities
+  VP8Proba proba_;
+  int use_skip_proba_;
+  uint8_t skip_p_;
+#ifndef ONLY_KEYFRAME_CODE
+  uint8_t intra_p_, last_p_, golden_p_;
+  VP8Proba proba_saved_;
+  int update_proba_;
+#endif
+
+  // Boundary data cache and persistent buffers.
+  uint8_t* intra_t_;     // top intra modes values: 4 * mb_w_
+  uint8_t  intra_l_[4];  // left intra modes values
+  uint8_t* y_t_;         // top luma samples: 16 * mb_w_
+  uint8_t* u_t_, *v_t_;  // top u/v samples: 8 * mb_w_ each
+
+  VP8MB* mb_info_;       // contextual macroblock info (mb_w_ + 1)
+  VP8FInfo* f_info_;     // filter strength info
+  uint8_t* yuv_b_;       // main block for Y/U/V (size = YUV_SIZE)
+  int16_t* coeffs_;      // 384 coeffs = (16+8+8) * 4*4
+
+  uint8_t* cache_y_;     // macroblock row for storing unfiltered samples
+  uint8_t* cache_u_;
+  uint8_t* cache_v_;
+  int cache_y_stride_;
+  int cache_uv_stride_;
+
+  // main memory chunk for the above data. Persistent.
+  void* mem_;
+  size_t mem_size_;
+
+  // Per macroblock non-persistent infos.
+  int mb_x_, mb_y_;       // current position, in macroblock units
+  uint8_t is_i4x4_;       // true if intra4x4
+  uint8_t imodes_[16];    // one 16x16 mode (#0) or sixteen 4x4 modes
+  uint8_t uvmode_;        // chroma prediction mode
+  uint8_t segment_;       // block's segment
+
+  // bit-wise info about the content of each sub-4x4 blocks: there are 16 bits
+  // for luma (bits #0->#15), then 4 bits for chroma-u (#16->#19) and 4 bits for
+  // chroma-v (#20->#23), each corresponding to one 4x4 block in decoding order.
+  // If the bit is set, the 4x4 block contains some non-zero coefficients.
+  uint32_t non_zero_;
+  uint32_t non_zero_ac_;
+
+  // Filtering side-info
+  int filter_type_;                         // 0=off, 1=simple, 2=complex
+  int filter_row_;                          // per-row flag
+  uint8_t filter_levels_[NUM_MB_SEGMENTS];  // precalculated per-segment
+
+  // extensions
+  const uint8_t* alpha_data_;   // compressed alpha data (if present)
+  size_t alpha_data_size_;
+  uint8_t* alpha_plane_;        // output. Persistent, contains the whole data.
+
+  int layer_colorspace_;
+  const uint8_t* layer_data_;   // compressed layer data (if present)
+  size_t layer_data_size_;
+};
+
+//------------------------------------------------------------------------------
+// internal functions. Not public.
+
+// in vp8.c
+int VP8SetError(VP8Decoder* const dec,
+                VP8StatusCode error, const char* const msg);
+
+// in tree.c
+void VP8ResetProba(VP8Proba* const proba);
+void VP8ParseProba(VP8BitReader* const br, VP8Decoder* const dec);
+void VP8ParseIntraMode(VP8BitReader* const br,  VP8Decoder* const dec);
+
+// in quant.c
+void VP8ParseQuant(VP8Decoder* const dec);
+
+// in frame.c
+int VP8InitFrame(VP8Decoder* const dec, VP8Io* io);
+// Predict a block and add residual
+void VP8ReconstructBlock(VP8Decoder* const dec);
+// Call io->setup() and finish setting up scan parameters.
+// After this call returns, one must always call VP8ExitCritical() with the
+// same parameters. Both functions should be used in pair. Returns VP8_STATUS_OK
+// if ok, otherwise sets and returns the error status on *dec.
+VP8StatusCode VP8EnterCritical(VP8Decoder* const dec, VP8Io* const io);
+// Must always be called in pair with VP8EnterCritical().
+// Returns false in case of error.
+int VP8ExitCritical(VP8Decoder* const dec, VP8Io* const io);
+// Process the last decoded row (filtering + output)
+int VP8ProcessRow(VP8Decoder* const dec, VP8Io* const io);
+// Store a block, along with filtering params
+void VP8StoreBlock(VP8Decoder* const dec);
+// To be called at the start of a new scanline, to initialize predictors.
+void VP8InitScanline(VP8Decoder* const dec);
+// Decode one macroblock. Returns false if there is not enough data.
+int VP8DecodeMB(VP8Decoder* const dec, VP8BitReader* const token_br);
+
+// in alpha.c
+const uint8_t* VP8DecompressAlphaRows(VP8Decoder* const dec,
+                                      int row, int num_rows);
+
+// in layer.c
+int VP8DecodeLayer(VP8Decoder* const dec);
+
+//------------------------------------------------------------------------------
+
+#if defined(__cplusplus) || defined(c_plusplus)
+}    // extern "C"
+#endif
+
+#endif  /* WEBP_DEC_VP8I_H_ */
diff --git a/drivers/webpold/dec/vp8l.c b/drivers/webpold/dec/vp8l.c
new file mode 100644
index 0000000000..897e4395c7
--- /dev/null
+++ b/drivers/webpold/dec/vp8l.c
@@ -0,0 +1,1200 @@
+// Copyright 2012 Google Inc. All Rights Reserved.
+//
+// This code is licensed under the same terms as WebM:
+//  Software License Agreement:  http://www.webmproject.org/license/software/
+//  Additional IP Rights Grant:  http://www.webmproject.org/license/additional/
+// -----------------------------------------------------------------------------
+//
+// main entry for the decoder
+//
+// Authors: Vikas Arora (vikaas.arora@gmail.com)
+//          Jyrki Alakuijala (jyrki@google.com)
+
+#include <stdio.h>
+#include <stdlib.h>
+#include "./vp8li.h"
+#include "../dsp/lossless.h"
+#include "../dsp/yuv.h"
+#include "../utils/huffman.h"
+#include "../utils/utils.h"
+
+#if defined(__cplusplus) || defined(c_plusplus)
+extern "C" {
+#endif
+
+#define NUM_ARGB_CACHE_ROWS          16
+
+static const int kCodeLengthLiterals = 16;
+static const int kCodeLengthRepeatCode = 16;
+static const int kCodeLengthExtraBits[3] = { 2, 3, 7 };
+static const int kCodeLengthRepeatOffsets[3] = { 3, 3, 11 };
+
+// -----------------------------------------------------------------------------
+//  Five Huffman codes are used at each meta code:
+//  1. green + length prefix codes + color cache codes,
+//  2. alpha,
+//  3. red,
+//  4. blue, and,
+//  5. distance prefix codes.
+typedef enum {
+  GREEN = 0,
+  RED   = 1,
+  BLUE  = 2,
+  ALPHA = 3,
+  DIST  = 4
+} HuffIndex;
+
+static const uint16_t kAlphabetSize[HUFFMAN_CODES_PER_META_CODE] = {
+  NUM_LITERAL_CODES + NUM_LENGTH_CODES,
+  NUM_LITERAL_CODES, NUM_LITERAL_CODES, NUM_LITERAL_CODES,
+  NUM_DISTANCE_CODES
+};
+
+
+#define NUM_CODE_LENGTH_CODES       19
+static const uint8_t kCodeLengthCodeOrder[NUM_CODE_LENGTH_CODES] = {
+  17, 18, 0, 1, 2, 3, 4, 5, 16, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15
+};
+
+#define CODE_TO_PLANE_CODES        120
+static const uint8_t code_to_plane_lut[CODE_TO_PLANE_CODES] = {
+   0x18, 0x07, 0x17, 0x19, 0x28, 0x06, 0x27, 0x29, 0x16, 0x1a,
+   0x26, 0x2a, 0x38, 0x05, 0x37, 0x39, 0x15, 0x1b, 0x36, 0x3a,
+   0x25, 0x2b, 0x48, 0x04, 0x47, 0x49, 0x14, 0x1c, 0x35, 0x3b,
+   0x46, 0x4a, 0x24, 0x2c, 0x58, 0x45, 0x4b, 0x34, 0x3c, 0x03,
+   0x57, 0x59, 0x13, 0x1d, 0x56, 0x5a, 0x23, 0x2d, 0x44, 0x4c,
+   0x55, 0x5b, 0x33, 0x3d, 0x68, 0x02, 0x67, 0x69, 0x12, 0x1e,
+   0x66, 0x6a, 0x22, 0x2e, 0x54, 0x5c, 0x43, 0x4d, 0x65, 0x6b,
+   0x32, 0x3e, 0x78, 0x01, 0x77, 0x79, 0x53, 0x5d, 0x11, 0x1f,
+   0x64, 0x6c, 0x42, 0x4e, 0x76, 0x7a, 0x21, 0x2f, 0x75, 0x7b,
+   0x31, 0x3f, 0x63, 0x6d, 0x52, 0x5e, 0x00, 0x74, 0x7c, 0x41,
+   0x4f, 0x10, 0x20, 0x62, 0x6e, 0x30, 0x73, 0x7d, 0x51, 0x5f,
+   0x40, 0x72, 0x7e, 0x61, 0x6f, 0x50, 0x71, 0x7f, 0x60, 0x70
+};
+
+static int DecodeImageStream(int xsize, int ysize,
+                             int is_level0,
+                             VP8LDecoder* const dec,
+                             uint32_t** const decoded_data);
+
+//------------------------------------------------------------------------------
+
+int VP8LCheckSignature(const uint8_t* const data, size_t size) {
+  return (size >= 1) && (data[0] == VP8L_MAGIC_BYTE);
+}
+
+static int ReadImageInfo(VP8LBitReader* const br,
+                         int* const width, int* const height,
+                         int* const has_alpha) {
+  const uint8_t signature = VP8LReadBits(br, 8);
+  if (!VP8LCheckSignature(&signature, 1)) {
+    return 0;
+  }
+  *width = VP8LReadBits(br, VP8L_IMAGE_SIZE_BITS) + 1;
+  *height = VP8LReadBits(br, VP8L_IMAGE_SIZE_BITS) + 1;
+  *has_alpha = VP8LReadBits(br, 1);
+  VP8LReadBits(br, VP8L_VERSION_BITS);  // Read/ignore the version number.
+  return 1;
+}
+
+int VP8LGetInfo(const uint8_t* data, size_t data_size,
+                int* const width, int* const height, int* const has_alpha) {
+  if (data == NULL || data_size < VP8L_FRAME_HEADER_SIZE) {
+    return 0;         // not enough data
+  } else {
+    int w, h, a;
+    VP8LBitReader br;
+    VP8LInitBitReader(&br, data, data_size);
+    if (!ReadImageInfo(&br, &w, &h, &a)) {
+      return 0;
+    }
+    if (width != NULL) *width = w;
+    if (height != NULL) *height = h;
+    if (has_alpha != NULL) *has_alpha = a;
+    return 1;
+  }
+}
+
+//------------------------------------------------------------------------------
+
+static WEBP_INLINE int GetCopyDistance(int distance_symbol,
+                                       VP8LBitReader* const br) {
+  int extra_bits, offset;
+  if (distance_symbol < 4) {
+    return distance_symbol + 1;
+  }
+  extra_bits = (distance_symbol - 2) >> 1;
+  offset = (2 + (distance_symbol & 1)) << extra_bits;
+  return offset + VP8LReadBits(br, extra_bits) + 1;
+}
+
+static WEBP_INLINE int GetCopyLength(int length_symbol,
+                                     VP8LBitReader* const br) {
+  // Length and distance prefixes are encoded the same way.
+  return GetCopyDistance(length_symbol, br);
+}
+
+static WEBP_INLINE int PlaneCodeToDistance(int xsize, int plane_code) {
+  if (plane_code > CODE_TO_PLANE_CODES) {
+    return plane_code - CODE_TO_PLANE_CODES;
+  } else {
+    const int dist_code = code_to_plane_lut[plane_code - 1];
+    const int yoffset = dist_code >> 4;
+    const int xoffset = 8 - (dist_code & 0xf);
+    const int dist = yoffset * xsize + xoffset;
+    return (dist >= 1) ? dist : 1;
+  }
+}
+
+//------------------------------------------------------------------------------
+// Decodes the next Huffman code from bit-stream.
+// FillBitWindow(br) needs to be called at minimum every second call
+// to ReadSymbolUnsafe.
+static int ReadSymbolUnsafe(const HuffmanTree* tree, VP8LBitReader* const br) {
+  const HuffmanTreeNode* node = tree->root_;
+  assert(node != NULL);
+  while (!HuffmanTreeNodeIsLeaf(node)) {
+    node = HuffmanTreeNextNode(node, VP8LReadOneBitUnsafe(br));
+  }
+  return node->symbol_;
+}
+
+static WEBP_INLINE int ReadSymbol(const HuffmanTree* tree,
+                                  VP8LBitReader* const br) {
+  const int read_safe = (br->pos_ + 8 > br->len_);
+  if (!read_safe) {
+    return ReadSymbolUnsafe(tree, br);
+  } else {
+    const HuffmanTreeNode* node = tree->root_;
+    assert(node != NULL);
+    while (!HuffmanTreeNodeIsLeaf(node)) {
+      node = HuffmanTreeNextNode(node, VP8LReadOneBit(br));
+    }
+    return node->symbol_;
+  }
+}
+
+static int ReadHuffmanCodeLengths(
+    VP8LDecoder* const dec, const int* const code_length_code_lengths,
+    int num_symbols, int* const code_lengths) {
+  int ok = 0;
+  VP8LBitReader* const br = &dec->br_;
+  int symbol;
+  int max_symbol;
+  int prev_code_len = DEFAULT_CODE_LENGTH;
+  HuffmanTree tree;
+
+  if (!HuffmanTreeBuildImplicit(&tree, code_length_code_lengths,
+                                NUM_CODE_LENGTH_CODES)) {
+    dec->status_ = VP8_STATUS_BITSTREAM_ERROR;
+    return 0;
+  }
+
+  if (VP8LReadBits(br, 1)) {    // use length
+    const int length_nbits = 2 + 2 * VP8LReadBits(br, 3);
+    max_symbol = 2 + VP8LReadBits(br, length_nbits);
+    if (max_symbol > num_symbols) {
+      dec->status_ = VP8_STATUS_BITSTREAM_ERROR;
+      goto End;
+    }
+  } else {
+    max_symbol = num_symbols;
+  }
+
+  symbol = 0;
+  while (symbol < num_symbols) {
+    int code_len;
+    if (max_symbol-- == 0) break;
+    VP8LFillBitWindow(br);
+    code_len = ReadSymbol(&tree, br);
+    if (code_len < kCodeLengthLiterals) {
+      code_lengths[symbol++] = code_len;
+      if (code_len != 0) prev_code_len = code_len;
+    } else {
+      const int use_prev = (code_len == kCodeLengthRepeatCode);
+      const int slot = code_len - kCodeLengthLiterals;
+      const int extra_bits = kCodeLengthExtraBits[slot];
+      const int repeat_offset = kCodeLengthRepeatOffsets[slot];
+      int repeat = VP8LReadBits(br, extra_bits) + repeat_offset;
+      if (symbol + repeat > num_symbols) {
+        dec->status_ = VP8_STATUS_BITSTREAM_ERROR;
+        goto End;
+      } else {
+        const int length = use_prev ? prev_code_len : 0;
+        while (repeat-- > 0) code_lengths[symbol++] = length;
+      }
+    }
+  }
+  ok = 1;
+
+ End:
+  HuffmanTreeRelease(&tree);
+  return ok;
+}
+
+static int ReadHuffmanCode(int alphabet_size, VP8LDecoder* const dec,
+                           HuffmanTree* const tree) {
+  int ok = 0;
+  VP8LBitReader* const br = &dec->br_;
+  const int simple_code = VP8LReadBits(br, 1);
+
+  if (simple_code) {  // Read symbols, codes & code lengths directly.
+    int symbols[2];
+    int codes[2];
+    int code_lengths[2];
+    const int num_symbols = VP8LReadBits(br, 1) + 1;
+    const int first_symbol_len_code = VP8LReadBits(br, 1);
+    // The first code is either 1 bit or 8 bit code.
+    symbols[0] = VP8LReadBits(br, (first_symbol_len_code == 0) ? 1 : 8);
+    codes[0] = 0;
+    code_lengths[0] = num_symbols - 1;
+    // The second code (if present), is always 8 bit long.
+    if (num_symbols == 2) {
+      symbols[1] = VP8LReadBits(br, 8);
+      codes[1] = 1;
+      code_lengths[1] = num_symbols - 1;
+    }
+    ok = HuffmanTreeBuildExplicit(tree, code_lengths, codes, symbols,
+                                  alphabet_size, num_symbols);
+  } else {  // Decode Huffman-coded code lengths.
+    int* code_lengths = NULL;
+    int i;
+    int code_length_code_lengths[NUM_CODE_LENGTH_CODES] = { 0 };
+    const int num_codes = VP8LReadBits(br, 4) + 4;
+    if (num_codes > NUM_CODE_LENGTH_CODES) {
+      dec->status_ = VP8_STATUS_BITSTREAM_ERROR;
+      return 0;
+    }
+
+    code_lengths =
+        (int*)WebPSafeCalloc((uint64_t)alphabet_size, sizeof(*code_lengths));
+    if (code_lengths == NULL) {
+      dec->status_ = VP8_STATUS_OUT_OF_MEMORY;
+      return 0;
+    }
+
+    for (i = 0; i < num_codes; ++i) {
+      code_length_code_lengths[kCodeLengthCodeOrder[i]] = VP8LReadBits(br, 3);
+    }
+    ok = ReadHuffmanCodeLengths(dec, code_length_code_lengths, alphabet_size,
+                                code_lengths);
+    if (ok) {
+      ok = HuffmanTreeBuildImplicit(tree, code_lengths, alphabet_size);
+    }
+    free(code_lengths);
+  }
+  ok = ok && !br->error_;
+  if (!ok) {
+    dec->status_ = VP8_STATUS_BITSTREAM_ERROR;
+    return 0;
+  }
+  return 1;
+}
+
+static void DeleteHtreeGroups(HTreeGroup* htree_groups, int num_htree_groups) {
+  if (htree_groups != NULL) {
+    int i, j;
+    for (i = 0; i < num_htree_groups; ++i) {
+      HuffmanTree* const htrees = htree_groups[i].htrees_;
+      for (j = 0; j < HUFFMAN_CODES_PER_META_CODE; ++j) {
+        HuffmanTreeRelease(&htrees[j]);
+      }
+    }
+    free(htree_groups);
+  }
+}
+
+static int ReadHuffmanCodes(VP8LDecoder* const dec, int xsize, int ysize,
+                            int color_cache_bits, int allow_recursion) {
+  int i, j;
+  VP8LBitReader* const br = &dec->br_;
+  VP8LMetadata* const hdr = &dec->hdr_;
+  uint32_t* huffman_image = NULL;
+  HTreeGroup* htree_groups = NULL;
+  int num_htree_groups = 1;
+
+  if (allow_recursion && VP8LReadBits(br, 1)) {
+    // use meta Huffman codes.
+    const int huffman_precision = VP8LReadBits(br, 3) + 2;
+    const int huffman_xsize = VP8LSubSampleSize(xsize, huffman_precision);
+    const int huffman_ysize = VP8LSubSampleSize(ysize, huffman_precision);
+    const int huffman_pixs = huffman_xsize * huffman_ysize;
+    if (!DecodeImageStream(huffman_xsize, huffman_ysize, 0, dec,
+                           &huffman_image)) {
+      dec->status_ = VP8_STATUS_BITSTREAM_ERROR;
+      goto Error;
+    }
+    hdr->huffman_subsample_bits_ = huffman_precision;
+    for (i = 0; i < huffman_pixs; ++i) {
+      // The huffman data is stored in red and green bytes.
+      const int index = (huffman_image[i] >> 8) & 0xffff;
+      huffman_image[i] = index;
+      if (index >= num_htree_groups) {
+        num_htree_groups = index + 1;
+      }
+    }
+  }
+
+  if (br->error_) goto Error;
+
+  assert(num_htree_groups <= 0x10000);
+  htree_groups =
+      (HTreeGroup*)WebPSafeCalloc((uint64_t)num_htree_groups,
+                                  sizeof(*htree_groups));
+  if (htree_groups == NULL) {
+    dec->status_ = VP8_STATUS_OUT_OF_MEMORY;
+    goto Error;
+  }
+
+  for (i = 0; i < num_htree_groups; ++i) {
+    HuffmanTree* const htrees = htree_groups[i].htrees_;
+    for (j = 0; j < HUFFMAN_CODES_PER_META_CODE; ++j) {
+      int alphabet_size = kAlphabetSize[j];
+      if (j == 0 && color_cache_bits > 0) {
+        alphabet_size += 1 << color_cache_bits;
+      }
+      if (!ReadHuffmanCode(alphabet_size, dec, htrees + j)) goto Error;
+    }
+  }
+
+  // All OK. Finalize pointers and return.
+  hdr->huffman_image_ = huffman_image;
+  hdr->num_htree_groups_ = num_htree_groups;
+  hdr->htree_groups_ = htree_groups;
+  return 1;
+
+ Error:
+  free(huffman_image);
+  DeleteHtreeGroups(htree_groups, num_htree_groups);
+  return 0;
+}
+
+//------------------------------------------------------------------------------
+// Scaling.
+
+static int AllocateAndInitRescaler(VP8LDecoder* const dec, VP8Io* const io) {
+  const int num_channels = 4;
+  const int in_width = io->mb_w;
+  const int out_width = io->scaled_width;
+  const int in_height = io->mb_h;
+  const int out_height = io->scaled_height;
+  const uint64_t work_size = 2 * num_channels * (uint64_t)out_width;
+  int32_t* work;        // Rescaler work area.
+  const uint64_t scaled_data_size = num_channels * (uint64_t)out_width;
+  uint32_t* scaled_data;  // Temporary storage for scaled BGRA data.
+  const uint64_t memory_size = sizeof(*dec->rescaler) +
+                               work_size * sizeof(*work) +
+                               scaled_data_size * sizeof(*scaled_data);
+  uint8_t* memory = (uint8_t*)WebPSafeCalloc(memory_size, sizeof(*memory));
+  if (memory == NULL) {
+    dec->status_ = VP8_STATUS_OUT_OF_MEMORY;
+    return 0;
+  }
+  assert(dec->rescaler_memory == NULL);
+  dec->rescaler_memory = memory;
+
+  dec->rescaler = (WebPRescaler*)memory;
+  memory += sizeof(*dec->rescaler);
+  work = (int32_t*)memory;
+  memory += work_size * sizeof(*work);
+  scaled_data = (uint32_t*)memory;
+
+  WebPRescalerInit(dec->rescaler, in_width, in_height, (uint8_t*)scaled_data,
+                   out_width, out_height, 0, num_channels,
+                   in_width, out_width, in_height, out_height, work);
+  return 1;
+}
+
+//------------------------------------------------------------------------------
+// Export to ARGB
+
+// We have special "export" function since we need to convert from BGRA
+static int Export(WebPRescaler* const rescaler, WEBP_CSP_MODE colorspace,
+                  int rgba_stride, uint8_t* const rgba) {
+  const uint32_t* const src = (const uint32_t*)rescaler->dst;
+  const int dst_width = rescaler->dst_width;
+  int num_lines_out = 0;
+  while (WebPRescalerHasPendingOutput(rescaler)) {
+    uint8_t* const dst = rgba + num_lines_out * rgba_stride;
+    WebPRescalerExportRow(rescaler);
+    VP8LConvertFromBGRA(src, dst_width, colorspace, dst);
+    ++num_lines_out;
+  }
+  return num_lines_out;
+}
+
+// Emit scaled rows.
+static int EmitRescaledRows(const VP8LDecoder* const dec,
+                            const uint32_t* const data, int in_stride, int mb_h,
+                            uint8_t* const out, int out_stride) {
+  const WEBP_CSP_MODE colorspace = dec->output_->colorspace;
+  const uint8_t* const in = (const uint8_t*)data;
+  int num_lines_in = 0;
+  int num_lines_out = 0;
+  while (num_lines_in < mb_h) {
+    const uint8_t* const row_in = in + num_lines_in * in_stride;
+    uint8_t* const row_out = out + num_lines_out * out_stride;
+    num_lines_in += WebPRescalerImport(dec->rescaler, mb_h - num_lines_in,
+                                       row_in, in_stride);
+    num_lines_out += Export(dec->rescaler, colorspace, out_stride, row_out);
+  }
+  return num_lines_out;
+}
+
+// Emit rows without any scaling.
+static int EmitRows(WEBP_CSP_MODE colorspace,
+                    const uint32_t* const data, int in_stride,
+                    int mb_w, int mb_h,
+                    uint8_t* const out, int out_stride) {
+  int lines = mb_h;
+  const uint8_t* row_in = (const uint8_t*)data;
+  uint8_t* row_out = out;
+  while (lines-- > 0) {
+    VP8LConvertFromBGRA((const uint32_t*)row_in, mb_w, colorspace, row_out);
+    row_in += in_stride;
+    row_out += out_stride;
+  }
+  return mb_h;  // Num rows out == num rows in.
+}
+
+//------------------------------------------------------------------------------
+// Export to YUVA
+
+static void ConvertToYUVA(const uint32_t* const src, int width, int y_pos,
+                          const WebPDecBuffer* const output) {
+  const WebPYUVABuffer* const buf = &output->u.YUVA;
+  // first, the luma plane
+  {
+    int i;
+    uint8_t* const y = buf->y + y_pos * buf->y_stride;
+    for (i = 0; i < width; ++i) {
+      const uint32_t p = src[i];
+      y[i] = VP8RGBToY((p >> 16) & 0xff, (p >> 8) & 0xff, (p >> 0) & 0xff);
+    }
+  }
+
+  // then U/V planes
+  {
+    uint8_t* const u = buf->u + (y_pos >> 1) * buf->u_stride;
+    uint8_t* const v = buf->v + (y_pos >> 1) * buf->v_stride;
+    const int uv_width = width >> 1;
+    int i;
+    for (i = 0; i < uv_width; ++i) {
+      const uint32_t v0 = src[2 * i + 0];
+      const uint32_t v1 = src[2 * i + 1];
+      // VP8RGBToU/V expects four accumulated pixels. Hence we need to
+      // scale r/g/b value by a factor 2. We just shift v0/v1 one bit less.
+      const int r = ((v0 >> 15) & 0x1fe) + ((v1 >> 15) & 0x1fe);
+      const int g = ((v0 >>  7) & 0x1fe) + ((v1 >>  7) & 0x1fe);
+      const int b = ((v0 <<  1) & 0x1fe) + ((v1 <<  1) & 0x1fe);
+      if (!(y_pos & 1)) {  // even lines: store values
+        u[i] = VP8RGBToU(r, g, b);
+        v[i] = VP8RGBToV(r, g, b);
+      } else {             // odd lines: average with previous values
+        const int tmp_u = VP8RGBToU(r, g, b);
+        const int tmp_v = VP8RGBToV(r, g, b);
+        // Approximated average-of-four. But it's an acceptable diff.
+        u[i] = (u[i] + tmp_u + 1) >> 1;
+        v[i] = (v[i] + tmp_v + 1) >> 1;
+      }
+    }
+    if (width & 1) {       // last pixel
+      const uint32_t v0 = src[2 * i + 0];
+      const int r = (v0 >> 14) & 0x3fc;
+      const int g = (v0 >>  6) & 0x3fc;
+      const int b = (v0 <<  2) & 0x3fc;
+      if (!(y_pos & 1)) {  // even lines
+        u[i] = VP8RGBToU(r, g, b);
+        v[i] = VP8RGBToV(r, g, b);
+      } else {             // odd lines (note: we could just skip this)
+        const int tmp_u = VP8RGBToU(r, g, b);
+        const int tmp_v = VP8RGBToV(r, g, b);
+        u[i] = (u[i] + tmp_u + 1) >> 1;
+        v[i] = (v[i] + tmp_v + 1) >> 1;
+      }
+    }
+  }
+  // Lastly, store alpha if needed.
+  if (buf->a != NULL) {
+    int i;
+    uint8_t* const a = buf->a + y_pos * buf->a_stride;
+    for (i = 0; i < width; ++i) a[i] = (src[i] >> 24);
+  }
+}
+
+static int ExportYUVA(const VP8LDecoder* const dec, int y_pos) {
+  WebPRescaler* const rescaler = dec->rescaler;
+  const uint32_t* const src = (const uint32_t*)rescaler->dst;
+  const int dst_width = rescaler->dst_width;
+  int num_lines_out = 0;
+  while (WebPRescalerHasPendingOutput(rescaler)) {
+    WebPRescalerExportRow(rescaler);
+    ConvertToYUVA(src, dst_width, y_pos, dec->output_);
+    ++y_pos;
+    ++num_lines_out;
+  }
+  return num_lines_out;
+}
+
+static int EmitRescaledRowsYUVA(const VP8LDecoder* const dec,
+                                const uint32_t* const data,
+                                int in_stride, int mb_h) {
+  const uint8_t* const in = (const uint8_t*)data;
+  int num_lines_in = 0;
+  int y_pos = dec->last_out_row_;
+  while (num_lines_in < mb_h) {
+    const uint8_t* const row_in = in + num_lines_in * in_stride;
+    num_lines_in += WebPRescalerImport(dec->rescaler, mb_h - num_lines_in,
+                                       row_in, in_stride);
+    y_pos += ExportYUVA(dec, y_pos);
+  }
+  return y_pos;
+}
+
+static int EmitRowsYUVA(const VP8LDecoder* const dec,
+                        const uint32_t* const data, int in_stride,
+                        int mb_w, int num_rows) {
+  int y_pos = dec->last_out_row_;
+  const uint8_t* row_in = (const uint8_t*)data;
+  while (num_rows-- > 0) {
+    ConvertToYUVA((const uint32_t*)row_in, mb_w, y_pos, dec->output_);
+    row_in += in_stride;
+    ++y_pos;
+  }
+  return y_pos;
+}
+
+//------------------------------------------------------------------------------
+// Cropping.
+
+// Sets io->mb_y, io->mb_h & io->mb_w according to start row, end row and
+// crop options. Also updates the input data pointer, so that it points to the
+// start of the cropped window.
+// Note that 'pixel_stride' is in units of 'uint32_t' (and not 'bytes).
+// Returns true if the crop window is not empty.
+static int SetCropWindow(VP8Io* const io, int y_start, int y_end,
+                         const uint32_t** const in_data, int pixel_stride) {
+  assert(y_start < y_end);
+  assert(io->crop_left < io->crop_right);
+  if (y_end > io->crop_bottom) {
+    y_end = io->crop_bottom;  // make sure we don't overflow on last row.
+  }
+  if (y_start < io->crop_top) {
+    const int delta = io->crop_top - y_start;
+    y_start = io->crop_top;
+    *in_data += pixel_stride * delta;
+  }
+  if (y_start >= y_end) return 0;  // Crop window is empty.
+
+  *in_data += io->crop_left;
+
+  io->mb_y = y_start - io->crop_top;
+  io->mb_w = io->crop_right - io->crop_left;
+  io->mb_h = y_end - y_start;
+  return 1;  // Non-empty crop window.
+}
+
+//------------------------------------------------------------------------------
+
+static WEBP_INLINE int GetMetaIndex(
+    const uint32_t* const image, int xsize, int bits, int x, int y) {
+  if (bits == 0) return 0;
+  return image[xsize * (y >> bits) + (x >> bits)];
+}
+
+static WEBP_INLINE HTreeGroup* GetHtreeGroupForPos(VP8LMetadata* const hdr,
+                                                   int x, int y) {
+  const int meta_index = GetMetaIndex(hdr->huffman_image_, hdr->huffman_xsize_,
+                                      hdr->huffman_subsample_bits_, x, y);
+  assert(meta_index < hdr->num_htree_groups_);
+  return hdr->htree_groups_ + meta_index;
+}
+
+//------------------------------------------------------------------------------
+// Main loop, with custom row-processing function
+
+typedef void (*ProcessRowsFunc)(VP8LDecoder* const dec, int row);
+
+static void ApplyInverseTransforms(VP8LDecoder* const dec, int num_rows,
+                                   const uint32_t* const rows) {
+  int n = dec->next_transform_;
+  const int cache_pixs = dec->width_ * num_rows;
+  const int start_row = dec->last_row_;
+  const int end_row = start_row + num_rows;
+  const uint32_t* rows_in = rows;
+  uint32_t* const rows_out = dec->argb_cache_;
+
+  // Inverse transforms.
+  // TODO: most transforms only need to operate on the cropped region only.
+  memcpy(rows_out, rows_in, cache_pixs * sizeof(*rows_out));
+  while (n-- > 0) {
+    VP8LTransform* const transform = &dec->transforms_[n];
+    VP8LInverseTransform(transform, start_row, end_row, rows_in, rows_out);
+    rows_in = rows_out;
+  }
+}
+
+// Processes (transforms, scales & color-converts) the rows decoded after the
+// last call.
+static void ProcessRows(VP8LDecoder* const dec, int row) {
+  const uint32_t* const rows = dec->argb_ + dec->width_ * dec->last_row_;
+  const int num_rows = row - dec->last_row_;
+
+  if (num_rows <= 0) return;  // Nothing to be done.
+  ApplyInverseTransforms(dec, num_rows, rows);
+
+  // Emit output.
+  {
+    VP8Io* const io = dec->io_;
+    const uint32_t* rows_data = dec->argb_cache_;
+    if (!SetCropWindow(io, dec->last_row_, row, &rows_data, io->width)) {
+      // Nothing to output (this time).
+    } else {
+      const WebPDecBuffer* const output = dec->output_;
+      const int in_stride = io->width * sizeof(*rows_data);
+      if (output->colorspace < MODE_YUV) {  // convert to RGBA
+        const WebPRGBABuffer* const buf = &output->u.RGBA;
+        uint8_t* const rgba = buf->rgba + dec->last_out_row_ * buf->stride;
+        const int num_rows_out = io->use_scaling ?
+            EmitRescaledRows(dec, rows_data, in_stride, io->mb_h,
+                             rgba, buf->stride) :
+            EmitRows(output->colorspace, rows_data, in_stride,
+                     io->mb_w, io->mb_h, rgba, buf->stride);
+        // Update 'last_out_row_'.
+        dec->last_out_row_ += num_rows_out;
+      } else {                              // convert to YUVA
+        dec->last_out_row_ = io->use_scaling ?
+            EmitRescaledRowsYUVA(dec, rows_data, in_stride, io->mb_h) :
+            EmitRowsYUVA(dec, rows_data, in_stride, io->mb_w, io->mb_h);
+      }
+      assert(dec->last_out_row_ <= output->height);
+    }
+  }
+
+  // Update 'last_row_'.
+  dec->last_row_ = row;
+  assert(dec->last_row_ <= dec->height_);
+}
+
+static int DecodeImageData(VP8LDecoder* const dec,
+                           uint32_t* const data, int width, int height,
+                           ProcessRowsFunc process_func) {
+  int ok = 1;
+  int col = 0, row = 0;
+  VP8LBitReader* const br = &dec->br_;
+  VP8LMetadata* const hdr = &dec->hdr_;
+  HTreeGroup* htree_group = hdr->htree_groups_;
+  uint32_t* src = data;
+  uint32_t* last_cached = data;
+  uint32_t* const src_end = data + width * height;
+  const int len_code_limit = NUM_LITERAL_CODES + NUM_LENGTH_CODES;
+  const int color_cache_limit = len_code_limit + hdr->color_cache_size_;
+  VP8LColorCache* const color_cache =
+      (hdr->color_cache_size_ > 0) ? &hdr->color_cache_ : NULL;
+  const int mask = hdr->huffman_mask_;
+
+  assert(htree_group != NULL);
+
+  while (!br->eos_ && src < src_end) {
+    int code;
+    // Only update when changing tile. Note we could use the following test:
+    //   if "((((prev_col ^ col) | prev_row ^ row)) > mask)" -> tile changed
+    // but that's actually slower and requires storing the previous col/row
+    if ((col & mask) == 0) {
+      htree_group = GetHtreeGroupForPos(hdr, col, row);
+    }
+    VP8LFillBitWindow(br);
+    code = ReadSymbol(&htree_group->htrees_[GREEN], br);
+    if (code < NUM_LITERAL_CODES) {   // Literal.
+      int red, green, blue, alpha;
+      red = ReadSymbol(&htree_group->htrees_[RED], br);
+      green = code;
+      VP8LFillBitWindow(br);
+      blue = ReadSymbol(&htree_group->htrees_[BLUE], br);
+      alpha = ReadSymbol(&htree_group->htrees_[ALPHA], br);
+      *src = (alpha << 24) + (red << 16) + (green << 8) + blue;
+ AdvanceByOne:
+      ++src;
+      ++col;
+      if (col >= width) {
+        col = 0;
+        ++row;
+        if ((process_func != NULL) && (row % NUM_ARGB_CACHE_ROWS == 0)) {
+          process_func(dec, row);
+        }
+        if (color_cache != NULL) {
+          while (last_cached < src) {
+            VP8LColorCacheInsert(color_cache, *last_cached++);
+          }
+        }
+      }
+    } else if (code < len_code_limit) {           // Backward reference
+      int dist_code, dist;
+      const int length_sym = code - NUM_LITERAL_CODES;
+      const int length = GetCopyLength(length_sym, br);
+      const int dist_symbol = ReadSymbol(&htree_group->htrees_[DIST], br);
+      VP8LFillBitWindow(br);
+      dist_code = GetCopyDistance(dist_symbol, br);
+      dist = PlaneCodeToDistance(width, dist_code);
+      if (src - data < dist || src_end - src < length) {
+        ok = 0;
+        goto End;
+      }
+      {
+        int i;
+        for (i = 0; i < length; ++i) src[i] = src[i - dist];
+        src += length;
+      }
+      col += length;
+      while (col >= width) {
+        col -= width;
+        ++row;
+        if ((process_func != NULL) && (row % NUM_ARGB_CACHE_ROWS == 0)) {
+          process_func(dec, row);
+        }
+      }
+      if (src < src_end) {
+        htree_group = GetHtreeGroupForPos(hdr, col, row);
+        if (color_cache != NULL) {
+          while (last_cached < src) {
+            VP8LColorCacheInsert(color_cache, *last_cached++);
+          }
+        }
+      }
+    } else if (code < color_cache_limit) {    // Color cache.
+      const int key = code - len_code_limit;
+      assert(color_cache != NULL);
+      while (last_cached < src) {
+        VP8LColorCacheInsert(color_cache, *last_cached++);
+      }
+      *src = VP8LColorCacheLookup(color_cache, key);
+      goto AdvanceByOne;
+    } else {    // Not reached.
+      ok = 0;
+      goto End;
+    }
+    ok = !br->error_;
+    if (!ok) goto End;
+  }
+  // Process the remaining rows corresponding to last row-block.
+  if (process_func != NULL) process_func(dec, row);
+
+ End:
+  if (br->error_ || !ok || (br->eos_ && src < src_end)) {
+    ok = 0;
+    dec->status_ = (!br->eos_) ?
+        VP8_STATUS_BITSTREAM_ERROR : VP8_STATUS_SUSPENDED;
+  } else if (src == src_end) {
+    dec->state_ = READ_DATA;
+  }
+
+  return ok;
+}
+
+// -----------------------------------------------------------------------------
+// VP8LTransform
+
+static void ClearTransform(VP8LTransform* const transform) {
+  free(transform->data_);
+  transform->data_ = NULL;
+}
+
+// For security reason, we need to remap the color map to span
+// the total possible bundled values, and not just the num_colors.
+static int ExpandColorMap(int num_colors, VP8LTransform* const transform) {
+  int i;
+  const int final_num_colors = 1 << (8 >> transform->bits_);
+  uint32_t* const new_color_map =
+      (uint32_t*)WebPSafeMalloc((uint64_t)final_num_colors,
+                                sizeof(*new_color_map));
+  if (new_color_map == NULL) {
+    return 0;
+  } else {
+    uint8_t* const data = (uint8_t*)transform->data_;
+    uint8_t* const new_data = (uint8_t*)new_color_map;
+    new_color_map[0] = transform->data_[0];
+    for (i = 4; i < 4 * num_colors; ++i) {
+      // Equivalent to AddPixelEq(), on a byte-basis.
+      new_data[i] = (data[i] + new_data[i - 4]) & 0xff;
+    }
+    for (; i < 4 * final_num_colors; ++i)
+      new_data[i] = 0;  // black tail.
+    free(transform->data_);
+    transform->data_ = new_color_map;
+  }
+  return 1;
+}
+
+static int ReadTransform(int* const xsize, int const* ysize,
+                         VP8LDecoder* const dec) {
+  int ok = 1;
+  VP8LBitReader* const br = &dec->br_;
+  VP8LTransform* transform = &dec->transforms_[dec->next_transform_];
+  const VP8LImageTransformType type =
+      (VP8LImageTransformType)VP8LReadBits(br, 2);
+
+  // Each transform type can only be present once in the stream.
+  if (dec->transforms_seen_ & (1U << type)) {
+    return 0;  // Already there, let's not accept the second same transform.
+  }
+  dec->transforms_seen_ |= (1U << type);
+
+  transform->type_ = type;
+  transform->xsize_ = *xsize;
+  transform->ysize_ = *ysize;
+  transform->data_ = NULL;
+  ++dec->next_transform_;
+  assert(dec->next_transform_ <= NUM_TRANSFORMS);
+
+  switch (type) {
+    case PREDICTOR_TRANSFORM:
+    case CROSS_COLOR_TRANSFORM:
+      transform->bits_ = VP8LReadBits(br, 3) + 2;
+      ok = DecodeImageStream(VP8LSubSampleSize(transform->xsize_,
+                                               transform->bits_),
+                             VP8LSubSampleSize(transform->ysize_,
+                                               transform->bits_),
+                             0, dec, &transform->data_);
+      break;
+    case COLOR_INDEXING_TRANSFORM: {
+       const int num_colors = VP8LReadBits(br, 8) + 1;
+       const int bits = (num_colors > 16) ? 0
+                      : (num_colors > 4) ? 1
+                      : (num_colors > 2) ? 2
+                      : 3;
+       *xsize = VP8LSubSampleSize(transform->xsize_, bits);
+       transform->bits_ = bits;
+       ok = DecodeImageStream(num_colors, 1, 0, dec, &transform->data_);
+       ok = ok && ExpandColorMap(num_colors, transform);
+      break;
+    }
+    case SUBTRACT_GREEN:
+      break;
+    default:
+      assert(0);    // can't happen
+      break;
+  }
+
+  return ok;
+}
+
+// -----------------------------------------------------------------------------
+// VP8LMetadata
+
+static void InitMetadata(VP8LMetadata* const hdr) {
+  assert(hdr);
+  memset(hdr, 0, sizeof(*hdr));
+}
+
+static void ClearMetadata(VP8LMetadata* const hdr) {
+  assert(hdr);
+
+  free(hdr->huffman_image_);
+  DeleteHtreeGroups(hdr->htree_groups_, hdr->num_htree_groups_);
+  VP8LColorCacheClear(&hdr->color_cache_);
+  InitMetadata(hdr);
+}
+
+// -----------------------------------------------------------------------------
+// VP8LDecoder
+
+VP8LDecoder* VP8LNew(void) {
+  VP8LDecoder* const dec = (VP8LDecoder*)calloc(1, sizeof(*dec));
+  if (dec == NULL) return NULL;
+  dec->status_ = VP8_STATUS_OK;
+  dec->action_ = READ_DIM;
+  dec->state_ = READ_DIM;
+  return dec;
+}
+
+void VP8LClear(VP8LDecoder* const dec) {
+  int i;
+  if (dec == NULL) return;
+  ClearMetadata(&dec->hdr_);
+
+  free(dec->argb_);
+  dec->argb_ = NULL;
+  for (i = 0; i < dec->next_transform_; ++i) {
+    ClearTransform(&dec->transforms_[i]);
+  }
+  dec->next_transform_ = 0;
+  dec->transforms_seen_ = 0;
+
+  free(dec->rescaler_memory);
+  dec->rescaler_memory = NULL;
+
+  dec->output_ = NULL;   // leave no trace behind
+}
+
+void VP8LDelete(VP8LDecoder* const dec) {
+  if (dec != NULL) {
+    VP8LClear(dec);
+    free(dec);
+  }
+}
+
+static void UpdateDecoder(VP8LDecoder* const dec, int width, int height) {
+  VP8LMetadata* const hdr = &dec->hdr_;
+  const int num_bits = hdr->huffman_subsample_bits_;
+  dec->width_ = width;
+  dec->height_ = height;
+
+  hdr->huffman_xsize_ = VP8LSubSampleSize(width, num_bits);
+  hdr->huffman_mask_ = (num_bits == 0) ? ~0 : (1 << num_bits) - 1;
+}
+
+static int DecodeImageStream(int xsize, int ysize,
+                             int is_level0,
+                             VP8LDecoder* const dec,
+                             uint32_t** const decoded_data) {
+  int ok = 1;
+  int transform_xsize = xsize;
+  int transform_ysize = ysize;
+  VP8LBitReader* const br = &dec->br_;
+  VP8LMetadata* const hdr = &dec->hdr_;
+  uint32_t* data = NULL;
+  int color_cache_bits = 0;
+
+  // Read the transforms (may recurse).
+  if (is_level0) {
+    while (ok && VP8LReadBits(br, 1)) {
+      ok = ReadTransform(&transform_xsize, &transform_ysize, dec);
+    }
+  }
+
+  // Color cache
+  if (ok && VP8LReadBits(br, 1)) {
+    color_cache_bits = VP8LReadBits(br, 4);
+    ok = (color_cache_bits >= 1 && color_cache_bits <= MAX_CACHE_BITS);
+    if (!ok) {
+      dec->status_ = VP8_STATUS_BITSTREAM_ERROR;
+      goto End;
+    }
+  }
+
+  // Read the Huffman codes (may recurse).
+  ok = ok && ReadHuffmanCodes(dec, transform_xsize, transform_ysize,
+                              color_cache_bits, is_level0);
+  if (!ok) {
+    dec->status_ = VP8_STATUS_BITSTREAM_ERROR;
+    goto End;
+  }
+
+  // Finish setting up the color-cache
+  if (color_cache_bits > 0) {
+    hdr->color_cache_size_ = 1 << color_cache_bits;
+    if (!VP8LColorCacheInit(&hdr->color_cache_, color_cache_bits)) {
+      dec->status_ = VP8_STATUS_OUT_OF_MEMORY;
+      ok = 0;
+      goto End;
+    }
+  } else {
+    hdr->color_cache_size_ = 0;
+  }
+  UpdateDecoder(dec, transform_xsize, transform_ysize);
+
+  if (is_level0) {   // level 0 complete
+    dec->state_ = READ_HDR;
+    goto End;
+  }
+
+  {
+    const uint64_t total_size = (uint64_t)transform_xsize * transform_ysize;
+    data = (uint32_t*)WebPSafeMalloc(total_size, sizeof(*data));
+    if (data == NULL) {
+      dec->status_ = VP8_STATUS_OUT_OF_MEMORY;
+      ok = 0;
+      goto End;
+    }
+  }
+
+  // Use the Huffman trees to decode the LZ77 encoded data.
+  ok = DecodeImageData(dec, data, transform_xsize, transform_ysize, NULL);
+  ok = ok && !br->error_;
+
+ End:
+
+  if (!ok) {
+    free(data);
+    ClearMetadata(hdr);
+    // If not enough data (br.eos_) resulted in BIT_STREAM_ERROR, update the
+    // status appropriately.
+    if (dec->status_ == VP8_STATUS_BITSTREAM_ERROR && dec->br_.eos_) {
+      dec->status_ = VP8_STATUS_SUSPENDED;
+    }
+  } else {
+    if (decoded_data != NULL) {
+      *decoded_data = data;
+    } else {
+      // We allocate image data in this function only for transforms. At level 0
+      // (that is: not the transforms), we shouldn't have allocated anything.
+      assert(data == NULL);
+      assert(is_level0);
+    }
+    if (!is_level0) ClearMetadata(hdr);  // Clean up temporary data behind.
+  }
+  return ok;
+}
+
+//------------------------------------------------------------------------------
+// Allocate dec->argb_ and dec->argb_cache_ using dec->width_ and dec->height_
+
+static int AllocateARGBBuffers(VP8LDecoder* const dec, int final_width) {
+  const uint64_t num_pixels = (uint64_t)dec->width_ * dec->height_;
+  // Scratch buffer corresponding to top-prediction row for transforming the
+  // first row in the row-blocks.
+  const uint64_t cache_top_pixels = final_width;
+  // Scratch buffer for temporary BGRA storage.
+  const uint64_t cache_pixels = (uint64_t)final_width * NUM_ARGB_CACHE_ROWS;
+  const uint64_t total_num_pixels =
+      num_pixels + cache_top_pixels + cache_pixels;
+
+  assert(dec->width_ <= final_width);
+  dec->argb_ = (uint32_t*)WebPSafeMalloc(total_num_pixels, sizeof(*dec->argb_));
+  if (dec->argb_ == NULL) {
+    dec->argb_cache_ = NULL;    // for sanity check
+    dec->status_ = VP8_STATUS_OUT_OF_MEMORY;
+    return 0;
+  }
+  dec->argb_cache_ = dec->argb_ + num_pixels + cache_top_pixels;
+  return 1;
+}
+
+//------------------------------------------------------------------------------
+// Special row-processing that only stores the alpha data.
+
+static void ExtractAlphaRows(VP8LDecoder* const dec, int row) {
+  const int num_rows = row - dec->last_row_;
+  const uint32_t* const in = dec->argb_ + dec->width_ * dec->last_row_;
+
+  if (num_rows <= 0) return;  // Nothing to be done.
+  ApplyInverseTransforms(dec, num_rows, in);
+
+  // Extract alpha (which is stored in the green plane).
+  {
+    const int width = dec->io_->width;      // the final width (!= dec->width_)
+    const int cache_pixs = width * num_rows;
+    uint8_t* const dst = (uint8_t*)dec->io_->opaque + width * dec->last_row_;
+    const uint32_t* const src = dec->argb_cache_;
+    int i;
+    for (i = 0; i < cache_pixs; ++i) dst[i] = (src[i] >> 8) & 0xff;
+  }
+
+  dec->last_row_ = dec->last_out_row_ = row;
+}
+
+int VP8LDecodeAlphaImageStream(int width, int height, const uint8_t* const data,
+                               size_t data_size, uint8_t* const output) {
+  VP8Io io;
+  int ok = 0;
+  VP8LDecoder* const dec = VP8LNew();
+  if (dec == NULL) return 0;
+
+  dec->width_ = width;
+  dec->height_ = height;
+  dec->io_ = &io;
+
+  VP8InitIo(&io);
+  WebPInitCustomIo(NULL, &io);    // Just a sanity Init. io won't be used.
+  io.opaque = output;
+  io.width = width;
+  io.height = height;
+
+  dec->status_ = VP8_STATUS_OK;
+  VP8LInitBitReader(&dec->br_, data, data_size);
+
+  dec->action_ = READ_HDR;
+  if (!DecodeImageStream(width, height, 1, dec, NULL)) goto Err;
+
+  // Allocate output (note that dec->width_ may have changed here).
+  if (!AllocateARGBBuffers(dec, width)) goto Err;
+
+  // Decode (with special row processing).
+  dec->action_ = READ_DATA;
+  ok = DecodeImageData(dec, dec->argb_, dec->width_, dec->height_,
+                       ExtractAlphaRows);
+
+ Err:
+  VP8LDelete(dec);
+  return ok;
+}
+
+//------------------------------------------------------------------------------
+
+int VP8LDecodeHeader(VP8LDecoder* const dec, VP8Io* const io) {
+  int width, height, has_alpha;
+
+  if (dec == NULL) return 0;
+  if (io == NULL) {
+    dec->status_ = VP8_STATUS_INVALID_PARAM;
+    return 0;
+  }
+
+  dec->io_ = io;
+  dec->status_ = VP8_STATUS_OK;
+  VP8LInitBitReader(&dec->br_, io->data, io->data_size);
+  if (!ReadImageInfo(&dec->br_, &width, &height, &has_alpha)) {
+    dec->status_ = VP8_STATUS_BITSTREAM_ERROR;
+    goto Error;
+  }
+  dec->state_ = READ_DIM;
+  io->width = width;
+  io->height = height;
+
+  dec->action_ = READ_HDR;
+  if (!DecodeImageStream(width, height, 1, dec, NULL)) goto Error;
+  return 1;
+
+ Error:
+   VP8LClear(dec);
+   assert(dec->status_ != VP8_STATUS_OK);
+   return 0;
+}
+
+int VP8LDecodeImage(VP8LDecoder* const dec) {
+  VP8Io* io = NULL;
+  WebPDecParams* params = NULL;
+
+  // Sanity checks.
+  if (dec == NULL) return 0;
+
+  io = dec->io_;
+  assert(io != NULL);
+  params = (WebPDecParams*)io->opaque;
+  assert(params != NULL);
+  dec->output_ = params->output;
+  assert(dec->output_ != NULL);
+
+  // Initialization.
+  if (!WebPIoInitFromOptions(params->options, io, MODE_BGRA)) {
+    dec->status_ = VP8_STATUS_INVALID_PARAM;
+    goto Err;
+  }
+
+  if (!AllocateARGBBuffers(dec, io->width)) goto Err;
+
+  if (io->use_scaling && !AllocateAndInitRescaler(dec, io)) goto Err;
+
+  // Decode.
+  dec->action_ = READ_DATA;
+  if (!DecodeImageData(dec, dec->argb_, dec->width_, dec->height_,
+                       ProcessRows)) {
+    goto Err;
+  }
+
+  // Cleanup.
+  params->last_y = dec->last_out_row_;
+  VP8LClear(dec);
+  return 1;
+
+ Err:
+  VP8LClear(dec);
+  assert(dec->status_ != VP8_STATUS_OK);
+  return 0;
+}
+
+//------------------------------------------------------------------------------
+
+#if defined(__cplusplus) || defined(c_plusplus)
+}    // extern "C"
+#endif
diff --git a/drivers/webpold/dec/vp8li.h b/drivers/webpold/dec/vp8li.h
new file mode 100644
index 0000000000..5f6cd6a01c
--- /dev/null
+++ b/drivers/webpold/dec/vp8li.h
@@ -0,0 +1,121 @@
+// Copyright 2012 Google Inc. All Rights Reserved.
+//
+// This code is licensed under the same terms as WebM:
+//  Software License Agreement:  http://www.webmproject.org/license/software/
+//  Additional IP Rights Grant:  http://www.webmproject.org/license/additional/
+// -----------------------------------------------------------------------------
+//
+// Lossless decoder: internal header.
+//
+// Author: Skal (pascal.massimino@gmail.com)
+//         Vikas Arora(vikaas.arora@gmail.com)
+
+#ifndef WEBP_DEC_VP8LI_H_
+#define WEBP_DEC_VP8LI_H_
+
+#include <string.h>     // for memcpy()
+#include "./webpi.h"
+#include "../utils/bit_reader.h"
+#include "../utils/color_cache.h"
+#include "../utils/huffman.h"
+#include "../format_constants.h"
+
+#if defined(__cplusplus) || defined(c_plusplus)
+extern "C" {
+#endif
+
+typedef enum {
+  READ_DATA = 0,
+  READ_HDR = 1,
+  READ_DIM = 2
+} VP8LDecodeState;
+
+typedef struct VP8LTransform VP8LTransform;
+struct VP8LTransform {
+  VP8LImageTransformType type_;   // transform type.
+  int                    bits_;   // subsampling bits defining transform window.
+  int                    xsize_;  // transform window X index.
+  int                    ysize_;  // transform window Y index.
+  uint32_t              *data_;   // transform data.
+};
+
+typedef struct {
+  HuffmanTree htrees_[HUFFMAN_CODES_PER_META_CODE];
+} HTreeGroup;
+
+typedef struct {
+  int             color_cache_size_;
+  VP8LColorCache  color_cache_;
+
+  int             huffman_mask_;
+  int             huffman_subsample_bits_;
+  int             huffman_xsize_;
+  uint32_t       *huffman_image_;
+  int             num_htree_groups_;
+  HTreeGroup     *htree_groups_;
+} VP8LMetadata;
+
+typedef struct {
+  VP8StatusCode    status_;
+  VP8LDecodeState  action_;
+  VP8LDecodeState  state_;
+  VP8Io           *io_;
+
+  const WebPDecBuffer *output_;    // shortcut to io->opaque->output
+
+  uint32_t        *argb_;          // Internal data: always in BGRA color mode.
+  uint32_t        *argb_cache_;    // Scratch buffer for temporary BGRA storage.
+
+  VP8LBitReader    br_;
+
+  int              width_;
+  int              height_;
+  int              last_row_;      // last input row decoded so far.
+  int              last_out_row_;  // last row output so far.
+
+  VP8LMetadata     hdr_;
+
+  int              next_transform_;
+  VP8LTransform    transforms_[NUM_TRANSFORMS];
+  // or'd bitset storing the transforms types.
+  uint32_t         transforms_seen_;
+
+  uint8_t         *rescaler_memory;  // Working memory for rescaling work.
+  WebPRescaler    *rescaler;         // Common rescaler for all channels.
+} VP8LDecoder;
+
+//------------------------------------------------------------------------------
+// internal functions. Not public.
+
+// in vp8l.c
+
+// Decodes a raw image stream (without header) and store the alpha data
+// into *output, which must be of size width x height. Returns false in case
+// of error.
+int VP8LDecodeAlphaImageStream(int width, int height, const uint8_t* const data,
+                               size_t data_size, uint8_t* const output);
+
+// Allocates and initialize a new lossless decoder instance.
+VP8LDecoder* VP8LNew(void);
+
+// Decodes the image header. Returns false in case of error.
+int VP8LDecodeHeader(VP8LDecoder* const dec, VP8Io* const io);
+
+// Decodes an image. It's required to decode the lossless header before calling
+// this function. Returns false in case of error, with updated dec->status_.
+int VP8LDecodeImage(VP8LDecoder* const dec);
+
+// Resets the decoder in its initial state, reclaiming memory.
+// Preserves the dec->status_ value.
+void VP8LClear(VP8LDecoder* const dec);
+
+// Clears and deallocate a lossless decoder instance.
+void VP8LDelete(VP8LDecoder* const dec);
+
+//------------------------------------------------------------------------------
+
+#if defined(__cplusplus) || defined(c_plusplus)
+}    // extern "C"
+#endif
+
+#endif  /* WEBP_DEC_VP8LI_H_ */
diff --git a/drivers/webpold/dec/webp.c b/drivers/webpold/dec/webp.c
new file mode 100644
index 0000000000..f44bc2b8ae
--- /dev/null
+++ b/drivers/webpold/dec/webp.c
@@ -0,0 +1,771 @@
+// Copyright 2010 Google Inc. All Rights Reserved.
+//
+// This code is licensed under the same terms as WebM:
+//  Software License Agreement:  http://www.webmproject.org/license/software/
+//  Additional IP Rights Grant:  http://www.webmproject.org/license/additional/
+// -----------------------------------------------------------------------------
+//
+// Main decoding functions for WEBP images.
+//
+// Author: Skal (pascal.massimino@gmail.com)
+
+#include <stdlib.h>
+
+#include "./vp8i.h"
+#include "./vp8li.h"
+#include "./webpi.h"
+#include "../format_constants.h"
+
+#if defined(__cplusplus) || defined(c_plusplus)
+extern "C" {
+#endif
+
+//------------------------------------------------------------------------------
+// RIFF layout is:
+//   Offset  tag
+//   0...3   "RIFF" 4-byte tag
+//   4...7   size of image data (including metadata) starting at offset 8
+//   8...11  "WEBP"   our form-type signature
+// The RIFF container (12 bytes) is followed by appropriate chunks:
+//   12..15  "VP8 ": 4-bytes tags, signaling the use of VP8 video format
+//   16..19  size of the raw VP8 image data, starting at offset 20
+//   20....  the VP8 bytes
+// Or,
+//   12..15  "VP8L": 4-bytes tags, signaling the use of VP8L lossless format
+//   16..19  size of the raw VP8L image data, starting at offset 20
+//   20....  the VP8L bytes
+// Or,
+//   12..15  "VP8X": 4-bytes tags, describing the extended-VP8 chunk.
+//   16..19  size of the VP8X chunk starting at offset 20.
+//   20..23  VP8X flags bit-map corresponding to the chunk-types present.
+//   24..26  Width of the Canvas Image.
+//   27..29  Height of the Canvas Image.
+// There can be extra chunks after the "VP8X" chunk (ICCP, TILE, FRM, VP8,
+// META  ...)
+// All sizes are in little-endian order.
+// Note: chunk data size must be padded to multiple of 2 when written.
+
+static WEBP_INLINE uint32_t get_le24(const uint8_t* const data) {
+  return data[0] | (data[1] << 8) | (data[2] << 16);
+}
+
+static WEBP_INLINE uint32_t get_le32(const uint8_t* const data) {
+  return (uint32_t)get_le24(data) | (data[3] << 24);
+}
+
+// Validates the RIFF container (if detected) and skips over it.
+// If a RIFF container is detected,
+// Returns VP8_STATUS_BITSTREAM_ERROR for invalid header, and
+//         VP8_STATUS_OK otherwise.
+// In case there are not enough bytes (partial RIFF container), return 0 for
+// *riff_size. Else return the RIFF size extracted from the header.
+static VP8StatusCode ParseRIFF(const uint8_t** const data,
+                               size_t* const data_size,
+                               size_t* const riff_size) {
+  assert(data != NULL);
+  assert(data_size != NULL);
+  assert(riff_size != NULL);
+
+  *riff_size = 0;  // Default: no RIFF present.
+  if (*data_size >= RIFF_HEADER_SIZE && !memcmp(*data, "RIFF", TAG_SIZE)) {
+    if (memcmp(*data + 8, "WEBP", TAG_SIZE)) {
+      return VP8_STATUS_BITSTREAM_ERROR;  // Wrong image file signature.
+    } else {
+      const uint32_t size = get_le32(*data + TAG_SIZE);
+      // Check that we have at least one chunk (i.e "WEBP" + "VP8?nnnn").
+      if (size < TAG_SIZE + CHUNK_HEADER_SIZE) {
+        return VP8_STATUS_BITSTREAM_ERROR;
+      }
+      // We have a RIFF container. Skip it.
+      *riff_size = size;
+      *data += RIFF_HEADER_SIZE;
+      *data_size -= RIFF_HEADER_SIZE;
+    }
+  }
+  return VP8_STATUS_OK;
+}
+
+// Validates the VP8X header and skips over it.
+// Returns VP8_STATUS_BITSTREAM_ERROR for invalid VP8X header,
+//         VP8_STATUS_NOT_ENOUGH_DATA in case of insufficient data, and
+//         VP8_STATUS_OK otherwise.
+// If a VP8X chunk is found, found_vp8x is set to true and *width_ptr,
+// *height_ptr and *flags_ptr are set to the corresponding values extracted
+// from the VP8X chunk.
+static VP8StatusCode ParseVP8X(const uint8_t** const data,
+                               size_t* const data_size,
+                               int* const found_vp8x,
+                               int* const width_ptr, int* const height_ptr,
+                               uint32_t* const flags_ptr) {
+  const uint32_t vp8x_size = CHUNK_HEADER_SIZE + VP8X_CHUNK_SIZE;
+  assert(data != NULL);
+  assert(data_size != NULL);
+  assert(found_vp8x != NULL);
+
+  *found_vp8x = 0;
+
+  if (*data_size < CHUNK_HEADER_SIZE) {
+    return VP8_STATUS_NOT_ENOUGH_DATA;  // Insufficient data.
+  }
+
+  if (!memcmp(*data, "VP8X", TAG_SIZE)) {
+    int width, height;
+    uint32_t flags;
+    const uint32_t chunk_size = get_le32(*data + TAG_SIZE);
+    if (chunk_size != VP8X_CHUNK_SIZE) {
+      return VP8_STATUS_BITSTREAM_ERROR;  // Wrong chunk size.
+    }
+
+    // Verify if enough data is available to validate the VP8X chunk.
+    if (*data_size < vp8x_size) {
+      return VP8_STATUS_NOT_ENOUGH_DATA;  // Insufficient data.
+    }
+    flags = get_le32(*data + 8);
+    width = 1 + get_le24(*data + 12);
+    height = 1 + get_le24(*data + 15);
+    if (width * (uint64_t)height >= MAX_IMAGE_AREA) {
+      return VP8_STATUS_BITSTREAM_ERROR;  // image is too large
+    }
+
+    if (flags_ptr != NULL) *flags_ptr = flags;
+    if (width_ptr != NULL) *width_ptr = width;
+    if (height_ptr != NULL) *height_ptr = height;
+    // Skip over VP8X header bytes.
+    *data += vp8x_size;
+    *data_size -= vp8x_size;
+    *found_vp8x = 1;
+  }
+  return VP8_STATUS_OK;
+}
+
+// Skips to the next VP8/VP8L chunk header in the data given the size of the
+// RIFF chunk 'riff_size'.
+// Returns VP8_STATUS_BITSTREAM_ERROR if any invalid chunk size is encountered,
+//         VP8_STATUS_NOT_ENOUGH_DATA in case of insufficient data, and
+//         VP8_STATUS_OK otherwise.
+// If an alpha chunk is found, *alpha_data and *alpha_size are set
+// appropriately.
+static VP8StatusCode ParseOptionalChunks(const uint8_t** const data,
+                                         size_t* const data_size,
+                                         size_t const riff_size,
+                                         const uint8_t** const alpha_data,
+                                         size_t* const alpha_size) {
+  const uint8_t* buf;
+  size_t buf_size;
+  uint32_t total_size = TAG_SIZE +           // "WEBP".
+                        CHUNK_HEADER_SIZE +  // "VP8Xnnnn".
+                        VP8X_CHUNK_SIZE;     // data.
+  assert(data != NULL);
+  assert(data_size != NULL);
+  buf = *data;
+  buf_size = *data_size;
+
+  assert(alpha_data != NULL);
+  assert(alpha_size != NULL);
+  *alpha_data = NULL;
+  *alpha_size = 0;
+
+  while (1) {
+    uint32_t chunk_size;
+    uint32_t disk_chunk_size;   // chunk_size with padding
+
+    *data = buf;
+    *data_size = buf_size;
+
+    if (buf_size < CHUNK_HEADER_SIZE) {  // Insufficient data.
+      return VP8_STATUS_NOT_ENOUGH_DATA;
+    }
+
+    chunk_size = get_le32(buf + TAG_SIZE);
+    // For odd-sized chunk-payload, there's one byte padding at the end.
+    disk_chunk_size = (CHUNK_HEADER_SIZE + chunk_size + 1) & ~1;
+    total_size += disk_chunk_size;
+
+    // Check that total bytes skipped so far does not exceed riff_size.
+    if (riff_size > 0 && (total_size > riff_size)) {
+      return VP8_STATUS_BITSTREAM_ERROR;          // Not a valid chunk size.
+    }
+
+    if (buf_size < disk_chunk_size) {             // Insufficient data.
+      return VP8_STATUS_NOT_ENOUGH_DATA;
+    }
+
+    if (!memcmp(buf, "ALPH", TAG_SIZE)) {         // A valid ALPH header.
+      *alpha_data = buf + CHUNK_HEADER_SIZE;
+      *alpha_size = chunk_size;
+    } else if (!memcmp(buf, "VP8 ", TAG_SIZE) ||
+               !memcmp(buf, "VP8L", TAG_SIZE)) {  // A valid VP8/VP8L header.
+      return VP8_STATUS_OK;  // Found.
+    }
+
+    // We have a full and valid chunk; skip it.
+    buf += disk_chunk_size;
+    buf_size -= disk_chunk_size;
+  }
+}
+
+// Validates the VP8/VP8L Header ("VP8 nnnn" or "VP8L nnnn") and skips over it.
+// Returns VP8_STATUS_BITSTREAM_ERROR for invalid (chunk larger than
+//         riff_size) VP8/VP8L header,
+//         VP8_STATUS_NOT_ENOUGH_DATA in case of insufficient data, and
+//         VP8_STATUS_OK otherwise.
+// If a VP8/VP8L chunk is found, *chunk_size is set to the total number of bytes
+// extracted from the VP8/VP8L chunk header.
+// The flag '*is_lossless' is set to 1 in case of VP8L chunk / raw VP8L data.
+static VP8StatusCode ParseVP8Header(const uint8_t** const data_ptr,
+                                    size_t* const data_size,
+                                    size_t riff_size,
+                                    size_t* const chunk_size,
+                                    int* const is_lossless) {
+  const uint8_t* const data = *data_ptr;
+  const int is_vp8 = !memcmp(data, "VP8 ", TAG_SIZE);
+  const int is_vp8l = !memcmp(data, "VP8L", TAG_SIZE);
+  const uint32_t minimal_size =
+      TAG_SIZE + CHUNK_HEADER_SIZE;  // "WEBP" + "VP8 nnnn" OR
+                                     // "WEBP" + "VP8Lnnnn"
+  assert(data != NULL);
+  assert(data_size != NULL);
+  assert(chunk_size != NULL);
+  assert(is_lossless != NULL);
+
+  if (*data_size < CHUNK_HEADER_SIZE) {
+    return VP8_STATUS_NOT_ENOUGH_DATA;  // Insufficient data.
+  }
+
+  if (is_vp8 || is_vp8l) {
+    // Bitstream contains VP8/VP8L header.
+    const uint32_t size = get_le32(data + TAG_SIZE);
+    if ((riff_size >= minimal_size) && (size > riff_size - minimal_size)) {
+      return VP8_STATUS_BITSTREAM_ERROR;  // Inconsistent size information.
+    }
+    // Skip over CHUNK_HEADER_SIZE bytes from VP8/VP8L Header.
+    *chunk_size = size;
+    *data_ptr += CHUNK_HEADER_SIZE;
+    *data_size -= CHUNK_HEADER_SIZE;
+    *is_lossless = is_vp8l;
+  } else {
+    // Raw VP8/VP8L bitstream (no header).
+    *is_lossless = VP8LCheckSignature(data, *data_size);
+    *chunk_size = *data_size;
+  }
+
+  return VP8_STATUS_OK;
+}
+
+//------------------------------------------------------------------------------
+
+// Fetch '*width', '*height', '*has_alpha' and fill out 'headers' based on
+// 'data'. All the output parameters may be NULL. If 'headers' is NULL only the
+// minimal amount will be read to fetch the remaining parameters.
+// If 'headers' is non-NULL this function will attempt to locate both alpha
+// data (with or without a VP8X chunk) and the bitstream chunk (VP8/VP8L).
+// Note: The following chunk sequences (before the raw VP8/VP8L data) are
+// considered valid by this function:
+// RIFF + VP8(L)
+// RIFF + VP8X + (optional chunks) + VP8(L)
+// ALPH + VP8 <-- Not a valid WebP format: only allowed for internal purpose.
+// VP8(L)     <-- Not a valid WebP format: only allowed for internal purpose.
+static VP8StatusCode ParseHeadersInternal(const uint8_t* data,
+                                          size_t data_size,
+                                          int* const width,
+                                          int* const height,
+                                          int* const has_alpha,
+                                          WebPHeaderStructure* const headers) {
+  int found_riff = 0;
+  int found_vp8x = 0;
+  VP8StatusCode status;
+  WebPHeaderStructure hdrs;
+
+  if (data == NULL || data_size < RIFF_HEADER_SIZE) {
+    return VP8_STATUS_NOT_ENOUGH_DATA;
+  }
+  memset(&hdrs, 0, sizeof(hdrs));
+  hdrs.data = data;
+  hdrs.data_size = data_size;
+
+  // Skip over RIFF header.
+  status = ParseRIFF(&data, &data_size, &hdrs.riff_size);
+  if (status != VP8_STATUS_OK) {
+    return status;   // Wrong RIFF header / insufficient data.
+  }
+  found_riff = (hdrs.riff_size > 0);
+
+  // Skip over VP8X.
+  {
+    uint32_t flags = 0;
+    status = ParseVP8X(&data, &data_size, &found_vp8x, width, height, &flags);
+    if (status != VP8_STATUS_OK) {
+      return status;  // Wrong VP8X / insufficient data.
+    }
+    if (!found_riff && found_vp8x) {
+      // Note: This restriction may be removed in the future, if it becomes
+      // necessary to send VP8X chunk to the decoder.
+      return VP8_STATUS_BITSTREAM_ERROR;
+    }
+    if (has_alpha != NULL) *has_alpha = !!(flags & ALPHA_FLAG_BIT);
+    if (found_vp8x && headers == NULL) {
+      return VP8_STATUS_OK;  // Return features from VP8X header.
+    }
+  }
+
+  if (data_size < TAG_SIZE) return VP8_STATUS_NOT_ENOUGH_DATA;
+
+  // Skip over optional chunks if data started with "RIFF + VP8X" or "ALPH".
+  if ((found_riff && found_vp8x) ||
+      (!found_riff && !found_vp8x && !memcmp(data, "ALPH", TAG_SIZE))) {
+    status = ParseOptionalChunks(&data, &data_size, hdrs.riff_size,
+                                 &hdrs.alpha_data, &hdrs.alpha_data_size);
+    if (status != VP8_STATUS_OK) {
+      return status;  // Found an invalid chunk size / insufficient data.
+    }
+  }
+
+  // Skip over VP8/VP8L header.
+  status = ParseVP8Header(&data, &data_size, hdrs.riff_size,
+                          &hdrs.compressed_size, &hdrs.is_lossless);
+  if (status != VP8_STATUS_OK) {
+    return status;  // Wrong VP8/VP8L chunk-header / insufficient data.
+  }
+  if (hdrs.compressed_size > MAX_CHUNK_PAYLOAD) {
+    return VP8_STATUS_BITSTREAM_ERROR;
+  }
+
+  if (!hdrs.is_lossless) {
+    if (data_size < VP8_FRAME_HEADER_SIZE) {
+      return VP8_STATUS_NOT_ENOUGH_DATA;
+    }
+    // Validates raw VP8 data.
+    if (!VP8GetInfo(data, data_size,
+                    (uint32_t)hdrs.compressed_size, width, height)) {
+      return VP8_STATUS_BITSTREAM_ERROR;
+    }
+  } else {
+    if (data_size < VP8L_FRAME_HEADER_SIZE) {
+      return VP8_STATUS_NOT_ENOUGH_DATA;
+    }
+    // Validates raw VP8L data.
+    if (!VP8LGetInfo(data, data_size, width, height, has_alpha)) {
+      return VP8_STATUS_BITSTREAM_ERROR;
+    }
+  }
+
+  if (has_alpha != NULL) {
+    // If the data did not contain a VP8X/VP8L chunk the only definitive way
+    // to set this is by looking for alpha data (from an ALPH chunk).
+    *has_alpha |= (hdrs.alpha_data != NULL);
+  }
+  if (headers != NULL) {
+    *headers = hdrs;
+    headers->offset = data - headers->data;
+    assert((uint64_t)(data - headers->data) < MAX_CHUNK_PAYLOAD);
+    assert(headers->offset == headers->data_size - data_size);
+  }
+  return VP8_STATUS_OK;  // Return features from VP8 header.
+}
+
+VP8StatusCode WebPParseHeaders(WebPHeaderStructure* const headers) {
+  assert(headers != NULL);
+  // fill out headers, ignore width/height/has_alpha.
+  return ParseHeadersInternal(headers->data, headers->data_size,
+                              NULL, NULL, NULL, headers);
+}
+
+//------------------------------------------------------------------------------
+// WebPDecParams
+
+void WebPResetDecParams(WebPDecParams* const params) {
+  if (params) {
+    memset(params, 0, sizeof(*params));
+  }
+}
+
+//------------------------------------------------------------------------------
+// "Into" decoding variants
+
+// Main flow
+static VP8StatusCode DecodeInto(const uint8_t* const data, size_t data_size,
+                                WebPDecParams* const params) {
+  VP8StatusCode status;
+  VP8Io io;
+  WebPHeaderStructure headers;
+
+  headers.data = data;
+  headers.data_size = data_size;
+  status = WebPParseHeaders(&headers);   // Process Pre-VP8 chunks.
+  if (status != VP8_STATUS_OK) {
+    return status;
+  }
+
+  assert(params != NULL);
+  VP8InitIo(&io);
+  io.data = headers.data + headers.offset;
+  io.data_size = headers.data_size - headers.offset;
+  WebPInitCustomIo(params, &io);  // Plug the I/O functions.
+
+  if (!headers.is_lossless) {
+    VP8Decoder* const dec = VP8New();
+    if (dec == NULL) {
+      return VP8_STATUS_OUT_OF_MEMORY;
+    }
+#ifdef WEBP_USE_THREAD
+    dec->use_threads_ = params->options && (params->options->use_threads > 0);
+#else
+    dec->use_threads_ = 0;
+#endif
+    dec->alpha_data_ = headers.alpha_data;
+    dec->alpha_data_size_ = headers.alpha_data_size;
+
+    // Decode bitstream header, update io->width/io->height.
+    if (!VP8GetHeaders(dec, &io)) {
+      status = dec->status_;   // An error occurred. Grab error status.
+    } else {
+      // Allocate/check output buffers.
+      status = WebPAllocateDecBuffer(io.width, io.height, params->options,
+                                     params->output);
+      if (status == VP8_STATUS_OK) {  // Decode
+        if (!VP8Decode(dec, &io)) {
+          status = dec->status_;
+        }
+      }
+    }
+    VP8Delete(dec);
+  } else {
+    VP8LDecoder* const dec = VP8LNew();
+    if (dec == NULL) {
+      return VP8_STATUS_OUT_OF_MEMORY;
+    }
+    if (!VP8LDecodeHeader(dec, &io)) {
+      status = dec->status_;   // An error occurred. Grab error status.
+    } else {
+      // Allocate/check output buffers.
+      status = WebPAllocateDecBuffer(io.width, io.height, params->options,
+                                     params->output);
+      if (status == VP8_STATUS_OK) {  // Decode
+        if (!VP8LDecodeImage(dec)) {
+          status = dec->status_;
+        }
+      }
+    }
+    VP8LDelete(dec);
+  }
+
+  if (status != VP8_STATUS_OK) {
+    WebPFreeDecBuffer(params->output);
+  }
+  return status;
+}
+
+// Helpers
+static uint8_t* DecodeIntoRGBABuffer(WEBP_CSP_MODE colorspace,
+                                     const uint8_t* const data,
+                                     size_t data_size,
+                                     uint8_t* const rgba,
+                                     int stride, size_t size) {
+  WebPDecParams params;
+  WebPDecBuffer buf;
+  if (rgba == NULL) {
+    return NULL;
+  }
+  WebPInitDecBuffer(&buf);
+  WebPResetDecParams(&params);
+  params.output = &buf;
+  buf.colorspace    = colorspace;
+  buf.u.RGBA.rgba   = rgba;
+  buf.u.RGBA.stride = stride;
+  buf.u.RGBA.size   = size;
+  buf.is_external_memory = 1;
+  if (DecodeInto(data, data_size, &params) != VP8_STATUS_OK) {
+    return NULL;
+  }
+  return rgba;
+}
+
+uint8_t* WebPDecodeRGBInto(const uint8_t* data, size_t data_size,
+                           uint8_t* output, size_t size, int stride) {
+  return DecodeIntoRGBABuffer(MODE_RGB, data, data_size, output, stride, size);
+}
+
+uint8_t* WebPDecodeRGBAInto(const uint8_t* data, size_t data_size,
+                            uint8_t* output, size_t size, int stride) {
+  return DecodeIntoRGBABuffer(MODE_RGBA, data, data_size, output, stride, size);
+}
+
+uint8_t* WebPDecodeARGBInto(const uint8_t* data, size_t data_size,
+                            uint8_t* output, size_t size, int stride) {
+  return DecodeIntoRGBABuffer(MODE_ARGB, data, data_size, output, stride, size);
+}
+
+uint8_t* WebPDecodeBGRInto(const uint8_t* data, size_t data_size,
+                           uint8_t* output, size_t size, int stride) {
+  return DecodeIntoRGBABuffer(MODE_BGR, data, data_size, output, stride, size);
+}
+
+uint8_t* WebPDecodeBGRAInto(const uint8_t* data, size_t data_size,
+                            uint8_t* output, size_t size, int stride) {
+  return DecodeIntoRGBABuffer(MODE_BGRA, data, data_size, output, stride, size);
+}
+
+uint8_t* WebPDecodeYUVInto(const uint8_t* data, size_t data_size,
+                           uint8_t* luma, size_t luma_size, int luma_stride,
+                           uint8_t* u, size_t u_size, int u_stride,
+                           uint8_t* v, size_t v_size, int v_stride) {
+  WebPDecParams params;
+  WebPDecBuffer output;
+  if (luma == NULL) return NULL;
+  WebPInitDecBuffer(&output);
+  WebPResetDecParams(&params);
+  params.output = &output;
+  output.colorspace      = MODE_YUV;
+  output.u.YUVA.y        = luma;
+  output.u.YUVA.y_stride = luma_stride;
+  output.u.YUVA.y_size   = luma_size;
+  output.u.YUVA.u        = u;
+  output.u.YUVA.u_stride = u_stride;
+  output.u.YUVA.u_size   = u_size;
+  output.u.YUVA.v        = v;
+  output.u.YUVA.v_stride = v_stride;
+  output.u.YUVA.v_size   = v_size;
+  output.is_external_memory = 1;
+  if (DecodeInto(data, data_size, &params) != VP8_STATUS_OK) {
+    return NULL;
+  }
+  return luma;
+}
+
+//------------------------------------------------------------------------------
+
+static uint8_t* Decode(WEBP_CSP_MODE mode, const uint8_t* const data,
+                       size_t data_size, int* const width, int* const height,
+                       WebPDecBuffer* const keep_info) {
+  WebPDecParams params;
+  WebPDecBuffer output;
+
+  WebPInitDecBuffer(&output);
+  WebPResetDecParams(&params);
+  params.output = &output;
+  output.colorspace = mode;
+
+  // Retrieve (and report back) the required dimensions from bitstream.
+  if (!WebPGetInfo(data, data_size, &output.width, &output.height)) {
+    return NULL;
+  }
+  if (width != NULL) *width = output.width;
+  if (height != NULL) *height = output.height;
+
+  // Decode
+  if (DecodeInto(data, data_size, &params) != VP8_STATUS_OK) {
+    return NULL;
+  }
+  if (keep_info != NULL) {    // keep track of the side-info
+    WebPCopyDecBuffer(&output, keep_info);
+  }
+  // return decoded samples (don't clear 'output'!)
+  return WebPIsRGBMode(mode) ? output.u.RGBA.rgba : output.u.YUVA.y;
+}
+
+uint8_t* WebPDecodeRGB(const uint8_t* data, size_t data_size,
+                       int* width, int* height) {
+  return Decode(MODE_RGB, data, data_size, width, height, NULL);
+}
+
+uint8_t* WebPDecodeRGBA(const uint8_t* data, size_t data_size,
+                        int* width, int* height) {
+  return Decode(MODE_RGBA, data, data_size, width, height, NULL);
+}
+
+uint8_t* WebPDecodeARGB(const uint8_t* data, size_t data_size,
+                        int* width, int* height) {
+  return Decode(MODE_ARGB, data, data_size, width, height, NULL);
+}
+
+uint8_t* WebPDecodeBGR(const uint8_t* data, size_t data_size,
+                       int* width, int* height) {
+  return Decode(MODE_BGR, data, data_size, width, height, NULL);
+}
+
+uint8_t* WebPDecodeBGRA(const uint8_t* data, size_t data_size,
+                        int* width, int* height) {
+  return Decode(MODE_BGRA, data, data_size, width, height, NULL);
+}
+
+uint8_t* WebPDecodeYUV(const uint8_t* data, size_t data_size,
+                       int* width, int* height, uint8_t** u, uint8_t** v,
+                       int* stride, int* uv_stride) {
+  WebPDecBuffer output;   // only to preserve the side-infos
+  uint8_t* const out = Decode(MODE_YUV, data, data_size,
+                              width, height, &output);
+
+  if (out != NULL) {
+    const WebPYUVABuffer* const buf = &output.u.YUVA;
+    *u = buf->u;
+    *v = buf->v;
+    *stride = buf->y_stride;
+    *uv_stride = buf->u_stride;
+    assert(buf->u_stride == buf->v_stride);
+  }
+  return out;
+}
+
+static void DefaultFeatures(WebPBitstreamFeatures* const features) {
+  assert(features != NULL);
+  memset(features, 0, sizeof(*features));
+  features->bitstream_version = 0;
+}
+
+static VP8StatusCode GetFeatures(const uint8_t* const data, size_t data_size,
+                                 WebPBitstreamFeatures* const features) {
+  if (features == NULL || data == NULL) {
+    return VP8_STATUS_INVALID_PARAM;
+  }
+  DefaultFeatures(features);
+
+  // Only parse enough of the data to retrieve width/height/has_alpha.
+  return ParseHeadersInternal(data, data_size,
+                              &features->width, &features->height,
+                              &features->has_alpha, NULL);
+}
+
+//------------------------------------------------------------------------------
+// WebPGetInfo()
+
+int WebPGetInfo(const uint8_t* data, size_t data_size,
+                int* width, int* height) {
+  WebPBitstreamFeatures features;
+
+  if (GetFeatures(data, data_size, &features) != VP8_STATUS_OK) {
+    return 0;
+  }
+
+  if (width != NULL) {
+    *width  = features.width;
+  }
+  if (height != NULL) {
+    *height = features.height;
+  }
+
+  return 1;
+}
+
+//------------------------------------------------------------------------------
+// Advance decoding API
+
+int WebPInitDecoderConfigInternal(WebPDecoderConfig* config,
+                                  int version) {
+  if (WEBP_ABI_IS_INCOMPATIBLE(version, WEBP_DECODER_ABI_VERSION)) {
+    return 0;   // version mismatch
+  }
+  if (config == NULL) {
+    return 0;
+  }
+  memset(config, 0, sizeof(*config));
+  DefaultFeatures(&config->input);
+  WebPInitDecBuffer(&config->output);
+  return 1;
+}
+
+VP8StatusCode WebPGetFeaturesInternal(const uint8_t* data, size_t data_size,
+                                      WebPBitstreamFeatures* features,
+                                      int version) {
+  VP8StatusCode status;
+  if (WEBP_ABI_IS_INCOMPATIBLE(version, WEBP_DECODER_ABI_VERSION)) {
+    return VP8_STATUS_INVALID_PARAM;   // version mismatch
+  }
+  if (features == NULL) {
+    return VP8_STATUS_INVALID_PARAM;
+  }
+
+  status = GetFeatures(data, data_size, features);
+  if (status == VP8_STATUS_NOT_ENOUGH_DATA) {
+    return VP8_STATUS_BITSTREAM_ERROR;  // Not-enough-data treated as error.
+  }
+  return status;
+}
+
+VP8StatusCode WebPDecode(const uint8_t* data, size_t data_size,
+                         WebPDecoderConfig* config) {
+  WebPDecParams params;
+  VP8StatusCode status;
+
+  if (config == NULL) {
+    return VP8_STATUS_INVALID_PARAM;
+  }
+
+  status = GetFeatures(data, data_size, &config->input);
+  if (status != VP8_STATUS_OK) {
+    if (status == VP8_STATUS_NOT_ENOUGH_DATA) {
+      return VP8_STATUS_BITSTREAM_ERROR;  // Not-enough-data treated as error.
+    }
+    return status;
+  }
+
+  WebPResetDecParams(&params);
+  params.output = &config->output;
+  params.options = &config->options;
+  status = DecodeInto(data, data_size, &params);
+
+  return status;
+}
+
+//------------------------------------------------------------------------------
+// Cropping and rescaling.
+
+int WebPIoInitFromOptions(const WebPDecoderOptions* const options,
+                          VP8Io* const io, WEBP_CSP_MODE src_colorspace) {
+  const int W = io->width;
+  const int H = io->height;
+  int x = 0, y = 0, w = W, h = H;
+
+  // Cropping
+  io->use_cropping = (options != NULL) && (options->use_cropping > 0);
+  if (io->use_cropping) {
+    w = options->crop_width;
+    h = options->crop_height;
+    x = options->crop_left;
+    y = options->crop_top;
+    if (!WebPIsRGBMode(src_colorspace)) {   // only snap for YUV420 or YUV422
+      x &= ~1;
+      y &= ~1;    // TODO(later): only for YUV420, not YUV422.
+    }
+    if (x < 0 || y < 0 || w <= 0 || h <= 0 || x + w > W || y + h > H) {
+      return 0;  // out of frame boundary error
+    }
+  }
+  io->crop_left   = x;
+  io->crop_top    = y;
+  io->crop_right  = x + w;
+  io->crop_bottom = y + h;
+  io->mb_w = w;
+  io->mb_h = h;
+
+  // Scaling
+  io->use_scaling = (options != NULL) && (options->use_scaling > 0);
+  if (io->use_scaling) {
+    if (options->scaled_width <= 0 || options->scaled_height <= 0) {
+      return 0;
+    }
+    io->scaled_width = options->scaled_width;
+    io->scaled_height = options->scaled_height;
+  }
+
+  // Filter
+  io->bypass_filtering = options && options->bypass_filtering;
+
+  // Fancy upsampler
+#ifdef FANCY_UPSAMPLING
+  io->fancy_upsampling = (options == NULL) || (!options->no_fancy_upsampling);
+#endif
+
+  if (io->use_scaling) {
+    // disable filter (only for large downscaling ratio).
+    io->bypass_filtering = (io->scaled_width < W * 3 / 4) &&
+                           (io->scaled_height < H * 3 / 4);
+    io->fancy_upsampling = 0;
+  }
+  return 1;
+}
+
+//------------------------------------------------------------------------------
+
+#if defined(__cplusplus) || defined(c_plusplus)
+}    // extern "C"
+#endif
diff --git a/drivers/webpold/dec/webpi.h b/drivers/webpold/dec/webpi.h
new file mode 100644
index 0000000000..44e5744411
--- /dev/null
+++ b/drivers/webpold/dec/webpi.h
@@ -0,0 +1,114 @@
+// Copyright 2011 Google Inc. All Rights Reserved.
+//
+// This code is licensed under the same terms as WebM:
+//  Software License Agreement:  http://www.webmproject.org/license/software/
+//  Additional IP Rights Grant:  http://www.webmproject.org/license/additional/
+// -----------------------------------------------------------------------------
+//
+// Internal header: WebP decoding parameters and custom IO on buffer
+//
+// Author: somnath@google.com (Somnath Banerjee)
+
+#ifndef WEBP_DEC_WEBPI_H_
+#define WEBP_DEC_WEBPI_H_
+
+#if defined(__cplusplus) || defined(c_plusplus)
+extern "C" {
+#endif
+
+#include "../utils/rescaler.h"
+#include "./decode_vp8.h"
+
+//------------------------------------------------------------------------------
+// WebPDecParams: Decoding output parameters. Transient internal object.
+
+typedef struct WebPDecParams WebPDecParams;
+typedef int (*OutputFunc)(const VP8Io* const io, WebPDecParams* const p);
+typedef int (*OutputRowFunc)(WebPDecParams* const p, int y_pos);
+
+struct WebPDecParams {
+  WebPDecBuffer* output;             // output buffer.
+  uint8_t* tmp_y, *tmp_u, *tmp_v;    // cache for the fancy upsampler
+                                     // or used for tmp rescaling
+
+  int last_y;                 // coordinate of the line that was last output
+  const WebPDecoderOptions* options;  // if not NULL, use alt decoding features
+  // rescalers
+  WebPRescaler scaler_y, scaler_u, scaler_v, scaler_a;
+  void* memory;                  // overall scratch memory for the output work.
+
+  OutputFunc emit;               // output RGB or YUV samples
+  OutputFunc emit_alpha;         // output alpha channel
+  OutputRowFunc emit_alpha_row;  // output one line of rescaled alpha values
+};
+
+// Should be called first, before any use of the WebPDecParams object.
+void WebPResetDecParams(WebPDecParams* const params);
+
+//------------------------------------------------------------------------------
+// Header parsing helpers
+
+// Structure storing a description of the RIFF headers.
+typedef struct {
+  const uint8_t* data;         // input buffer
+  size_t data_size;            // input buffer size
+  size_t offset;               // offset to main data chunk (VP8 or VP8L)
+  const uint8_t* alpha_data;   // points to alpha chunk (if present)
+  size_t alpha_data_size;      // alpha chunk size
+  size_t compressed_size;      // VP8/VP8L compressed data size
+  size_t riff_size;            // size of the riff payload (or 0 if absent)
+  int is_lossless;             // true if a VP8L chunk is present
+} WebPHeaderStructure;
+
+// Skips over all valid chunks prior to the first VP8/VP8L frame header.
+// Returns VP8_STATUS_OK on success,
+//         VP8_STATUS_BITSTREAM_ERROR if an invalid header/chunk is found, and
+//         VP8_STATUS_NOT_ENOUGH_DATA if case of insufficient data.
+// In 'headers', compressed_size, offset, alpha_data, alpha_size and lossless
+// fields are updated appropriately upon success.
+VP8StatusCode WebPParseHeaders(WebPHeaderStructure* const headers);
+
+//------------------------------------------------------------------------------
+// Misc utils
+
+// Initializes VP8Io with custom setup, io and teardown functions. The default
+// hooks will use the supplied 'params' as io->opaque handle.
+void WebPInitCustomIo(WebPDecParams* const params, VP8Io* const io);
+
+// Setup crop_xxx fields, mb_w and mb_h in io. 'src_colorspace' refers
+// to the *compressed* format, not the output one.
+int WebPIoInitFromOptions(const WebPDecoderOptions* const options,
+                          VP8Io* const io, WEBP_CSP_MODE src_colorspace);
+
+//------------------------------------------------------------------------------
+// Internal functions regarding WebPDecBuffer memory (in buffer.c).
+// Don't really need to be externally visible for now.
+
+// Prepare 'buffer' with the requested initial dimensions width/height.
+// If no external storage is supplied, initializes buffer by allocating output
+// memory and setting up the stride information. Validate the parameters. Return
+// an error code in case of problem (no memory, or invalid stride / size /
+// dimension / etc.). If *options is not NULL, also verify that the options'
+// parameters are valid and apply them to the width/height dimensions of the
+// output buffer. This takes cropping / scaling / rotation into account.
+VP8StatusCode WebPAllocateDecBuffer(int width, int height,
+                                    const WebPDecoderOptions* const options,
+                                    WebPDecBuffer* const buffer);
+
+// Copy 'src' into 'dst' buffer, making sure 'dst' is not marked as owner of the
+// memory (still held by 'src').
+void WebPCopyDecBuffer(const WebPDecBuffer* const src,
+                       WebPDecBuffer* const dst);
+
+// Copy and transfer ownership from src to dst (beware of parameter order!)
+void WebPGrabDecBuffer(WebPDecBuffer* const src, WebPDecBuffer* const dst);
+
+
+
+//------------------------------------------------------------------------------
+
+#if defined(__cplusplus) || defined(c_plusplus)
+}    // extern "C"
+#endif
+
+#endif  /* WEBP_DEC_WEBPI_H_ */
diff --git a/drivers/webpold/decode.h b/drivers/webpold/decode.h
new file mode 100644
index 0000000000..43b6c58f4f
--- /dev/null
+++ b/drivers/webpold/decode.h
@@ -0,0 +1,454 @@
+// Copyright 2010 Google Inc. All Rights Reserved.
+//
+// This code is licensed under the same terms as WebM:
+//  Software License Agreement:  http://www.webmproject.org/license/software/
+//  Additional IP Rights Grant:  http://www.webmproject.org/license/additional/
+// -----------------------------------------------------------------------------
+//
+//  Main decoding functions for WebP images.
+//
+// Author: Skal (pascal.massimino@gmail.com)
+
+#ifndef WEBP_WEBP_DECODE_H_
+#define WEBP_WEBP_DECODE_H_
+
+#include "./types.h"
+
+#if defined(__cplusplus) || defined(c_plusplus)
+extern "C" {
+#endif
+
+#define WEBP_DECODER_ABI_VERSION 0x0200    // MAJOR(8b) + MINOR(8b)
+
+// Return the decoder's version number, packed in hexadecimal using 8bits for
+// each of major/minor/revision. E.g: v2.5.7 is 0x020507.
+WEBP_EXTERN(int) WebPGetDecoderVersion(void);
+
+// Retrieve basic header information: width, height.
+// This function will also validate the header and return 0 in
+// case of formatting error.
+// Pointers 'width' and 'height' can be passed NULL if deemed irrelevant.
+WEBP_EXTERN(int) WebPGetInfo(const uint8_t* data, size_t data_size,
+                             int* width, int* height);
+
+// Decodes WebP images pointed to by 'data' and returns RGBA samples, along
+// with the dimensions in *width and *height. The ordering of samples in
+// memory is R, G, B, A, R, G, B, A... in scan order (endian-independent).
+// The returned pointer should be deleted calling free().
+// Returns NULL in case of error.
+WEBP_EXTERN(uint8_t*) WebPDecodeRGBA(const uint8_t* data, size_t data_size,
+                                     int* width, int* height);
+
+// Same as WebPDecodeRGBA, but returning A, R, G, B, A, R, G, B... ordered data.
+WEBP_EXTERN(uint8_t*) WebPDecodeARGB(const uint8_t* data, size_t data_size,
+                                     int* width, int* height);
+
+// Same as WebPDecodeRGBA, but returning B, G, R, A, B, G, R, A... ordered data.
+WEBP_EXTERN(uint8_t*) WebPDecodeBGRA(const uint8_t* data, size_t data_size,
+                                     int* width, int* height);
+
+// Same as WebPDecodeRGBA, but returning R, G, B, R, G, B... ordered data.
+// If the bitstream contains transparency, it is ignored.
+WEBP_EXTERN(uint8_t*) WebPDecodeRGB(const uint8_t* data, size_t data_size,
+                                    int* width, int* height);
+
+// Same as WebPDecodeRGB, but returning B, G, R, B, G, R... ordered data.
+WEBP_EXTERN(uint8_t*) WebPDecodeBGR(const uint8_t* data, size_t data_size,
+                                    int* width, int* height);
+
+
+// Decode WebP images pointed to by 'data' to Y'UV format(*). The pointer
+// returned is the Y samples buffer. Upon return, *u and *v will point to
+// the U and V chroma data. These U and V buffers need NOT be free()'d,
+// unlike the returned Y luma one. The dimension of the U and V planes
+// are both (*width + 1) / 2 and (*height + 1)/ 2.
+// Upon return, the Y buffer has a stride returned as '*stride', while U and V
+// have a common stride returned as '*uv_stride'.
+// Return NULL in case of error.
+// (*) Also named Y'CbCr. See: http://en.wikipedia.org/wiki/YCbCr
+WEBP_EXTERN(uint8_t*) WebPDecodeYUV(const uint8_t* data, size_t data_size,
+                                    int* width, int* height,
+                                    uint8_t** u, uint8_t** v,
+                                    int* stride, int* uv_stride);
+
+// These five functions are variants of the above ones, that decode the image
+// directly into a pre-allocated buffer 'output_buffer'. The maximum storage
+// available in this buffer is indicated by 'output_buffer_size'. If this
+// storage is not sufficient (or an error occurred), NULL is returned.
+// Otherwise, output_buffer is returned, for convenience.
+// The parameter 'output_stride' specifies the distance (in bytes)
+// between scanlines. Hence, output_buffer_size is expected to be at least
+// output_stride x picture-height.
+WEBP_EXTERN(uint8_t*) WebPDecodeRGBAInto(
+    const uint8_t* data, size_t data_size,
+    uint8_t* output_buffer, size_t output_buffer_size, int output_stride);
+WEBP_EXTERN(uint8_t*) WebPDecodeARGBInto(
+    const uint8_t* data, size_t data_size,
+    uint8_t* output_buffer, size_t output_buffer_size, int output_stride);
+WEBP_EXTERN(uint8_t*) WebPDecodeBGRAInto(
+    const uint8_t* data, size_t data_size,
+    uint8_t* output_buffer, size_t output_buffer_size, int output_stride);
+
+// RGB and BGR variants. Here too the transparency information, if present,
+// will be dropped and ignored.
+WEBP_EXTERN(uint8_t*) WebPDecodeRGBInto(
+    const uint8_t* data, size_t data_size,
+    uint8_t* output_buffer, size_t output_buffer_size, int output_stride);
+WEBP_EXTERN(uint8_t*) WebPDecodeBGRInto(
+    const uint8_t* data, size_t data_size,
+    uint8_t* output_buffer, size_t output_buffer_size, int output_stride);
+
+// WebPDecodeYUVInto() is a variant of WebPDecodeYUV() that operates directly
+// into pre-allocated luma/chroma plane buffers. This function requires the
+// strides to be passed: one for the luma plane and one for each of the
+// chroma ones. The size of each plane buffer is passed as 'luma_size',
+// 'u_size' and 'v_size' respectively.
+// Pointer to the luma plane ('*luma') is returned or NULL if an error occurred
+// during decoding (or because some buffers were found to be too small).
+WEBP_EXTERN(uint8_t*) WebPDecodeYUVInto(
+    const uint8_t* data, size_t data_size,
+    uint8_t* luma, size_t luma_size, int luma_stride,
+    uint8_t* u, size_t u_size, int u_stride,
+    uint8_t* v, size_t v_size, int v_stride);
+
+//------------------------------------------------------------------------------
+// Output colorspaces and buffer
+
+// Colorspaces
+// Note: the naming describes the byte-ordering of packed samples in memory.
+// For instance, MODE_BGRA relates to samples ordered as B,G,R,A,B,G,R,A,...
+// Non-capital names (e.g.:MODE_Argb) relates to pre-multiplied RGB channels.
+// RGB-565 and RGBA-4444 are also endian-agnostic and byte-oriented.
+typedef enum { MODE_RGB = 0, MODE_RGBA = 1,
+               MODE_BGR = 2, MODE_BGRA = 3,
+               MODE_ARGB = 4, MODE_RGBA_4444 = 5,
+               MODE_RGB_565 = 6,
+               // RGB-premultiplied transparent modes (alpha value is preserved)
+               MODE_rgbA = 7,
+               MODE_bgrA = 8,
+               MODE_Argb = 9,
+               MODE_rgbA_4444 = 10,
+               // YUV modes must come after RGB ones.
+               MODE_YUV = 11, MODE_YUVA = 12,  // yuv 4:2:0
+               MODE_LAST = 13
+             } WEBP_CSP_MODE;
+
+// Some useful macros:
+static WEBP_INLINE int WebPIsPremultipliedMode(WEBP_CSP_MODE mode) {
+  return (mode == MODE_rgbA || mode == MODE_bgrA || mode == MODE_Argb ||
+          mode == MODE_rgbA_4444);
+}
+
+static WEBP_INLINE int WebPIsAlphaMode(WEBP_CSP_MODE mode) {
+  return (mode == MODE_RGBA || mode == MODE_BGRA || mode == MODE_ARGB ||
+          mode == MODE_RGBA_4444 || mode == MODE_YUVA ||
+          WebPIsPremultipliedMode(mode));
+}
+
+static WEBP_INLINE int WebPIsRGBMode(WEBP_CSP_MODE mode) {
+  return (mode < MODE_YUV);
+}
+
+//------------------------------------------------------------------------------
+// WebPDecBuffer: Generic structure for describing the output sample buffer.
+
+typedef struct {    // view as RGBA
+  uint8_t* rgba;    // pointer to RGBA samples
+  int stride;       // stride in bytes from one scanline to the next.
+  size_t size;      // total size of the *rgba buffer.
+} WebPRGBABuffer;
+
+typedef struct {              // view as YUVA
+  uint8_t* y, *u, *v, *a;     // pointer to luma, chroma U/V, alpha samples
+  int y_stride;               // luma stride
+  int u_stride, v_stride;     // chroma strides
+  int a_stride;               // alpha stride
+  size_t y_size;              // luma plane size
+  size_t u_size, v_size;      // chroma planes size
+  size_t a_size;              // alpha-plane size
+} WebPYUVABuffer;
+
+// Output buffer
+typedef struct {
+  WEBP_CSP_MODE colorspace;  // Colorspace.
+  int width, height;         // Dimensions.
+  int is_external_memory;    // If true, 'internal_memory' pointer is not used.
+  union {
+    WebPRGBABuffer RGBA;
+    WebPYUVABuffer YUVA;
+  } u;                       // Nameless union of buffer parameters.
+  uint32_t       pad[4];     // padding for later use
+
+  uint8_t* private_memory;   // Internally allocated memory (only when
+                             // is_external_memory is false). Should not be used
+                             // externally, but accessed via the buffer union.
+} WebPDecBuffer;
+
+// Internal, version-checked, entry point
+WEBP_EXTERN(int) WebPInitDecBufferInternal(WebPDecBuffer*, int);
+
+// Initialize the structure as empty. Must be called before any other use.
+// Returns false in case of version mismatch
+static WEBP_INLINE int WebPInitDecBuffer(WebPDecBuffer* buffer) {
+  return WebPInitDecBufferInternal(buffer, WEBP_DECODER_ABI_VERSION);
+}
+
+// Free any memory associated with the buffer. Must always be called last.
+// Note: doesn't free the 'buffer' structure itself.
+WEBP_EXTERN(void) WebPFreeDecBuffer(WebPDecBuffer* buffer);
+
+//------------------------------------------------------------------------------
+// Enumeration of the status codes
+
+typedef enum {
+  VP8_STATUS_OK = 0,
+  VP8_STATUS_OUT_OF_MEMORY,
+  VP8_STATUS_INVALID_PARAM,
+  VP8_STATUS_BITSTREAM_ERROR,
+  VP8_STATUS_UNSUPPORTED_FEATURE,
+  VP8_STATUS_SUSPENDED,
+  VP8_STATUS_USER_ABORT,
+  VP8_STATUS_NOT_ENOUGH_DATA
+} VP8StatusCode;
+
+//------------------------------------------------------------------------------
+// Incremental decoding
+//
+// This API allows streamlined decoding of partial data.
+// Picture can be incrementally decoded as data become available thanks to the
+// WebPIDecoder object. This object can be left in a SUSPENDED state if the
+// picture is only partially decoded, pending additional input.
+// Code example:
+//
+//   WebPInitDecBuffer(&buffer);
+//   buffer.colorspace = mode;
+//   ...
+//   WebPIDecoder* idec = WebPINewDecoder(&buffer);
+//   while (has_more_data) {
+//     // ... (get additional data)
+//     status = WebPIAppend(idec, new_data, new_data_size);
+//     if (status != VP8_STATUS_SUSPENDED ||
+//       break;
+//     }
+//
+//     // The above call decodes the current available buffer.
+//     // Part of the image can now be refreshed by calling to
+//     // WebPIDecGetRGB()/WebPIDecGetYUVA() etc.
+//   }
+//   WebPIDelete(idec);
+
+typedef struct WebPIDecoder WebPIDecoder;
+
+// Creates a new incremental decoder with the supplied buffer parameter.
+// This output_buffer can be passed NULL, in which case a default output buffer
+// is used (with MODE_RGB). Otherwise, an internal reference to 'output_buffer'
+// is kept, which means that the lifespan of 'output_buffer' must be larger than
+// that of the returned WebPIDecoder object.
+// Returns NULL if the allocation failed.
+WEBP_EXTERN(WebPIDecoder*) WebPINewDecoder(WebPDecBuffer* output_buffer);
+
+// This function allocates and initializes an incremental-decoder object, which
+// will output the RGB/A samples specified by 'csp' into a preallocated
+// buffer 'output_buffer'. The size of this buffer is at least
+// 'output_buffer_size' and the stride (distance in bytes between two scanlines)
+// is specified by 'output_stride'. Returns NULL if the allocation failed.
+WEBP_EXTERN(WebPIDecoder*) WebPINewRGB(
+    WEBP_CSP_MODE csp,
+    uint8_t* output_buffer, size_t output_buffer_size, int output_stride);
+
+// This function allocates and initializes an incremental-decoder object, which
+// will output the raw luma/chroma samples into a preallocated planes. The luma
+// plane is specified by its pointer 'luma', its size 'luma_size' and its stride
+// 'luma_stride'. Similarly, the chroma-u plane is specified by the 'u',
+// 'u_size' and 'u_stride' parameters, and the chroma-v plane by 'v'
+// and 'v_size'. And same for the alpha-plane. The 'a' pointer can be pass
+// NULL in case one is not interested in the transparency plane.
+// Returns NULL if the allocation failed.
+WEBP_EXTERN(WebPIDecoder*) WebPINewYUVA(
+    uint8_t* luma, size_t luma_size, int luma_stride,
+    uint8_t* u, size_t u_size, int u_stride,
+    uint8_t* v, size_t v_size, int v_stride,
+    uint8_t* a, size_t a_size, int a_stride);
+
+// Deprecated version of the above, without the alpha plane.
+// Kept for backward compatibility.
+WEBP_EXTERN(WebPIDecoder*) WebPINewYUV(
+    uint8_t* luma, size_t luma_size, int luma_stride,
+    uint8_t* u, size_t u_size, int u_stride,
+    uint8_t* v, size_t v_size, int v_stride);
+
+// Deletes the WebPIDecoder object and associated memory. Must always be called
+// if WebPINewDecoder, WebPINewRGB or WebPINewYUV succeeded.
+WEBP_EXTERN(void) WebPIDelete(WebPIDecoder* idec);
+
+// Copies and decodes the next available data. Returns VP8_STATUS_OK when
+// the image is successfully decoded. Returns VP8_STATUS_SUSPENDED when more
+// data is expected. Returns error in other cases.
+WEBP_EXTERN(VP8StatusCode) WebPIAppend(
+    WebPIDecoder* idec, const uint8_t* data, size_t data_size);
+
+// A variant of the above function to be used when data buffer contains
+// partial data from the beginning. In this case data buffer is not copied
+// to the internal memory.
+// Note that the value of the 'data' pointer can change between calls to
+// WebPIUpdate, for instance when the data buffer is resized to fit larger data.
+WEBP_EXTERN(VP8StatusCode) WebPIUpdate(
+    WebPIDecoder* idec, const uint8_t* data, size_t data_size);
+
+// Returns the RGB/A image decoded so far. Returns NULL if output params
+// are not initialized yet. The RGB/A output type corresponds to the colorspace
+// specified during call to WebPINewDecoder() or WebPINewRGB().
+// *last_y is the index of last decoded row in raster scan order. Some pointers
+// (*last_y, *width etc.) can be NULL if corresponding information is not
+// needed.
+WEBP_EXTERN(uint8_t*) WebPIDecGetRGB(
+    const WebPIDecoder* idec, int* last_y,
+    int* width, int* height, int* stride);
+
+// Same as above function to get a YUVA image. Returns pointer to the luma
+// plane or NULL in case of error. If there is no alpha information
+// the alpha pointer '*a' will be returned NULL.
+WEBP_EXTERN(uint8_t*) WebPIDecGetYUVA(
+    const WebPIDecoder* idec, int* last_y,
+    uint8_t** u, uint8_t** v, uint8_t** a,
+    int* width, int* height, int* stride, int* uv_stride, int* a_stride);
+
+// Deprecated alpha-less version of WebPIDecGetYUVA(): it will ignore the
+// alpha information (if present). Kept for backward compatibility.
+static WEBP_INLINE uint8_t* WebPIDecGetYUV(
+    const WebPIDecoder* idec, int* last_y, uint8_t** u, uint8_t** v,
+    int* width, int* height, int* stride, int* uv_stride) {
+  return WebPIDecGetYUVA(idec, last_y, u, v, NULL, width, height,
+                         stride, uv_stride, NULL);
+}
+
+// Generic call to retrieve information about the displayable area.
+// If non NULL, the left/right/width/height pointers are filled with the visible
+// rectangular area so far.
+// Returns NULL in case the incremental decoder object is in an invalid state.
+// Otherwise returns the pointer to the internal representation. This structure
+// is read-only, tied to WebPIDecoder's lifespan and should not be modified.
+WEBP_EXTERN(const WebPDecBuffer*) WebPIDecodedArea(
+    const WebPIDecoder* idec, int* left, int* top, int* width, int* height);
+
+//------------------------------------------------------------------------------
+// Advanced decoding parametrization
+//
+//  Code sample for using the advanced decoding API
+/*
+     // A) Init a configuration object
+     WebPDecoderConfig config;
+     CHECK(WebPInitDecoderConfig(&config));
+
+     // B) optional: retrieve the bitstream's features.
+     CHECK(WebPGetFeatures(data, data_size, &config.input) == VP8_STATUS_OK);
+
+     // C) Adjust 'config', if needed
+     config.no_fancy = 1;
+     config.output.colorspace = MODE_BGRA;
+     // etc.
+
+     // Note that you can also make config.output point to an externally
+     // supplied memory buffer, provided it's big enough to store the decoded
+     // picture. Otherwise, config.output will just be used to allocate memory
+     // and store the decoded picture.
+
+     // D) Decode!
+     CHECK(WebPDecode(data, data_size, &config) == VP8_STATUS_OK);
+
+     // E) Decoded image is now in config.output (and config.output.u.RGBA)
+
+     // F) Reclaim memory allocated in config's object. It's safe to call
+     // this function even if the memory is external and wasn't allocated
+     // by WebPDecode().
+     WebPFreeDecBuffer(&config.output);
+*/
+
+// Features gathered from the bitstream
+typedef struct {
+  int width;        // Width in pixels, as read from the bitstream.
+  int height;       // Height in pixels, as read from the bitstream.
+  int has_alpha;    // True if the bitstream contains an alpha channel.
+
+  // Unused for now:
+  int bitstream_version;        // should be 0 for now. TODO(later)
+  int no_incremental_decoding;  // if true, using incremental decoding is not
+                                // recommended.
+  int rotate;                   // TODO(later)
+  int uv_sampling;              // should be 0 for now. TODO(later)
+  uint32_t pad[3];              // padding for later use
+} WebPBitstreamFeatures;
+
+// Internal, version-checked, entry point
+WEBP_EXTERN(VP8StatusCode) WebPGetFeaturesInternal(
+    const uint8_t*, size_t, WebPBitstreamFeatures*, int);
+
+// Retrieve features from the bitstream. The *features structure is filled
+// with information gathered from the bitstream.
+// Returns false in case of error or version mismatch.
+// In case of error, features->bitstream_status will reflect the error code.
+static WEBP_INLINE VP8StatusCode WebPGetFeatures(
+    const uint8_t* data, size_t data_size,
+    WebPBitstreamFeatures* features) {
+  return WebPGetFeaturesInternal(data, data_size, features,
+                                 WEBP_DECODER_ABI_VERSION);
+}
+
+// Decoding options
+typedef struct {
+  int bypass_filtering;               // if true, skip the in-loop filtering
+  int no_fancy_upsampling;            // if true, use faster pointwise upsampler
+  int use_cropping;                   // if true, cropping is applied _first_
+  int crop_left, crop_top;            // top-left position for cropping.
+                                      // Will be snapped to even values.
+  int crop_width, crop_height;        // dimension of the cropping area
+  int use_scaling;                    // if true, scaling is applied _afterward_
+  int scaled_width, scaled_height;    // final resolution
+  int use_threads;                    // if true, use multi-threaded decoding
+
+  // Unused for now:
+  int force_rotation;                 // forced rotation (to be applied _last_)
+  int no_enhancement;                 // if true, discard enhancement layer
+  uint32_t pad[6];                    // padding for later use
+} WebPDecoderOptions;
+
+// Main object storing the configuration for advanced decoding.
+typedef struct {
+  WebPBitstreamFeatures input;  // Immutable bitstream features (optional)
+  WebPDecBuffer output;         // Output buffer (can point to external mem)
+  WebPDecoderOptions options;   // Decoding options
+} WebPDecoderConfig;
+
+// Internal, version-checked, entry point
+WEBP_EXTERN(int) WebPInitDecoderConfigInternal(WebPDecoderConfig*, int);
+
+// Initialize the configuration as empty. This function must always be
+// called first, unless WebPGetFeatures() is to be called.
+// Returns false in case of mismatched version.
+static WEBP_INLINE int WebPInitDecoderConfig(WebPDecoderConfig* config) {
+  return WebPInitDecoderConfigInternal(config, WEBP_DECODER_ABI_VERSION);
+}
+
+// Instantiate a new incremental decoder object with the requested
+// configuration. The bitstream can be passed using 'data' and 'data_size'
+// parameter, in which case the features will be parsed and stored into
+// config->input. Otherwise, 'data' can be NULL and no parsing will occur.
+// Note that 'config' can be NULL too, in which case a default configuration
+// is used.
+// The return WebPIDecoder object must always be deleted calling WebPIDelete().
+// Returns NULL in case of error (and config->status will then reflect
+// the error condition).
+WEBP_EXTERN(WebPIDecoder*) WebPIDecode(const uint8_t* data, size_t data_size,
+                                       WebPDecoderConfig* config);
+
+// Non-incremental version. This version decodes the full data at once, taking
+// 'config' into account. Returns decoding status (which should be VP8_STATUS_OK
+// if the decoding was successful).
+WEBP_EXTERN(VP8StatusCode) WebPDecode(const uint8_t* data, size_t data_size,
+                                      WebPDecoderConfig* config);
+
+#if defined(__cplusplus) || defined(c_plusplus)
+}    // extern "C"
+#endif
+
+#endif  /* WEBP_WEBP_DECODE_H_ */
diff --git a/drivers/webpold/dsp/cpu.c b/drivers/webpold/dsp/cpu.c
new file mode 100644
index 0000000000..0228734457
--- /dev/null
+++ b/drivers/webpold/dsp/cpu.c
@@ -0,0 +1,85 @@
+// Copyright 2011 Google Inc. All Rights Reserved.
+//
+// This code is licensed under the same terms as WebM:
+//  Software License Agreement:  http://www.webmproject.org/license/software/
+//  Additional IP Rights Grant:  http://www.webmproject.org/license/additional/
+// -----------------------------------------------------------------------------
+//
+// CPU detection
+//
+// Author: Christian Duvivier (cduvivier@google.com)
+
+#include "./dsp.h"
+
+#if defined(__ANDROID__)
+#include <cpu-features.h>
+#endif
+
+#if defined(__cplusplus) || defined(c_plusplus)
+extern "C" {
+#endif
+
+//------------------------------------------------------------------------------
+// SSE2 detection.
+//
+
+// apple/darwin gcc-4.0.1 defines __PIC__, but not __pic__ with -fPIC.
+#if (defined(__pic__) || defined(__PIC__)) && defined(__i386__)
+static WEBP_INLINE void GetCPUInfo(int cpu_info[4], int info_type) {
+  __asm__ volatile (
+    "mov %%ebx, %%edi\n"
+    "cpuid\n"
+    "xchg %%edi, %%ebx\n"
+    : "=a"(cpu_info[0]), "=D"(cpu_info[1]), "=c"(cpu_info[2]), "=d"(cpu_info[3])
+    : "a"(info_type));
+}
+#elif defined(__i386__) || defined(__x86_64__)
+static WEBP_INLINE void GetCPUInfo(int cpu_info[4], int info_type) {
+  __asm__ volatile (
+    "cpuid\n"
+    : "=a"(cpu_info[0]), "=b"(cpu_info[1]), "=c"(cpu_info[2]), "=d"(cpu_info[3])
+    : "a"(info_type));
+}
+#elif defined(WEBP_MSC_SSE2)
+#define GetCPUInfo __cpuid
+#endif
+
+#if defined(__i386__) || defined(__x86_64__) || defined(WEBP_MSC_SSE2)
+static int x86CPUInfo(CPUFeature feature) {
+  int cpu_info[4];
+  GetCPUInfo(cpu_info, 1);
+  if (feature == kSSE2) {
+    return 0 != (cpu_info[3] & 0x04000000);
+  }
+  if (feature == kSSE3) {
+    return 0 != (cpu_info[2] & 0x00000001);
+  }
+  return 0;
+}
+VP8CPUInfo VP8GetCPUInfo = x86CPUInfo;
+#elif defined(WEBP_ANDROID_NEON)
+static int AndroidCPUInfo(CPUFeature feature) {
+  const AndroidCpuFamily cpu_family = android_getCpuFamily();
+  const uint64_t cpu_features = android_getCpuFeatures();
+  if (feature == kNEON) {
+    return (cpu_family == ANDROID_CPU_FAMILY_ARM &&
+            0 != (cpu_features & ANDROID_CPU_ARM_FEATURE_NEON));
+  }
+  return 0;
+}
+VP8CPUInfo VP8GetCPUInfo = AndroidCPUInfo;
+#elif defined(__ARM_NEON__)
+// define a dummy function to enable turning off NEON at runtime by setting
+// VP8DecGetCPUInfo = NULL
+static int armCPUInfo(CPUFeature feature) {
+  (void)feature;
+  return 1;
+}
+VP8CPUInfo VP8GetCPUInfo = armCPUInfo;
+#else
+VP8CPUInfo VP8GetCPUInfo = NULL;
+#endif
+
+#if defined(__cplusplus) || defined(c_plusplus)
+}    // extern "C"
+#endif
diff --git a/drivers/webpold/dsp/dec.c b/drivers/webpold/dsp/dec.c
new file mode 100644
index 0000000000..9ae7b6fa76
--- /dev/null
+++ b/drivers/webpold/dsp/dec.c
@@ -0,0 +1,732 @@
+// Copyright 2010 Google Inc. All Rights Reserved.
+//
+// This code is licensed under the same terms as WebM:
+//  Software License Agreement:  http://www.webmproject.org/license/software/
+//  Additional IP Rights Grant:  http://www.webmproject.org/license/additional/
+// -----------------------------------------------------------------------------
+//
+// Speed-critical decoding functions.
+//
+// Author: Skal (pascal.massimino@gmail.com)
+
+#include "./dsp.h"
+#include "../dec/vp8i.h"
+
+#if defined(__cplusplus) || defined(c_plusplus)
+extern "C" {
+#endif
+
+//------------------------------------------------------------------------------
+// run-time tables (~4k)
+
+static uint8_t abs0[255 + 255 + 1];     // abs(i)
+static uint8_t abs1[255 + 255 + 1];     // abs(i)>>1
+static int8_t sclip1[1020 + 1020 + 1];  // clips [-1020, 1020] to [-128, 127]
+static int8_t sclip2[112 + 112 + 1];    // clips [-112, 112] to [-16, 15]
+static uint8_t clip1[255 + 510 + 1];    // clips [-255,510] to [0,255]
+
+// We declare this variable 'volatile' to prevent instruction reordering
+// and make sure it's set to true _last_ (so as to be thread-safe)
+static volatile int tables_ok = 0;
+
+static void DspInitTables(void) {
+  if (!tables_ok) {
+    int i;
+    for (i = -255; i <= 255; ++i) {
+      abs0[255 + i] = (i < 0) ? -i : i;
+      abs1[255 + i] = abs0[255 + i] >> 1;
+    }
+    for (i = -1020; i <= 1020; ++i) {
+      sclip1[1020 + i] = (i < -128) ? -128 : (i > 127) ? 127 : i;
+    }
+    for (i = -112; i <= 112; ++i) {
+      sclip2[112 + i] = (i < -16) ? -16 : (i > 15) ? 15 : i;
+    }
+    for (i = -255; i <= 255 + 255; ++i) {
+      clip1[255 + i] = (i < 0) ? 0 : (i > 255) ? 255 : i;
+    }
+    tables_ok = 1;
+  }
+}
+
+static WEBP_INLINE uint8_t clip_8b(int v) {
+  return (!(v & ~0xff)) ? v : (v < 0) ? 0 : 255;
+}
+
+//------------------------------------------------------------------------------
+// Transforms (Paragraph 14.4)
+
+#define STORE(x, y, v) \
+  dst[x + y * BPS] = clip_8b(dst[x + y * BPS] + ((v) >> 3))
+
+static const int kC1 = 20091 + (1 << 16);
+static const int kC2 = 35468;
+#define MUL(a, b) (((a) * (b)) >> 16)
+
+static void TransformOne(const int16_t* in, uint8_t* dst) {
+  int C[4 * 4], *tmp;
+  int i;
+  tmp = C;
+  for (i = 0; i < 4; ++i) {    // vertical pass
+    const int a = in[0] + in[8];    // [-4096, 4094]
+    const int b = in[0] - in[8];    // [-4095, 4095]
+    const int c = MUL(in[4], kC2) - MUL(in[12], kC1);   // [-3783, 3783]
+    const int d = MUL(in[4], kC1) + MUL(in[12], kC2);   // [-3785, 3781]
+    tmp[0] = a + d;   // [-7881, 7875]
+    tmp[1] = b + c;   // [-7878, 7878]
+    tmp[2] = b - c;   // [-7878, 7878]
+    tmp[3] = a - d;   // [-7877, 7879]
+    tmp += 4;
+    in++;
+  }
+  // Each pass is expanding the dynamic range by ~3.85 (upper bound).
+  // The exact value is (2. + (kC1 + kC2) / 65536).
+  // After the second pass, maximum interval is [-3794, 3794], assuming
+  // an input in [-2048, 2047] interval. We then need to add a dst value
+  // in the [0, 255] range.
+  // In the worst case scenario, the input to clip_8b() can be as large as
+  // [-60713, 60968].
+  tmp = C;
+  for (i = 0; i < 4; ++i) {    // horizontal pass
+    const int dc = tmp[0] + 4;
+    const int a =  dc +  tmp[8];
+    const int b =  dc -  tmp[8];
+    const int c = MUL(tmp[4], kC2) - MUL(tmp[12], kC1);
+    const int d = MUL(tmp[4], kC1) + MUL(tmp[12], kC2);
+    STORE(0, 0, a + d);
+    STORE(1, 0, b + c);
+    STORE(2, 0, b - c);
+    STORE(3, 0, a - d);
+    tmp++;
+    dst += BPS;
+  }
+}
+#undef MUL
+
+static void TransformTwo(const int16_t* in, uint8_t* dst, int do_two) {
+  TransformOne(in, dst);
+  if (do_two) {
+    TransformOne(in + 16, dst + 4);
+  }
+}
+
+static void TransformUV(const int16_t* in, uint8_t* dst) {
+  VP8Transform(in + 0 * 16, dst, 1);
+  VP8Transform(in + 2 * 16, dst + 4 * BPS, 1);
+}
+
+static void TransformDC(const int16_t *in, uint8_t* dst) {
+  const int DC = in[0] + 4;
+  int i, j;
+  for (j = 0; j < 4; ++j) {
+    for (i = 0; i < 4; ++i) {
+      STORE(i, j, DC);
+    }
+  }
+}
+
+static void TransformDCUV(const int16_t* in, uint8_t* dst) {
+  if (in[0 * 16]) TransformDC(in + 0 * 16, dst);
+  if (in[1 * 16]) TransformDC(in + 1 * 16, dst + 4);
+  if (in[2 * 16]) TransformDC(in + 2 * 16, dst + 4 * BPS);
+  if (in[3 * 16]) TransformDC(in + 3 * 16, dst + 4 * BPS + 4);
+}
+
+#undef STORE
+
+//------------------------------------------------------------------------------
+// Paragraph 14.3
+
+static void TransformWHT(const int16_t* in, int16_t* out) {
+  int tmp[16];
+  int i;
+  for (i = 0; i < 4; ++i) {
+    const int a0 = in[0 + i] + in[12 + i];
+    const int a1 = in[4 + i] + in[ 8 + i];
+    const int a2 = in[4 + i] - in[ 8 + i];
+    const int a3 = in[0 + i] - in[12 + i];
+    tmp[0  + i] = a0 + a1;
+    tmp[8  + i] = a0 - a1;
+    tmp[4  + i] = a3 + a2;
+    tmp[12 + i] = a3 - a2;
+  }
+  for (i = 0; i < 4; ++i) {
+    const int dc = tmp[0 + i * 4] + 3;    // w/ rounder
+    const int a0 = dc             + tmp[3 + i * 4];
+    const int a1 = tmp[1 + i * 4] + tmp[2 + i * 4];
+    const int a2 = tmp[1 + i * 4] - tmp[2 + i * 4];
+    const int a3 = dc             - tmp[3 + i * 4];
+    out[ 0] = (a0 + a1) >> 3;
+    out[16] = (a3 + a2) >> 3;
+    out[32] = (a0 - a1) >> 3;
+    out[48] = (a3 - a2) >> 3;
+    out += 64;
+  }
+}
+
+void (*VP8TransformWHT)(const int16_t* in, int16_t* out) = TransformWHT;
+
+//------------------------------------------------------------------------------
+// Intra predictions
+
+#define DST(x, y) dst[(x) + (y) * BPS]
+
+static WEBP_INLINE void TrueMotion(uint8_t *dst, int size) {
+  const uint8_t* top = dst - BPS;
+  const uint8_t* const clip0 = clip1 + 255 - top[-1];
+  int y;
+  for (y = 0; y < size; ++y) {
+    const uint8_t* const clip = clip0 + dst[-1];
+    int x;
+    for (x = 0; x < size; ++x) {
+      dst[x] = clip[top[x]];
+    }
+    dst += BPS;
+  }
+}
+static void TM4(uint8_t *dst)   { TrueMotion(dst, 4); }
+static void TM8uv(uint8_t *dst) { TrueMotion(dst, 8); }
+static void TM16(uint8_t *dst)  { TrueMotion(dst, 16); }
+
+//------------------------------------------------------------------------------
+// 16x16
+
+static void VE16(uint8_t *dst) {     // vertical
+  int j;
+  for (j = 0; j < 16; ++j) {
+    memcpy(dst + j * BPS, dst - BPS, 16);
+  }
+}
+
+static void HE16(uint8_t *dst) {     // horizontal
+  int j;
+  for (j = 16; j > 0; --j) {
+    memset(dst, dst[-1], 16);
+    dst += BPS;
+  }
+}
+
+static WEBP_INLINE void Put16(int v, uint8_t* dst) {
+  int j;
+  for (j = 0; j < 16; ++j) {
+    memset(dst + j * BPS, v, 16);
+  }
+}
+
+static void DC16(uint8_t *dst) {    // DC
+  int DC = 16;
+  int j;
+  for (j = 0; j < 16; ++j) {
+    DC += dst[-1 + j * BPS] + dst[j - BPS];
+  }
+  Put16(DC >> 5, dst);
+}
+
+static void DC16NoTop(uint8_t *dst) {   // DC with top samples not available
+  int DC = 8;
+  int j;
+  for (j = 0; j < 16; ++j) {
+    DC += dst[-1 + j * BPS];
+  }
+  Put16(DC >> 4, dst);
+}
+
+static void DC16NoLeft(uint8_t *dst) {  // DC with left samples not available
+  int DC = 8;
+  int i;
+  for (i = 0; i < 16; ++i) {
+    DC += dst[i - BPS];
+  }
+  Put16(DC >> 4, dst);
+}
+
+static void DC16NoTopLeft(uint8_t *dst) {  // DC with no top and left samples
+  Put16(0x80, dst);
+}
+
+//------------------------------------------------------------------------------
+// 4x4
+
+#define AVG3(a, b, c) (((a) + 2 * (b) + (c) + 2) >> 2)
+#define AVG2(a, b) (((a) + (b) + 1) >> 1)
+
+static void VE4(uint8_t *dst) {    // vertical
+  const uint8_t* top = dst - BPS;
+  const uint8_t vals[4] = {
+    AVG3(top[-1], top[0], top[1]),
+    AVG3(top[ 0], top[1], top[2]),
+    AVG3(top[ 1], top[2], top[3]),
+    AVG3(top[ 2], top[3], top[4])
+  };
+  int i;
+  for (i = 0; i < 4; ++i) {
+    memcpy(dst + i * BPS, vals, sizeof(vals));
+  }
+}
+
+static void HE4(uint8_t *dst) {    // horizontal
+  const int A = dst[-1 - BPS];
+  const int B = dst[-1];
+  const int C = dst[-1 + BPS];
+  const int D = dst[-1 + 2 * BPS];
+  const int E = dst[-1 + 3 * BPS];
+  *(uint32_t*)(dst + 0 * BPS) = 0x01010101U * AVG3(A, B, C);
+  *(uint32_t*)(dst + 1 * BPS) = 0x01010101U * AVG3(B, C, D);
+  *(uint32_t*)(dst + 2 * BPS) = 0x01010101U * AVG3(C, D, E);
+  *(uint32_t*)(dst + 3 * BPS) = 0x01010101U * AVG3(D, E, E);
+}
+
+static void DC4(uint8_t *dst) {   // DC
+  uint32_t dc = 4;
+  int i;
+  for (i = 0; i < 4; ++i) dc += dst[i - BPS] + dst[-1 + i * BPS];
+  dc >>= 3;
+  for (i = 0; i < 4; ++i) memset(dst + i * BPS, dc, 4);
+}
+
+static void RD4(uint8_t *dst) {   // Down-right
+  const int I = dst[-1 + 0 * BPS];
+  const int J = dst[-1 + 1 * BPS];
+  const int K = dst[-1 + 2 * BPS];
+  const int L = dst[-1 + 3 * BPS];
+  const int X = dst[-1 - BPS];
+  const int A = dst[0 - BPS];
+  const int B = dst[1 - BPS];
+  const int C = dst[2 - BPS];
+  const int D = dst[3 - BPS];
+  DST(0, 3)                                     = AVG3(J, K, L);
+  DST(0, 2) = DST(1, 3)                         = AVG3(I, J, K);
+  DST(0, 1) = DST(1, 2) = DST(2, 3)             = AVG3(X, I, J);
+  DST(0, 0) = DST(1, 1) = DST(2, 2) = DST(3, 3) = AVG3(A, X, I);
+  DST(1, 0) = DST(2, 1) = DST(3, 2)             = AVG3(B, A, X);
+  DST(2, 0) = DST(3, 1)                         = AVG3(C, B, A);
+  DST(3, 0)                                     = AVG3(D, C, B);
+}
+
+static void LD4(uint8_t *dst) {   // Down-Left
+  const int A = dst[0 - BPS];
+  const int B = dst[1 - BPS];
+  const int C = dst[2 - BPS];
+  const int D = dst[3 - BPS];
+  const int E = dst[4 - BPS];
+  const int F = dst[5 - BPS];
+  const int G = dst[6 - BPS];
+  const int H = dst[7 - BPS];
+  DST(0, 0)                                     = AVG3(A, B, C);
+  DST(1, 0) = DST(0, 1)                         = AVG3(B, C, D);
+  DST(2, 0) = DST(1, 1) = DST(0, 2)             = AVG3(C, D, E);
+  DST(3, 0) = DST(2, 1) = DST(1, 2) = DST(0, 3) = AVG3(D, E, F);
+  DST(3, 1) = DST(2, 2) = DST(1, 3)             = AVG3(E, F, G);
+  DST(3, 2) = DST(2, 3)                         = AVG3(F, G, H);
+  DST(3, 3)                                     = AVG3(G, H, H);
+}
+
+static void VR4(uint8_t *dst) {   // Vertical-Right
+  const int I = dst[-1 + 0 * BPS];
+  const int J = dst[-1 + 1 * BPS];
+  const int K = dst[-1 + 2 * BPS];
+  const int X = dst[-1 - BPS];
+  const int A = dst[0 - BPS];
+  const int B = dst[1 - BPS];
+  const int C = dst[2 - BPS];
+  const int D = dst[3 - BPS];
+  DST(0, 0) = DST(1, 2) = AVG2(X, A);
+  DST(1, 0) = DST(2, 2) = AVG2(A, B);
+  DST(2, 0) = DST(3, 2) = AVG2(B, C);
+  DST(3, 0)             = AVG2(C, D);
+
+  DST(0, 3) =             AVG3(K, J, I);
+  DST(0, 2) =             AVG3(J, I, X);
+  DST(0, 1) = DST(1, 3) = AVG3(I, X, A);
+  DST(1, 1) = DST(2, 3) = AVG3(X, A, B);
+  DST(2, 1) = DST(3, 3) = AVG3(A, B, C);
+  DST(3, 1) =             AVG3(B, C, D);
+}
+
+static void VL4(uint8_t *dst) {   // Vertical-Left
+  const int A = dst[0 - BPS];
+  const int B = dst[1 - BPS];
+  const int C = dst[2 - BPS];
+  const int D = dst[3 - BPS];
+  const int E = dst[4 - BPS];
+  const int F = dst[5 - BPS];
+  const int G = dst[6 - BPS];
+  const int H = dst[7 - BPS];
+  DST(0, 0) =             AVG2(A, B);
+  DST(1, 0) = DST(0, 2) = AVG2(B, C);
+  DST(2, 0) = DST(1, 2) = AVG2(C, D);
+  DST(3, 0) = DST(2, 2) = AVG2(D, E);
+
+  DST(0, 1) =             AVG3(A, B, C);
+  DST(1, 1) = DST(0, 3) = AVG3(B, C, D);
+  DST(2, 1) = DST(1, 3) = AVG3(C, D, E);
+  DST(3, 1) = DST(2, 3) = AVG3(D, E, F);
+              DST(3, 2) = AVG3(E, F, G);
+              DST(3, 3) = AVG3(F, G, H);
+}
+
+static void HU4(uint8_t *dst) {   // Horizontal-Up
+  const int I = dst[-1 + 0 * BPS];
+  const int J = dst[-1 + 1 * BPS];
+  const int K = dst[-1 + 2 * BPS];
+  const int L = dst[-1 + 3 * BPS];
+  DST(0, 0) =             AVG2(I, J);
+  DST(2, 0) = DST(0, 1) = AVG2(J, K);
+  DST(2, 1) = DST(0, 2) = AVG2(K, L);
+  DST(1, 0) =             AVG3(I, J, K);
+  DST(3, 0) = DST(1, 1) = AVG3(J, K, L);
+  DST(3, 1) = DST(1, 2) = AVG3(K, L, L);
+  DST(3, 2) = DST(2, 2) =
+    DST(0, 3) = DST(1, 3) = DST(2, 3) = DST(3, 3) = L;
+}
+
+static void HD4(uint8_t *dst) {  // Horizontal-Down
+  const int I = dst[-1 + 0 * BPS];
+  const int J = dst[-1 + 1 * BPS];
+  const int K = dst[-1 + 2 * BPS];
+  const int L = dst[-1 + 3 * BPS];
+  const int X = dst[-1 - BPS];
+  const int A = dst[0 - BPS];
+  const int B = dst[1 - BPS];
+  const int C = dst[2 - BPS];
+
+  DST(0, 0) = DST(2, 1) = AVG2(I, X);
+  DST(0, 1) = DST(2, 2) = AVG2(J, I);
+  DST(0, 2) = DST(2, 3) = AVG2(K, J);
+  DST(0, 3)             = AVG2(L, K);
+
+  DST(3, 0)             = AVG3(A, B, C);
+  DST(2, 0)             = AVG3(X, A, B);
+  DST(1, 0) = DST(3, 1) = AVG3(I, X, A);
+  DST(1, 1) = DST(3, 2) = AVG3(J, I, X);
+  DST(1, 2) = DST(3, 3) = AVG3(K, J, I);
+  DST(1, 3)             = AVG3(L, K, J);
+}
+
+#undef DST
+#undef AVG3
+#undef AVG2
+
+//------------------------------------------------------------------------------
+// Chroma
+
+static void VE8uv(uint8_t *dst) {    // vertical
+  int j;
+  for (j = 0; j < 8; ++j) {
+    memcpy(dst + j * BPS, dst - BPS, 8);
+  }
+}
+
+static void HE8uv(uint8_t *dst) {    // horizontal
+  int j;
+  for (j = 0; j < 8; ++j) {
+    memset(dst, dst[-1], 8);
+    dst += BPS;
+  }
+}
+
+// helper for chroma-DC predictions
+static WEBP_INLINE void Put8x8uv(uint64_t v, uint8_t* dst) {
+  int j;
+  for (j = 0; j < 8; ++j) {
+    *(uint64_t*)(dst + j * BPS) = v;
+  }
+}
+
+static void DC8uv(uint8_t *dst) {     // DC
+  int dc0 = 8;
+  int i;
+  for (i = 0; i < 8; ++i) {
+    dc0 += dst[i - BPS] + dst[-1 + i * BPS];
+  }
+  Put8x8uv((uint64_t)((dc0 >> 4) * 0x0101010101010101ULL), dst);
+}
+
+static void DC8uvNoLeft(uint8_t *dst) {   // DC with no left samples
+  int dc0 = 4;
+  int i;
+  for (i = 0; i < 8; ++i) {
+    dc0 += dst[i - BPS];
+  }
+  Put8x8uv((uint64_t)((dc0 >> 3) * 0x0101010101010101ULL), dst);
+}
+
+static void DC8uvNoTop(uint8_t *dst) {  // DC with no top samples
+  int dc0 = 4;
+  int i;
+  for (i = 0; i < 8; ++i) {
+    dc0 += dst[-1 + i * BPS];
+  }
+  Put8x8uv((uint64_t)((dc0 >> 3) * 0x0101010101010101ULL), dst);
+}
+
+static void DC8uvNoTopLeft(uint8_t *dst) {    // DC with nothing
+  Put8x8uv(0x8080808080808080ULL, dst);
+}
+
+//------------------------------------------------------------------------------
+// default C implementations
+
+const VP8PredFunc VP8PredLuma4[NUM_BMODES] = {
+  DC4, TM4, VE4, HE4, RD4, VR4, LD4, VL4, HD4, HU4
+};
+
+const VP8PredFunc VP8PredLuma16[NUM_B_DC_MODES] = {
+  DC16, TM16, VE16, HE16,
+  DC16NoTop, DC16NoLeft, DC16NoTopLeft
+};
+
+const VP8PredFunc VP8PredChroma8[NUM_B_DC_MODES] = {
+  DC8uv, TM8uv, VE8uv, HE8uv,
+  DC8uvNoTop, DC8uvNoLeft, DC8uvNoTopLeft
+};
+
+//------------------------------------------------------------------------------
+// Edge filtering functions
+
+// 4 pixels in, 2 pixels out
+static WEBP_INLINE void do_filter2(uint8_t* p, int step) {
+  const int p1 = p[-2*step], p0 = p[-step], q0 = p[0], q1 = p[step];
+  const int a = 3 * (q0 - p0) + sclip1[1020 + p1 - q1];
+  const int a1 = sclip2[112 + ((a + 4) >> 3)];
+  const int a2 = sclip2[112 + ((a + 3) >> 3)];
+  p[-step] = clip1[255 + p0 + a2];
+  p[    0] = clip1[255 + q0 - a1];
+}
+
+// 4 pixels in, 4 pixels out
+static WEBP_INLINE void do_filter4(uint8_t* p, int step) {
+  const int p1 = p[-2*step], p0 = p[-step], q0 = p[0], q1 = p[step];
+  const int a = 3 * (q0 - p0);
+  const int a1 = sclip2[112 + ((a + 4) >> 3)];
+  const int a2 = sclip2[112 + ((a + 3) >> 3)];
+  const int a3 = (a1 + 1) >> 1;
+  p[-2*step] = clip1[255 + p1 + a3];
+  p[-  step] = clip1[255 + p0 + a2];
+  p[      0] = clip1[255 + q0 - a1];
+  p[   step] = clip1[255 + q1 - a3];
+}
+
+// 6 pixels in, 6 pixels out
+static WEBP_INLINE void do_filter6(uint8_t* p, int step) {
+  const int p2 = p[-3*step], p1 = p[-2*step], p0 = p[-step];
+  const int q0 = p[0], q1 = p[step], q2 = p[2*step];
+  const int a = sclip1[1020 + 3 * (q0 - p0) + sclip1[1020 + p1 - q1]];
+  const int a1 = (27 * a + 63) >> 7;  // eq. to ((3 * a + 7) * 9) >> 7
+  const int a2 = (18 * a + 63) >> 7;  // eq. to ((2 * a + 7) * 9) >> 7
+  const int a3 = (9  * a + 63) >> 7;  // eq. to ((1 * a + 7) * 9) >> 7
+  p[-3*step] = clip1[255 + p2 + a3];
+  p[-2*step] = clip1[255 + p1 + a2];
+  p[-  step] = clip1[255 + p0 + a1];
+  p[      0] = clip1[255 + q0 - a1];
+  p[   step] = clip1[255 + q1 - a2];
+  p[ 2*step] = clip1[255 + q2 - a3];
+}
+
+static WEBP_INLINE int hev(const uint8_t* p, int step, int thresh) {
+  const int p1 = p[-2*step], p0 = p[-step], q0 = p[0], q1 = p[step];
+  return (abs0[255 + p1 - p0] > thresh) || (abs0[255 + q1 - q0] > thresh);
+}
+
+static WEBP_INLINE int needs_filter(const uint8_t* p, int step, int thresh) {
+  const int p1 = p[-2*step], p0 = p[-step], q0 = p[0], q1 = p[step];
+  return (2 * abs0[255 + p0 - q0] + abs1[255 + p1 - q1]) <= thresh;
+}
+
+static WEBP_INLINE int needs_filter2(const uint8_t* p,
+                                     int step, int t, int it) {
+  const int p3 = p[-4*step], p2 = p[-3*step], p1 = p[-2*step], p0 = p[-step];
+  const int q0 = p[0], q1 = p[step], q2 = p[2*step], q3 = p[3*step];
+  if ((2 * abs0[255 + p0 - q0] + abs1[255 + p1 - q1]) > t)
+    return 0;
+  return abs0[255 + p3 - p2] <= it && abs0[255 + p2 - p1] <= it &&
+         abs0[255 + p1 - p0] <= it && abs0[255 + q3 - q2] <= it &&
+         abs0[255 + q2 - q1] <= it && abs0[255 + q1 - q0] <= it;
+}
+
+//------------------------------------------------------------------------------
+// Simple In-loop filtering (Paragraph 15.2)
+
+static void SimpleVFilter16(uint8_t* p, int stride, int thresh) {
+  int i;
+  for (i = 0; i < 16; ++i) {
+    if (needs_filter(p + i, stride, thresh)) {
+      do_filter2(p + i, stride);
+    }
+  }
+}
+
+static void SimpleHFilter16(uint8_t* p, int stride, int thresh) {
+  int i;
+  for (i = 0; i < 16; ++i) {
+    if (needs_filter(p + i * stride, 1, thresh)) {
+      do_filter2(p + i * stride, 1);
+    }
+  }
+}
+
+static void SimpleVFilter16i(uint8_t* p, int stride, int thresh) {
+  int k;
+  for (k = 3; k > 0; --k) {
+    p += 4 * stride;
+    SimpleVFilter16(p, stride, thresh);
+  }
+}
+
+static void SimpleHFilter16i(uint8_t* p, int stride, int thresh) {
+  int k;
+  for (k = 3; k > 0; --k) {
+    p += 4;
+    SimpleHFilter16(p, stride, thresh);
+  }
+}
+
+//------------------------------------------------------------------------------
+// Complex In-loop filtering (Paragraph 15.3)
+
+static WEBP_INLINE void FilterLoop26(uint8_t* p,
+                                     int hstride, int vstride, int size,
+                                     int thresh, int ithresh, int hev_thresh) {
+  while (size-- > 0) {
+    if (needs_filter2(p, hstride, thresh, ithresh)) {
+      if (hev(p, hstride, hev_thresh)) {
+        do_filter2(p, hstride);
+      } else {
+        do_filter6(p, hstride);
+      }
+    }
+    p += vstride;
+  }
+}
+
+static WEBP_INLINE void FilterLoop24(uint8_t* p,
+                                     int hstride, int vstride, int size,
+                                     int thresh, int ithresh, int hev_thresh) {
+  while (size-- > 0) {
+    if (needs_filter2(p, hstride, thresh, ithresh)) {
+      if (hev(p, hstride, hev_thresh)) {
+        do_filter2(p, hstride);
+      } else {
+        do_filter4(p, hstride);
+      }
+    }
+    p += vstride;
+  }
+}
+
+// on macroblock edges
+static void VFilter16(uint8_t* p, int stride,
+                      int thresh, int ithresh, int hev_thresh) {
+  FilterLoop26(p, stride, 1, 16, thresh, ithresh, hev_thresh);
+}
+
+static void HFilter16(uint8_t* p, int stride,
+                      int thresh, int ithresh, int hev_thresh) {
+  FilterLoop26(p, 1, stride, 16, thresh, ithresh, hev_thresh);
+}
+
+// on three inner edges
+static void VFilter16i(uint8_t* p, int stride,
+                       int thresh, int ithresh, int hev_thresh) {
+  int k;
+  for (k = 3; k > 0; --k) {
+    p += 4 * stride;
+    FilterLoop24(p, stride, 1, 16, thresh, ithresh, hev_thresh);
+  }
+}
+
+static void HFilter16i(uint8_t* p, int stride,
+                       int thresh, int ithresh, int hev_thresh) {
+  int k;
+  for (k = 3; k > 0; --k) {
+    p += 4;
+    FilterLoop24(p, 1, stride, 16, thresh, ithresh, hev_thresh);
+  }
+}
+
+// 8-pixels wide variant, for chroma filtering
+static void VFilter8(uint8_t* u, uint8_t* v, int stride,
+                     int thresh, int ithresh, int hev_thresh) {
+  FilterLoop26(u, stride, 1, 8, thresh, ithresh, hev_thresh);
+  FilterLoop26(v, stride, 1, 8, thresh, ithresh, hev_thresh);
+}
+
+static void HFilter8(uint8_t* u, uint8_t* v, int stride,
+                     int thresh, int ithresh, int hev_thresh) {
+  FilterLoop26(u, 1, stride, 8, thresh, ithresh, hev_thresh);
+  FilterLoop26(v, 1, stride, 8, thresh, ithresh, hev_thresh);
+}
+
+static void VFilter8i(uint8_t* u, uint8_t* v, int stride,
+                      int thresh, int ithresh, int hev_thresh) {
+  FilterLoop24(u + 4 * stride, stride, 1, 8, thresh, ithresh, hev_thresh);
+  FilterLoop24(v + 4 * stride, stride, 1, 8, thresh, ithresh, hev_thresh);
+}
+
+static void HFilter8i(uint8_t* u, uint8_t* v, int stride,
+                      int thresh, int ithresh, int hev_thresh) {
+  FilterLoop24(u + 4, 1, stride, 8, thresh, ithresh, hev_thresh);
+  FilterLoop24(v + 4, 1, stride, 8, thresh, ithresh, hev_thresh);
+}
+
+//------------------------------------------------------------------------------
+
+VP8DecIdct2 VP8Transform;
+VP8DecIdct VP8TransformUV;
+VP8DecIdct VP8TransformDC;
+VP8DecIdct VP8TransformDCUV;
+
+VP8LumaFilterFunc VP8VFilter16;
+VP8LumaFilterFunc VP8HFilter16;
+VP8ChromaFilterFunc VP8VFilter8;
+VP8ChromaFilterFunc VP8HFilter8;
+VP8LumaFilterFunc VP8VFilter16i;
+VP8LumaFilterFunc VP8HFilter16i;
+VP8ChromaFilterFunc VP8VFilter8i;
+VP8ChromaFilterFunc VP8HFilter8i;
+VP8SimpleFilterFunc VP8SimpleVFilter16;
+VP8SimpleFilterFunc VP8SimpleHFilter16;
+VP8SimpleFilterFunc VP8SimpleVFilter16i;
+VP8SimpleFilterFunc VP8SimpleHFilter16i;
+
+extern void VP8DspInitSSE2(void);
+extern void VP8DspInitNEON(void);
+
+void VP8DspInit(void) {
+  DspInitTables();
+
+  VP8Transform = TransformTwo;
+  VP8TransformUV = TransformUV;
+  VP8TransformDC = TransformDC;
+  VP8TransformDCUV = TransformDCUV;
+
+  VP8VFilter16 = VFilter16;
+  VP8HFilter16 = HFilter16;
+  VP8VFilter8 = VFilter8;
+  VP8HFilter8 = HFilter8;
+  VP8VFilter16i = VFilter16i;
+  VP8HFilter16i = HFilter16i;
+  VP8VFilter8i = VFilter8i;
+  VP8HFilter8i = HFilter8i;
+  VP8SimpleVFilter16 = SimpleVFilter16;
+  VP8SimpleHFilter16 = SimpleHFilter16;
+  VP8SimpleVFilter16i = SimpleVFilter16i;
+  VP8SimpleHFilter16i = SimpleHFilter16i;
+
+  // If defined, use CPUInfo() to overwrite some pointers with faster versions.
+  if (VP8GetCPUInfo) {
+#if defined(WEBP_USE_SSE2)
+    if (VP8GetCPUInfo(kSSE2)) {
+      VP8DspInitSSE2();
+    }
+#elif defined(WEBP_USE_NEON)
+    if (VP8GetCPUInfo(kNEON)) {
+      VP8DspInitNEON();
+    }
+#endif
+  }
+}
+
+#if defined(__cplusplus) || defined(c_plusplus)
+}    // extern "C"
+#endif
diff --git a/drivers/webpold/dsp/dec_neon.c b/drivers/webpold/dsp/dec_neon.c
new file mode 100644
index 0000000000..ec824b790b
--- /dev/null
+++ b/drivers/webpold/dsp/dec_neon.c
@@ -0,0 +1,329 @@
+// Copyright 2012 Google Inc. All Rights Reserved.
+//
+// This code is licensed under the same terms as WebM:
+//  Software License Agreement:  http://www.webmproject.org/license/software/
+//  Additional IP Rights Grant:  http://www.webmproject.org/license/additional/
+// -----------------------------------------------------------------------------
+//
+// ARM NEON version of dsp functions and loop filtering.
+//
+// Authors: Somnath Banerjee (somnath@google.com)
+//          Johann Koenig (johannkoenig@google.com)
+
+#include "./dsp.h"
+
+#if defined(WEBP_USE_NEON)
+
+#include "../dec/vp8i.h"
+
+#if defined(__cplusplus) || defined(c_plusplus)
+extern "C" {
+#endif
+
+#define QRegs "q0", "q1", "q2", "q3", "q4", "q5", "q6", "q7",                  \
+              "q8", "q9", "q10", "q11", "q12", "q13", "q14", "q15"
+
+#define FLIP_SIGN_BIT2(a, b, s)                                                \
+  "veor     " #a "," #a "," #s "               \n"                             \
+  "veor     " #b "," #b "," #s "               \n"                             \
+
+#define FLIP_SIGN_BIT4(a, b, c, d, s)                                          \
+  FLIP_SIGN_BIT2(a, b, s)                                                      \
+  FLIP_SIGN_BIT2(c, d, s)                                                      \
+
+#define NEEDS_FILTER(p1, p0, q0, q1, thresh, mask)                             \
+  "vabd.u8    q15," #p0 "," #q0 "         \n"  /* abs(p0 - q0) */              \
+  "vabd.u8    q14," #p1 "," #q1 "         \n"  /* abs(p1 - q1) */              \
+  "vqadd.u8   q15, q15, q15               \n"  /* abs(p0 - q0) * 2 */          \
+  "vshr.u8    q14, q14, #1                \n"  /* abs(p1 - q1) / 2 */          \
+  "vqadd.u8   q15, q15, q14     \n"  /* abs(p0 - q0) * 2 + abs(p1 - q1) / 2 */ \
+  "vdup.8     q14, " #thresh "            \n"                                  \
+  "vcge.u8   " #mask ", q14, q15          \n"  /* mask <= thresh */
+
+#define GET_BASE_DELTA(p1, p0, q0, q1, o)                                      \
+  "vqsub.s8   q15," #q0 "," #p0 "         \n"  /* (q0 - p0) */                 \
+  "vqsub.s8  " #o "," #p1 "," #q1 "       \n"  /* (p1 - q1) */                 \
+  "vqadd.s8  " #o "," #o ", q15           \n"  /* (p1 - q1) + 1 * (p0 - q0) */ \
+  "vqadd.s8  " #o "," #o ", q15           \n"  /* (p1 - q1) + 2 * (p0 - q0) */ \
+  "vqadd.s8  " #o "," #o ", q15           \n"  /* (p1 - q1) + 3 * (p0 - q0) */
+
+#define DO_SIMPLE_FILTER(p0, q0, fl)                                           \
+  "vmov.i8    q15, #0x03                  \n"                                  \
+  "vqadd.s8   q15, q15, " #fl "           \n"  /* filter1 = filter + 3 */      \
+  "vshr.s8    q15, q15, #3                \n"  /* filter1 >> 3 */              \
+  "vqadd.s8  " #p0 "," #p0 ", q15         \n"  /* p0 += filter1 */             \
+                                                                               \
+  "vmov.i8    q15, #0x04                  \n"                                  \
+  "vqadd.s8   q15, q15, " #fl "           \n"  /* filter1 = filter + 4 */      \
+  "vshr.s8    q15, q15, #3                \n"  /* filter2 >> 3 */              \
+  "vqsub.s8  " #q0 "," #q0 ", q15         \n"  /* q0 -= filter2 */
+
+// Applies filter on 2 pixels (p0 and q0)
+#define DO_FILTER2(p1, p0, q0, q1, thresh)                                     \
+  NEEDS_FILTER(p1, p0, q0, q1, thresh, q9)     /* filter mask in q9 */         \
+  "vmov.i8    q10, #0x80                  \n"  /* sign bit */                  \
+  FLIP_SIGN_BIT4(p1, p0, q0, q1, q10)          /* convert to signed value */   \
+  GET_BASE_DELTA(p1, p0, q0, q1, q11)          /* get filter level  */         \
+  "vand       q9, q9, q11                 \n"  /* apply filter mask */         \
+  DO_SIMPLE_FILTER(p0, q0, q9)                 /* apply filter */              \
+  FLIP_SIGN_BIT2(p0, q0, q10)
+
+// Load/Store vertical edge
+#define LOAD8x4(c1, c2, c3, c4, b1, b2, stride)                                \
+  "vld4.8   {" #c1"[0], " #c2"[0], " #c3"[0], " #c4"[0]}," #b1 "," #stride"\n" \
+  "vld4.8   {" #c1"[1], " #c2"[1], " #c3"[1], " #c4"[1]}," #b2 "," #stride"\n" \
+  "vld4.8   {" #c1"[2], " #c2"[2], " #c3"[2], " #c4"[2]}," #b1 "," #stride"\n" \
+  "vld4.8   {" #c1"[3], " #c2"[3], " #c3"[3], " #c4"[3]}," #b2 "," #stride"\n" \
+  "vld4.8   {" #c1"[4], " #c2"[4], " #c3"[4], " #c4"[4]}," #b1 "," #stride"\n" \
+  "vld4.8   {" #c1"[5], " #c2"[5], " #c3"[5], " #c4"[5]}," #b2 "," #stride"\n" \
+  "vld4.8   {" #c1"[6], " #c2"[6], " #c3"[6], " #c4"[6]}," #b1 "," #stride"\n" \
+  "vld4.8   {" #c1"[7], " #c2"[7], " #c3"[7], " #c4"[7]}," #b2 "," #stride"\n"
+
+#define STORE8x2(c1, c2, p,stride)                                             \
+  "vst2.8   {" #c1"[0], " #c2"[0]}," #p "," #stride " \n"                      \
+  "vst2.8   {" #c1"[1], " #c2"[1]}," #p "," #stride " \n"                      \
+  "vst2.8   {" #c1"[2], " #c2"[2]}," #p "," #stride " \n"                      \
+  "vst2.8   {" #c1"[3], " #c2"[3]}," #p "," #stride " \n"                      \
+  "vst2.8   {" #c1"[4], " #c2"[4]}," #p "," #stride " \n"                      \
+  "vst2.8   {" #c1"[5], " #c2"[5]}," #p "," #stride " \n"                      \
+  "vst2.8   {" #c1"[6], " #c2"[6]}," #p "," #stride " \n"                      \
+  "vst2.8   {" #c1"[7], " #c2"[7]}," #p "," #stride " \n"
+
+//-----------------------------------------------------------------------------
+// Simple In-loop filtering (Paragraph 15.2)
+
+static void SimpleVFilter16NEON(uint8_t* p, int stride, int thresh) {
+  __asm__ volatile (
+    "sub        %[p], %[p], %[stride], lsl #1  \n"  // p -= 2 * stride
+
+    "vld1.u8    {q1}, [%[p]], %[stride]        \n"  // p1
+    "vld1.u8    {q2}, [%[p]], %[stride]        \n"  // p0
+    "vld1.u8    {q3}, [%[p]], %[stride]        \n"  // q0
+    "vld1.u8    {q4}, [%[p]]                   \n"  // q1
+
+    DO_FILTER2(q1, q2, q3, q4, %[thresh])
+
+    "sub        %[p], %[p], %[stride], lsl #1  \n"  // p -= 2 * stride
+
+    "vst1.u8    {q2}, [%[p]], %[stride]        \n"  // store op0
+    "vst1.u8    {q3}, [%[p]]                   \n"  // store oq0
+    : [p] "+r"(p)
+    : [stride] "r"(stride), [thresh] "r"(thresh)
+    : "memory", QRegs
+  );
+}
+
+static void SimpleHFilter16NEON(uint8_t* p, int stride, int thresh) {
+  __asm__ volatile (
+    "sub        r4, %[p], #2                   \n"  // base1 = p - 2
+    "lsl        r6, %[stride], #1              \n"  // r6 = 2 * stride
+    "add        r5, r4, %[stride]              \n"  // base2 = base1 + stride
+
+    LOAD8x4(d2, d3, d4, d5, [r4], [r5], r6)
+    LOAD8x4(d6, d7, d8, d9, [r4], [r5], r6)
+    "vswp       d3, d6                         \n"  // p1:q1 p0:q3
+    "vswp       d5, d8                         \n"  // q0:q2 q1:q4
+    "vswp       q2, q3                         \n"  // p1:q1 p0:q2 q0:q3 q1:q4
+
+    DO_FILTER2(q1, q2, q3, q4, %[thresh])
+
+    "sub        %[p], %[p], #1                 \n"  // p - 1
+
+    "vswp        d5, d6                        \n"
+    STORE8x2(d4, d5, [%[p]], %[stride])
+    STORE8x2(d6, d7, [%[p]], %[stride])
+
+    : [p] "+r"(p)
+    : [stride] "r"(stride), [thresh] "r"(thresh)
+    : "memory", "r4", "r5", "r6", QRegs
+  );
+}
+
+static void SimpleVFilter16iNEON(uint8_t* p, int stride, int thresh) {
+  int k;
+  for (k = 3; k > 0; --k) {
+    p += 4 * stride;
+    SimpleVFilter16NEON(p, stride, thresh);
+  }
+}
+
+static void SimpleHFilter16iNEON(uint8_t* p, int stride, int thresh) {
+  int k;
+  for (k = 3; k > 0; --k) {
+    p += 4;
+    SimpleHFilter16NEON(p, stride, thresh);
+  }
+}
+
+static void TransformOneNEON(const int16_t *in, uint8_t *dst) {
+  const int kBPS = BPS;
+  const int16_t constants[] = {20091, 17734, 0, 0};
+  /* kC1, kC2. Padded because vld1.16 loads 8 bytes
+   * Technically these are unsigned but vqdmulh is only available in signed.
+   * vqdmulh returns high half (effectively >> 16) but also doubles the value,
+   * changing the >> 16 to >> 15 and requiring an additional >> 1.
+   * We use this to our advantage with kC2. The canonical value is 35468.
+   * However, the high bit is set so treating it as signed will give incorrect
+   * results. We avoid this by down shifting by 1 here to clear the highest bit.
+   * Combined with the doubling effect of vqdmulh we get >> 16.
+   * This can not be applied to kC1 because the lowest bit is set. Down shifting
+   * the constant would reduce precision.
+   */
+
+  /* libwebp uses a trick to avoid some extra addition that libvpx does.
+   * Instead of:
+   * temp2 = ip[12] + ((ip[12] * cospi8sqrt2minus1) >> 16);
+   * libwebp adds 1 << 16 to cospi8sqrt2minus1 (kC1). However, this causes the
+   * same issue with kC1 and vqdmulh that we work around by down shifting kC2
+   */
+
+  /* Adapted from libvpx: vp8/common/arm/neon/shortidct4x4llm_neon.asm */
+  __asm__ volatile (
+    "vld1.16         {q1, q2}, [%[in]]           \n"
+    "vld1.16         {d0}, [%[constants]]        \n"
+
+    /* d2: in[0]
+     * d3: in[8]
+     * d4: in[4]
+     * d5: in[12]
+     */
+    "vswp            d3, d4                      \n"
+
+    /* q8 = {in[4], in[12]} * kC1 * 2 >> 16
+     * q9 = {in[4], in[12]} * kC2 >> 16
+     */
+    "vqdmulh.s16     q8, q2, d0[0]               \n"
+    "vqdmulh.s16     q9, q2, d0[1]               \n"
+
+    /* d22 = a = in[0] + in[8]
+     * d23 = b = in[0] - in[8]
+     */
+    "vqadd.s16       d22, d2, d3                 \n"
+    "vqsub.s16       d23, d2, d3                 \n"
+
+    /* The multiplication should be x * kC1 >> 16
+     * However, with vqdmulh we get x * kC1 * 2 >> 16
+     * (multiply, double, return high half)
+     * We avoided this in kC2 by pre-shifting the constant.
+     * q8 = in[4]/[12] * kC1 >> 16
+     */
+    "vshr.s16        q8, q8, #1                  \n"
+
+    /* Add {in[4], in[12]} back after the multiplication. This is handled by
+     * adding 1 << 16 to kC1 in the libwebp C code.
+     */
+    "vqadd.s16       q8, q2, q8                  \n"
+
+    /* d20 = c = in[4]*kC2 - in[12]*kC1
+     * d21 = d = in[4]*kC1 + in[12]*kC2
+     */
+    "vqsub.s16       d20, d18, d17               \n"
+    "vqadd.s16       d21, d19, d16               \n"
+
+    /* d2 = tmp[0] = a + d
+     * d3 = tmp[1] = b + c
+     * d4 = tmp[2] = b - c
+     * d5 = tmp[3] = a - d
+     */
+    "vqadd.s16       d2, d22, d21                \n"
+    "vqadd.s16       d3, d23, d20                \n"
+    "vqsub.s16       d4, d23, d20                \n"
+    "vqsub.s16       d5, d22, d21                \n"
+
+    "vzip.16         q1, q2                      \n"
+    "vzip.16         q1, q2                      \n"
+
+    "vswp            d3, d4                      \n"
+
+    /* q8 = {tmp[4], tmp[12]} * kC1 * 2 >> 16
+     * q9 = {tmp[4], tmp[12]} * kC2 >> 16
+     */
+    "vqdmulh.s16     q8, q2, d0[0]               \n"
+    "vqdmulh.s16     q9, q2, d0[1]               \n"
+
+    /* d22 = a = tmp[0] + tmp[8]
+     * d23 = b = tmp[0] - tmp[8]
+     */
+    "vqadd.s16       d22, d2, d3                 \n"
+    "vqsub.s16       d23, d2, d3                 \n"
+
+    /* See long winded explanations prior */
+    "vshr.s16        q8, q8, #1                  \n"
+    "vqadd.s16       q8, q2, q8                  \n"
+
+    /* d20 = c = in[4]*kC2 - in[12]*kC1
+     * d21 = d = in[4]*kC1 + in[12]*kC2
+     */
+    "vqsub.s16       d20, d18, d17               \n"
+    "vqadd.s16       d21, d19, d16               \n"
+
+    /* d2 = tmp[0] = a + d
+     * d3 = tmp[1] = b + c
+     * d4 = tmp[2] = b - c
+     * d5 = tmp[3] = a - d
+     */
+    "vqadd.s16       d2, d22, d21                \n"
+    "vqadd.s16       d3, d23, d20                \n"
+    "vqsub.s16       d4, d23, d20                \n"
+    "vqsub.s16       d5, d22, d21                \n"
+
+    "vld1.32         d6[0], [%[dst]], %[kBPS]    \n"
+    "vld1.32         d6[1], [%[dst]], %[kBPS]    \n"
+    "vld1.32         d7[0], [%[dst]], %[kBPS]    \n"
+    "vld1.32         d7[1], [%[dst]], %[kBPS]    \n"
+
+    "sub         %[dst], %[dst], %[kBPS], lsl #2 \n"
+
+    /* (val) + 4 >> 3 */
+    "vrshr.s16       d2, d2, #3                  \n"
+    "vrshr.s16       d3, d3, #3                  \n"
+    "vrshr.s16       d4, d4, #3                  \n"
+    "vrshr.s16       d5, d5, #3                  \n"
+
+    "vzip.16         q1, q2                      \n"
+    "vzip.16         q1, q2                      \n"
+
+    /* Must accumulate before saturating */
+    "vmovl.u8        q8, d6                      \n"
+    "vmovl.u8        q9, d7                      \n"
+
+    "vqadd.s16       q1, q1, q8                  \n"
+    "vqadd.s16       q2, q2, q9                  \n"
+
+    "vqmovun.s16     d0, q1                      \n"
+    "vqmovun.s16     d1, q2                      \n"
+
+    "vst1.32         d0[0], [%[dst]], %[kBPS]    \n"
+    "vst1.32         d0[1], [%[dst]], %[kBPS]    \n"
+    "vst1.32         d1[0], [%[dst]], %[kBPS]    \n"
+    "vst1.32         d1[1], [%[dst]]             \n"
+
+    : [in] "+r"(in), [dst] "+r"(dst)  /* modified registers */
+    : [kBPS] "r"(kBPS), [constants] "r"(constants)  /* constants */
+    : "memory", "q0", "q1", "q2", "q8", "q9", "q10", "q11"  /* clobbered */
+  );
+}
+
+static void TransformTwoNEON(const int16_t* in, uint8_t* dst, int do_two) {
+  TransformOneNEON(in, dst);
+  if (do_two) {
+    TransformOneNEON(in + 16, dst + 4);
+  }
+}
+
+extern void VP8DspInitNEON(void);
+
+void VP8DspInitNEON(void) {
+  VP8Transform = TransformTwoNEON;
+
+  VP8SimpleVFilter16 = SimpleVFilter16NEON;
+  VP8SimpleHFilter16 = SimpleHFilter16NEON;
+  VP8SimpleVFilter16i = SimpleVFilter16iNEON;
+  VP8SimpleHFilter16i = SimpleHFilter16iNEON;
+}
+
+#if defined(__cplusplus) || defined(c_plusplus)
+}    // extern "C"
+#endif
+
+#endif   // WEBP_USE_NEON
diff --git a/drivers/webpold/dsp/dec_sse2.c b/drivers/webpold/dsp/dec_sse2.c
new file mode 100644
index 0000000000..472b68ecb8
--- /dev/null
+++ b/drivers/webpold/dsp/dec_sse2.c
@@ -0,0 +1,903 @@
+// Copyright 2011 Google Inc. All Rights Reserved.
+//
+// This code is licensed under the same terms as WebM:
+//  Software License Agreement:  http://www.webmproject.org/license/software/
+//  Additional IP Rights Grant:  http://www.webmproject.org/license/additional/
+// -----------------------------------------------------------------------------
+//
+// SSE2 version of some decoding functions (idct, loop filtering).
+//
+// Author: somnath@google.com (Somnath Banerjee)
+//         cduvivier@google.com (Christian Duvivier)
+
+#include "./dsp.h"
+
+#if defined(WEBP_USE_SSE2)
+
+#include <emmintrin.h>
+#include "../dec/vp8i.h"
+
+#if defined(__cplusplus) || defined(c_plusplus)
+extern "C" {
+#endif
+
+//------------------------------------------------------------------------------
+// Transforms (Paragraph 14.4)
+
+static void TransformSSE2(const int16_t* in, uint8_t* dst, int do_two) {
+  // This implementation makes use of 16-bit fixed point versions of two
+  // multiply constants:
+  //    K1 = sqrt(2) * cos (pi/8) ~= 85627 / 2^16
+  //    K2 = sqrt(2) * sin (pi/8) ~= 35468 / 2^16
+  //
+  // To be able to use signed 16-bit integers, we use the following trick to
+  // have constants within range:
+  // - Associated constants are obtained by subtracting the 16-bit fixed point
+  //   version of one:
+  //      k = K - (1 << 16)  =>  K = k + (1 << 16)
+  //      K1 = 85267  =>  k1 =  20091
+  //      K2 = 35468  =>  k2 = -30068
+  // - The multiplication of a variable by a constant become the sum of the
+  //   variable and the multiplication of that variable by the associated
+  //   constant:
+  //      (x * K) >> 16 = (x * (k + (1 << 16))) >> 16 = ((x * k ) >> 16) + x
+  const __m128i k1 = _mm_set1_epi16(20091);
+  const __m128i k2 = _mm_set1_epi16(-30068);
+  __m128i T0, T1, T2, T3;
+
+  // Load and concatenate the transform coefficients (we'll do two transforms
+  // in parallel). In the case of only one transform, the second half of the
+  // vectors will just contain random value we'll never use nor store.
+  __m128i in0, in1, in2, in3;
+  {
+    in0 = _mm_loadl_epi64((__m128i*)&in[0]);
+    in1 = _mm_loadl_epi64((__m128i*)&in[4]);
+    in2 = _mm_loadl_epi64((__m128i*)&in[8]);
+    in3 = _mm_loadl_epi64((__m128i*)&in[12]);
+    // a00 a10 a20 a30   x x x x
+    // a01 a11 a21 a31   x x x x
+    // a02 a12 a22 a32   x x x x
+    // a03 a13 a23 a33   x x x x
+    if (do_two) {
+      const __m128i inB0 = _mm_loadl_epi64((__m128i*)&in[16]);
+      const __m128i inB1 = _mm_loadl_epi64((__m128i*)&in[20]);
+      const __m128i inB2 = _mm_loadl_epi64((__m128i*)&in[24]);
+      const __m128i inB3 = _mm_loadl_epi64((__m128i*)&in[28]);
+      in0 = _mm_unpacklo_epi64(in0, inB0);
+      in1 = _mm_unpacklo_epi64(in1, inB1);
+      in2 = _mm_unpacklo_epi64(in2, inB2);
+      in3 = _mm_unpacklo_epi64(in3, inB3);
+      // a00 a10 a20 a30   b00 b10 b20 b30
+      // a01 a11 a21 a31   b01 b11 b21 b31
+      // a02 a12 a22 a32   b02 b12 b22 b32
+      // a03 a13 a23 a33   b03 b13 b23 b33
+    }
+  }
+
+  // Vertical pass and subsequent transpose.
+  {
+    // First pass, c and d calculations are longer because of the "trick"
+    // multiplications.
+    const __m128i a = _mm_add_epi16(in0, in2);
+    const __m128i b = _mm_sub_epi16(in0, in2);
+    // c = MUL(in1, K2) - MUL(in3, K1) = MUL(in1, k2) - MUL(in3, k1) + in1 - in3
+    const __m128i c1 = _mm_mulhi_epi16(in1, k2);
+    const __m128i c2 = _mm_mulhi_epi16(in3, k1);
+    const __m128i c3 = _mm_sub_epi16(in1, in3);
+    const __m128i c4 = _mm_sub_epi16(c1, c2);
+    const __m128i c = _mm_add_epi16(c3, c4);
+    // d = MUL(in1, K1) + MUL(in3, K2) = MUL(in1, k1) + MUL(in3, k2) + in1 + in3
+    const __m128i d1 = _mm_mulhi_epi16(in1, k1);
+    const __m128i d2 = _mm_mulhi_epi16(in3, k2);
+    const __m128i d3 = _mm_add_epi16(in1, in3);
+    const __m128i d4 = _mm_add_epi16(d1, d2);
+    const __m128i d = _mm_add_epi16(d3, d4);
+
+    // Second pass.
+    const __m128i tmp0 = _mm_add_epi16(a, d);
+    const __m128i tmp1 = _mm_add_epi16(b, c);
+    const __m128i tmp2 = _mm_sub_epi16(b, c);
+    const __m128i tmp3 = _mm_sub_epi16(a, d);
+
+    // Transpose the two 4x4.
+    // a00 a01 a02 a03   b00 b01 b02 b03
+    // a10 a11 a12 a13   b10 b11 b12 b13
+    // a20 a21 a22 a23   b20 b21 b22 b23
+    // a30 a31 a32 a33   b30 b31 b32 b33
+    const __m128i transpose0_0 = _mm_unpacklo_epi16(tmp0, tmp1);
+    const __m128i transpose0_1 = _mm_unpacklo_epi16(tmp2, tmp3);
+    const __m128i transpose0_2 = _mm_unpackhi_epi16(tmp0, tmp1);
+    const __m128i transpose0_3 = _mm_unpackhi_epi16(tmp2, tmp3);
+    // a00 a10 a01 a11   a02 a12 a03 a13
+    // a20 a30 a21 a31   a22 a32 a23 a33
+    // b00 b10 b01 b11   b02 b12 b03 b13
+    // b20 b30 b21 b31   b22 b32 b23 b33
+    const __m128i transpose1_0 = _mm_unpacklo_epi32(transpose0_0, transpose0_1);
+    const __m128i transpose1_1 = _mm_unpacklo_epi32(transpose0_2, transpose0_3);
+    const __m128i transpose1_2 = _mm_unpackhi_epi32(transpose0_0, transpose0_1);
+    const __m128i transpose1_3 = _mm_unpackhi_epi32(transpose0_2, transpose0_3);
+    // a00 a10 a20 a30 a01 a11 a21 a31
+    // b00 b10 b20 b30 b01 b11 b21 b31
+    // a02 a12 a22 a32 a03 a13 a23 a33
+    // b02 b12 a22 b32 b03 b13 b23 b33
+    T0 = _mm_unpacklo_epi64(transpose1_0, transpose1_1);
+    T1 = _mm_unpackhi_epi64(transpose1_0, transpose1_1);
+    T2 = _mm_unpacklo_epi64(transpose1_2, transpose1_3);
+    T3 = _mm_unpackhi_epi64(transpose1_2, transpose1_3);
+    // a00 a10 a20 a30   b00 b10 b20 b30
+    // a01 a11 a21 a31   b01 b11 b21 b31
+    // a02 a12 a22 a32   b02 b12 b22 b32
+    // a03 a13 a23 a33   b03 b13 b23 b33
+  }
+
+  // Horizontal pass and subsequent transpose.
+  {
+    // First pass, c and d calculations are longer because of the "trick"
+    // multiplications.
+    const __m128i four = _mm_set1_epi16(4);
+    const __m128i dc = _mm_add_epi16(T0, four);
+    const __m128i a =  _mm_add_epi16(dc, T2);
+    const __m128i b =  _mm_sub_epi16(dc, T2);
+    // c = MUL(T1, K2) - MUL(T3, K1) = MUL(T1, k2) - MUL(T3, k1) + T1 - T3
+    const __m128i c1 = _mm_mulhi_epi16(T1, k2);
+    const __m128i c2 = _mm_mulhi_epi16(T3, k1);
+    const __m128i c3 = _mm_sub_epi16(T1, T3);
+    const __m128i c4 = _mm_sub_epi16(c1, c2);
+    const __m128i c = _mm_add_epi16(c3, c4);
+    // d = MUL(T1, K1) + MUL(T3, K2) = MUL(T1, k1) + MUL(T3, k2) + T1 + T3
+    const __m128i d1 = _mm_mulhi_epi16(T1, k1);
+    const __m128i d2 = _mm_mulhi_epi16(T3, k2);
+    const __m128i d3 = _mm_add_epi16(T1, T3);
+    const __m128i d4 = _mm_add_epi16(d1, d2);
+    const __m128i d = _mm_add_epi16(d3, d4);
+
+    // Second pass.
+    const __m128i tmp0 = _mm_add_epi16(a, d);
+    const __m128i tmp1 = _mm_add_epi16(b, c);
+    const __m128i tmp2 = _mm_sub_epi16(b, c);
+    const __m128i tmp3 = _mm_sub_epi16(a, d);
+    const __m128i shifted0 = _mm_srai_epi16(tmp0, 3);
+    const __m128i shifted1 = _mm_srai_epi16(tmp1, 3);
+    const __m128i shifted2 = _mm_srai_epi16(tmp2, 3);
+    const __m128i shifted3 = _mm_srai_epi16(tmp3, 3);
+
+    // Transpose the two 4x4.
+    // a00 a01 a02 a03   b00 b01 b02 b03
+    // a10 a11 a12 a13   b10 b11 b12 b13
+    // a20 a21 a22 a23   b20 b21 b22 b23
+    // a30 a31 a32 a33   b30 b31 b32 b33
+    const __m128i transpose0_0 = _mm_unpacklo_epi16(shifted0, shifted1);
+    const __m128i transpose0_1 = _mm_unpacklo_epi16(shifted2, shifted3);
+    const __m128i transpose0_2 = _mm_unpackhi_epi16(shifted0, shifted1);
+    const __m128i transpose0_3 = _mm_unpackhi_epi16(shifted2, shifted3);
+    // a00 a10 a01 a11   a02 a12 a03 a13
+    // a20 a30 a21 a31   a22 a32 a23 a33
+    // b00 b10 b01 b11   b02 b12 b03 b13
+    // b20 b30 b21 b31   b22 b32 b23 b33
+    const __m128i transpose1_0 = _mm_unpacklo_epi32(transpose0_0, transpose0_1);
+    const __m128i transpose1_1 = _mm_unpacklo_epi32(transpose0_2, transpose0_3);
+    const __m128i transpose1_2 = _mm_unpackhi_epi32(transpose0_0, transpose0_1);
+    const __m128i transpose1_3 = _mm_unpackhi_epi32(transpose0_2, transpose0_3);
+    // a00 a10 a20 a30 a01 a11 a21 a31
+    // b00 b10 b20 b30 b01 b11 b21 b31
+    // a02 a12 a22 a32 a03 a13 a23 a33
+    // b02 b12 a22 b32 b03 b13 b23 b33
+    T0 = _mm_unpacklo_epi64(transpose1_0, transpose1_1);
+    T1 = _mm_unpackhi_epi64(transpose1_0, transpose1_1);
+    T2 = _mm_unpacklo_epi64(transpose1_2, transpose1_3);
+    T3 = _mm_unpackhi_epi64(transpose1_2, transpose1_3);
+    // a00 a10 a20 a30   b00 b10 b20 b30
+    // a01 a11 a21 a31   b01 b11 b21 b31
+    // a02 a12 a22 a32   b02 b12 b22 b32
+    // a03 a13 a23 a33   b03 b13 b23 b33
+  }
+
+  // Add inverse transform to 'dst' and store.
+  {
+    const __m128i zero = _mm_set1_epi16(0);
+    // Load the reference(s).
+    __m128i dst0, dst1, dst2, dst3;
+    if (do_two) {
+      // Load eight bytes/pixels per line.
+      dst0 = _mm_loadl_epi64((__m128i*)&dst[0 * BPS]);
+      dst1 = _mm_loadl_epi64((__m128i*)&dst[1 * BPS]);
+      dst2 = _mm_loadl_epi64((__m128i*)&dst[2 * BPS]);
+      dst3 = _mm_loadl_epi64((__m128i*)&dst[3 * BPS]);
+    } else {
+      // Load four bytes/pixels per line.
+      dst0 = _mm_cvtsi32_si128(*(int*)&dst[0 * BPS]);
+      dst1 = _mm_cvtsi32_si128(*(int*)&dst[1 * BPS]);
+      dst2 = _mm_cvtsi32_si128(*(int*)&dst[2 * BPS]);
+      dst3 = _mm_cvtsi32_si128(*(int*)&dst[3 * BPS]);
+    }
+    // Convert to 16b.
+    dst0 = _mm_unpacklo_epi8(dst0, zero);
+    dst1 = _mm_unpacklo_epi8(dst1, zero);
+    dst2 = _mm_unpacklo_epi8(dst2, zero);
+    dst3 = _mm_unpacklo_epi8(dst3, zero);
+    // Add the inverse transform(s).
+    dst0 = _mm_add_epi16(dst0, T0);
+    dst1 = _mm_add_epi16(dst1, T1);
+    dst2 = _mm_add_epi16(dst2, T2);
+    dst3 = _mm_add_epi16(dst3, T3);
+    // Unsigned saturate to 8b.
+    dst0 = _mm_packus_epi16(dst0, dst0);
+    dst1 = _mm_packus_epi16(dst1, dst1);
+    dst2 = _mm_packus_epi16(dst2, dst2);
+    dst3 = _mm_packus_epi16(dst3, dst3);
+    // Store the results.
+    if (do_two) {
+      // Store eight bytes/pixels per line.
+      _mm_storel_epi64((__m128i*)&dst[0 * BPS], dst0);
+      _mm_storel_epi64((__m128i*)&dst[1 * BPS], dst1);
+      _mm_storel_epi64((__m128i*)&dst[2 * BPS], dst2);
+      _mm_storel_epi64((__m128i*)&dst[3 * BPS], dst3);
+    } else {
+      // Store four bytes/pixels per line.
+      *((int32_t *)&dst[0 * BPS]) = _mm_cvtsi128_si32(dst0);
+      *((int32_t *)&dst[1 * BPS]) = _mm_cvtsi128_si32(dst1);
+      *((int32_t *)&dst[2 * BPS]) = _mm_cvtsi128_si32(dst2);
+      *((int32_t *)&dst[3 * BPS]) = _mm_cvtsi128_si32(dst3);
+    }
+  }
+}
+
+//------------------------------------------------------------------------------
+// Loop Filter (Paragraph 15)
+
+// Compute abs(p - q) = subs(p - q) OR subs(q - p)
+#define MM_ABS(p, q)  _mm_or_si128(                                            \
+    _mm_subs_epu8((q), (p)),                                                   \
+    _mm_subs_epu8((p), (q)))
+
+// Shift each byte of "a" by N bits while preserving by the sign bit.
+//
+// It first shifts the lower bytes of the words and then the upper bytes and
+// then merges the results together.
+#define SIGNED_SHIFT_N(a, N) {                                                 \
+  __m128i t = a;                                                               \
+  t = _mm_slli_epi16(t, 8);                                                    \
+  t = _mm_srai_epi16(t, N);                                                    \
+  t = _mm_srli_epi16(t, 8);                                                    \
+                                                                               \
+  a = _mm_srai_epi16(a, N + 8);                                                \
+  a = _mm_slli_epi16(a, 8);                                                    \
+                                                                               \
+  a = _mm_or_si128(t, a);                                                      \
+}
+
+#define FLIP_SIGN_BIT2(a, b) {                                                 \
+  a = _mm_xor_si128(a, sign_bit);                                              \
+  b = _mm_xor_si128(b, sign_bit);                                              \
+}
+
+#define FLIP_SIGN_BIT4(a, b, c, d) {                                           \
+  FLIP_SIGN_BIT2(a, b);                                                        \
+  FLIP_SIGN_BIT2(c, d);                                                        \
+}
+
+#define GET_NOTHEV(p1, p0, q0, q1, hev_thresh, not_hev) {                      \
+  const __m128i zero = _mm_setzero_si128();                                    \
+  const __m128i t1 = MM_ABS(p1, p0);                                           \
+  const __m128i t2 = MM_ABS(q1, q0);                                           \
+                                                                               \
+  const __m128i h = _mm_set1_epi8(hev_thresh);                                 \
+  const __m128i t3 = _mm_subs_epu8(t1, h);  /* abs(p1 - p0) - hev_tresh */     \
+  const __m128i t4 = _mm_subs_epu8(t2, h);  /* abs(q1 - q0) - hev_tresh */     \
+                                                                               \
+  not_hev = _mm_or_si128(t3, t4);                                              \
+  not_hev = _mm_cmpeq_epi8(not_hev, zero); /* not_hev <= t1 && not_hev <= t2 */\
+}
+
+#define GET_BASE_DELTA(p1, p0, q0, q1, o) {                                    \
+  const __m128i qp0 = _mm_subs_epi8(q0, p0);  /* q0 - p0 */                    \
+  o = _mm_subs_epi8(p1, q1);            /* p1 - q1 */                          \
+  o = _mm_adds_epi8(o, qp0);            /* p1 - q1 + 1 * (q0 - p0) */          \
+  o = _mm_adds_epi8(o, qp0);            /* p1 - q1 + 2 * (q0 - p0) */          \
+  o = _mm_adds_epi8(o, qp0);            /* p1 - q1 + 3 * (q0 - p0) */          \
+}
+
+#define DO_SIMPLE_FILTER(p0, q0, fl) {                                         \
+  const __m128i three = _mm_set1_epi8(3);                                      \
+  const __m128i four = _mm_set1_epi8(4);                                       \
+  __m128i v3 = _mm_adds_epi8(fl, three);                                       \
+  __m128i v4 = _mm_adds_epi8(fl, four);                                        \
+                                                                               \
+  /* Do +4 side */                                                             \
+  SIGNED_SHIFT_N(v4, 3);                /* v4 >> 3  */                         \
+  q0 = _mm_subs_epi8(q0, v4);           /* q0 -= v4 */                         \
+                                                                               \
+  /* Now do +3 side */                                                         \
+  SIGNED_SHIFT_N(v3, 3);                /* v3 >> 3  */                         \
+  p0 = _mm_adds_epi8(p0, v3);           /* p0 += v3 */                         \
+}
+
+// Updates values of 2 pixels at MB edge during complex filtering.
+// Update operations:
+// q = q - a and p = p + a; where a = [(a_hi >> 7), (a_lo >> 7)]
+#define UPDATE_2PIXELS(pi, qi, a_lo, a_hi) {                                   \
+  const __m128i a_lo7 = _mm_srai_epi16(a_lo, 7);                               \
+  const __m128i a_hi7 = _mm_srai_epi16(a_hi, 7);                               \
+  const __m128i a = _mm_packs_epi16(a_lo7, a_hi7);                             \
+  pi = _mm_adds_epi8(pi, a);                                                   \
+  qi = _mm_subs_epi8(qi, a);                                                   \
+}
+
+static void NeedsFilter(const __m128i* p1, const __m128i* p0, const __m128i* q0,
+                        const __m128i* q1, int thresh, __m128i *mask) {
+  __m128i t1 = MM_ABS(*p1, *q1);        // abs(p1 - q1)
+  *mask = _mm_set1_epi8(0xFE);
+  t1 = _mm_and_si128(t1, *mask);        // set lsb of each byte to zero
+  t1 = _mm_srli_epi16(t1, 1);           // abs(p1 - q1) / 2
+
+  *mask = MM_ABS(*p0, *q0);             // abs(p0 - q0)
+  *mask = _mm_adds_epu8(*mask, *mask);  // abs(p0 - q0) * 2
+  *mask = _mm_adds_epu8(*mask, t1);     // abs(p0 - q0) * 2 + abs(p1 - q1) / 2
+
+  t1 = _mm_set1_epi8(thresh);
+  *mask = _mm_subs_epu8(*mask, t1);     // mask <= thresh
+  *mask = _mm_cmpeq_epi8(*mask, _mm_setzero_si128());
+}
+
+//------------------------------------------------------------------------------
+// Edge filtering functions
+
+// Applies filter on 2 pixels (p0 and q0)
+static WEBP_INLINE void DoFilter2(const __m128i* p1, __m128i* p0, __m128i* q0,
+                                  const __m128i* q1, int thresh) {
+  __m128i a, mask;
+  const __m128i sign_bit = _mm_set1_epi8(0x80);
+  const __m128i p1s = _mm_xor_si128(*p1, sign_bit);
+  const __m128i q1s = _mm_xor_si128(*q1, sign_bit);
+
+  NeedsFilter(p1, p0, q0, q1, thresh, &mask);
+
+  // convert to signed values
+  FLIP_SIGN_BIT2(*p0, *q0);
+
+  GET_BASE_DELTA(p1s, *p0, *q0, q1s, a);
+  a = _mm_and_si128(a, mask);     // mask filter values we don't care about
+  DO_SIMPLE_FILTER(*p0, *q0, a);
+
+  // unoffset
+  FLIP_SIGN_BIT2(*p0, *q0);
+}
+
+// Applies filter on 4 pixels (p1, p0, q0 and q1)
+static WEBP_INLINE void DoFilter4(__m128i* p1, __m128i *p0,
+                                  __m128i* q0, __m128i* q1,
+                                  const __m128i* mask, int hev_thresh) {
+  __m128i not_hev;
+  __m128i t1, t2, t3;
+  const __m128i sign_bit = _mm_set1_epi8(0x80);
+
+  // compute hev mask
+  GET_NOTHEV(*p1, *p0, *q0, *q1, hev_thresh, not_hev);
+
+  // convert to signed values
+  FLIP_SIGN_BIT4(*p1, *p0, *q0, *q1);
+
+  t1 = _mm_subs_epi8(*p1, *q1);        // p1 - q1
+  t1 = _mm_andnot_si128(not_hev, t1);  // hev(p1 - q1)
+  t2 = _mm_subs_epi8(*q0, *p0);        // q0 - p0
+  t1 = _mm_adds_epi8(t1, t2);          // hev(p1 - q1) + 1 * (q0 - p0)
+  t1 = _mm_adds_epi8(t1, t2);          // hev(p1 - q1) + 2 * (q0 - p0)
+  t1 = _mm_adds_epi8(t1, t2);          // hev(p1 - q1) + 3 * (q0 - p0)
+  t1 = _mm_and_si128(t1, *mask);       // mask filter values we don't care about
+
+  // Do +4 side
+  t2 = _mm_set1_epi8(4);
+  t2 = _mm_adds_epi8(t1, t2);        // 3 * (q0 - p0) + (p1 - q1) + 4
+  SIGNED_SHIFT_N(t2, 3);             // (3 * (q0 - p0) + hev(p1 - q1) + 4) >> 3
+  t3 = t2;                           // save t2
+  *q0 = _mm_subs_epi8(*q0, t2);      // q0 -= t2
+
+  // Now do +3 side
+  t2 = _mm_set1_epi8(3);
+  t2 = _mm_adds_epi8(t1, t2);        // +3 instead of +4
+  SIGNED_SHIFT_N(t2, 3);             // (3 * (q0 - p0) + hev(p1 - q1) + 3) >> 3
+  *p0 = _mm_adds_epi8(*p0, t2);      // p0 += t2
+
+  t2 = _mm_set1_epi8(1);
+  t3 = _mm_adds_epi8(t3, t2);
+  SIGNED_SHIFT_N(t3, 1);             // (3 * (q0 - p0) + hev(p1 - q1) + 4) >> 4
+
+  t3 = _mm_and_si128(not_hev, t3);   // if !hev
+  *q1 = _mm_subs_epi8(*q1, t3);      // q1 -= t3
+  *p1 = _mm_adds_epi8(*p1, t3);      // p1 += t3
+
+  // unoffset
+  FLIP_SIGN_BIT4(*p1, *p0, *q0, *q1);
+}
+
+// Applies filter on 6 pixels (p2, p1, p0, q0, q1 and q2)
+static WEBP_INLINE void DoFilter6(__m128i *p2, __m128i* p1, __m128i *p0,
+                                  __m128i* q0, __m128i* q1, __m128i *q2,
+                                  const __m128i* mask, int hev_thresh) {
+  __m128i a, not_hev;
+  const __m128i sign_bit = _mm_set1_epi8(0x80);
+
+  // compute hev mask
+  GET_NOTHEV(*p1, *p0, *q0, *q1, hev_thresh, not_hev);
+
+  // convert to signed values
+  FLIP_SIGN_BIT4(*p1, *p0, *q0, *q1);
+  FLIP_SIGN_BIT2(*p2, *q2);
+
+  GET_BASE_DELTA(*p1, *p0, *q0, *q1, a);
+
+  { // do simple filter on pixels with hev
+    const __m128i m = _mm_andnot_si128(not_hev, *mask);
+    const __m128i f = _mm_and_si128(a, m);
+    DO_SIMPLE_FILTER(*p0, *q0, f);
+  }
+  { // do strong filter on pixels with not hev
+    const __m128i zero = _mm_setzero_si128();
+    const __m128i nine = _mm_set1_epi16(0x0900);
+    const __m128i sixty_three = _mm_set1_epi16(63);
+
+    const __m128i m = _mm_and_si128(not_hev, *mask);
+    const __m128i f = _mm_and_si128(a, m);
+    const __m128i f_lo = _mm_unpacklo_epi8(zero, f);
+    const __m128i f_hi = _mm_unpackhi_epi8(zero, f);
+
+    const __m128i f9_lo = _mm_mulhi_epi16(f_lo, nine);   // Filter (lo) * 9
+    const __m128i f9_hi = _mm_mulhi_epi16(f_hi, nine);   // Filter (hi) * 9
+    const __m128i f18_lo = _mm_add_epi16(f9_lo, f9_lo);  // Filter (lo) * 18
+    const __m128i f18_hi = _mm_add_epi16(f9_hi, f9_hi);  // Filter (hi) * 18
+
+    const __m128i a2_lo = _mm_add_epi16(f9_lo, sixty_three);  // Filter * 9 + 63
+    const __m128i a2_hi = _mm_add_epi16(f9_hi, sixty_three);  // Filter * 9 + 63
+
+    const __m128i a1_lo = _mm_add_epi16(f18_lo, sixty_three);  // F... * 18 + 63
+    const __m128i a1_hi = _mm_add_epi16(f18_hi, sixty_three);  // F... * 18 + 63
+
+    const __m128i a0_lo = _mm_add_epi16(f18_lo, a2_lo);  // Filter * 27 + 63
+    const __m128i a0_hi = _mm_add_epi16(f18_hi, a2_hi);  // Filter * 27 + 63
+
+    UPDATE_2PIXELS(*p2, *q2, a2_lo, a2_hi);
+    UPDATE_2PIXELS(*p1, *q1, a1_lo, a1_hi);
+    UPDATE_2PIXELS(*p0, *q0, a0_lo, a0_hi);
+  }
+
+  // unoffset
+  FLIP_SIGN_BIT4(*p1, *p0, *q0, *q1);
+  FLIP_SIGN_BIT2(*p2, *q2);
+}
+
+// reads 8 rows across a vertical edge.
+//
+// TODO(somnath): Investigate _mm_shuffle* also see if it can be broken into
+// two Load4x4() to avoid code duplication.
+static WEBP_INLINE void Load8x4(const uint8_t* b, int stride,
+                                __m128i* p, __m128i* q) {
+  __m128i t1, t2;
+
+  // Load 0th, 1st, 4th and 5th rows
+  __m128i r0 =  _mm_cvtsi32_si128(*((int*)&b[0 * stride]));  // 03 02 01 00
+  __m128i r1 =  _mm_cvtsi32_si128(*((int*)&b[1 * stride]));  // 13 12 11 10
+  __m128i r4 =  _mm_cvtsi32_si128(*((int*)&b[4 * stride]));  // 43 42 41 40
+  __m128i r5 =  _mm_cvtsi32_si128(*((int*)&b[5 * stride]));  // 53 52 51 50
+
+  r0 = _mm_unpacklo_epi32(r0, r4);               // 43 42 41 40 03 02 01 00
+  r1 = _mm_unpacklo_epi32(r1, r5);               // 53 52 51 50 13 12 11 10
+
+  // t1 = 53 43 52 42 51 41 50 40 13 03 12 02 11 01 10 00
+  t1 = _mm_unpacklo_epi8(r0, r1);
+
+  // Load 2nd, 3rd, 6th and 7th rows
+  r0 =  _mm_cvtsi32_si128(*((int*)&b[2 * stride]));          // 23 22 21 22
+  r1 =  _mm_cvtsi32_si128(*((int*)&b[3 * stride]));          // 33 32 31 30
+  r4 =  _mm_cvtsi32_si128(*((int*)&b[6 * stride]));          // 63 62 61 60
+  r5 =  _mm_cvtsi32_si128(*((int*)&b[7 * stride]));          // 73 72 71 70
+
+  r0 = _mm_unpacklo_epi32(r0, r4);               // 63 62 61 60 23 22 21 20
+  r1 = _mm_unpacklo_epi32(r1, r5);               // 73 72 71 70 33 32 31 30
+
+  // t2 = 73 63 72 62 71 61 70 60 33 23 32 22 31 21 30 20
+  t2 = _mm_unpacklo_epi8(r0, r1);
+
+  // t1 = 33 23 13 03 32 22 12 02 31 21 11 01 30 20 10 00
+  // t2 = 73 63 53 43 72 62 52 42 71 61 51 41 70 60 50 40
+  r0 = t1;
+  t1 = _mm_unpacklo_epi16(t1, t2);
+  t2 = _mm_unpackhi_epi16(r0, t2);
+
+  // *p = 71 61 51 41 31 21 11 01 70 60 50 40 30 20 10 00
+  // *q = 73 63 53 43 33 23 13 03 72 62 52 42 32 22 12 02
+  *p = _mm_unpacklo_epi32(t1, t2);
+  *q = _mm_unpackhi_epi32(t1, t2);
+}
+
+static WEBP_INLINE void Load16x4(const uint8_t* r0, const uint8_t* r8,
+                                 int stride,
+                                 __m128i* p1, __m128i* p0,
+                                 __m128i* q0, __m128i* q1) {
+  __m128i t1, t2;
+  // Assume the pixels around the edge (|) are numbered as follows
+  //                00 01 | 02 03
+  //                10 11 | 12 13
+  //                 ...  |  ...
+  //                e0 e1 | e2 e3
+  //                f0 f1 | f2 f3
+  //
+  // r0 is pointing to the 0th row (00)
+  // r8 is pointing to the 8th row (80)
+
+  // Load
+  // p1 = 71 61 51 41 31 21 11 01 70 60 50 40 30 20 10 00
+  // q0 = 73 63 53 43 33 23 13 03 72 62 52 42 32 22 12 02
+  // p0 = f1 e1 d1 c1 b1 a1 91 81 f0 e0 d0 c0 b0 a0 90 80
+  // q1 = f3 e3 d3 c3 b3 a3 93 83 f2 e2 d2 c2 b2 a2 92 82
+  Load8x4(r0, stride, p1, q0);
+  Load8x4(r8, stride, p0, q1);
+
+  t1 = *p1;
+  t2 = *q0;
+  // p1 = f0 e0 d0 c0 b0 a0 90 80 70 60 50 40 30 20 10 00
+  // p0 = f1 e1 d1 c1 b1 a1 91 81 71 61 51 41 31 21 11 01
+  // q0 = f2 e2 d2 c2 b2 a2 92 82 72 62 52 42 32 22 12 02
+  // q1 = f3 e3 d3 c3 b3 a3 93 83 73 63 53 43 33 23 13 03
+  *p1 = _mm_unpacklo_epi64(t1, *p0);
+  *p0 = _mm_unpackhi_epi64(t1, *p0);
+  *q0 = _mm_unpacklo_epi64(t2, *q1);
+  *q1 = _mm_unpackhi_epi64(t2, *q1);
+}
+
+static WEBP_INLINE void Store4x4(__m128i* x, uint8_t* dst, int stride) {
+  int i;
+  for (i = 0; i < 4; ++i, dst += stride) {
+    *((int32_t*)dst) = _mm_cvtsi128_si32(*x);
+    *x = _mm_srli_si128(*x, 4);
+  }
+}
+
+// Transpose back and store
+static WEBP_INLINE void Store16x4(uint8_t* r0, uint8_t* r8, int stride,
+                                  __m128i* p1, __m128i* p0,
+                                  __m128i* q0, __m128i* q1) {
+  __m128i t1;
+
+  // p0 = 71 70 61 60 51 50 41 40 31 30 21 20 11 10 01 00
+  // p1 = f1 f0 e1 e0 d1 d0 c1 c0 b1 b0 a1 a0 91 90 81 80
+  t1 = *p0;
+  *p0 = _mm_unpacklo_epi8(*p1, t1);
+  *p1 = _mm_unpackhi_epi8(*p1, t1);
+
+  // q0 = 73 72 63 62 53 52 43 42 33 32 23 22 13 12 03 02
+  // q1 = f3 f2 e3 e2 d3 d2 c3 c2 b3 b2 a3 a2 93 92 83 82
+  t1 = *q0;
+  *q0 = _mm_unpacklo_epi8(t1, *q1);
+  *q1 = _mm_unpackhi_epi8(t1, *q1);
+
+  // p0 = 33 32 31 30 23 22 21 20 13 12 11 10 03 02 01 00
+  // q0 = 73 72 71 70 63 62 61 60 53 52 51 50 43 42 41 40
+  t1 = *p0;
+  *p0 = _mm_unpacklo_epi16(t1, *q0);
+  *q0 = _mm_unpackhi_epi16(t1, *q0);
+
+  // p1 = b3 b2 b1 b0 a3 a2 a1 a0 93 92 91 90 83 82 81 80
+  // q1 = f3 f2 f1 f0 e3 e2 e1 e0 d3 d2 d1 d0 c3 c2 c1 c0
+  t1 = *p1;
+  *p1 = _mm_unpacklo_epi16(t1, *q1);
+  *q1 = _mm_unpackhi_epi16(t1, *q1);
+
+  Store4x4(p0, r0, stride);
+  r0 += 4 * stride;
+  Store4x4(q0, r0, stride);
+
+  Store4x4(p1, r8, stride);
+  r8 += 4 * stride;
+  Store4x4(q1, r8, stride);
+}
+
+//------------------------------------------------------------------------------
+// Simple In-loop filtering (Paragraph 15.2)
+
+static void SimpleVFilter16SSE2(uint8_t* p, int stride, int thresh) {
+  // Load
+  __m128i p1 = _mm_loadu_si128((__m128i*)&p[-2 * stride]);
+  __m128i p0 = _mm_loadu_si128((__m128i*)&p[-stride]);
+  __m128i q0 = _mm_loadu_si128((__m128i*)&p[0]);
+  __m128i q1 = _mm_loadu_si128((__m128i*)&p[stride]);
+
+  DoFilter2(&p1, &p0, &q0, &q1, thresh);
+
+  // Store
+  _mm_storeu_si128((__m128i*)&p[-stride], p0);
+  _mm_storeu_si128((__m128i*)p, q0);
+}
+
+static void SimpleHFilter16SSE2(uint8_t* p, int stride, int thresh) {
+  __m128i p1, p0, q0, q1;
+
+  p -= 2;  // beginning of p1
+
+  Load16x4(p, p + 8 * stride,  stride, &p1, &p0, &q0, &q1);
+  DoFilter2(&p1, &p0, &q0, &q1, thresh);
+  Store16x4(p, p + 8 * stride, stride, &p1, &p0, &q0, &q1);
+}
+
+static void SimpleVFilter16iSSE2(uint8_t* p, int stride, int thresh) {
+  int k;
+  for (k = 3; k > 0; --k) {
+    p += 4 * stride;
+    SimpleVFilter16SSE2(p, stride, thresh);
+  }
+}
+
+static void SimpleHFilter16iSSE2(uint8_t* p, int stride, int thresh) {
+  int k;
+  for (k = 3; k > 0; --k) {
+    p += 4;
+    SimpleHFilter16SSE2(p, stride, thresh);
+  }
+}
+
+//------------------------------------------------------------------------------
+// Complex In-loop filtering (Paragraph 15.3)
+
+#define MAX_DIFF1(p3, p2, p1, p0, m) {                                         \
+  m = MM_ABS(p3, p2);                                                          \
+  m = _mm_max_epu8(m, MM_ABS(p2, p1));                                         \
+  m = _mm_max_epu8(m, MM_ABS(p1, p0));                                         \
+}
+
+#define MAX_DIFF2(p3, p2, p1, p0, m) {                                         \
+  m = _mm_max_epu8(m, MM_ABS(p3, p2));                                         \
+  m = _mm_max_epu8(m, MM_ABS(p2, p1));                                         \
+  m = _mm_max_epu8(m, MM_ABS(p1, p0));                                         \
+}
+
+#define LOAD_H_EDGES4(p, stride, e1, e2, e3, e4) {                             \
+  e1 = _mm_loadu_si128((__m128i*)&(p)[0 * stride]);                            \
+  e2 = _mm_loadu_si128((__m128i*)&(p)[1 * stride]);                            \
+  e3 = _mm_loadu_si128((__m128i*)&(p)[2 * stride]);                            \
+  e4 = _mm_loadu_si128((__m128i*)&(p)[3 * stride]);                            \
+}
+
+#define LOADUV_H_EDGE(p, u, v, stride) {                                       \
+  p = _mm_loadl_epi64((__m128i*)&(u)[(stride)]);                               \
+  p = _mm_unpacklo_epi64(p, _mm_loadl_epi64((__m128i*)&(v)[(stride)]));        \
+}
+
+#define LOADUV_H_EDGES4(u, v, stride, e1, e2, e3, e4) {                        \
+  LOADUV_H_EDGE(e1, u, v, 0 * stride);                                         \
+  LOADUV_H_EDGE(e2, u, v, 1 * stride);                                         \
+  LOADUV_H_EDGE(e3, u, v, 2 * stride);                                         \
+  LOADUV_H_EDGE(e4, u, v, 3 * stride);                                         \
+}
+
+#define STOREUV(p, u, v, stride) {                                             \
+  _mm_storel_epi64((__m128i*)&u[(stride)], p);                                 \
+  p = _mm_srli_si128(p, 8);                                                    \
+  _mm_storel_epi64((__m128i*)&v[(stride)], p);                                 \
+}
+
+#define COMPLEX_FL_MASK(p1, p0, q0, q1, thresh, ithresh, mask) {               \
+  __m128i fl_yes;                                                              \
+  const __m128i it = _mm_set1_epi8(ithresh);                                   \
+  mask = _mm_subs_epu8(mask, it);                                              \
+  mask = _mm_cmpeq_epi8(mask, _mm_setzero_si128());                            \
+  NeedsFilter(&p1, &p0, &q0, &q1, thresh, &fl_yes);                            \
+  mask = _mm_and_si128(mask, fl_yes);                                          \
+}
+
+// on macroblock edges
+static void VFilter16SSE2(uint8_t* p, int stride,
+                          int thresh, int ithresh, int hev_thresh) {
+  __m128i t1;
+  __m128i mask;
+  __m128i p2, p1, p0, q0, q1, q2;
+
+  // Load p3, p2, p1, p0
+  LOAD_H_EDGES4(p - 4 * stride, stride, t1, p2, p1, p0);
+  MAX_DIFF1(t1, p2, p1, p0, mask);
+
+  // Load q0, q1, q2, q3
+  LOAD_H_EDGES4(p, stride, q0, q1, q2, t1);
+  MAX_DIFF2(t1, q2, q1, q0, mask);
+
+  COMPLEX_FL_MASK(p1, p0, q0, q1, thresh, ithresh, mask);
+  DoFilter6(&p2, &p1, &p0, &q0, &q1, &q2, &mask, hev_thresh);
+
+  // Store
+  _mm_storeu_si128((__m128i*)&p[-3 * stride], p2);
+  _mm_storeu_si128((__m128i*)&p[-2 * stride], p1);
+  _mm_storeu_si128((__m128i*)&p[-1 * stride], p0);
+  _mm_storeu_si128((__m128i*)&p[0 * stride], q0);
+  _mm_storeu_si128((__m128i*)&p[1 * stride], q1);
+  _mm_storeu_si128((__m128i*)&p[2 * stride], q2);
+}
+
+static void HFilter16SSE2(uint8_t* p, int stride,
+                          int thresh, int ithresh, int hev_thresh) {
+  __m128i mask;
+  __m128i p3, p2, p1, p0, q0, q1, q2, q3;
+
+  uint8_t* const b = p - 4;
+  Load16x4(b, b + 8 * stride, stride, &p3, &p2, &p1, &p0);  // p3, p2, p1, p0
+  MAX_DIFF1(p3, p2, p1, p0, mask);
+
+  Load16x4(p, p + 8 * stride, stride, &q0, &q1, &q2, &q3);  // q0, q1, q2, q3
+  MAX_DIFF2(q3, q2, q1, q0, mask);
+
+  COMPLEX_FL_MASK(p1, p0, q0, q1, thresh, ithresh, mask);
+  DoFilter6(&p2, &p1, &p0, &q0, &q1, &q2, &mask, hev_thresh);
+
+  Store16x4(b, b + 8 * stride, stride, &p3, &p2, &p1, &p0);
+  Store16x4(p, p + 8 * stride, stride, &q0, &q1, &q2, &q3);
+}
+
+// on three inner edges
+static void VFilter16iSSE2(uint8_t* p, int stride,
+                           int thresh, int ithresh, int hev_thresh) {
+  int k;
+  __m128i mask;
+  __m128i t1, t2, p1, p0, q0, q1;
+
+  for (k = 3; k > 0; --k) {
+    // Load p3, p2, p1, p0
+    LOAD_H_EDGES4(p, stride, t2, t1, p1, p0);
+    MAX_DIFF1(t2, t1, p1, p0, mask);
+
+    p += 4 * stride;
+
+    // Load q0, q1, q2, q3
+    LOAD_H_EDGES4(p, stride, q0, q1, t1, t2);
+    MAX_DIFF2(t2, t1, q1, q0, mask);
+
+    COMPLEX_FL_MASK(p1, p0, q0, q1, thresh, ithresh, mask);
+    DoFilter4(&p1, &p0, &q0, &q1, &mask, hev_thresh);
+
+    // Store
+    _mm_storeu_si128((__m128i*)&p[-2 * stride], p1);
+    _mm_storeu_si128((__m128i*)&p[-1 * stride], p0);
+    _mm_storeu_si128((__m128i*)&p[0 * stride], q0);
+    _mm_storeu_si128((__m128i*)&p[1 * stride], q1);
+  }
+}
+
+static void HFilter16iSSE2(uint8_t* p, int stride,
+                           int thresh, int ithresh, int hev_thresh) {
+  int k;
+  uint8_t* b;
+  __m128i mask;
+  __m128i t1, t2, p1, p0, q0, q1;
+
+  for (k = 3; k > 0; --k) {
+    b = p;
+    Load16x4(b, b + 8 * stride, stride, &t2, &t1, &p1, &p0);  // p3, p2, p1, p0
+    MAX_DIFF1(t2, t1, p1, p0, mask);
+
+    b += 4;  // beginning of q0
+    Load16x4(b, b + 8 * stride, stride, &q0, &q1, &t1, &t2);  // q0, q1, q2, q3
+    MAX_DIFF2(t2, t1, q1, q0, mask);
+
+    COMPLEX_FL_MASK(p1, p0, q0, q1, thresh, ithresh, mask);
+    DoFilter4(&p1, &p0, &q0, &q1, &mask, hev_thresh);
+
+    b -= 2;  // beginning of p1
+    Store16x4(b, b + 8 * stride, stride, &p1, &p0, &q0, &q1);
+
+    p += 4;
+  }
+}
+
+// 8-pixels wide variant, for chroma filtering
+static void VFilter8SSE2(uint8_t* u, uint8_t* v, int stride,
+                         int thresh, int ithresh, int hev_thresh) {
+  __m128i mask;
+  __m128i t1, p2, p1, p0, q0, q1, q2;
+
+  // Load p3, p2, p1, p0
+  LOADUV_H_EDGES4(u - 4 * stride, v - 4 * stride, stride, t1, p2, p1, p0);
+  MAX_DIFF1(t1, p2, p1, p0, mask);
+
+  // Load q0, q1, q2, q3
+  LOADUV_H_EDGES4(u, v, stride, q0, q1, q2, t1);
+  MAX_DIFF2(t1, q2, q1, q0, mask);
+
+  COMPLEX_FL_MASK(p1, p0, q0, q1, thresh, ithresh, mask);
+  DoFilter6(&p2, &p1, &p0, &q0, &q1, &q2, &mask, hev_thresh);
+
+  // Store
+  STOREUV(p2, u, v, -3 * stride);
+  STOREUV(p1, u, v, -2 * stride);
+  STOREUV(p0, u, v, -1 * stride);
+  STOREUV(q0, u, v, 0 * stride);
+  STOREUV(q1, u, v, 1 * stride);
+  STOREUV(q2, u, v, 2 * stride);
+}
+
+static void HFilter8SSE2(uint8_t* u, uint8_t* v, int stride,
+                         int thresh, int ithresh, int hev_thresh) {
+  __m128i mask;
+  __m128i p3, p2, p1, p0, q0, q1, q2, q3;
+
+  uint8_t* const tu = u - 4;
+  uint8_t* const tv = v - 4;
+  Load16x4(tu, tv, stride, &p3, &p2, &p1, &p0);  // p3, p2, p1, p0
+  MAX_DIFF1(p3, p2, p1, p0, mask);
+
+  Load16x4(u, v, stride, &q0, &q1, &q2, &q3);    // q0, q1, q2, q3
+  MAX_DIFF2(q3, q2, q1, q0, mask);
+
+  COMPLEX_FL_MASK(p1, p0, q0, q1, thresh, ithresh, mask);
+  DoFilter6(&p2, &p1, &p0, &q0, &q1, &q2, &mask, hev_thresh);
+
+  Store16x4(tu, tv, stride, &p3, &p2, &p1, &p0);
+  Store16x4(u, v, stride, &q0, &q1, &q2, &q3);
+}
+
+static void VFilter8iSSE2(uint8_t* u, uint8_t* v, int stride,
+                          int thresh, int ithresh, int hev_thresh) {
+  __m128i mask;
+  __m128i t1, t2, p1, p0, q0, q1;
+
+  // Load p3, p2, p1, p0
+  LOADUV_H_EDGES4(u, v, stride, t2, t1, p1, p0);
+  MAX_DIFF1(t2, t1, p1, p0, mask);
+
+  u += 4 * stride;
+  v += 4 * stride;
+
+  // Load q0, q1, q2, q3
+  LOADUV_H_EDGES4(u, v, stride, q0, q1, t1, t2);
+  MAX_DIFF2(t2, t1, q1, q0, mask);
+
+  COMPLEX_FL_MASK(p1, p0, q0, q1, thresh, ithresh, mask);
+  DoFilter4(&p1, &p0, &q0, &q1, &mask, hev_thresh);
+
+  // Store
+  STOREUV(p1, u, v, -2 * stride);
+  STOREUV(p0, u, v, -1 * stride);
+  STOREUV(q0, u, v, 0 * stride);
+  STOREUV(q1, u, v, 1 * stride);
+}
+
+static void HFilter8iSSE2(uint8_t* u, uint8_t* v, int stride,
+                          int thresh, int ithresh, int hev_thresh) {
+  __m128i mask;
+  __m128i t1, t2, p1, p0, q0, q1;
+  Load16x4(u, v, stride, &t2, &t1, &p1, &p0);   // p3, p2, p1, p0
+  MAX_DIFF1(t2, t1, p1, p0, mask);
+
+  u += 4;  // beginning of q0
+  v += 4;
+  Load16x4(u, v, stride, &q0, &q1, &t1, &t2);  // q0, q1, q2, q3
+  MAX_DIFF2(t2, t1, q1, q0, mask);
+
+  COMPLEX_FL_MASK(p1, p0, q0, q1, thresh, ithresh, mask);
+  DoFilter4(&p1, &p0, &q0, &q1, &mask, hev_thresh);
+
+  u -= 2;  // beginning of p1
+  v -= 2;
+  Store16x4(u, v, stride, &p1, &p0, &q0, &q1);
+}
+
+extern void VP8DspInitSSE2(void);
+
+void VP8DspInitSSE2(void) {
+  VP8Transform = TransformSSE2;
+
+  VP8VFilter16 = VFilter16SSE2;
+  VP8HFilter16 = HFilter16SSE2;
+  VP8VFilter8 = VFilter8SSE2;
+  VP8HFilter8 = HFilter8SSE2;
+  VP8VFilter16i = VFilter16iSSE2;
+  VP8HFilter16i = HFilter16iSSE2;
+  VP8VFilter8i = VFilter8iSSE2;
+  VP8HFilter8i = HFilter8iSSE2;
+
+  VP8SimpleVFilter16 = SimpleVFilter16SSE2;
+  VP8SimpleHFilter16 = SimpleHFilter16SSE2;
+  VP8SimpleVFilter16i = SimpleVFilter16iSSE2;
+  VP8SimpleHFilter16i = SimpleHFilter16iSSE2;
+}
+
+#if defined(__cplusplus) || defined(c_plusplus)
+}    // extern "C"
+#endif
+
+#endif   // WEBP_USE_SSE2
diff --git a/drivers/webpold/dsp/dsp.h b/drivers/webpold/dsp/dsp.h
new file mode 100644
index 0000000000..fd686a8532
--- /dev/null
+++ b/drivers/webpold/dsp/dsp.h
@@ -0,0 +1,210 @@
+// Copyright 2011 Google Inc. All Rights Reserved.
+//
+// This code is licensed under the same terms as WebM:
+//  Software License Agreement:  http://www.webmproject.org/license/software/
+//  Additional IP Rights Grant:  http://www.webmproject.org/license/additional/
+// -----------------------------------------------------------------------------
+//
+//   Speed-critical functions.
+//
+// Author: Skal (pascal.massimino@gmail.com)
+
+#ifndef WEBP_DSP_DSP_H_
+#define WEBP_DSP_DSP_H_
+
+#include "../types.h"
+
+#if defined(__cplusplus) || defined(c_plusplus)
+extern "C" {
+#endif
+
+//------------------------------------------------------------------------------
+// CPU detection
+
+#if defined(_MSC_VER) && (defined(_M_X64) || defined(_M_IX86))
+#define WEBP_MSC_SSE2  // Visual C++ SSE2 targets
+#endif
+
+#if defined(__SSE2__) || defined(WEBP_MSC_SSE2)
+#define WEBP_USE_SSE2
+#endif
+
+#if defined(__ANDROID__) && defined(__ARM_ARCH_7A__) && defined(__ARM_NEON__)
+#define WEBP_ANDROID_NEON  // Android targets that might support NEON
+#endif
+
+#if ( (defined(__ARM_NEON__) && !defined(__aarch64__)) || defined(WEBP_ANDROID_NEON)) && !defined(PSP2_ENABLED)
+#define WEBP_USE_NEON
+#endif
+
+typedef enum {
+  kSSE2,
+  kSSE3,
+  kNEON
+} CPUFeature;
+// returns true if the CPU supports the feature.
+typedef int (*VP8CPUInfo)(CPUFeature feature);
+extern VP8CPUInfo VP8GetCPUInfo;
+
+//------------------------------------------------------------------------------
+// Encoding
+
+int VP8GetAlpha(const int histo[]);
+
+// Transforms
+// VP8Idct: Does one of two inverse transforms. If do_two is set, the transforms
+//          will be done for (ref, in, dst) and (ref + 4, in + 16, dst + 4).
+typedef void (*VP8Idct)(const uint8_t* ref, const int16_t* in, uint8_t* dst,
+                        int do_two);
+typedef void (*VP8Fdct)(const uint8_t* src, const uint8_t* ref, int16_t* out);
+typedef void (*VP8WHT)(const int16_t* in, int16_t* out);
+extern VP8Idct VP8ITransform;
+extern VP8Fdct VP8FTransform;
+extern VP8WHT VP8ITransformWHT;
+extern VP8WHT VP8FTransformWHT;
+// Predictions
+// *dst is the destination block. *top and *left can be NULL.
+typedef void (*VP8IntraPreds)(uint8_t *dst, const uint8_t* left,
+                              const uint8_t* top);
+typedef void (*VP8Intra4Preds)(uint8_t *dst, const uint8_t* top);
+extern VP8Intra4Preds VP8EncPredLuma4;
+extern VP8IntraPreds VP8EncPredLuma16;
+extern VP8IntraPreds VP8EncPredChroma8;
+
+typedef int (*VP8Metric)(const uint8_t* pix, const uint8_t* ref);
+extern VP8Metric VP8SSE16x16, VP8SSE16x8, VP8SSE8x8, VP8SSE4x4;
+typedef int (*VP8WMetric)(const uint8_t* pix, const uint8_t* ref,
+                          const uint16_t* const weights);
+extern VP8WMetric VP8TDisto4x4, VP8TDisto16x16;
+
+typedef void (*VP8BlockCopy)(const uint8_t* src, uint8_t* dst);
+extern VP8BlockCopy VP8Copy4x4;
+// Quantization
+struct VP8Matrix;   // forward declaration
+typedef int (*VP8QuantizeBlock)(int16_t in[16], int16_t out[16],
+                                int n, const struct VP8Matrix* const mtx);
+extern VP8QuantizeBlock VP8EncQuantizeBlock;
+
+// Compute susceptibility based on DCT-coeff histograms:
+// the higher, the "easier" the macroblock is to compress.
+typedef int (*VP8CHisto)(const uint8_t* ref, const uint8_t* pred,
+                         int start_block, int end_block);
+extern const int VP8DspScan[16 + 4 + 4];
+extern VP8CHisto VP8CollectHistogram;
+
+void VP8EncDspInit(void);   // must be called before using any of the above
+
+//------------------------------------------------------------------------------
+// Decoding
+
+typedef void (*VP8DecIdct)(const int16_t* coeffs, uint8_t* dst);
+// when doing two transforms, coeffs is actually int16_t[2][16].
+typedef void (*VP8DecIdct2)(const int16_t* coeffs, uint8_t* dst, int do_two);
+extern VP8DecIdct2 VP8Transform;
+extern VP8DecIdct VP8TransformUV;
+extern VP8DecIdct VP8TransformDC;
+extern VP8DecIdct VP8TransformDCUV;
+extern void (*VP8TransformWHT)(const int16_t* in, int16_t* out);
+
+// *dst is the destination block, with stride BPS. Boundary samples are
+// assumed accessible when needed.
+typedef void (*VP8PredFunc)(uint8_t* dst);
+extern const VP8PredFunc VP8PredLuma16[/* NUM_B_DC_MODES */];
+extern const VP8PredFunc VP8PredChroma8[/* NUM_B_DC_MODES */];
+extern const VP8PredFunc VP8PredLuma4[/* NUM_BMODES */];
+
+// simple filter (only for luma)
+typedef void (*VP8SimpleFilterFunc)(uint8_t* p, int stride, int thresh);
+extern VP8SimpleFilterFunc VP8SimpleVFilter16;
+extern VP8SimpleFilterFunc VP8SimpleHFilter16;
+extern VP8SimpleFilterFunc VP8SimpleVFilter16i;  // filter 3 inner edges
+extern VP8SimpleFilterFunc VP8SimpleHFilter16i;
+
+// regular filter (on both macroblock edges and inner edges)
+typedef void (*VP8LumaFilterFunc)(uint8_t* luma, int stride,
+                                  int thresh, int ithresh, int hev_t);
+typedef void (*VP8ChromaFilterFunc)(uint8_t* u, uint8_t* v, int stride,
+                                    int thresh, int ithresh, int hev_t);
+// on outer edge
+extern VP8LumaFilterFunc VP8VFilter16;
+extern VP8LumaFilterFunc VP8HFilter16;
+extern VP8ChromaFilterFunc VP8VFilter8;
+extern VP8ChromaFilterFunc VP8HFilter8;
+
+// on inner edge
+extern VP8LumaFilterFunc VP8VFilter16i;   // filtering 3 inner edges altogether
+extern VP8LumaFilterFunc VP8HFilter16i;
+extern VP8ChromaFilterFunc VP8VFilter8i;  // filtering u and v altogether
+extern VP8ChromaFilterFunc VP8HFilter8i;
+
+// must be called before anything using the above
+void VP8DspInit(void);
+
+//------------------------------------------------------------------------------
+// WebP I/O
+
+#define FANCY_UPSAMPLING   // undefined to remove fancy upsampling support
+
+typedef void (*WebPUpsampleLinePairFunc)(
+    const uint8_t* top_y, const uint8_t* bottom_y,
+    const uint8_t* top_u, const uint8_t* top_v,
+    const uint8_t* cur_u, const uint8_t* cur_v,
+    uint8_t* top_dst, uint8_t* bottom_dst, int len);
+
+#ifdef FANCY_UPSAMPLING
+
+// Fancy upsampling functions to convert YUV to RGB(A) modes
+extern WebPUpsampleLinePairFunc WebPUpsamplers[/* MODE_LAST */];
+
+// Initializes SSE2 version of the fancy upsamplers.
+void WebPInitUpsamplersSSE2(void);
+
+#endif    // FANCY_UPSAMPLING
+
+// Point-sampling methods.
+typedef void (*WebPSampleLinePairFunc)(
+    const uint8_t* top_y, const uint8_t* bottom_y,
+    const uint8_t* u, const uint8_t* v,
+    uint8_t* top_dst, uint8_t* bottom_dst, int len);
+
+extern const WebPSampleLinePairFunc WebPSamplers[/* MODE_LAST */];
+
+// General function for converting two lines of ARGB or RGBA.
+// 'alpha_is_last' should be true if 0xff000000 is stored in memory as
+// as 0x00, 0x00, 0x00, 0xff (little endian).
+WebPUpsampleLinePairFunc WebPGetLinePairConverter(int alpha_is_last);
+
+// YUV444->RGB converters
+typedef void (*WebPYUV444Converter)(const uint8_t* y,
+                                    const uint8_t* u, const uint8_t* v,
+                                    uint8_t* dst, int len);
+
+extern const WebPYUV444Converter WebPYUV444Converters[/* MODE_LAST */];
+
+// Main function to be called
+void WebPInitUpsamplers(void);
+
+//------------------------------------------------------------------------------
+// Pre-multiply planes with alpha values
+
+// Apply alpha pre-multiply on an rgba, bgra or argb plane of size w * h.
+// alpha_first should be 0 for argb, 1 for rgba or bgra (where alpha is last).
+extern void (*WebPApplyAlphaMultiply)(
+    uint8_t* rgba, int alpha_first, int w, int h, int stride);
+
+// Same, buf specifically for RGBA4444 format
+extern void (*WebPApplyAlphaMultiply4444)(
+    uint8_t* rgba4444, int w, int h, int stride);
+
+// To be called first before using the above.
+void WebPInitPremultiply(void);
+
+void WebPInitPremultiplySSE2(void);   // should not be called directly.
+
+//------------------------------------------------------------------------------
+
+#if defined(__cplusplus) || defined(c_plusplus)
+}    // extern "C"
+#endif
+
+#endif  /* WEBP_DSP_DSP_H_ */
diff --git a/drivers/webpold/dsp/enc.c b/drivers/webpold/dsp/enc.c
new file mode 100644
index 0000000000..02234564be
--- /dev/null
+++ b/drivers/webpold/dsp/enc.c
@@ -0,0 +1,743 @@
+// Copyright 2011 Google Inc. All Rights Reserved.
+//
+// This code is licensed under the same terms as WebM:
+//  Software License Agreement:  http://www.webmproject.org/license/software/
+//  Additional IP Rights Grant:  http://www.webmproject.org/license/additional/
+// -----------------------------------------------------------------------------
+//
+// Speed-critical encoding functions.
+//
+// Author: Skal (pascal.massimino@gmail.com)
+
+#include <stdlib.h>  // for abs()
+#include "./dsp.h"
+#include "../enc/vp8enci.h"
+
+#if defined(__cplusplus) || defined(c_plusplus)
+extern "C" {
+#endif
+
+//------------------------------------------------------------------------------
+// Compute susceptibility based on DCT-coeff histograms:
+// the higher, the "easier" the macroblock is to compress.
+
+static int ClipAlpha(int alpha) {
+  return alpha < 0 ? 0 : alpha > 255 ? 255 : alpha;
+}
+
+int VP8GetAlpha(const int histo[MAX_COEFF_THRESH + 1]) {
+  int num = 0, den = 0, val = 0;
+  int k;
+  int alpha;
+  // note: changing this loop to avoid the numerous "k + 1" slows things down.
+  for (k = 0; k < MAX_COEFF_THRESH; ++k) {
+    if (histo[k + 1]) {
+      val += histo[k + 1];
+      num += val * (k + 1);
+      den += (k + 1) * (k + 1);
+    }
+  }
+  // we scale the value to a usable [0..255] range
+  alpha = den ? 10 * num / den - 5 : 0;
+  return ClipAlpha(alpha);
+}
+
+const int VP8DspScan[16 + 4 + 4] = {
+  // Luma
+  0 +  0 * BPS,  4 +  0 * BPS, 8 +  0 * BPS, 12 +  0 * BPS,
+  0 +  4 * BPS,  4 +  4 * BPS, 8 +  4 * BPS, 12 +  4 * BPS,
+  0 +  8 * BPS,  4 +  8 * BPS, 8 +  8 * BPS, 12 +  8 * BPS,
+  0 + 12 * BPS,  4 + 12 * BPS, 8 + 12 * BPS, 12 + 12 * BPS,
+
+  0 + 0 * BPS,   4 + 0 * BPS, 0 + 4 * BPS,  4 + 4 * BPS,    // U
+  8 + 0 * BPS,  12 + 0 * BPS, 8 + 4 * BPS, 12 + 4 * BPS     // V
+};
+
+static int CollectHistogram(const uint8_t* ref, const uint8_t* pred,
+                            int start_block, int end_block) {
+  int histo[MAX_COEFF_THRESH + 1] = { 0 };
+  int16_t out[16];
+  int j, k;
+  for (j = start_block; j < end_block; ++j) {
+    VP8FTransform(ref + VP8DspScan[j], pred + VP8DspScan[j], out);
+
+    // Convert coefficients to bin (within out[]).
+    for (k = 0; k < 16; ++k) {
+      const int v = abs(out[k]) >> 2;
+      out[k] = (v > MAX_COEFF_THRESH) ? MAX_COEFF_THRESH : v;
+    }
+
+    // Use bin to update histogram.
+    for (k = 0; k < 16; ++k) {
+      histo[out[k]]++;
+    }
+  }
+
+  return VP8GetAlpha(histo);
+}
+
+//------------------------------------------------------------------------------
+// run-time tables (~4k)
+
+static uint8_t clip1[255 + 510 + 1];    // clips [-255,510] to [0,255]
+
+// We declare this variable 'volatile' to prevent instruction reordering
+// and make sure it's set to true _last_ (so as to be thread-safe)
+static volatile int tables_ok = 0;
+
+static void InitTables(void) {
+  if (!tables_ok) {
+    int i;
+    for (i = -255; i <= 255 + 255; ++i) {
+      clip1[255 + i] = (i < 0) ? 0 : (i > 255) ? 255 : i;
+    }
+    tables_ok = 1;
+  }
+}
+
+static WEBP_INLINE uint8_t clip_8b(int v) {
+  return (!(v & ~0xff)) ? v : v < 0 ? 0 : 255;
+}
+
+//------------------------------------------------------------------------------
+// Transforms (Paragraph 14.4)
+
+#define STORE(x, y, v) \
+  dst[(x) + (y) * BPS] = clip_8b(ref[(x) + (y) * BPS] + ((v) >> 3))
+
+static const int kC1 = 20091 + (1 << 16);
+static const int kC2 = 35468;
+#define MUL(a, b) (((a) * (b)) >> 16)
+
+static WEBP_INLINE void ITransformOne(const uint8_t* ref, const int16_t* in,
+                                      uint8_t* dst) {
+  int C[4 * 4], *tmp;
+  int i;
+  tmp = C;
+  for (i = 0; i < 4; ++i) {    // vertical pass
+    const int a = in[0] + in[8];
+    const int b = in[0] - in[8];
+    const int c = MUL(in[4], kC2) - MUL(in[12], kC1);
+    const int d = MUL(in[4], kC1) + MUL(in[12], kC2);
+    tmp[0] = a + d;
+    tmp[1] = b + c;
+    tmp[2] = b - c;
+    tmp[3] = a - d;
+    tmp += 4;
+    in++;
+  }
+
+  tmp = C;
+  for (i = 0; i < 4; ++i) {    // horizontal pass
+    const int dc = tmp[0] + 4;
+    const int a =  dc +  tmp[8];
+    const int b =  dc -  tmp[8];
+    const int c = MUL(tmp[4], kC2) - MUL(tmp[12], kC1);
+    const int d = MUL(tmp[4], kC1) + MUL(tmp[12], kC2);
+    STORE(0, i, a + d);
+    STORE(1, i, b + c);
+    STORE(2, i, b - c);
+    STORE(3, i, a - d);
+    tmp++;
+  }
+}
+
+static void ITransform(const uint8_t* ref, const int16_t* in, uint8_t* dst,
+                       int do_two) {
+  ITransformOne(ref, in, dst);
+  if (do_two) {
+    ITransformOne(ref + 4, in + 16, dst + 4);
+  }
+}
+
+static void FTransform(const uint8_t* src, const uint8_t* ref, int16_t* out) {
+  int i;
+  int tmp[16];
+  for (i = 0; i < 4; ++i, src += BPS, ref += BPS) {
+    const int d0 = src[0] - ref[0];
+    const int d1 = src[1] - ref[1];
+    const int d2 = src[2] - ref[2];
+    const int d3 = src[3] - ref[3];
+    const int a0 = (d0 + d3) << 3;
+    const int a1 = (d1 + d2) << 3;
+    const int a2 = (d1 - d2) << 3;
+    const int a3 = (d0 - d3) << 3;
+    tmp[0 + i * 4] = (a0 + a1);
+    tmp[1 + i * 4] = (a2 * 2217 + a3 * 5352 + 14500) >> 12;
+    tmp[2 + i * 4] = (a0 - a1);
+    tmp[3 + i * 4] = (a3 * 2217 - a2 * 5352 +  7500) >> 12;
+  }
+  for (i = 0; i < 4; ++i) {
+    const int a0 = (tmp[0 + i] + tmp[12 + i]);
+    const int a1 = (tmp[4 + i] + tmp[ 8 + i]);
+    const int a2 = (tmp[4 + i] - tmp[ 8 + i]);
+    const int a3 = (tmp[0 + i] - tmp[12 + i]);
+    out[0 + i] = (a0 + a1 + 7) >> 4;
+    out[4 + i] = ((a2 * 2217 + a3 * 5352 + 12000) >> 16) + (a3 != 0);
+    out[8 + i] = (a0 - a1 + 7) >> 4;
+    out[12+ i] = ((a3 * 2217 - a2 * 5352 + 51000) >> 16);
+  }
+}
+
+static void ITransformWHT(const int16_t* in, int16_t* out) {
+  int tmp[16];
+  int i;
+  for (i = 0; i < 4; ++i) {
+    const int a0 = in[0 + i] + in[12 + i];
+    const int a1 = in[4 + i] + in[ 8 + i];
+    const int a2 = in[4 + i] - in[ 8 + i];
+    const int a3 = in[0 + i] - in[12 + i];
+    tmp[0  + i] = a0 + a1;
+    tmp[8  + i] = a0 - a1;
+    tmp[4  + i] = a3 + a2;
+    tmp[12 + i] = a3 - a2;
+  }
+  for (i = 0; i < 4; ++i) {
+    const int dc = tmp[0 + i * 4] + 3;    // w/ rounder
+    const int a0 = dc             + tmp[3 + i * 4];
+    const int a1 = tmp[1 + i * 4] + tmp[2 + i * 4];
+    const int a2 = tmp[1 + i * 4] - tmp[2 + i * 4];
+    const int a3 = dc             - tmp[3 + i * 4];
+    out[ 0] = (a0 + a1) >> 3;
+    out[16] = (a3 + a2) >> 3;
+    out[32] = (a0 - a1) >> 3;
+    out[48] = (a3 - a2) >> 3;
+    out += 64;
+  }
+}
+
+static void FTransformWHT(const int16_t* in, int16_t* out) {
+  int tmp[16];
+  int i;
+  for (i = 0; i < 4; ++i, in += 64) {
+    const int a0 = (in[0 * 16] + in[2 * 16]) << 2;
+    const int a1 = (in[1 * 16] + in[3 * 16]) << 2;
+    const int a2 = (in[1 * 16] - in[3 * 16]) << 2;
+    const int a3 = (in[0 * 16] - in[2 * 16]) << 2;
+    tmp[0 + i * 4] = (a0 + a1) + (a0 != 0);
+    tmp[1 + i * 4] = a3 + a2;
+    tmp[2 + i * 4] = a3 - a2;
+    tmp[3 + i * 4] = a0 - a1;
+  }
+  for (i = 0; i < 4; ++i) {
+    const int a0 = (tmp[0 + i] + tmp[8 + i]);
+    const int a1 = (tmp[4 + i] + tmp[12+ i]);
+    const int a2 = (tmp[4 + i] - tmp[12+ i]);
+    const int a3 = (tmp[0 + i] - tmp[8 + i]);
+    const int b0 = a0 + a1;
+    const int b1 = a3 + a2;
+    const int b2 = a3 - a2;
+    const int b3 = a0 - a1;
+    out[ 0 + i] = (b0 + (b0 > 0) + 3) >> 3;
+    out[ 4 + i] = (b1 + (b1 > 0) + 3) >> 3;
+    out[ 8 + i] = (b2 + (b2 > 0) + 3) >> 3;
+    out[12 + i] = (b3 + (b3 > 0) + 3) >> 3;
+  }
+}
+
+#undef MUL
+#undef STORE
+
+//------------------------------------------------------------------------------
+// Intra predictions
+
+#define DST(x, y) dst[(x) + (y) * BPS]
+
+static WEBP_INLINE void Fill(uint8_t* dst, int value, int size) {
+  int j;
+  for (j = 0; j < size; ++j) {
+    memset(dst + j * BPS, value, size);
+  }
+}
+
+static WEBP_INLINE void VerticalPred(uint8_t* dst,
+                                     const uint8_t* top, int size) {
+  int j;
+  if (top) {
+    for (j = 0; j < size; ++j) memcpy(dst + j * BPS, top, size);
+  } else {
+    Fill(dst, 127, size);
+  }
+}
+
+static WEBP_INLINE void HorizontalPred(uint8_t* dst,
+                                       const uint8_t* left, int size) {
+  if (left) {
+    int j;
+    for (j = 0; j < size; ++j) {
+      memset(dst + j * BPS, left[j], size);
+    }
+  } else {
+    Fill(dst, 129, size);
+  }
+}
+
+static WEBP_INLINE void TrueMotion(uint8_t* dst, const uint8_t* left,
+                                   const uint8_t* top, int size) {
+  int y;
+  if (left) {
+    if (top) {
+      const uint8_t* const clip = clip1 + 255 - left[-1];
+      for (y = 0; y < size; ++y) {
+        const uint8_t* const clip_table = clip + left[y];
+        int x;
+        for (x = 0; x < size; ++x) {
+          dst[x] = clip_table[top[x]];
+        }
+        dst += BPS;
+      }
+    } else {
+      HorizontalPred(dst, left, size);
+    }
+  } else {
+    // true motion without left samples (hence: with default 129 value)
+    // is equivalent to VE prediction where you just copy the top samples.
+    // Note that if top samples are not available, the default value is
+    // then 129, and not 127 as in the VerticalPred case.
+    if (top) {
+      VerticalPred(dst, top, size);
+    } else {
+      Fill(dst, 129, size);
+    }
+  }
+}
+
+static WEBP_INLINE void DCMode(uint8_t* dst, const uint8_t* left,
+                               const uint8_t* top,
+                               int size, int round, int shift) {
+  int DC = 0;
+  int j;
+  if (top) {
+    for (j = 0; j < size; ++j) DC += top[j];
+    if (left) {   // top and left present
+      for (j = 0; j < size; ++j) DC += left[j];
+    } else {      // top, but no left
+      DC += DC;
+    }
+    DC = (DC + round) >> shift;
+  } else if (left) {   // left but no top
+    for (j = 0; j < size; ++j) DC += left[j];
+    DC += DC;
+    DC = (DC + round) >> shift;
+  } else {   // no top, no left, nothing.
+    DC = 0x80;
+  }
+  Fill(dst, DC, size);
+}
+
+//------------------------------------------------------------------------------
+// Chroma 8x8 prediction (paragraph 12.2)
+
+static void IntraChromaPreds(uint8_t* dst, const uint8_t* left,
+                             const uint8_t* top) {
+  // U block
+  DCMode(C8DC8 + dst, left, top, 8, 8, 4);
+  VerticalPred(C8VE8 + dst, top, 8);
+  HorizontalPred(C8HE8 + dst, left, 8);
+  TrueMotion(C8TM8 + dst, left, top, 8);
+  // V block
+  dst += 8;
+  if (top) top += 8;
+  if (left) left += 16;
+  DCMode(C8DC8 + dst, left, top, 8, 8, 4);
+  VerticalPred(C8VE8 + dst, top, 8);
+  HorizontalPred(C8HE8 + dst, left, 8);
+  TrueMotion(C8TM8 + dst, left, top, 8);
+}
+
+//------------------------------------------------------------------------------
+// luma 16x16 prediction (paragraph 12.3)
+
+static void Intra16Preds(uint8_t* dst,
+                         const uint8_t* left, const uint8_t* top) {
+  DCMode(I16DC16 + dst, left, top, 16, 16, 5);
+  VerticalPred(I16VE16 + dst, top, 16);
+  HorizontalPred(I16HE16 + dst, left, 16);
+  TrueMotion(I16TM16 + dst, left, top, 16);
+}
+
+//------------------------------------------------------------------------------
+// luma 4x4 prediction
+
+#define AVG3(a, b, c) (((a) + 2 * (b) + (c) + 2) >> 2)
+#define AVG2(a, b) (((a) + (b) + 1) >> 1)
+
+static void VE4(uint8_t* dst, const uint8_t* top) {    // vertical
+  const uint8_t vals[4] = {
+    AVG3(top[-1], top[0], top[1]),
+    AVG3(top[ 0], top[1], top[2]),
+    AVG3(top[ 1], top[2], top[3]),
+    AVG3(top[ 2], top[3], top[4])
+  };
+  int i;
+  for (i = 0; i < 4; ++i) {
+    memcpy(dst + i * BPS, vals, 4);
+  }
+}
+
+static void HE4(uint8_t* dst, const uint8_t* top) {    // horizontal
+  const int X = top[-1];
+  const int I = top[-2];
+  const int J = top[-3];
+  const int K = top[-4];
+  const int L = top[-5];
+  *(uint32_t*)(dst + 0 * BPS) = 0x01010101U * AVG3(X, I, J);
+  *(uint32_t*)(dst + 1 * BPS) = 0x01010101U * AVG3(I, J, K);
+  *(uint32_t*)(dst + 2 * BPS) = 0x01010101U * AVG3(J, K, L);
+  *(uint32_t*)(dst + 3 * BPS) = 0x01010101U * AVG3(K, L, L);
+}
+
+static void DC4(uint8_t* dst, const uint8_t* top) {
+  uint32_t dc = 4;
+  int i;
+  for (i = 0; i < 4; ++i) dc += top[i] + top[-5 + i];
+  Fill(dst, dc >> 3, 4);
+}
+
+static void RD4(uint8_t* dst, const uint8_t* top) {
+  const int X = top[-1];
+  const int I = top[-2];
+  const int J = top[-3];
+  const int K = top[-4];
+  const int L = top[-5];
+  const int A = top[0];
+  const int B = top[1];
+  const int C = top[2];
+  const int D = top[3];
+  DST(0, 3)                                     = AVG3(J, K, L);
+  DST(0, 2) = DST(1, 3)                         = AVG3(I, J, K);
+  DST(0, 1) = DST(1, 2) = DST(2, 3)             = AVG3(X, I, J);
+  DST(0, 0) = DST(1, 1) = DST(2, 2) = DST(3, 3) = AVG3(A, X, I);
+  DST(1, 0) = DST(2, 1) = DST(3, 2)             = AVG3(B, A, X);
+  DST(2, 0) = DST(3, 1)                         = AVG3(C, B, A);
+  DST(3, 0)                                     = AVG3(D, C, B);
+}
+
+static void LD4(uint8_t* dst, const uint8_t* top) {
+  const int A = top[0];
+  const int B = top[1];
+  const int C = top[2];
+  const int D = top[3];
+  const int E = top[4];
+  const int F = top[5];
+  const int G = top[6];
+  const int H = top[7];
+  DST(0, 0)                                     = AVG3(A, B, C);
+  DST(1, 0) = DST(0, 1)                         = AVG3(B, C, D);
+  DST(2, 0) = DST(1, 1) = DST(0, 2)             = AVG3(C, D, E);
+  DST(3, 0) = DST(2, 1) = DST(1, 2) = DST(0, 3) = AVG3(D, E, F);
+  DST(3, 1) = DST(2, 2) = DST(1, 3)             = AVG3(E, F, G);
+  DST(3, 2) = DST(2, 3)                         = AVG3(F, G, H);
+  DST(3, 3)                                     = AVG3(G, H, H);
+}
+
+static void VR4(uint8_t* dst, const uint8_t* top) {
+  const int X = top[-1];
+  const int I = top[-2];
+  const int J = top[-3];
+  const int K = top[-4];
+  const int A = top[0];
+  const int B = top[1];
+  const int C = top[2];
+  const int D = top[3];
+  DST(0, 0) = DST(1, 2) = AVG2(X, A);
+  DST(1, 0) = DST(2, 2) = AVG2(A, B);
+  DST(2, 0) = DST(3, 2) = AVG2(B, C);
+  DST(3, 0)             = AVG2(C, D);
+
+  DST(0, 3) =             AVG3(K, J, I);
+  DST(0, 2) =             AVG3(J, I, X);
+  DST(0, 1) = DST(1, 3) = AVG3(I, X, A);
+  DST(1, 1) = DST(2, 3) = AVG3(X, A, B);
+  DST(2, 1) = DST(3, 3) = AVG3(A, B, C);
+  DST(3, 1) =             AVG3(B, C, D);
+}
+
+static void VL4(uint8_t* dst, const uint8_t* top) {
+  const int A = top[0];
+  const int B = top[1];
+  const int C = top[2];
+  const int D = top[3];
+  const int E = top[4];
+  const int F = top[5];
+  const int G = top[6];
+  const int H = top[7];
+  DST(0, 0) =             AVG2(A, B);
+  DST(1, 0) = DST(0, 2) = AVG2(B, C);
+  DST(2, 0) = DST(1, 2) = AVG2(C, D);
+  DST(3, 0) = DST(2, 2) = AVG2(D, E);
+
+  DST(0, 1) =             AVG3(A, B, C);
+  DST(1, 1) = DST(0, 3) = AVG3(B, C, D);
+  DST(2, 1) = DST(1, 3) = AVG3(C, D, E);
+  DST(3, 1) = DST(2, 3) = AVG3(D, E, F);
+              DST(3, 2) = AVG3(E, F, G);
+              DST(3, 3) = AVG3(F, G, H);
+}
+
+static void HU4(uint8_t* dst, const uint8_t* top) {
+  const int I = top[-2];
+  const int J = top[-3];
+  const int K = top[-4];
+  const int L = top[-5];
+  DST(0, 0) =             AVG2(I, J);
+  DST(2, 0) = DST(0, 1) = AVG2(J, K);
+  DST(2, 1) = DST(0, 2) = AVG2(K, L);
+  DST(1, 0) =             AVG3(I, J, K);
+  DST(3, 0) = DST(1, 1) = AVG3(J, K, L);
+  DST(3, 1) = DST(1, 2) = AVG3(K, L, L);
+  DST(3, 2) = DST(2, 2) =
+  DST(0, 3) = DST(1, 3) = DST(2, 3) = DST(3, 3) = L;
+}
+
+static void HD4(uint8_t* dst, const uint8_t* top) {
+  const int X = top[-1];
+  const int I = top[-2];
+  const int J = top[-3];
+  const int K = top[-4];
+  const int L = top[-5];
+  const int A = top[0];
+  const int B = top[1];
+  const int C = top[2];
+
+  DST(0, 0) = DST(2, 1) = AVG2(I, X);
+  DST(0, 1) = DST(2, 2) = AVG2(J, I);
+  DST(0, 2) = DST(2, 3) = AVG2(K, J);
+  DST(0, 3)             = AVG2(L, K);
+
+  DST(3, 0)             = AVG3(A, B, C);
+  DST(2, 0)             = AVG3(X, A, B);
+  DST(1, 0) = DST(3, 1) = AVG3(I, X, A);
+  DST(1, 1) = DST(3, 2) = AVG3(J, I, X);
+  DST(1, 2) = DST(3, 3) = AVG3(K, J, I);
+  DST(1, 3)             = AVG3(L, K, J);
+}
+
+static void TM4(uint8_t* dst, const uint8_t* top) {
+  int x, y;
+  const uint8_t* const clip = clip1 + 255 - top[-1];
+  for (y = 0; y < 4; ++y) {
+    const uint8_t* const clip_table = clip + top[-2 - y];
+    for (x = 0; x < 4; ++x) {
+      dst[x] = clip_table[top[x]];
+    }
+    dst += BPS;
+  }
+}
+
+#undef DST
+#undef AVG3
+#undef AVG2
+
+// Left samples are top[-5 .. -2], top_left is top[-1], top are
+// located at top[0..3], and top right is top[4..7]
+static void Intra4Preds(uint8_t* dst, const uint8_t* top) {
+  DC4(I4DC4 + dst, top);
+  TM4(I4TM4 + dst, top);
+  VE4(I4VE4 + dst, top);
+  HE4(I4HE4 + dst, top);
+  RD4(I4RD4 + dst, top);
+  VR4(I4VR4 + dst, top);
+  LD4(I4LD4 + dst, top);
+  VL4(I4VL4 + dst, top);
+  HD4(I4HD4 + dst, top);
+  HU4(I4HU4 + dst, top);
+}
+
+//------------------------------------------------------------------------------
+// Metric
+
+static WEBP_INLINE int GetSSE(const uint8_t* a, const uint8_t* b,
+                              int w, int h) {
+  int count = 0;
+  int y, x;
+  for (y = 0; y < h; ++y) {
+    for (x = 0; x < w; ++x) {
+      const int diff = (int)a[x] - b[x];
+      count += diff * diff;
+    }
+    a += BPS;
+    b += BPS;
+  }
+  return count;
+}
+
+static int SSE16x16(const uint8_t* a, const uint8_t* b) {
+  return GetSSE(a, b, 16, 16);
+}
+static int SSE16x8(const uint8_t* a, const uint8_t* b) {
+  return GetSSE(a, b, 16, 8);
+}
+static int SSE8x8(const uint8_t* a, const uint8_t* b) {
+  return GetSSE(a, b, 8, 8);
+}
+static int SSE4x4(const uint8_t* a, const uint8_t* b) {
+  return GetSSE(a, b, 4, 4);
+}
+
+//------------------------------------------------------------------------------
+// Texture distortion
+//
+// We try to match the spectral content (weighted) between source and
+// reconstructed samples.
+
+// Hadamard transform
+// Returns the weighted sum of the absolute value of transformed coefficients.
+static int TTransform(const uint8_t* in, const uint16_t* w) {
+  int sum = 0;
+  int tmp[16];
+  int i;
+  // horizontal pass
+  for (i = 0; i < 4; ++i, in += BPS) {
+    const int a0 = (in[0] + in[2]) << 2;
+    const int a1 = (in[1] + in[3]) << 2;
+    const int a2 = (in[1] - in[3]) << 2;
+    const int a3 = (in[0] - in[2]) << 2;
+    tmp[0 + i * 4] = a0 + a1 + (a0 != 0);
+    tmp[1 + i * 4] = a3 + a2;
+    tmp[2 + i * 4] = a3 - a2;
+    tmp[3 + i * 4] = a0 - a1;
+  }
+  // vertical pass
+  for (i = 0; i < 4; ++i, ++w) {
+    const int a0 = (tmp[0 + i] + tmp[8 + i]);
+    const int a1 = (tmp[4 + i] + tmp[12+ i]);
+    const int a2 = (tmp[4 + i] - tmp[12+ i]);
+    const int a3 = (tmp[0 + i] - tmp[8 + i]);
+    const int b0 = a0 + a1;
+    const int b1 = a3 + a2;
+    const int b2 = a3 - a2;
+    const int b3 = a0 - a1;
+    // abs((b + (b<0) + 3) >> 3) = (abs(b) + 3) >> 3
+    sum += w[ 0] * ((abs(b0) + 3) >> 3);
+    sum += w[ 4] * ((abs(b1) + 3) >> 3);
+    sum += w[ 8] * ((abs(b2) + 3) >> 3);
+    sum += w[12] * ((abs(b3) + 3) >> 3);
+  }
+  return sum;
+}
+
+static int Disto4x4(const uint8_t* const a, const uint8_t* const b,
+                    const uint16_t* const w) {
+  const int sum1 = TTransform(a, w);
+  const int sum2 = TTransform(b, w);
+  return (abs(sum2 - sum1) + 8) >> 4;
+}
+
+static int Disto16x16(const uint8_t* const a, const uint8_t* const b,
+                      const uint16_t* const w) {
+  int D = 0;
+  int x, y;
+  for (y = 0; y < 16 * BPS; y += 4 * BPS) {
+    for (x = 0; x < 16; x += 4) {
+      D += Disto4x4(a + x + y, b + x + y, w);
+    }
+  }
+  return D;
+}
+
+//------------------------------------------------------------------------------
+// Quantization
+//
+
+static const uint8_t kZigzag[16] = {
+  0, 1, 4, 8, 5, 2, 3, 6, 9, 12, 13, 10, 7, 11, 14, 15
+};
+
+// Simple quantization
+static int QuantizeBlock(int16_t in[16], int16_t out[16],
+                         int n, const VP8Matrix* const mtx) {
+  int last = -1;
+  for (; n < 16; ++n) {
+    const int j = kZigzag[n];
+    const int sign = (in[j] < 0);
+    int coeff = (sign ? -in[j] : in[j]) + mtx->sharpen_[j];
+    if (coeff > 2047) coeff = 2047;
+    if (coeff > mtx->zthresh_[j]) {
+      const int Q = mtx->q_[j];
+      const int iQ = mtx->iq_[j];
+      const int B = mtx->bias_[j];
+      out[n] = QUANTDIV(coeff, iQ, B);
+      if (sign) out[n] = -out[n];
+      in[j] = out[n] * Q;
+      if (out[n]) last = n;
+    } else {
+      out[n] = 0;
+      in[j] = 0;
+    }
+  }
+  return (last >= 0);
+}
+
+//------------------------------------------------------------------------------
+// Block copy
+
+static WEBP_INLINE void Copy(const uint8_t* src, uint8_t* dst, int size) {
+  int y;
+  for (y = 0; y < size; ++y) {
+    memcpy(dst, src, size);
+    src += BPS;
+    dst += BPS;
+  }
+}
+
+static void Copy4x4(const uint8_t* src, uint8_t* dst) { Copy(src, dst, 4); }
+
+//------------------------------------------------------------------------------
+// Initialization
+
+// Speed-critical function pointers. We have to initialize them to the default
+// implementations within VP8EncDspInit().
+VP8CHisto VP8CollectHistogram;
+VP8Idct VP8ITransform;
+VP8Fdct VP8FTransform;
+VP8WHT VP8ITransformWHT;
+VP8WHT VP8FTransformWHT;
+VP8Intra4Preds VP8EncPredLuma4;
+VP8IntraPreds VP8EncPredLuma16;
+VP8IntraPreds VP8EncPredChroma8;
+VP8Metric VP8SSE16x16;
+VP8Metric VP8SSE8x8;
+VP8Metric VP8SSE16x8;
+VP8Metric VP8SSE4x4;
+VP8WMetric VP8TDisto4x4;
+VP8WMetric VP8TDisto16x16;
+VP8QuantizeBlock VP8EncQuantizeBlock;
+VP8BlockCopy VP8Copy4x4;
+
+extern void VP8EncDspInitSSE2(void);
+
+void VP8EncDspInit(void) {
+  InitTables();
+
+  // default C implementations
+  VP8CollectHistogram = CollectHistogram;
+  VP8ITransform = ITransform;
+  VP8FTransform = FTransform;
+  VP8ITransformWHT = ITransformWHT;
+  VP8FTransformWHT = FTransformWHT;
+  VP8EncPredLuma4 = Intra4Preds;
+  VP8EncPredLuma16 = Intra16Preds;
+  VP8EncPredChroma8 = IntraChromaPreds;
+  VP8SSE16x16 = SSE16x16;
+  VP8SSE8x8 = SSE8x8;
+  VP8SSE16x8 = SSE16x8;
+  VP8SSE4x4 = SSE4x4;
+  VP8TDisto4x4 = Disto4x4;
+  VP8TDisto16x16 = Disto16x16;
+  VP8EncQuantizeBlock = QuantizeBlock;
+  VP8Copy4x4 = Copy4x4;
+
+  // If defined, use CPUInfo() to overwrite some pointers with faster versions.
+  if (VP8GetCPUInfo) {
+#if defined(WEBP_USE_SSE2)
+    if (VP8GetCPUInfo(kSSE2)) {
+      VP8EncDspInitSSE2();
+    }
+#endif
+  }
+}
+
+#if defined(__cplusplus) || defined(c_plusplus)
+}    // extern "C"
+#endif
diff --git a/drivers/webpold/dsp/enc_sse2.c b/drivers/webpold/dsp/enc_sse2.c
new file mode 100644
index 0000000000..b046761dc1
--- /dev/null
+++ b/drivers/webpold/dsp/enc_sse2.c
@@ -0,0 +1,837 @@
+// Copyright 2011 Google Inc. All Rights Reserved.
+//
+// This code is licensed under the same terms as WebM:
+//  Software License Agreement:  http://www.webmproject.org/license/software/
+//  Additional IP Rights Grant:  http://www.webmproject.org/license/additional/
+// -----------------------------------------------------------------------------
+//
+// SSE2 version of speed-critical encoding functions.
+//
+// Author: Christian Duvivier (cduvivier@google.com)
+
+#include "./dsp.h"
+
+#if defined(WEBP_USE_SSE2)
+#include <stdlib.h>  // for abs()
+#include <emmintrin.h>
+
+#include "../enc/vp8enci.h"
+
+#if defined(__cplusplus) || defined(c_plusplus)
+extern "C" {
+#endif
+
+//------------------------------------------------------------------------------
+// Compute susceptibility based on DCT-coeff histograms:
+// the higher, the "easier" the macroblock is to compress.
+
+static int CollectHistogramSSE2(const uint8_t* ref, const uint8_t* pred,
+                                int start_block, int end_block) {
+  int histo[MAX_COEFF_THRESH + 1] = { 0 };
+  int16_t out[16];
+  int j, k;
+  const __m128i max_coeff_thresh = _mm_set1_epi16(MAX_COEFF_THRESH);
+  for (j = start_block; j < end_block; ++j) {
+    VP8FTransform(ref + VP8DspScan[j], pred + VP8DspScan[j], out);
+
+    // Convert coefficients to bin (within out[]).
+    {
+      // Load.
+      const __m128i out0 = _mm_loadu_si128((__m128i*)&out[0]);
+      const __m128i out1 = _mm_loadu_si128((__m128i*)&out[8]);
+      // sign(out) = out >> 15  (0x0000 if positive, 0xffff if negative)
+      const __m128i sign0 = _mm_srai_epi16(out0, 15);
+      const __m128i sign1 = _mm_srai_epi16(out1, 15);
+      // abs(out) = (out ^ sign) - sign
+      const __m128i xor0 = _mm_xor_si128(out0, sign0);
+      const __m128i xor1 = _mm_xor_si128(out1, sign1);
+      const __m128i abs0 = _mm_sub_epi16(xor0, sign0);
+      const __m128i abs1 = _mm_sub_epi16(xor1, sign1);
+      // v = abs(out) >> 2
+      const __m128i v0 = _mm_srai_epi16(abs0, 2);
+      const __m128i v1 = _mm_srai_epi16(abs1, 2);
+      // bin = min(v, MAX_COEFF_THRESH)
+      const __m128i bin0 = _mm_min_epi16(v0, max_coeff_thresh);
+      const __m128i bin1 = _mm_min_epi16(v1, max_coeff_thresh);
+      // Store.
+      _mm_storeu_si128((__m128i*)&out[0], bin0);
+      _mm_storeu_si128((__m128i*)&out[8], bin1);
+    }
+
+    // Use bin to update histogram.
+    for (k = 0; k < 16; ++k) {
+      histo[out[k]]++;
+    }
+  }
+
+  return VP8GetAlpha(histo);
+}
+
+//------------------------------------------------------------------------------
+// Transforms (Paragraph 14.4)
+
+// Does one or two inverse transforms.
+static void ITransformSSE2(const uint8_t* ref, const int16_t* in, uint8_t* dst,
+                           int do_two) {
+  // This implementation makes use of 16-bit fixed point versions of two
+  // multiply constants:
+  //    K1 = sqrt(2) * cos (pi/8) ~= 85627 / 2^16
+  //    K2 = sqrt(2) * sin (pi/8) ~= 35468 / 2^16
+  //
+  // To be able to use signed 16-bit integers, we use the following trick to
+  // have constants within range:
+  // - Associated constants are obtained by subtracting the 16-bit fixed point
+  //   version of one:
+  //      k = K - (1 << 16)  =>  K = k + (1 << 16)
+  //      K1 = 85267  =>  k1 =  20091
+  //      K2 = 35468  =>  k2 = -30068
+  // - The multiplication of a variable by a constant become the sum of the
+  //   variable and the multiplication of that variable by the associated
+  //   constant:
+  //      (x * K) >> 16 = (x * (k + (1 << 16))) >> 16 = ((x * k ) >> 16) + x
+  const __m128i k1 = _mm_set1_epi16(20091);
+  const __m128i k2 = _mm_set1_epi16(-30068);
+  __m128i T0, T1, T2, T3;
+
+  // Load and concatenate the transform coefficients (we'll do two inverse
+  // transforms in parallel). In the case of only one inverse transform, the
+  // second half of the vectors will just contain random value we'll never
+  // use nor store.
+  __m128i in0, in1, in2, in3;
+  {
+    in0 = _mm_loadl_epi64((__m128i*)&in[0]);
+    in1 = _mm_loadl_epi64((__m128i*)&in[4]);
+    in2 = _mm_loadl_epi64((__m128i*)&in[8]);
+    in3 = _mm_loadl_epi64((__m128i*)&in[12]);
+    // a00 a10 a20 a30   x x x x
+    // a01 a11 a21 a31   x x x x
+    // a02 a12 a22 a32   x x x x
+    // a03 a13 a23 a33   x x x x
+    if (do_two) {
+      const __m128i inB0 = _mm_loadl_epi64((__m128i*)&in[16]);
+      const __m128i inB1 = _mm_loadl_epi64((__m128i*)&in[20]);
+      const __m128i inB2 = _mm_loadl_epi64((__m128i*)&in[24]);
+      const __m128i inB3 = _mm_loadl_epi64((__m128i*)&in[28]);
+      in0 = _mm_unpacklo_epi64(in0, inB0);
+      in1 = _mm_unpacklo_epi64(in1, inB1);
+      in2 = _mm_unpacklo_epi64(in2, inB2);
+      in3 = _mm_unpacklo_epi64(in3, inB3);
+      // a00 a10 a20 a30   b00 b10 b20 b30
+      // a01 a11 a21 a31   b01 b11 b21 b31
+      // a02 a12 a22 a32   b02 b12 b22 b32
+      // a03 a13 a23 a33   b03 b13 b23 b33
+    }
+  }
+
+  // Vertical pass and subsequent transpose.
+  {
+    // First pass, c and d calculations are longer because of the "trick"
+    // multiplications.
+    const __m128i a = _mm_add_epi16(in0, in2);
+    const __m128i b = _mm_sub_epi16(in0, in2);
+    // c = MUL(in1, K2) - MUL(in3, K1) = MUL(in1, k2) - MUL(in3, k1) + in1 - in3
+    const __m128i c1 = _mm_mulhi_epi16(in1, k2);
+    const __m128i c2 = _mm_mulhi_epi16(in3, k1);
+    const __m128i c3 = _mm_sub_epi16(in1, in3);
+    const __m128i c4 = _mm_sub_epi16(c1, c2);
+    const __m128i c = _mm_add_epi16(c3, c4);
+    // d = MUL(in1, K1) + MUL(in3, K2) = MUL(in1, k1) + MUL(in3, k2) + in1 + in3
+    const __m128i d1 = _mm_mulhi_epi16(in1, k1);
+    const __m128i d2 = _mm_mulhi_epi16(in3, k2);
+    const __m128i d3 = _mm_add_epi16(in1, in3);
+    const __m128i d4 = _mm_add_epi16(d1, d2);
+    const __m128i d = _mm_add_epi16(d3, d4);
+
+    // Second pass.
+    const __m128i tmp0 = _mm_add_epi16(a, d);
+    const __m128i tmp1 = _mm_add_epi16(b, c);
+    const __m128i tmp2 = _mm_sub_epi16(b, c);
+    const __m128i tmp3 = _mm_sub_epi16(a, d);
+
+    // Transpose the two 4x4.
+    // a00 a01 a02 a03   b00 b01 b02 b03
+    // a10 a11 a12 a13   b10 b11 b12 b13
+    // a20 a21 a22 a23   b20 b21 b22 b23
+    // a30 a31 a32 a33   b30 b31 b32 b33
+    const __m128i transpose0_0 = _mm_unpacklo_epi16(tmp0, tmp1);
+    const __m128i transpose0_1 = _mm_unpacklo_epi16(tmp2, tmp3);
+    const __m128i transpose0_2 = _mm_unpackhi_epi16(tmp0, tmp1);
+    const __m128i transpose0_3 = _mm_unpackhi_epi16(tmp2, tmp3);
+    // a00 a10 a01 a11   a02 a12 a03 a13
+    // a20 a30 a21 a31   a22 a32 a23 a33
+    // b00 b10 b01 b11   b02 b12 b03 b13
+    // b20 b30 b21 b31   b22 b32 b23 b33
+    const __m128i transpose1_0 = _mm_unpacklo_epi32(transpose0_0, transpose0_1);
+    const __m128i transpose1_1 = _mm_unpacklo_epi32(transpose0_2, transpose0_3);
+    const __m128i transpose1_2 = _mm_unpackhi_epi32(transpose0_0, transpose0_1);
+    const __m128i transpose1_3 = _mm_unpackhi_epi32(transpose0_2, transpose0_3);
+    // a00 a10 a20 a30 a01 a11 a21 a31
+    // b00 b10 b20 b30 b01 b11 b21 b31
+    // a02 a12 a22 a32 a03 a13 a23 a33
+    // b02 b12 a22 b32 b03 b13 b23 b33
+    T0 = _mm_unpacklo_epi64(transpose1_0, transpose1_1);
+    T1 = _mm_unpackhi_epi64(transpose1_0, transpose1_1);
+    T2 = _mm_unpacklo_epi64(transpose1_2, transpose1_3);
+    T3 = _mm_unpackhi_epi64(transpose1_2, transpose1_3);
+    // a00 a10 a20 a30   b00 b10 b20 b30
+    // a01 a11 a21 a31   b01 b11 b21 b31
+    // a02 a12 a22 a32   b02 b12 b22 b32
+    // a03 a13 a23 a33   b03 b13 b23 b33
+  }
+
+  // Horizontal pass and subsequent transpose.
+  {
+    // First pass, c and d calculations are longer because of the "trick"
+    // multiplications.
+    const __m128i four = _mm_set1_epi16(4);
+    const __m128i dc = _mm_add_epi16(T0, four);
+    const __m128i a =  _mm_add_epi16(dc, T2);
+    const __m128i b =  _mm_sub_epi16(dc, T2);
+    // c = MUL(T1, K2) - MUL(T3, K1) = MUL(T1, k2) - MUL(T3, k1) + T1 - T3
+    const __m128i c1 = _mm_mulhi_epi16(T1, k2);
+    const __m128i c2 = _mm_mulhi_epi16(T3, k1);
+    const __m128i c3 = _mm_sub_epi16(T1, T3);
+    const __m128i c4 = _mm_sub_epi16(c1, c2);
+    const __m128i c = _mm_add_epi16(c3, c4);
+    // d = MUL(T1, K1) + MUL(T3, K2) = MUL(T1, k1) + MUL(T3, k2) + T1 + T3
+    const __m128i d1 = _mm_mulhi_epi16(T1, k1);
+    const __m128i d2 = _mm_mulhi_epi16(T3, k2);
+    const __m128i d3 = _mm_add_epi16(T1, T3);
+    const __m128i d4 = _mm_add_epi16(d1, d2);
+    const __m128i d = _mm_add_epi16(d3, d4);
+
+    // Second pass.
+    const __m128i tmp0 = _mm_add_epi16(a, d);
+    const __m128i tmp1 = _mm_add_epi16(b, c);
+    const __m128i tmp2 = _mm_sub_epi16(b, c);
+    const __m128i tmp3 = _mm_sub_epi16(a, d);
+    const __m128i shifted0 = _mm_srai_epi16(tmp0, 3);
+    const __m128i shifted1 = _mm_srai_epi16(tmp1, 3);
+    const __m128i shifted2 = _mm_srai_epi16(tmp2, 3);
+    const __m128i shifted3 = _mm_srai_epi16(tmp3, 3);
+
+    // Transpose the two 4x4.
+    // a00 a01 a02 a03   b00 b01 b02 b03
+    // a10 a11 a12 a13   b10 b11 b12 b13
+    // a20 a21 a22 a23   b20 b21 b22 b23
+    // a30 a31 a32 a33   b30 b31 b32 b33
+    const __m128i transpose0_0 = _mm_unpacklo_epi16(shifted0, shifted1);
+    const __m128i transpose0_1 = _mm_unpacklo_epi16(shifted2, shifted3);
+    const __m128i transpose0_2 = _mm_unpackhi_epi16(shifted0, shifted1);
+    const __m128i transpose0_3 = _mm_unpackhi_epi16(shifted2, shifted3);
+    // a00 a10 a01 a11   a02 a12 a03 a13
+    // a20 a30 a21 a31   a22 a32 a23 a33
+    // b00 b10 b01 b11   b02 b12 b03 b13
+    // b20 b30 b21 b31   b22 b32 b23 b33
+    const __m128i transpose1_0 = _mm_unpacklo_epi32(transpose0_0, transpose0_1);
+    const __m128i transpose1_1 = _mm_unpacklo_epi32(transpose0_2, transpose0_3);
+    const __m128i transpose1_2 = _mm_unpackhi_epi32(transpose0_0, transpose0_1);
+    const __m128i transpose1_3 = _mm_unpackhi_epi32(transpose0_2, transpose0_3);
+    // a00 a10 a20 a30 a01 a11 a21 a31
+    // b00 b10 b20 b30 b01 b11 b21 b31
+    // a02 a12 a22 a32 a03 a13 a23 a33
+    // b02 b12 a22 b32 b03 b13 b23 b33
+    T0 = _mm_unpacklo_epi64(transpose1_0, transpose1_1);
+    T1 = _mm_unpackhi_epi64(transpose1_0, transpose1_1);
+    T2 = _mm_unpacklo_epi64(transpose1_2, transpose1_3);
+    T3 = _mm_unpackhi_epi64(transpose1_2, transpose1_3);
+    // a00 a10 a20 a30   b00 b10 b20 b30
+    // a01 a11 a21 a31   b01 b11 b21 b31
+    // a02 a12 a22 a32   b02 b12 b22 b32
+    // a03 a13 a23 a33   b03 b13 b23 b33
+  }
+
+  // Add inverse transform to 'ref' and store.
+  {
+    const __m128i zero = _mm_set1_epi16(0);
+    // Load the reference(s).
+    __m128i ref0, ref1, ref2, ref3;
+    if (do_two) {
+      // Load eight bytes/pixels per line.
+      ref0 = _mm_loadl_epi64((__m128i*)&ref[0 * BPS]);
+      ref1 = _mm_loadl_epi64((__m128i*)&ref[1 * BPS]);
+      ref2 = _mm_loadl_epi64((__m128i*)&ref[2 * BPS]);
+      ref3 = _mm_loadl_epi64((__m128i*)&ref[3 * BPS]);
+    } else {
+      // Load four bytes/pixels per line.
+      ref0 = _mm_cvtsi32_si128(*(int*)&ref[0 * BPS]);
+      ref1 = _mm_cvtsi32_si128(*(int*)&ref[1 * BPS]);
+      ref2 = _mm_cvtsi32_si128(*(int*)&ref[2 * BPS]);
+      ref3 = _mm_cvtsi32_si128(*(int*)&ref[3 * BPS]);
+    }
+    // Convert to 16b.
+    ref0 = _mm_unpacklo_epi8(ref0, zero);
+    ref1 = _mm_unpacklo_epi8(ref1, zero);
+    ref2 = _mm_unpacklo_epi8(ref2, zero);
+    ref3 = _mm_unpacklo_epi8(ref3, zero);
+    // Add the inverse transform(s).
+    ref0 = _mm_add_epi16(ref0, T0);
+    ref1 = _mm_add_epi16(ref1, T1);
+    ref2 = _mm_add_epi16(ref2, T2);
+    ref3 = _mm_add_epi16(ref3, T3);
+    // Unsigned saturate to 8b.
+    ref0 = _mm_packus_epi16(ref0, ref0);
+    ref1 = _mm_packus_epi16(ref1, ref1);
+    ref2 = _mm_packus_epi16(ref2, ref2);
+    ref3 = _mm_packus_epi16(ref3, ref3);
+    // Store the results.
+    if (do_two) {
+      // Store eight bytes/pixels per line.
+      _mm_storel_epi64((__m128i*)&dst[0 * BPS], ref0);
+      _mm_storel_epi64((__m128i*)&dst[1 * BPS], ref1);
+      _mm_storel_epi64((__m128i*)&dst[2 * BPS], ref2);
+      _mm_storel_epi64((__m128i*)&dst[3 * BPS], ref3);
+    } else {
+      // Store four bytes/pixels per line.
+      *((int32_t *)&dst[0 * BPS]) = _mm_cvtsi128_si32(ref0);
+      *((int32_t *)&dst[1 * BPS]) = _mm_cvtsi128_si32(ref1);
+      *((int32_t *)&dst[2 * BPS]) = _mm_cvtsi128_si32(ref2);
+      *((int32_t *)&dst[3 * BPS]) = _mm_cvtsi128_si32(ref3);
+    }
+  }
+}
+
+static void FTransformSSE2(const uint8_t* src, const uint8_t* ref,
+                           int16_t* out) {
+  const __m128i zero = _mm_setzero_si128();
+  const __m128i seven = _mm_set1_epi16(7);
+  const __m128i k7500 = _mm_set1_epi32(7500);
+  const __m128i k14500 = _mm_set1_epi32(14500);
+  const __m128i k51000 = _mm_set1_epi32(51000);
+  const __m128i k12000_plus_one = _mm_set1_epi32(12000 + (1 << 16));
+  const __m128i k5352_2217 = _mm_set_epi16(5352,  2217, 5352,  2217,
+                                           5352,  2217, 5352,  2217);
+  const __m128i k2217_5352 = _mm_set_epi16(2217, -5352, 2217, -5352,
+                                           2217, -5352, 2217, -5352);
+
+  __m128i v01, v32;
+
+  // Difference between src and ref and initial transpose.
+  {
+    // Load src and convert to 16b.
+    const __m128i src0 = _mm_loadl_epi64((__m128i*)&src[0 * BPS]);
+    const __m128i src1 = _mm_loadl_epi64((__m128i*)&src[1 * BPS]);
+    const __m128i src2 = _mm_loadl_epi64((__m128i*)&src[2 * BPS]);
+    const __m128i src3 = _mm_loadl_epi64((__m128i*)&src[3 * BPS]);
+    const __m128i src_0 = _mm_unpacklo_epi8(src0, zero);
+    const __m128i src_1 = _mm_unpacklo_epi8(src1, zero);
+    const __m128i src_2 = _mm_unpacklo_epi8(src2, zero);
+    const __m128i src_3 = _mm_unpacklo_epi8(src3, zero);
+    // Load ref and convert to 16b.
+    const __m128i ref0 = _mm_loadl_epi64((__m128i*)&ref[0 * BPS]);
+    const __m128i ref1 = _mm_loadl_epi64((__m128i*)&ref[1 * BPS]);
+    const __m128i ref2 = _mm_loadl_epi64((__m128i*)&ref[2 * BPS]);
+    const __m128i ref3 = _mm_loadl_epi64((__m128i*)&ref[3 * BPS]);
+    const __m128i ref_0 = _mm_unpacklo_epi8(ref0, zero);
+    const __m128i ref_1 = _mm_unpacklo_epi8(ref1, zero);
+    const __m128i ref_2 = _mm_unpacklo_epi8(ref2, zero);
+    const __m128i ref_3 = _mm_unpacklo_epi8(ref3, zero);
+    // Compute difference.
+    const __m128i diff0 = _mm_sub_epi16(src_0, ref_0);
+    const __m128i diff1 = _mm_sub_epi16(src_1, ref_1);
+    const __m128i diff2 = _mm_sub_epi16(src_2, ref_2);
+    const __m128i diff3 = _mm_sub_epi16(src_3, ref_3);
+
+    // Transpose.
+    // 00 01 02 03   0 0 0 0
+    // 10 11 12 13   0 0 0 0
+    // 20 21 22 23   0 0 0 0
+    // 30 31 32 33   0 0 0 0
+    const __m128i transpose0_0 = _mm_unpacklo_epi16(diff0, diff1);
+    const __m128i transpose0_1 = _mm_unpacklo_epi16(diff2, diff3);
+    // 00 10 01 11   02 12 03 13
+    // 20 30 21 31   22 32 23 33
+    const __m128i v23 = _mm_unpackhi_epi32(transpose0_0, transpose0_1);
+    v01 = _mm_unpacklo_epi32(transpose0_0, transpose0_1);
+    v32 = _mm_shuffle_epi32(v23, _MM_SHUFFLE(1, 0, 3, 2));
+    // a02 a12 a22 a32   a03 a13 a23 a33
+    // a00 a10 a20 a30   a01 a11 a21 a31
+    // a03 a13 a23 a33   a02 a12 a22 a32
+  }
+
+  // First pass and subsequent transpose.
+  {
+    // Same operations are done on the (0,3) and (1,2) pairs.
+    // b0 = (a0 + a3) << 3
+    // b1 = (a1 + a2) << 3
+    // b3 = (a0 - a3) << 3
+    // b2 = (a1 - a2) << 3
+    const __m128i a01 = _mm_add_epi16(v01, v32);
+    const __m128i a32 = _mm_sub_epi16(v01, v32);
+    const __m128i b01 = _mm_slli_epi16(a01, 3);
+    const __m128i b32 = _mm_slli_epi16(a32, 3);
+    const __m128i b11 = _mm_unpackhi_epi64(b01, b01);
+    const __m128i b22 = _mm_unpackhi_epi64(b32, b32);
+
+    // e0 = b0 + b1
+    // e2 = b0 - b1
+    const __m128i e0 = _mm_add_epi16(b01, b11);
+    const __m128i e2 = _mm_sub_epi16(b01, b11);
+    const __m128i e02 = _mm_unpacklo_epi64(e0, e2);
+
+    // e1 = (b3 * 5352 + b2 * 2217 + 14500) >> 12
+    // e3 = (b3 * 2217 - b2 * 5352 +  7500) >> 12
+    const __m128i b23 = _mm_unpacklo_epi16(b22, b32);
+    const __m128i c1 = _mm_madd_epi16(b23, k5352_2217);
+    const __m128i c3 = _mm_madd_epi16(b23, k2217_5352);
+    const __m128i d1 = _mm_add_epi32(c1, k14500);
+    const __m128i d3 = _mm_add_epi32(c3, k7500);
+    const __m128i e1 = _mm_srai_epi32(d1, 12);
+    const __m128i e3 = _mm_srai_epi32(d3, 12);
+    const __m128i e13 = _mm_packs_epi32(e1, e3);
+
+    // Transpose.
+    // 00 01 02 03  20 21 22 23
+    // 10 11 12 13  30 31 32 33
+    const __m128i transpose0_0 = _mm_unpacklo_epi16(e02, e13);
+    const __m128i transpose0_1 = _mm_unpackhi_epi16(e02, e13);
+    // 00 10 01 11   02 12 03 13
+    // 20 30 21 31   22 32 23 33
+    const __m128i v23 = _mm_unpackhi_epi32(transpose0_0, transpose0_1);
+    v01 = _mm_unpacklo_epi32(transpose0_0, transpose0_1);
+    v32 = _mm_shuffle_epi32(v23, _MM_SHUFFLE(1, 0, 3, 2));
+    // 02 12 22 32   03 13 23 33
+    // 00 10 20 30   01 11 21 31
+    // 03 13 23 33   02 12 22 32
+  }
+
+  // Second pass
+  {
+    // Same operations are done on the (0,3) and (1,2) pairs.
+    // a0 = v0 + v3
+    // a1 = v1 + v2
+    // a3 = v0 - v3
+    // a2 = v1 - v2
+    const __m128i a01 = _mm_add_epi16(v01, v32);
+    const __m128i a32 = _mm_sub_epi16(v01, v32);
+    const __m128i a11 = _mm_unpackhi_epi64(a01, a01);
+    const __m128i a22 = _mm_unpackhi_epi64(a32, a32);
+
+    // d0 = (a0 + a1 + 7) >> 4;
+    // d2 = (a0 - a1 + 7) >> 4;
+    const __m128i b0 = _mm_add_epi16(a01, a11);
+    const __m128i b2 = _mm_sub_epi16(a01, a11);
+    const __m128i c0 = _mm_add_epi16(b0, seven);
+    const __m128i c2 = _mm_add_epi16(b2, seven);
+    const __m128i d0 = _mm_srai_epi16(c0, 4);
+    const __m128i d2 = _mm_srai_epi16(c2, 4);
+
+    // f1 = ((b3 * 5352 + b2 * 2217 + 12000) >> 16)
+    // f3 = ((b3 * 2217 - b2 * 5352 + 51000) >> 16)
+    const __m128i b23 = _mm_unpacklo_epi16(a22, a32);
+    const __m128i c1 = _mm_madd_epi16(b23, k5352_2217);
+    const __m128i c3 = _mm_madd_epi16(b23, k2217_5352);
+    const __m128i d1 = _mm_add_epi32(c1, k12000_plus_one);
+    const __m128i d3 = _mm_add_epi32(c3, k51000);
+    const __m128i e1 = _mm_srai_epi32(d1, 16);
+    const __m128i e3 = _mm_srai_epi32(d3, 16);
+    const __m128i f1 = _mm_packs_epi32(e1, e1);
+    const __m128i f3 = _mm_packs_epi32(e3, e3);
+    // f1 = f1 + (a3 != 0);
+    // The compare will return (0xffff, 0) for (==0, !=0). To turn that into the
+    // desired (0, 1), we add one earlier through k12000_plus_one.
+    const __m128i g1 = _mm_add_epi16(f1, _mm_cmpeq_epi16(a32, zero));
+
+    _mm_storel_epi64((__m128i*)&out[ 0], d0);
+    _mm_storel_epi64((__m128i*)&out[ 4], g1);
+    _mm_storel_epi64((__m128i*)&out[ 8], d2);
+    _mm_storel_epi64((__m128i*)&out[12], f3);
+  }
+}
+
+//------------------------------------------------------------------------------
+// Metric
+
+static int SSE4x4SSE2(const uint8_t* a, const uint8_t* b) {
+  const __m128i zero = _mm_set1_epi16(0);
+
+  // Load values.
+  const __m128i a0 = _mm_loadl_epi64((__m128i*)&a[BPS * 0]);
+  const __m128i a1 = _mm_loadl_epi64((__m128i*)&a[BPS * 1]);
+  const __m128i a2 = _mm_loadl_epi64((__m128i*)&a[BPS * 2]);
+  const __m128i a3 = _mm_loadl_epi64((__m128i*)&a[BPS * 3]);
+  const __m128i b0 = _mm_loadl_epi64((__m128i*)&b[BPS * 0]);
+  const __m128i b1 = _mm_loadl_epi64((__m128i*)&b[BPS * 1]);
+  const __m128i b2 = _mm_loadl_epi64((__m128i*)&b[BPS * 2]);
+  const __m128i b3 = _mm_loadl_epi64((__m128i*)&b[BPS * 3]);
+
+  // Combine pair of lines and convert to 16b.
+  const __m128i a01 = _mm_unpacklo_epi32(a0, a1);
+  const __m128i a23 = _mm_unpacklo_epi32(a2, a3);
+  const __m128i b01 = _mm_unpacklo_epi32(b0, b1);
+  const __m128i b23 = _mm_unpacklo_epi32(b2, b3);
+  const __m128i a01s = _mm_unpacklo_epi8(a01, zero);
+  const __m128i a23s = _mm_unpacklo_epi8(a23, zero);
+  const __m128i b01s = _mm_unpacklo_epi8(b01, zero);
+  const __m128i b23s = _mm_unpacklo_epi8(b23, zero);
+
+  // Compute differences; (a-b)^2 = (abs(a-b))^2 = (sat8(a-b) + sat8(b-a))^2
+  // TODO(cduvivier): Dissassemble and figure out why this is fastest. We don't
+  //                  need absolute values, there is no need to do calculation
+  //                  in 8bit as we are already in 16bit, ... Yet this is what
+  //                  benchmarks the fastest!
+  const __m128i d0 = _mm_subs_epu8(a01s, b01s);
+  const __m128i d1 = _mm_subs_epu8(b01s, a01s);
+  const __m128i d2 = _mm_subs_epu8(a23s, b23s);
+  const __m128i d3 = _mm_subs_epu8(b23s, a23s);
+
+  // Square and add them all together.
+  const __m128i madd0 = _mm_madd_epi16(d0, d0);
+  const __m128i madd1 = _mm_madd_epi16(d1, d1);
+  const __m128i madd2 = _mm_madd_epi16(d2, d2);
+  const __m128i madd3 = _mm_madd_epi16(d3, d3);
+  const __m128i sum0 = _mm_add_epi32(madd0, madd1);
+  const __m128i sum1 = _mm_add_epi32(madd2, madd3);
+  const __m128i sum2 = _mm_add_epi32(sum0, sum1);
+  int32_t tmp[4];
+  _mm_storeu_si128((__m128i*)tmp, sum2);
+  return (tmp[3] + tmp[2] + tmp[1] + tmp[0]);
+}
+
+//------------------------------------------------------------------------------
+// Texture distortion
+//
+// We try to match the spectral content (weighted) between source and
+// reconstructed samples.
+
+// Hadamard transform
+// Returns the difference between the weighted sum of the absolute value of
+// transformed coefficients.
+static int TTransformSSE2(const uint8_t* inA, const uint8_t* inB,
+                          const uint16_t* const w) {
+  int32_t sum[4];
+  __m128i tmp_0, tmp_1, tmp_2, tmp_3;
+  const __m128i zero = _mm_setzero_si128();
+  const __m128i one = _mm_set1_epi16(1);
+  const __m128i three = _mm_set1_epi16(3);
+
+  // Load, combine and tranpose inputs.
+  {
+    const __m128i inA_0 = _mm_loadl_epi64((__m128i*)&inA[BPS * 0]);
+    const __m128i inA_1 = _mm_loadl_epi64((__m128i*)&inA[BPS * 1]);
+    const __m128i inA_2 = _mm_loadl_epi64((__m128i*)&inA[BPS * 2]);
+    const __m128i inA_3 = _mm_loadl_epi64((__m128i*)&inA[BPS * 3]);
+    const __m128i inB_0 = _mm_loadl_epi64((__m128i*)&inB[BPS * 0]);
+    const __m128i inB_1 = _mm_loadl_epi64((__m128i*)&inB[BPS * 1]);
+    const __m128i inB_2 = _mm_loadl_epi64((__m128i*)&inB[BPS * 2]);
+    const __m128i inB_3 = _mm_loadl_epi64((__m128i*)&inB[BPS * 3]);
+
+    // Combine inA and inB (we'll do two transforms in parallel).
+    const __m128i inAB_0 = _mm_unpacklo_epi8(inA_0, inB_0);
+    const __m128i inAB_1 = _mm_unpacklo_epi8(inA_1, inB_1);
+    const __m128i inAB_2 = _mm_unpacklo_epi8(inA_2, inB_2);
+    const __m128i inAB_3 = _mm_unpacklo_epi8(inA_3, inB_3);
+    // a00 b00 a01 b01 a02 b03 a03 b03   0 0 0 0 0 0 0 0
+    // a10 b10 a11 b11 a12 b12 a13 b13   0 0 0 0 0 0 0 0
+    // a20 b20 a21 b21 a22 b22 a23 b23   0 0 0 0 0 0 0 0
+    // a30 b30 a31 b31 a32 b32 a33 b33   0 0 0 0 0 0 0 0
+
+    // Transpose the two 4x4, discarding the filling zeroes.
+    const __m128i transpose0_0 = _mm_unpacklo_epi8(inAB_0, inAB_2);
+    const __m128i transpose0_1 = _mm_unpacklo_epi8(inAB_1, inAB_3);
+    // a00 a20  b00 b20  a01 a21  b01 b21  a02 a22  b02 b22  a03 a23  b03 b23
+    // a10 a30  b10 b30  a11 a31  b11 b31  a12 a32  b12 b32  a13 a33  b13 b33
+    const __m128i transpose1_0 = _mm_unpacklo_epi8(transpose0_0, transpose0_1);
+    const __m128i transpose1_1 = _mm_unpackhi_epi8(transpose0_0, transpose0_1);
+    // a00 a10 a20 a30  b00 b10 b20 b30  a01 a11 a21 a31  b01 b11 b21 b31
+    // a02 a12 a22 a32  b02 b12 b22 b32  a03 a13 a23 a33  b03 b13 b23 b33
+
+    // Convert to 16b.
+    tmp_0 = _mm_unpacklo_epi8(transpose1_0, zero);
+    tmp_1 = _mm_unpackhi_epi8(transpose1_0, zero);
+    tmp_2 = _mm_unpacklo_epi8(transpose1_1, zero);
+    tmp_3 = _mm_unpackhi_epi8(transpose1_1, zero);
+    // a00 a10 a20 a30   b00 b10 b20 b30
+    // a01 a11 a21 a31   b01 b11 b21 b31
+    // a02 a12 a22 a32   b02 b12 b22 b32
+    // a03 a13 a23 a33   b03 b13 b23 b33
+  }
+
+  // Horizontal pass and subsequent transpose.
+  {
+    // Calculate a and b (two 4x4 at once).
+    const __m128i a0 = _mm_slli_epi16(_mm_add_epi16(tmp_0, tmp_2), 2);
+    const __m128i a1 = _mm_slli_epi16(_mm_add_epi16(tmp_1, tmp_3), 2);
+    const __m128i a2 = _mm_slli_epi16(_mm_sub_epi16(tmp_1, tmp_3), 2);
+    const __m128i a3 = _mm_slli_epi16(_mm_sub_epi16(tmp_0, tmp_2), 2);
+    // b0_extra = (a0 != 0);
+    const __m128i b0_extra = _mm_andnot_si128(_mm_cmpeq_epi16 (a0, zero), one);
+    const __m128i b0_base = _mm_add_epi16(a0, a1);
+    const __m128i b1 = _mm_add_epi16(a3, a2);
+    const __m128i b2 = _mm_sub_epi16(a3, a2);
+    const __m128i b3 = _mm_sub_epi16(a0, a1);
+    const __m128i b0 = _mm_add_epi16(b0_base, b0_extra);
+    // a00 a01 a02 a03   b00 b01 b02 b03
+    // a10 a11 a12 a13   b10 b11 b12 b13
+    // a20 a21 a22 a23   b20 b21 b22 b23
+    // a30 a31 a32 a33   b30 b31 b32 b33
+
+    // Transpose the two 4x4.
+    const __m128i transpose0_0 = _mm_unpacklo_epi16(b0, b1);
+    const __m128i transpose0_1 = _mm_unpacklo_epi16(b2, b3);
+    const __m128i transpose0_2 = _mm_unpackhi_epi16(b0, b1);
+    const __m128i transpose0_3 = _mm_unpackhi_epi16(b2, b3);
+    // a00 a10 a01 a11   a02 a12 a03 a13
+    // a20 a30 a21 a31   a22 a32 a23 a33
+    // b00 b10 b01 b11   b02 b12 b03 b13
+    // b20 b30 b21 b31   b22 b32 b23 b33
+    const __m128i transpose1_0 = _mm_unpacklo_epi32(transpose0_0, transpose0_1);
+    const __m128i transpose1_1 = _mm_unpacklo_epi32(transpose0_2, transpose0_3);
+    const __m128i transpose1_2 = _mm_unpackhi_epi32(transpose0_0, transpose0_1);
+    const __m128i transpose1_3 = _mm_unpackhi_epi32(transpose0_2, transpose0_3);
+    // a00 a10 a20 a30 a01 a11 a21 a31
+    // b00 b10 b20 b30 b01 b11 b21 b31
+    // a02 a12 a22 a32 a03 a13 a23 a33
+    // b02 b12 a22 b32 b03 b13 b23 b33
+    tmp_0 = _mm_unpacklo_epi64(transpose1_0, transpose1_1);
+    tmp_1 = _mm_unpackhi_epi64(transpose1_0, transpose1_1);
+    tmp_2 = _mm_unpacklo_epi64(transpose1_2, transpose1_3);
+    tmp_3 = _mm_unpackhi_epi64(transpose1_2, transpose1_3);
+    // a00 a10 a20 a30   b00 b10 b20 b30
+    // a01 a11 a21 a31   b01 b11 b21 b31
+    // a02 a12 a22 a32   b02 b12 b22 b32
+    // a03 a13 a23 a33   b03 b13 b23 b33
+  }
+
+  // Vertical pass and difference of weighted sums.
+  {
+    // Load all inputs.
+    // TODO(cduvivier): Make variable declarations and allocations aligned so
+    //                  we can use _mm_load_si128 instead of _mm_loadu_si128.
+    const __m128i w_0 = _mm_loadu_si128((__m128i*)&w[0]);
+    const __m128i w_8 = _mm_loadu_si128((__m128i*)&w[8]);
+
+    // Calculate a and b (two 4x4 at once).
+    const __m128i a0 = _mm_add_epi16(tmp_0, tmp_2);
+    const __m128i a1 = _mm_add_epi16(tmp_1, tmp_3);
+    const __m128i a2 = _mm_sub_epi16(tmp_1, tmp_3);
+    const __m128i a3 = _mm_sub_epi16(tmp_0, tmp_2);
+    const __m128i b0 = _mm_add_epi16(a0, a1);
+    const __m128i b1 = _mm_add_epi16(a3, a2);
+    const __m128i b2 = _mm_sub_epi16(a3, a2);
+    const __m128i b3 = _mm_sub_epi16(a0, a1);
+
+    // Separate the transforms of inA and inB.
+    __m128i A_b0 = _mm_unpacklo_epi64(b0, b1);
+    __m128i A_b2 = _mm_unpacklo_epi64(b2, b3);
+    __m128i B_b0 = _mm_unpackhi_epi64(b0, b1);
+    __m128i B_b2 = _mm_unpackhi_epi64(b2, b3);
+
+    {
+      // sign(b) = b >> 15  (0x0000 if positive, 0xffff if negative)
+      const __m128i sign_A_b0 = _mm_srai_epi16(A_b0, 15);
+      const __m128i sign_A_b2 = _mm_srai_epi16(A_b2, 15);
+      const __m128i sign_B_b0 = _mm_srai_epi16(B_b0, 15);
+      const __m128i sign_B_b2 = _mm_srai_epi16(B_b2, 15);
+
+      // b = abs(b) = (b ^ sign) - sign
+      A_b0 = _mm_xor_si128(A_b0, sign_A_b0);
+      A_b2 = _mm_xor_si128(A_b2, sign_A_b2);
+      B_b0 = _mm_xor_si128(B_b0, sign_B_b0);
+      B_b2 = _mm_xor_si128(B_b2, sign_B_b2);
+      A_b0 = _mm_sub_epi16(A_b0, sign_A_b0);
+      A_b2 = _mm_sub_epi16(A_b2, sign_A_b2);
+      B_b0 = _mm_sub_epi16(B_b0, sign_B_b0);
+      B_b2 = _mm_sub_epi16(B_b2, sign_B_b2);
+    }
+
+    // b = abs(b) + 3
+    A_b0 = _mm_add_epi16(A_b0, three);
+    A_b2 = _mm_add_epi16(A_b2, three);
+    B_b0 = _mm_add_epi16(B_b0, three);
+    B_b2 = _mm_add_epi16(B_b2, three);
+
+    // abs((b + (b<0) + 3) >> 3) = (abs(b) + 3) >> 3
+    // b = (abs(b) + 3) >> 3
+    A_b0 = _mm_srai_epi16(A_b0, 3);
+    A_b2 = _mm_srai_epi16(A_b2, 3);
+    B_b0 = _mm_srai_epi16(B_b0, 3);
+    B_b2 = _mm_srai_epi16(B_b2, 3);
+
+    // weighted sums
+    A_b0 = _mm_madd_epi16(A_b0, w_0);
+    A_b2 = _mm_madd_epi16(A_b2, w_8);
+    B_b0 = _mm_madd_epi16(B_b0, w_0);
+    B_b2 = _mm_madd_epi16(B_b2, w_8);
+    A_b0 = _mm_add_epi32(A_b0, A_b2);
+    B_b0 = _mm_add_epi32(B_b0, B_b2);
+
+    // difference of weighted sums
+    A_b0 = _mm_sub_epi32(A_b0, B_b0);
+    _mm_storeu_si128((__m128i*)&sum[0], A_b0);
+  }
+  return sum[0] + sum[1] + sum[2] + sum[3];
+}
+
+static int Disto4x4SSE2(const uint8_t* const a, const uint8_t* const b,
+                        const uint16_t* const w) {
+  const int diff_sum = TTransformSSE2(a, b, w);
+  return (abs(diff_sum) + 8) >> 4;
+}
+
+static int Disto16x16SSE2(const uint8_t* const a, const uint8_t* const b,
+                          const uint16_t* const w) {
+  int D = 0;
+  int x, y;
+  for (y = 0; y < 16 * BPS; y += 4 * BPS) {
+    for (x = 0; x < 16; x += 4) {
+      D += Disto4x4SSE2(a + x + y, b + x + y, w);
+    }
+  }
+  return D;
+}
+
+
+//------------------------------------------------------------------------------
+// Quantization
+//
+
+// Simple quantization
+static int QuantizeBlockSSE2(int16_t in[16], int16_t out[16],
+                             int n, const VP8Matrix* const mtx) {
+  const __m128i max_coeff_2047 = _mm_set1_epi16(2047);
+  const __m128i zero = _mm_set1_epi16(0);
+  __m128i sign0, sign8;
+  __m128i coeff0, coeff8;
+  __m128i out0, out8;
+  __m128i packed_out;
+
+  // Load all inputs.
+  // TODO(cduvivier): Make variable declarations and allocations aligned so that
+  //                  we can use _mm_load_si128 instead of _mm_loadu_si128.
+  __m128i in0 = _mm_loadu_si128((__m128i*)&in[0]);
+  __m128i in8 = _mm_loadu_si128((__m128i*)&in[8]);
+  const __m128i sharpen0 = _mm_loadu_si128((__m128i*)&mtx->sharpen_[0]);
+  const __m128i sharpen8 = _mm_loadu_si128((__m128i*)&mtx->sharpen_[8]);
+  const __m128i iq0 = _mm_loadu_si128((__m128i*)&mtx->iq_[0]);
+  const __m128i iq8 = _mm_loadu_si128((__m128i*)&mtx->iq_[8]);
+  const __m128i bias0 = _mm_loadu_si128((__m128i*)&mtx->bias_[0]);
+  const __m128i bias8 = _mm_loadu_si128((__m128i*)&mtx->bias_[8]);
+  const __m128i q0 = _mm_loadu_si128((__m128i*)&mtx->q_[0]);
+  const __m128i q8 = _mm_loadu_si128((__m128i*)&mtx->q_[8]);
+  const __m128i zthresh0 = _mm_loadu_si128((__m128i*)&mtx->zthresh_[0]);
+  const __m128i zthresh8 = _mm_loadu_si128((__m128i*)&mtx->zthresh_[8]);
+
+  // sign(in) = in >> 15  (0x0000 if positive, 0xffff if negative)
+  sign0 = _mm_srai_epi16(in0, 15);
+  sign8 = _mm_srai_epi16(in8, 15);
+
+  // coeff = abs(in) = (in ^ sign) - sign
+  coeff0 = _mm_xor_si128(in0, sign0);
+  coeff8 = _mm_xor_si128(in8, sign8);
+  coeff0 = _mm_sub_epi16(coeff0, sign0);
+  coeff8 = _mm_sub_epi16(coeff8, sign8);
+
+  // coeff = abs(in) + sharpen
+  coeff0 = _mm_add_epi16(coeff0, sharpen0);
+  coeff8 = _mm_add_epi16(coeff8, sharpen8);
+
+  // if (coeff > 2047) coeff = 2047
+  coeff0 = _mm_min_epi16(coeff0, max_coeff_2047);
+  coeff8 = _mm_min_epi16(coeff8, max_coeff_2047);
+
+  // out = (coeff * iQ + B) >> QFIX;
+  {
+    // doing calculations with 32b precision (QFIX=17)
+    // out = (coeff * iQ)
+    __m128i coeff_iQ0H = _mm_mulhi_epu16(coeff0, iq0);
+    __m128i coeff_iQ0L = _mm_mullo_epi16(coeff0, iq0);
+    __m128i coeff_iQ8H = _mm_mulhi_epu16(coeff8, iq8);
+    __m128i coeff_iQ8L = _mm_mullo_epi16(coeff8, iq8);
+    __m128i out_00 = _mm_unpacklo_epi16(coeff_iQ0L, coeff_iQ0H);
+    __m128i out_04 = _mm_unpackhi_epi16(coeff_iQ0L, coeff_iQ0H);
+    __m128i out_08 = _mm_unpacklo_epi16(coeff_iQ8L, coeff_iQ8H);
+    __m128i out_12 = _mm_unpackhi_epi16(coeff_iQ8L, coeff_iQ8H);
+    // expand bias from 16b to 32b
+    __m128i bias_00 = _mm_unpacklo_epi16(bias0, zero);
+    __m128i bias_04 = _mm_unpackhi_epi16(bias0, zero);
+    __m128i bias_08 = _mm_unpacklo_epi16(bias8, zero);
+    __m128i bias_12 = _mm_unpackhi_epi16(bias8, zero);
+    // out = (coeff * iQ + B)
+    out_00 = _mm_add_epi32(out_00, bias_00);
+    out_04 = _mm_add_epi32(out_04, bias_04);
+    out_08 = _mm_add_epi32(out_08, bias_08);
+    out_12 = _mm_add_epi32(out_12, bias_12);
+    // out = (coeff * iQ + B) >> QFIX;
+    out_00 = _mm_srai_epi32(out_00, QFIX);
+    out_04 = _mm_srai_epi32(out_04, QFIX);
+    out_08 = _mm_srai_epi32(out_08, QFIX);
+    out_12 = _mm_srai_epi32(out_12, QFIX);
+    // pack result as 16b
+    out0 = _mm_packs_epi32(out_00, out_04);
+    out8 = _mm_packs_epi32(out_08, out_12);
+  }
+
+  // get sign back (if (sign[j]) out_n = -out_n)
+  out0 = _mm_xor_si128(out0, sign0);
+  out8 = _mm_xor_si128(out8, sign8);
+  out0 = _mm_sub_epi16(out0, sign0);
+  out8 = _mm_sub_epi16(out8, sign8);
+
+  // in = out * Q
+  in0 = _mm_mullo_epi16(out0, q0);
+  in8 = _mm_mullo_epi16(out8, q8);
+
+  // if (coeff <= mtx->zthresh_) {in=0; out=0;}
+  {
+    __m128i cmp0 = _mm_cmpgt_epi16(coeff0, zthresh0);
+    __m128i cmp8 = _mm_cmpgt_epi16(coeff8, zthresh8);
+    in0 = _mm_and_si128(in0, cmp0);
+    in8 = _mm_and_si128(in8, cmp8);
+    _mm_storeu_si128((__m128i*)&in[0], in0);
+    _mm_storeu_si128((__m128i*)&in[8], in8);
+    out0 = _mm_and_si128(out0, cmp0);
+    out8 = _mm_and_si128(out8, cmp8);
+  }
+
+  // zigzag the output before storing it.
+  //
+  // The zigzag pattern can almost be reproduced with a small sequence of
+  // shuffles. After it, we only need to swap the 7th (ending up in third
+  // position instead of twelfth) and 8th values.
+  {
+    __m128i outZ0, outZ8;
+    outZ0 = _mm_shufflehi_epi16(out0,  _MM_SHUFFLE(2, 1, 3, 0));
+    outZ0 = _mm_shuffle_epi32  (outZ0, _MM_SHUFFLE(3, 1, 2, 0));
+    outZ0 = _mm_shufflehi_epi16(outZ0, _MM_SHUFFLE(3, 1, 0, 2));
+    outZ8 = _mm_shufflelo_epi16(out8,  _MM_SHUFFLE(3, 0, 2, 1));
+    outZ8 = _mm_shuffle_epi32  (outZ8, _MM_SHUFFLE(3, 1, 2, 0));
+    outZ8 = _mm_shufflelo_epi16(outZ8, _MM_SHUFFLE(1, 3, 2, 0));
+    _mm_storeu_si128((__m128i*)&out[0], outZ0);
+    _mm_storeu_si128((__m128i*)&out[8], outZ8);
+    packed_out = _mm_packs_epi16(outZ0, outZ8);
+  }
+  {
+    const int16_t outZ_12 = out[12];
+    const int16_t outZ_3 = out[3];
+    out[3] = outZ_12;
+    out[12] = outZ_3;
+  }
+
+  // detect if all 'out' values are zeroes or not
+  {
+    int32_t tmp[4];
+    _mm_storeu_si128((__m128i*)tmp, packed_out);
+    if (n) {
+      tmp[0] &= ~0xff;
+    }
+    return (tmp[3] || tmp[2] || tmp[1] || tmp[0]);
+  }
+}
+
+extern void VP8EncDspInitSSE2(void);
+void VP8EncDspInitSSE2(void) {
+  VP8CollectHistogram = CollectHistogramSSE2;
+  VP8EncQuantizeBlock = QuantizeBlockSSE2;
+  VP8ITransform = ITransformSSE2;
+  VP8FTransform = FTransformSSE2;
+  VP8SSE4x4 = SSE4x4SSE2;
+  VP8TDisto4x4 = Disto4x4SSE2;
+  VP8TDisto16x16 = Disto16x16SSE2;
+}
+
+#if defined(__cplusplus) || defined(c_plusplus)
+}    // extern "C"
+#endif
+
+#endif   // WEBP_USE_SSE2
diff --git a/drivers/webpold/dsp/lossless.c b/drivers/webpold/dsp/lossless.c
new file mode 100644
index 0000000000..62a6b7b15a
--- /dev/null
+++ b/drivers/webpold/dsp/lossless.c
@@ -0,0 +1,1138 @@
+// Copyright 2012 Google Inc. All Rights Reserved.
+//
+// This code is licensed under the same terms as WebM:
+//  Software License Agreement:  http://www.webmproject.org/license/software/
+//  Additional IP Rights Grant:  http://www.webmproject.org/license/additional/
+// -----------------------------------------------------------------------------
+//
+// Image transforms and color space conversion methods for lossless decoder.
+//
+// Authors: Vikas Arora (vikaas.arora@gmail.com)
+//          Jyrki Alakuijala (jyrki@google.com)
+//          Urvang Joshi (urvang@google.com)
+
+#if defined(__cplusplus) || defined(c_plusplus)
+extern "C" {
+#endif
+
+#include <math.h>
+#include <stdlib.h>
+#include "./lossless.h"
+#include "../dec/vp8li.h"
+#include "../dsp/yuv.h"
+#include "../dsp/dsp.h"
+#include "../enc/histogram.h"
+
+#define MAX_DIFF_COST (1e30f)
+
+// lookup table for small values of log2(int)
+#define APPROX_LOG_MAX  4096
+#define LOG_2_RECIPROCAL 1.44269504088896338700465094007086
+#define LOG_LOOKUP_IDX_MAX 256
+static const float kLog2Table[LOG_LOOKUP_IDX_MAX] = {
+  0.0000000000000000f, 0.0000000000000000f,
+  1.0000000000000000f, 1.5849625007211560f,
+  2.0000000000000000f, 2.3219280948873621f,
+  2.5849625007211560f, 2.8073549220576041f,
+  3.0000000000000000f, 3.1699250014423121f,
+  3.3219280948873621f, 3.4594316186372973f,
+  3.5849625007211560f, 3.7004397181410921f,
+  3.8073549220576041f, 3.9068905956085187f,
+  4.0000000000000000f, 4.0874628412503390f,
+  4.1699250014423121f, 4.2479275134435852f,
+  4.3219280948873626f, 4.3923174227787606f,
+  4.4594316186372973f, 4.5235619560570130f,
+  4.5849625007211560f, 4.6438561897747243f,
+  4.7004397181410917f, 4.7548875021634682f,
+  4.8073549220576037f, 4.8579809951275718f,
+  4.9068905956085187f, 4.9541963103868749f,
+  5.0000000000000000f, 5.0443941193584533f,
+  5.0874628412503390f, 5.1292830169449663f,
+  5.1699250014423121f, 5.2094533656289501f,
+  5.2479275134435852f, 5.2854022188622487f,
+  5.3219280948873626f, 5.3575520046180837f,
+  5.3923174227787606f, 5.4262647547020979f,
+  5.4594316186372973f, 5.4918530963296747f,
+  5.5235619560570130f, 5.5545888516776376f,
+  5.5849625007211560f, 5.6147098441152083f,
+  5.6438561897747243f, 5.6724253419714951f,
+  5.7004397181410917f, 5.7279204545631987f,
+  5.7548875021634682f, 5.7813597135246599f,
+  5.8073549220576037f, 5.8328900141647412f,
+  5.8579809951275718f, 5.8826430493618415f,
+  5.9068905956085187f, 5.9307373375628866f,
+  5.9541963103868749f, 5.9772799234999167f,
+  6.0000000000000000f, 6.0223678130284543f,
+  6.0443941193584533f, 6.0660891904577720f,
+  6.0874628412503390f, 6.1085244567781691f,
+  6.1292830169449663f, 6.1497471195046822f,
+  6.1699250014423121f, 6.1898245588800175f,
+  6.2094533656289501f, 6.2288186904958804f,
+  6.2479275134435852f, 6.2667865406949010f,
+  6.2854022188622487f, 6.3037807481771030f,
+  6.3219280948873626f, 6.3398500028846243f,
+  6.3575520046180837f, 6.3750394313469245f,
+  6.3923174227787606f, 6.4093909361377017f,
+  6.4262647547020979f, 6.4429434958487279f,
+  6.4594316186372973f, 6.4757334309663976f,
+  6.4918530963296747f, 6.5077946401986963f,
+  6.5235619560570130f, 6.5391588111080309f,
+  6.5545888516776376f, 6.5698556083309478f,
+  6.5849625007211560f, 6.5999128421871278f,
+  6.6147098441152083f, 6.6293566200796094f,
+  6.6438561897747243f, 6.6582114827517946f,
+  6.6724253419714951f, 6.6865005271832185f,
+  6.7004397181410917f, 6.7142455176661224f,
+  6.7279204545631987f, 6.7414669864011464f,
+  6.7548875021634682f, 6.7681843247769259f,
+  6.7813597135246599f, 6.7944158663501061f,
+  6.8073549220576037f, 6.8201789624151878f,
+  6.8328900141647412f, 6.8454900509443747f,
+  6.8579809951275718f, 6.8703647195834047f,
+  6.8826430493618415f, 6.8948177633079437f,
+  6.9068905956085187f, 6.9188632372745946f,
+  6.9307373375628866f, 6.9425145053392398f,
+  6.9541963103868749f, 6.9657842846620869f,
+  6.9772799234999167f, 6.9886846867721654f,
+  7.0000000000000000f, 7.0112272554232539f,
+  7.0223678130284543f, 7.0334230015374501f,
+  7.0443941193584533f, 7.0552824355011898f,
+  7.0660891904577720f, 7.0768155970508308f,
+  7.0874628412503390f, 7.0980320829605263f,
+  7.1085244567781691f, 7.1189410727235076f,
+  7.1292830169449663f, 7.1395513523987936f,
+  7.1497471195046822f, 7.1598713367783890f,
+  7.1699250014423121f, 7.1799090900149344f,
+  7.1898245588800175f, 7.1996723448363644f,
+  7.2094533656289501f, 7.2191685204621611f,
+  7.2288186904958804f, 7.2384047393250785f,
+  7.2479275134435852f, 7.2573878426926521f,
+  7.2667865406949010f, 7.2761244052742375f,
+  7.2854022188622487f, 7.2946207488916270f,
+  7.3037807481771030f, 7.3128829552843557f,
+  7.3219280948873626f, 7.3309168781146167f,
+  7.3398500028846243f, 7.3487281542310771f,
+  7.3575520046180837f, 7.3663222142458160f,
+  7.3750394313469245f, 7.3837042924740519f,
+  7.3923174227787606f, 7.4008794362821843f,
+  7.4093909361377017f, 7.4178525148858982f,
+  7.4262647547020979f, 7.4346282276367245f,
+  7.4429434958487279f, 7.4512111118323289f,
+  7.4594316186372973f, 7.4676055500829976f,
+  7.4757334309663976f, 7.4838157772642563f,
+  7.4918530963296747f, 7.4998458870832056f,
+  7.5077946401986963f, 7.5156998382840427f,
+  7.5235619560570130f, 7.5313814605163118f,
+  7.5391588111080309f, 7.5468944598876364f,
+  7.5545888516776376f, 7.5622424242210728f,
+  7.5698556083309478f, 7.5774288280357486f,
+  7.5849625007211560f, 7.5924570372680806f,
+  7.5999128421871278f, 7.6073303137496104f,
+  7.6147098441152083f, 7.6220518194563764f,
+  7.6293566200796094f, 7.6366246205436487f,
+  7.6438561897747243f, 7.6510516911789281f,
+  7.6582114827517946f, 7.6653359171851764f,
+  7.6724253419714951f, 7.6794800995054464f,
+  7.6865005271832185f, 7.6934869574993252f,
+  7.7004397181410917f, 7.7073591320808825f,
+  7.7142455176661224f, 7.7210991887071855f,
+  7.7279204545631987f, 7.7347096202258383f,
+  7.7414669864011464f, 7.7481928495894605f,
+  7.7548875021634682f, 7.7615512324444795f,
+  7.7681843247769259f, 7.7747870596011736f,
+  7.7813597135246599f, 7.7879025593914317f,
+  7.7944158663501061f, 7.8008998999203047f,
+  7.8073549220576037f, 7.8137811912170374f,
+  7.8201789624151878f, 7.8265484872909150f,
+  7.8328900141647412f, 7.8392037880969436f,
+  7.8454900509443747f, 7.8517490414160571f,
+  7.8579809951275718f, 7.8641861446542797f,
+  7.8703647195834047f, 7.8765169465649993f,
+  7.8826430493618415f, 7.8887432488982591f,
+  7.8948177633079437f, 7.9008668079807486f,
+  7.9068905956085187f, 7.9128893362299619f,
+  7.9188632372745946f, 7.9248125036057812f,
+  7.9307373375628866f, 7.9366379390025709f,
+  7.9425145053392398f, 7.9483672315846778f,
+  7.9541963103868749f, 7.9600019320680805f,
+  7.9657842846620869f, 7.9715435539507719f,
+  7.9772799234999167f, 7.9829935746943103f,
+  7.9886846867721654f, 7.9943534368588577f
+};
+
+float VP8LFastLog2(int v) {
+  if (v < LOG_LOOKUP_IDX_MAX) {
+    return kLog2Table[v];
+  } else if (v < APPROX_LOG_MAX) {
+    int log_cnt = 0;
+    while (v >= LOG_LOOKUP_IDX_MAX) {
+      ++log_cnt;
+      v = v >> 1;
+    }
+    return kLog2Table[v] + (float)log_cnt;
+  } else {
+    return (float)(LOG_2_RECIPROCAL * log((double)v));
+  }
+}
+
+//------------------------------------------------------------------------------
+// Image transforms.
+
+// In-place sum of each component with mod 256.
+static WEBP_INLINE void AddPixelsEq(uint32_t* a, uint32_t b) {
+  const uint32_t alpha_and_green = (*a & 0xff00ff00u) + (b & 0xff00ff00u);
+  const uint32_t red_and_blue = (*a & 0x00ff00ffu) + (b & 0x00ff00ffu);
+  *a = (alpha_and_green & 0xff00ff00u) | (red_and_blue & 0x00ff00ffu);
+}
+
+static WEBP_INLINE uint32_t Average2(uint32_t a0, uint32_t a1) {
+  return (((a0 ^ a1) & 0xfefefefeL) >> 1) + (a0 & a1);
+}
+
+static WEBP_INLINE uint32_t Average3(uint32_t a0, uint32_t a1, uint32_t a2) {
+  return Average2(Average2(a0, a2), a1);
+}
+
+static WEBP_INLINE uint32_t Average4(uint32_t a0, uint32_t a1,
+                                     uint32_t a2, uint32_t a3) {
+  return Average2(Average2(a0, a1), Average2(a2, a3));
+}
+
+static WEBP_INLINE uint32_t Clip255(uint32_t a) {
+  if (a < 256) {
+    return a;
+  }
+  // return 0, when a is a negative integer.
+  // return 255, when a is positive.
+  return ~a >> 24;
+}
+
+static WEBP_INLINE int AddSubtractComponentFull(int a, int b, int c) {
+  return Clip255(a + b - c);
+}
+
+static WEBP_INLINE uint32_t ClampedAddSubtractFull(uint32_t c0, uint32_t c1,
+                                                   uint32_t c2) {
+  const int a = AddSubtractComponentFull(c0 >> 24, c1 >> 24, c2 >> 24);
+  const int r = AddSubtractComponentFull((c0 >> 16) & 0xff,
+                                         (c1 >> 16) & 0xff,
+                                         (c2 >> 16) & 0xff);
+  const int g = AddSubtractComponentFull((c0 >> 8) & 0xff,
+                                         (c1 >> 8) & 0xff,
+                                         (c2 >> 8) & 0xff);
+  const int b = AddSubtractComponentFull(c0 & 0xff, c1 & 0xff, c2 & 0xff);
+  return (a << 24) | (r << 16) | (g << 8) | b;
+}
+
+static WEBP_INLINE int AddSubtractComponentHalf(int a, int b) {
+  return Clip255(a + (a - b) / 2);
+}
+
+static WEBP_INLINE uint32_t ClampedAddSubtractHalf(uint32_t c0, uint32_t c1,
+                                                   uint32_t c2) {
+  const uint32_t ave = Average2(c0, c1);
+  const int a = AddSubtractComponentHalf(ave >> 24, c2 >> 24);
+  const int r = AddSubtractComponentHalf((ave >> 16) & 0xff, (c2 >> 16) & 0xff);
+  const int g = AddSubtractComponentHalf((ave >> 8) & 0xff, (c2 >> 8) & 0xff);
+  const int b = AddSubtractComponentHalf((ave >> 0) & 0xff, (c2 >> 0) & 0xff);
+  return (a << 24) | (r << 16) | (g << 8) | b;
+}
+
+static WEBP_INLINE int Sub3(int a, int b, int c) {
+  const int pa = b - c;
+  const int pb = a - c;
+  return abs(pa) - abs(pb);
+}
+
+static WEBP_INLINE uint32_t Select(uint32_t a, uint32_t b, uint32_t c) {
+  const int pa_minus_pb =
+      Sub3((a >> 24)       , (b >> 24)       , (c >> 24)       ) +
+      Sub3((a >> 16) & 0xff, (b >> 16) & 0xff, (c >> 16) & 0xff) +
+      Sub3((a >>  8) & 0xff, (b >>  8) & 0xff, (c >>  8) & 0xff) +
+      Sub3((a      ) & 0xff, (b      ) & 0xff, (c      ) & 0xff);
+
+  return (pa_minus_pb <= 0) ? a : b;
+}
+
+//------------------------------------------------------------------------------
+// Predictors
+
+static uint32_t Predictor0(uint32_t left, const uint32_t* const top) {
+  (void)top;
+  (void)left;
+  return ARGB_BLACK;
+}
+static uint32_t Predictor1(uint32_t left, const uint32_t* const top) {
+  (void)top;
+  return left;
+}
+static uint32_t Predictor2(uint32_t left, const uint32_t* const top) {
+  (void)left;
+  return top[0];
+}
+static uint32_t Predictor3(uint32_t left, const uint32_t* const top) {
+  (void)left;
+  return top[1];
+}
+static uint32_t Predictor4(uint32_t left, const uint32_t* const top) {
+  (void)left;
+  return top[-1];
+}
+static uint32_t Predictor5(uint32_t left, const uint32_t* const top) {
+  const uint32_t pred = Average3(left, top[0], top[1]);
+  return pred;
+}
+static uint32_t Predictor6(uint32_t left, const uint32_t* const top) {
+  const uint32_t pred = Average2(left, top[-1]);
+  return pred;
+}
+static uint32_t Predictor7(uint32_t left, const uint32_t* const top) {
+  const uint32_t pred = Average2(left, top[0]);
+  return pred;
+}
+static uint32_t Predictor8(uint32_t left, const uint32_t* const top) {
+  const uint32_t pred = Average2(top[-1], top[0]);
+  (void)left;
+  return pred;
+}
+static uint32_t Predictor9(uint32_t left, const uint32_t* const top) {
+  const uint32_t pred = Average2(top[0], top[1]);
+  (void)left;
+  return pred;
+}
+static uint32_t Predictor10(uint32_t left, const uint32_t* const top) {
+  const uint32_t pred = Average4(left, top[-1], top[0], top[1]);
+  return pred;
+}
+static uint32_t Predictor11(uint32_t left, const uint32_t* const top) {
+  const uint32_t pred = Select(top[0], left, top[-1]);
+  return pred;
+}
+static uint32_t Predictor12(uint32_t left, const uint32_t* const top) {
+  const uint32_t pred = ClampedAddSubtractFull(left, top[0], top[-1]);
+  return pred;
+}
+static uint32_t Predictor13(uint32_t left, const uint32_t* const top) {
+  const uint32_t pred = ClampedAddSubtractHalf(left, top[0], top[-1]);
+  return pred;
+}
+
+typedef uint32_t (*PredictorFunc)(uint32_t left, const uint32_t* const top);
+static const PredictorFunc kPredictors[16] = {
+  Predictor0, Predictor1, Predictor2, Predictor3,
+  Predictor4, Predictor5, Predictor6, Predictor7,
+  Predictor8, Predictor9, Predictor10, Predictor11,
+  Predictor12, Predictor13,
+  Predictor0, Predictor0    // <- padding security sentinels
+};
+
+// TODO(vikasa): Replace 256 etc with defines.
+static float PredictionCostSpatial(const int* counts,
+                                   int weight_0, double exp_val) {
+  const int significant_symbols = 16;
+  const double exp_decay_factor = 0.6;
+  double bits = weight_0 * counts[0];
+  int i;
+  for (i = 1; i < significant_symbols; ++i) {
+    bits += exp_val * (counts[i] + counts[256 - i]);
+    exp_val *= exp_decay_factor;
+  }
+  return (float)(-0.1 * bits);
+}
+
+// Compute the Shanon's entropy: Sum(p*log2(p))
+static float ShannonEntropy(const int* const array, int n) {
+  int i;
+  float retval = 0.f;
+  int sum = 0;
+  for (i = 0; i < n; ++i) {
+    if (array[i] != 0) {
+      sum += array[i];
+      retval -= VP8LFastSLog2(array[i]);
+    }
+  }
+  retval += VP8LFastSLog2(sum);
+  return retval;
+}
+
+static float PredictionCostSpatialHistogram(int accumulated[4][256],
+                                            int tile[4][256]) {
+  int i;
+  int k;
+  int combo[256];
+  double retval = 0;
+  for (i = 0; i < 4; ++i) {
+    const double exp_val = 0.94;
+    retval += PredictionCostSpatial(&tile[i][0], 1, exp_val);
+    retval += ShannonEntropy(&tile[i][0], 256);
+    for (k = 0; k < 256; ++k) {
+      combo[k] = accumulated[i][k] + tile[i][k];
+    }
+    retval += ShannonEntropy(&combo[0], 256);
+  }
+  return (float)retval;
+}
+
+static int GetBestPredictorForTile(int width, int height,
+                                   int tile_x, int tile_y, int bits,
+                                   int accumulated[4][256],
+                                   const uint32_t* const argb_scratch) {
+  const int kNumPredModes = 14;
+  const int col_start = tile_x << bits;
+  const int row_start = tile_y << bits;
+  const int tile_size = 1 << bits;
+  const int ymax = (tile_size <= height - row_start) ?
+      tile_size : height - row_start;
+  const int xmax = (tile_size <= width - col_start) ?
+      tile_size : width - col_start;
+  int histo[4][256];
+  float best_diff = MAX_DIFF_COST;
+  int best_mode = 0;
+
+  int mode;
+  for (mode = 0; mode < kNumPredModes; ++mode) {
+    const uint32_t* current_row = argb_scratch;
+    const PredictorFunc pred_func = kPredictors[mode];
+    float cur_diff;
+    int y;
+    memset(&histo[0][0], 0, sizeof(histo));
+    for (y = 0; y < ymax; ++y) {
+      int x;
+      const int row = row_start + y;
+      const uint32_t* const upper_row = current_row;
+      current_row = upper_row + width;
+      for (x = 0; x < xmax; ++x) {
+        const int col = col_start + x;
+        uint32_t predict;
+        uint32_t predict_diff;
+        if (row == 0) {
+          predict = (col == 0) ? ARGB_BLACK : current_row[col - 1];  // Left.
+        } else if (col == 0) {
+          predict = upper_row[col];  // Top.
+        } else {
+          predict = pred_func(current_row[col - 1], upper_row + col);
+        }
+        predict_diff = VP8LSubPixels(current_row[col], predict);
+        ++histo[0][predict_diff >> 24];
+        ++histo[1][((predict_diff >> 16) & 0xff)];
+        ++histo[2][((predict_diff >> 8) & 0xff)];
+        ++histo[3][(predict_diff & 0xff)];
+      }
+    }
+    cur_diff = PredictionCostSpatialHistogram(accumulated, histo);
+    if (cur_diff < best_diff) {
+      best_diff = cur_diff;
+      best_mode = mode;
+    }
+  }
+
+  return best_mode;
+}
+
+static void CopyTileWithPrediction(int width, int height,
+                                   int tile_x, int tile_y, int bits, int mode,
+                                   const uint32_t* const argb_scratch,
+                                   uint32_t* const argb) {
+  const int col_start = tile_x << bits;
+  const int row_start = tile_y << bits;
+  const int tile_size = 1 << bits;
+  const int ymax = (tile_size <= height - row_start) ?
+      tile_size : height - row_start;
+  const int xmax = (tile_size <= width - col_start) ?
+      tile_size : width - col_start;
+  const PredictorFunc pred_func = kPredictors[mode];
+  const uint32_t* current_row = argb_scratch;
+
+  int y;
+  for (y = 0; y < ymax; ++y) {
+    int x;
+    const int row = row_start + y;
+    const uint32_t* const upper_row = current_row;
+    current_row = upper_row + width;
+    for (x = 0; x < xmax; ++x) {
+      const int col = col_start + x;
+      const int pix = row * width + col;
+      uint32_t predict;
+      if (row == 0) {
+        predict = (col == 0) ? ARGB_BLACK : current_row[col - 1];  // Left.
+      } else if (col == 0) {
+        predict = upper_row[col];  // Top.
+      } else {
+        predict = pred_func(current_row[col - 1], upper_row + col);
+      }
+      argb[pix] = VP8LSubPixels(current_row[col], predict);
+    }
+  }
+}
+
+void VP8LResidualImage(int width, int height, int bits,
+                       uint32_t* const argb, uint32_t* const argb_scratch,
+                       uint32_t* const image) {
+  const int max_tile_size = 1 << bits;
+  const int tiles_per_row = VP8LSubSampleSize(width, bits);
+  const int tiles_per_col = VP8LSubSampleSize(height, bits);
+  uint32_t* const upper_row = argb_scratch;
+  uint32_t* const current_tile_rows = argb_scratch + width;
+  int tile_y;
+  int histo[4][256];
+  memset(histo, 0, sizeof(histo));
+  for (tile_y = 0; tile_y < tiles_per_col; ++tile_y) {
+    const int tile_y_offset = tile_y * max_tile_size;
+    const int this_tile_height =
+        (tile_y < tiles_per_col - 1) ? max_tile_size : height - tile_y_offset;
+    int tile_x;
+    if (tile_y > 0) {
+      memcpy(upper_row, current_tile_rows + (max_tile_size - 1) * width,
+             width * sizeof(*upper_row));
+    }
+    memcpy(current_tile_rows, &argb[tile_y_offset * width],
+           this_tile_height * width * sizeof(*current_tile_rows));
+    for (tile_x = 0; tile_x < tiles_per_row; ++tile_x) {
+      int pred;
+      int y;
+      const int tile_x_offset = tile_x * max_tile_size;
+      int all_x_max = tile_x_offset + max_tile_size;
+      if (all_x_max > width) {
+        all_x_max = width;
+      }
+      pred = GetBestPredictorForTile(width, height, tile_x, tile_y, bits, histo,
+                                     argb_scratch);
+      image[tile_y * tiles_per_row + tile_x] = 0xff000000u | (pred << 8);
+      CopyTileWithPrediction(width, height, tile_x, tile_y, bits, pred,
+                             argb_scratch, argb);
+      for (y = 0; y < max_tile_size; ++y) {
+        int ix;
+        int all_x;
+        int all_y = tile_y_offset + y;
+        if (all_y >= height) {
+          break;
+        }
+        ix = all_y * width + tile_x_offset;
+        for (all_x = tile_x_offset; all_x < all_x_max; ++all_x, ++ix) {
+          const uint32_t a = argb[ix];
+          ++histo[0][a >> 24];
+          ++histo[1][((a >> 16) & 0xff)];
+          ++histo[2][((a >> 8) & 0xff)];
+          ++histo[3][(a & 0xff)];
+        }
+      }
+    }
+  }
+}
+
+// Inverse prediction.
+static void PredictorInverseTransform(const VP8LTransform* const transform,
+                                      int y_start, int y_end, uint32_t* data) {
+  const int width = transform->xsize_;
+  if (y_start == 0) {  // First Row follows the L (mode=1) mode.
+    int x;
+    const uint32_t pred0 = Predictor0(data[-1], NULL);
+    AddPixelsEq(data, pred0);
+    for (x = 1; x < width; ++x) {
+      const uint32_t pred1 = Predictor1(data[x - 1], NULL);
+      AddPixelsEq(data + x, pred1);
+    }
+    data += width;
+    ++y_start;
+  }
+
+  {
+    int y = y_start;
+    const int mask = (1 << transform->bits_) - 1;
+    const int tiles_per_row = VP8LSubSampleSize(width, transform->bits_);
+    const uint32_t* pred_mode_base =
+        transform->data_ + (y >> transform->bits_) * tiles_per_row;
+
+    while (y < y_end) {
+      int x;
+      const uint32_t pred2 = Predictor2(data[-1], data - width);
+      const uint32_t* pred_mode_src = pred_mode_base;
+      PredictorFunc pred_func;
+
+      // First pixel follows the T (mode=2) mode.
+      AddPixelsEq(data, pred2);
+
+      // .. the rest:
+      pred_func = kPredictors[((*pred_mode_src++) >> 8) & 0xf];
+      for (x = 1; x < width; ++x) {
+        uint32_t pred;
+        if ((x & mask) == 0) {    // start of tile. Read predictor function.
+          pred_func = kPredictors[((*pred_mode_src++) >> 8) & 0xf];
+        }
+        pred = pred_func(data[x - 1], data + x - width);
+        AddPixelsEq(data + x, pred);
+      }
+      data += width;
+      ++y;
+      if ((y & mask) == 0) {   // Use the same mask, since tiles are squares.
+        pred_mode_base += tiles_per_row;
+      }
+    }
+  }
+}
+
+void VP8LSubtractGreenFromBlueAndRed(uint32_t* argb_data, int num_pixs) {
+  int i;
+  for (i = 0; i < num_pixs; ++i) {
+    const uint32_t argb = argb_data[i];
+    const uint32_t green = (argb >> 8) & 0xff;
+    const uint32_t new_r = (((argb >> 16) & 0xff) - green) & 0xff;
+    const uint32_t new_b = ((argb & 0xff) - green) & 0xff;
+    argb_data[i] = (argb & 0xff00ff00) | (new_r << 16) | new_b;
+  }
+}
+
+// Add green to blue and red channels (i.e. perform the inverse transform of
+// 'subtract green').
+static void AddGreenToBlueAndRed(const VP8LTransform* const transform,
+                                 int y_start, int y_end, uint32_t* data) {
+  const int width = transform->xsize_;
+  const uint32_t* const data_end = data + (y_end - y_start) * width;
+  while (data < data_end) {
+    const uint32_t argb = *data;
+    // "* 0001001u" is equivalent to "(green << 16) + green)"
+    const uint32_t green = ((argb >> 8) & 0xff);
+    uint32_t red_blue = (argb & 0x00ff00ffu);
+    red_blue += (green << 16) | green;
+    red_blue &= 0x00ff00ffu;
+    *data++ = (argb & 0xff00ff00u) | red_blue;
+  }
+}
+
+typedef struct {
+  // Note: the members are uint8_t, so that any negative values are
+  // automatically converted to "mod 256" values.
+  uint8_t green_to_red_;
+  uint8_t green_to_blue_;
+  uint8_t red_to_blue_;
+} Multipliers;
+
+static WEBP_INLINE void MultipliersClear(Multipliers* m) {
+  m->green_to_red_ = 0;
+  m->green_to_blue_ = 0;
+  m->red_to_blue_ = 0;
+}
+
+static WEBP_INLINE uint32_t ColorTransformDelta(int8_t color_pred,
+                                                int8_t color) {
+  return (uint32_t)((int)(color_pred) * color) >> 5;
+}
+
+static WEBP_INLINE void ColorCodeToMultipliers(uint32_t color_code,
+                                               Multipliers* const m) {
+  m->green_to_red_  = (color_code >>  0) & 0xff;
+  m->green_to_blue_ = (color_code >>  8) & 0xff;
+  m->red_to_blue_   = (color_code >> 16) & 0xff;
+}
+
+static WEBP_INLINE uint32_t MultipliersToColorCode(Multipliers* const m) {
+  return 0xff000000u |
+         ((uint32_t)(m->red_to_blue_) << 16) |
+         ((uint32_t)(m->green_to_blue_) << 8) |
+         m->green_to_red_;
+}
+
+static WEBP_INLINE uint32_t TransformColor(const Multipliers* const m,
+                                           uint32_t argb, int inverse) {
+  const uint32_t green = argb >> 8;
+  const uint32_t red = argb >> 16;
+  uint32_t new_red = red;
+  uint32_t new_blue = argb;
+
+  if (inverse) {
+    new_red += ColorTransformDelta(m->green_to_red_, green);
+    new_red &= 0xff;
+    new_blue += ColorTransformDelta(m->green_to_blue_, green);
+    new_blue += ColorTransformDelta(m->red_to_blue_, new_red);
+    new_blue &= 0xff;
+  } else {
+    new_red -= ColorTransformDelta(m->green_to_red_, green);
+    new_red &= 0xff;
+    new_blue -= ColorTransformDelta(m->green_to_blue_, green);
+    new_blue -= ColorTransformDelta(m->red_to_blue_, red);
+    new_blue &= 0xff;
+  }
+  return (argb & 0xff00ff00u) | (new_red << 16) | (new_blue);
+}
+
+static WEBP_INLINE int SkipRepeatedPixels(const uint32_t* const argb,
+                                          int ix, int xsize) {
+  const uint32_t v = argb[ix];
+  if (ix >= xsize + 3) {
+    if (v == argb[ix - xsize] &&
+        argb[ix - 1] == argb[ix - xsize - 1] &&
+        argb[ix - 2] == argb[ix - xsize - 2] &&
+        argb[ix - 3] == argb[ix - xsize - 3]) {
+      return 1;
+    }
+    return v == argb[ix - 3] && v == argb[ix - 2] && v == argb[ix - 1];
+  } else if (ix >= 3) {
+    return v == argb[ix - 3] && v == argb[ix - 2] && v == argb[ix - 1];
+  }
+  return 0;
+}
+
+static float PredictionCostCrossColor(const int accumulated[256],
+                                      const int counts[256]) {
+  // Favor low entropy, locally and globally.
+  int i;
+  int combo[256];
+  for (i = 0; i < 256; ++i) {
+    combo[i] = accumulated[i] + counts[i];
+  }
+  return ShannonEntropy(combo, 256) +
+         ShannonEntropy(counts, 256) +
+         PredictionCostSpatial(counts, 3, 2.4);  // Favor small absolute values.
+}
+
+static Multipliers GetBestColorTransformForTile(
+    int tile_x, int tile_y, int bits,
+    Multipliers prevX,
+    Multipliers prevY,
+    int step, int xsize, int ysize,
+    int* accumulated_red_histo,
+    int* accumulated_blue_histo,
+    const uint32_t* const argb) {
+  float best_diff = MAX_DIFF_COST;
+  float cur_diff;
+  const int halfstep = step / 2;
+  const int max_tile_size = 1 << bits;
+  const int tile_y_offset = tile_y * max_tile_size;
+  const int tile_x_offset = tile_x * max_tile_size;
+  int green_to_red;
+  int green_to_blue;
+  int red_to_blue;
+  int all_x_max = tile_x_offset + max_tile_size;
+  int all_y_max = tile_y_offset + max_tile_size;
+  Multipliers best_tx;
+  MultipliersClear(&best_tx);
+  if (all_x_max > xsize) {
+    all_x_max = xsize;
+  }
+  if (all_y_max > ysize) {
+    all_y_max = ysize;
+  }
+  for (green_to_red = -64; green_to_red <= 64; green_to_red += halfstep) {
+    int histo[256] = { 0 };
+    int all_y;
+    Multipliers tx;
+    MultipliersClear(&tx);
+    tx.green_to_red_ = green_to_red & 0xff;
+
+    for (all_y = tile_y_offset; all_y < all_y_max; ++all_y) {
+      uint32_t predict;
+      int ix = all_y * xsize + tile_x_offset;
+      int all_x;
+      for (all_x = tile_x_offset; all_x < all_x_max; ++all_x, ++ix) {
+        if (SkipRepeatedPixels(argb, ix, xsize)) {
+          continue;
+        }
+        predict = TransformColor(&tx, argb[ix], 0);
+        ++histo[(predict >> 16) & 0xff];  // red.
+      }
+    }
+    cur_diff = PredictionCostCrossColor(&accumulated_red_histo[0], &histo[0]);
+    if (tx.green_to_red_ == prevX.green_to_red_) {
+      cur_diff -= 3;  // favor keeping the areas locally similar
+    }
+    if (tx.green_to_red_ == prevY.green_to_red_) {
+      cur_diff -= 3;  // favor keeping the areas locally similar
+    }
+    if (tx.green_to_red_ == 0) {
+      cur_diff -= 3;
+    }
+    if (cur_diff < best_diff) {
+      best_diff = cur_diff;
+      best_tx = tx;
+    }
+  }
+  best_diff = MAX_DIFF_COST;
+  green_to_red = best_tx.green_to_red_;
+  for (green_to_blue = -32; green_to_blue <= 32; green_to_blue += step) {
+    for (red_to_blue = -32; red_to_blue <= 32; red_to_blue += step) {
+      int all_y;
+      int histo[256] = { 0 };
+      Multipliers tx;
+      tx.green_to_red_ = green_to_red;
+      tx.green_to_blue_ = green_to_blue;
+      tx.red_to_blue_ = red_to_blue;
+      for (all_y = tile_y_offset; all_y < all_y_max; ++all_y) {
+        uint32_t predict;
+        int all_x;
+        int ix = all_y * xsize + tile_x_offset;
+        for (all_x = tile_x_offset; all_x < all_x_max; ++all_x, ++ix) {
+          if (SkipRepeatedPixels(argb, ix, xsize)) {
+            continue;
+          }
+          predict = TransformColor(&tx, argb[ix], 0);
+          ++histo[predict & 0xff];  // blue.
+        }
+      }
+      cur_diff =
+        PredictionCostCrossColor(&accumulated_blue_histo[0], &histo[0]);
+      if (tx.green_to_blue_ == prevX.green_to_blue_) {
+        cur_diff -= 3;  // favor keeping the areas locally similar
+      }
+      if (tx.green_to_blue_ == prevY.green_to_blue_) {
+        cur_diff -= 3;  // favor keeping the areas locally similar
+      }
+      if (tx.red_to_blue_ == prevX.red_to_blue_) {
+        cur_diff -= 3;  // favor keeping the areas locally similar
+      }
+      if (tx.red_to_blue_ == prevY.red_to_blue_) {
+        cur_diff -= 3;  // favor keeping the areas locally similar
+      }
+      if (tx.green_to_blue_ == 0) {
+        cur_diff -= 3;
+      }
+      if (tx.red_to_blue_ == 0) {
+        cur_diff -= 3;
+      }
+      if (cur_diff < best_diff) {
+        best_diff = cur_diff;
+        best_tx = tx;
+      }
+    }
+  }
+  return best_tx;
+}
+
+static void CopyTileWithColorTransform(int xsize, int ysize,
+                                       int tile_x, int tile_y, int bits,
+                                       Multipliers color_transform,
+                                       uint32_t* const argb) {
+  int y;
+  int xscan = 1 << bits;
+  int yscan = 1 << bits;
+  tile_x <<= bits;
+  tile_y <<= bits;
+  if (xscan > xsize - tile_x) {
+    xscan = xsize - tile_x;
+  }
+  if (yscan > ysize - tile_y) {
+    yscan = ysize - tile_y;
+  }
+  yscan += tile_y;
+  for (y = tile_y; y < yscan; ++y) {
+    int ix = y * xsize + tile_x;
+    const int end_ix = ix + xscan;
+    for (; ix < end_ix; ++ix) {
+      argb[ix] = TransformColor(&color_transform, argb[ix], 0);
+    }
+  }
+}
+
+void VP8LColorSpaceTransform(int width, int height, int bits, int step,
+                             uint32_t* const argb, uint32_t* image) {
+  const int max_tile_size = 1 << bits;
+  int tile_xsize = VP8LSubSampleSize(width, bits);
+  int tile_ysize = VP8LSubSampleSize(height, bits);
+  int accumulated_red_histo[256] = { 0 };
+  int accumulated_blue_histo[256] = { 0 };
+  int tile_y;
+  int tile_x;
+  Multipliers prevX;
+  Multipliers prevY;
+  MultipliersClear(&prevY);
+  MultipliersClear(&prevX);
+  for (tile_y = 0; tile_y < tile_ysize; ++tile_y) {
+    for (tile_x = 0; tile_x < tile_xsize; ++tile_x) {
+      Multipliers color_transform;
+      int all_x_max;
+      int y;
+      const int tile_y_offset = tile_y * max_tile_size;
+      const int tile_x_offset = tile_x * max_tile_size;
+      if (tile_y != 0) {
+        ColorCodeToMultipliers(image[tile_y * tile_xsize + tile_x - 1], &prevX);
+        ColorCodeToMultipliers(image[(tile_y - 1) * tile_xsize + tile_x],
+                               &prevY);
+      } else if (tile_x != 0) {
+        ColorCodeToMultipliers(image[tile_y * tile_xsize + tile_x - 1], &prevX);
+      }
+      color_transform =
+          GetBestColorTransformForTile(tile_x, tile_y, bits,
+                                       prevX, prevY,
+                                       step, width, height,
+                                       &accumulated_red_histo[0],
+                                       &accumulated_blue_histo[0],
+                                       argb);
+      image[tile_y * tile_xsize + tile_x] =
+          MultipliersToColorCode(&color_transform);
+      CopyTileWithColorTransform(width, height, tile_x, tile_y, bits,
+                                 color_transform, argb);
+
+      // Gather accumulated histogram data.
+      all_x_max = tile_x_offset + max_tile_size;
+      if (all_x_max > width) {
+        all_x_max = width;
+      }
+      for (y = 0; y < max_tile_size; ++y) {
+        int ix;
+        int all_x;
+        int all_y = tile_y_offset + y;
+        if (all_y >= height) {
+          break;
+        }
+        ix = all_y * width + tile_x_offset;
+        for (all_x = tile_x_offset; all_x < all_x_max; ++all_x, ++ix) {
+          if (ix >= 2 &&
+              argb[ix] == argb[ix - 2] &&
+              argb[ix] == argb[ix - 1]) {
+            continue;  // repeated pixels are handled by backward references
+          }
+          if (ix >= width + 2 &&
+              argb[ix - 2] == argb[ix - width - 2] &&
+              argb[ix - 1] == argb[ix - width - 1] &&
+              argb[ix] == argb[ix - width]) {
+            continue;  // repeated pixels are handled by backward references
+          }
+          ++accumulated_red_histo[(argb[ix] >> 16) & 0xff];
+          ++accumulated_blue_histo[argb[ix] & 0xff];
+        }
+      }
+    }
+  }
+}
+
+// Color space inverse transform.
+static void ColorSpaceInverseTransform(const VP8LTransform* const transform,
+                                       int y_start, int y_end, uint32_t* data) {
+  const int width = transform->xsize_;
+  const int mask = (1 << transform->bits_) - 1;
+  const int tiles_per_row = VP8LSubSampleSize(width, transform->bits_);
+  int y = y_start;
+  const uint32_t* pred_row =
+      transform->data_ + (y >> transform->bits_) * tiles_per_row;
+
+  while (y < y_end) {
+    const uint32_t* pred = pred_row;
+    Multipliers m = { 0, 0, 0 };
+    int x;
+
+    for (x = 0; x < width; ++x) {
+      if ((x & mask) == 0) ColorCodeToMultipliers(*pred++, &m);
+      data[x] = TransformColor(&m, data[x], 1);
+    }
+    data += width;
+    ++y;
+    if ((y & mask) == 0) pred_row += tiles_per_row;;
+  }
+}
+
+// Separate out pixels packed together using pixel-bundling.
+static void ColorIndexInverseTransform(
+    const VP8LTransform* const transform,
+    int y_start, int y_end, const uint32_t* src, uint32_t* dst) {
+  int y;
+  const int bits_per_pixel = 8 >> transform->bits_;
+  const int width = transform->xsize_;
+  const uint32_t* const color_map = transform->data_;
+  if (bits_per_pixel < 8) {
+    const int pixels_per_byte = 1 << transform->bits_;
+    const int count_mask = pixels_per_byte - 1;
+    const uint32_t bit_mask = (1 << bits_per_pixel) - 1;
+    for (y = y_start; y < y_end; ++y) {
+      uint32_t packed_pixels = 0;
+      int x;
+      for (x = 0; x < width; ++x) {
+        // We need to load fresh 'packed_pixels' once every 'pixels_per_byte'
+        // increments of x. Fortunately, pixels_per_byte is a power of 2, so
+        // can just use a mask for that, instead of decrementing a counter.
+        if ((x & count_mask) == 0) packed_pixels = ((*src++) >> 8) & 0xff;
+        *dst++ = color_map[packed_pixels & bit_mask];
+        packed_pixels >>= bits_per_pixel;
+      }
+    }
+  } else {
+    for (y = y_start; y < y_end; ++y) {
+      int x;
+      for (x = 0; x < width; ++x) {
+        *dst++ = color_map[((*src++) >> 8) & 0xff];
+      }
+    }
+  }
+}
+
+void VP8LInverseTransform(const VP8LTransform* const transform,
+                          int row_start, int row_end,
+                          const uint32_t* const in, uint32_t* const out) {
+  assert(row_start < row_end);
+  assert(row_end <= transform->ysize_);
+  switch (transform->type_) {
+    case SUBTRACT_GREEN:
+      AddGreenToBlueAndRed(transform, row_start, row_end, out);
+      break;
+    case PREDICTOR_TRANSFORM:
+      PredictorInverseTransform(transform, row_start, row_end, out);
+      if (row_end != transform->ysize_) {
+        // The last predicted row in this iteration will be the top-pred row
+        // for the first row in next iteration.
+        const int width = transform->xsize_;
+        memcpy(out - width, out + (row_end - row_start - 1) * width,
+               width * sizeof(*out));
+      }
+      break;
+    case CROSS_COLOR_TRANSFORM:
+      ColorSpaceInverseTransform(transform, row_start, row_end, out);
+      break;
+    case COLOR_INDEXING_TRANSFORM:
+      if (in == out && transform->bits_ > 0) {
+        // Move packed pixels to the end of unpacked region, so that unpacking
+        // can occur seamlessly.
+        // Also, note that this is the only transform that applies on
+        // the effective width of VP8LSubSampleSize(xsize_, bits_). All other
+        // transforms work on effective width of xsize_.
+        const int out_stride = (row_end - row_start) * transform->xsize_;
+        const int in_stride = (row_end - row_start) *
+            VP8LSubSampleSize(transform->xsize_, transform->bits_);
+        uint32_t* const src = out + out_stride - in_stride;
+        memmove(src, out, in_stride * sizeof(*src));
+        ColorIndexInverseTransform(transform, row_start, row_end, src, out);
+      } else {
+        ColorIndexInverseTransform(transform, row_start, row_end, in, out);
+      }
+      break;
+  }
+}
+
+//------------------------------------------------------------------------------
+// Color space conversion.
+
+static int is_big_endian(void) {
+  static const union {
+    uint16_t w;
+    uint8_t b[2];
+  } tmp = { 1 };
+  return (tmp.b[0] != 1);
+}
+
+static void ConvertBGRAToRGB(const uint32_t* src,
+                             int num_pixels, uint8_t* dst) {
+  const uint32_t* const src_end = src + num_pixels;
+  while (src < src_end) {
+    const uint32_t argb = *src++;
+    *dst++ = (argb >> 16) & 0xff;
+    *dst++ = (argb >>  8) & 0xff;
+    *dst++ = (argb >>  0) & 0xff;
+  }
+}
+
+static void ConvertBGRAToRGBA(const uint32_t* src,
+                              int num_pixels, uint8_t* dst) {
+  const uint32_t* const src_end = src + num_pixels;
+  while (src < src_end) {
+    const uint32_t argb = *src++;
+    *dst++ = (argb >> 16) & 0xff;
+    *dst++ = (argb >>  8) & 0xff;
+    *dst++ = (argb >>  0) & 0xff;
+    *dst++ = (argb >> 24) & 0xff;
+  }
+}
+
+static void ConvertBGRAToRGBA4444(const uint32_t* src,
+                                  int num_pixels, uint8_t* dst) {
+  const uint32_t* const src_end = src + num_pixels;
+  while (src < src_end) {
+    const uint32_t argb = *src++;
+    *dst++ = ((argb >> 16) & 0xf0) | ((argb >> 12) & 0xf);
+    *dst++ = ((argb >>  0) & 0xf0) | ((argb >> 28) & 0xf);
+  }
+}
+
+static void ConvertBGRAToRGB565(const uint32_t* src,
+                                int num_pixels, uint8_t* dst) {
+  const uint32_t* const src_end = src + num_pixels;
+  while (src < src_end) {
+    const uint32_t argb = *src++;
+    *dst++ = ((argb >> 16) & 0xf8) | ((argb >> 13) & 0x7);
+    *dst++ = ((argb >>  5) & 0xe0) | ((argb >>  3) & 0x1f);
+  }
+}
+
+static void ConvertBGRAToBGR(const uint32_t* src,
+                             int num_pixels, uint8_t* dst) {
+  const uint32_t* const src_end = src + num_pixels;
+  while (src < src_end) {
+    const uint32_t argb = *src++;
+    *dst++ = (argb >>  0) & 0xff;
+    *dst++ = (argb >>  8) & 0xff;
+    *dst++ = (argb >> 16) & 0xff;
+  }
+}
+
+static void CopyOrSwap(const uint32_t* src, int num_pixels, uint8_t* dst,
+                       int swap_on_big_endian) {
+  if (is_big_endian() == swap_on_big_endian) {
+    const uint32_t* const src_end = src + num_pixels;
+    while (src < src_end) {
+      uint32_t argb = *src++;
+#if !defined(__BIG_ENDIAN__) && (defined(__i386__) || defined(__x86_64__))
+      __asm__ volatile("bswap %0" : "=r"(argb) : "0"(argb));
+      *(uint32_t*)dst = argb;
+      dst += sizeof(argb);
+#elif !defined(__BIG_ENDIAN__) && defined(_MSC_VER)
+      argb = _byteswap_ulong(argb);
+      *(uint32_t*)dst = argb;
+      dst += sizeof(argb);
+#else
+      *dst++ = (argb >> 24) & 0xff;
+      *dst++ = (argb >> 16) & 0xff;
+      *dst++ = (argb >>  8) & 0xff;
+      *dst++ = (argb >>  0) & 0xff;
+#endif
+    }
+  } else {
+    memcpy(dst, src, num_pixels * sizeof(*src));
+  }
+}
+
+void VP8LConvertFromBGRA(const uint32_t* const in_data, int num_pixels,
+                         WEBP_CSP_MODE out_colorspace, uint8_t* const rgba) {
+  switch (out_colorspace) {
+    case MODE_RGB:
+      ConvertBGRAToRGB(in_data, num_pixels, rgba);
+      break;
+    case MODE_RGBA:
+      ConvertBGRAToRGBA(in_data, num_pixels, rgba);
+      break;
+    case MODE_rgbA:
+      ConvertBGRAToRGBA(in_data, num_pixels, rgba);
+      WebPApplyAlphaMultiply(rgba, 0, num_pixels, 1, 0);
+      break;
+    case MODE_BGR:
+      ConvertBGRAToBGR(in_data, num_pixels, rgba);
+      break;
+    case MODE_BGRA:
+      CopyOrSwap(in_data, num_pixels, rgba, 1);
+      break;
+    case MODE_bgrA:
+      CopyOrSwap(in_data, num_pixels, rgba, 1);
+      WebPApplyAlphaMultiply(rgba, 0, num_pixels, 1, 0);
+      break;
+    case MODE_ARGB:
+      CopyOrSwap(in_data, num_pixels, rgba, 0);
+      break;
+    case MODE_Argb:
+      CopyOrSwap(in_data, num_pixels, rgba, 0);
+      WebPApplyAlphaMultiply(rgba, 1, num_pixels, 1, 0);
+      break;
+    case MODE_RGBA_4444:
+      ConvertBGRAToRGBA4444(in_data, num_pixels, rgba);
+      break;
+    case MODE_rgbA_4444:
+      ConvertBGRAToRGBA4444(in_data, num_pixels, rgba);
+      WebPApplyAlphaMultiply4444(rgba, num_pixels, 1, 0);
+      break;
+    case MODE_RGB_565:
+      ConvertBGRAToRGB565(in_data, num_pixels, rgba);
+      break;
+    default:
+      assert(0);          // Code flow should not reach here.
+  }
+}
+
+//------------------------------------------------------------------------------
+
+#if defined(__cplusplus) || defined(c_plusplus)
+}    // extern "C"
+#endif
diff --git a/drivers/webpold/dsp/lossless.h b/drivers/webpold/dsp/lossless.h
new file mode 100644
index 0000000000..7c7d5555ed
--- /dev/null
+++ b/drivers/webpold/dsp/lossless.h
@@ -0,0 +1,82 @@
+// Copyright 2012 Google Inc. All Rights Reserved.
+//
+// This code is licensed under the same terms as WebM:
+//  Software License Agreement:  http://www.webmproject.org/license/software/
+//  Additional IP Rights Grant:  http://www.webmproject.org/license/additional/
+// -----------------------------------------------------------------------------
+//
+// Image transforms and color space conversion methods for lossless decoder.
+//
+// Authors: Vikas Arora (vikaas.arora@gmail.com)
+//          Jyrki Alakuijala (jyrki@google.com)
+
+#ifndef WEBP_DSP_LOSSLESS_H_
+#define WEBP_DSP_LOSSLESS_H_
+
+#include "../types.h"
+#include "../decode.h"
+
+#if defined(__cplusplus) || defined(c_plusplus)
+extern "C" {
+#endif
+
+//------------------------------------------------------------------------------
+// Image transforms.
+
+struct VP8LTransform;  // Defined in dec/vp8li.h.
+
+// Performs inverse transform of data given transform information, start and end
+// rows. Transform will be applied to rows [row_start, row_end[.
+// The *in and *out pointers refer to source and destination data respectively
+// corresponding to the intermediate row (row_start).
+void VP8LInverseTransform(const struct VP8LTransform* const transform,
+                          int row_start, int row_end,
+                          const uint32_t* const in, uint32_t* const out);
+
+// Subtracts green from blue and red channels.
+void VP8LSubtractGreenFromBlueAndRed(uint32_t* argb_data, int num_pixs);
+
+void VP8LResidualImage(int width, int height, int bits,
+                       uint32_t* const argb, uint32_t* const argb_scratch,
+                       uint32_t* const image);
+
+void VP8LColorSpaceTransform(int width, int height, int bits, int step,
+                             uint32_t* const argb, uint32_t* image);
+
+//------------------------------------------------------------------------------
+// Color space conversion.
+
+// Converts from BGRA to other color spaces.
+void VP8LConvertFromBGRA(const uint32_t* const in_data, int num_pixels,
+                         WEBP_CSP_MODE out_colorspace, uint8_t* const rgba);
+
+//------------------------------------------------------------------------------
+// Misc methods.
+
+// Computes sampled size of 'size' when sampling using 'sampling bits'.
+static WEBP_INLINE uint32_t VP8LSubSampleSize(uint32_t size,
+                                              uint32_t sampling_bits) {
+  return (size + (1 << sampling_bits) - 1) >> sampling_bits;
+}
+
+// Faster logarithm for integers, with the property of log2(0) == 0.
+float VP8LFastLog2(int v);
+// Fast calculation of v * log2(v) for integer input.
+static WEBP_INLINE float VP8LFastSLog2(int v) { return VP8LFastLog2(v) * v; }
+
+// In-place difference of each component with mod 256.
+static WEBP_INLINE uint32_t VP8LSubPixels(uint32_t a, uint32_t b) {
+  const uint32_t alpha_and_green =
+      0x00ff00ffu + (a & 0xff00ff00u) - (b & 0xff00ff00u);
+  const uint32_t red_and_blue =
+      0xff00ff00u + (a & 0x00ff00ffu) - (b & 0x00ff00ffu);
+  return (alpha_and_green & 0xff00ff00u) | (red_and_blue & 0x00ff00ffu);
+}
+
+//------------------------------------------------------------------------------
+
+#if defined(__cplusplus) || defined(c_plusplus)
+}    // extern "C"
+#endif
+
+#endif  // WEBP_DSP_LOSSLESS_H_
diff --git a/drivers/webpold/dsp/upsampling.c b/drivers/webpold/dsp/upsampling.c
new file mode 100644
index 0000000000..4855eb1432
--- /dev/null
+++ b/drivers/webpold/dsp/upsampling.c
@@ -0,0 +1,357 @@
+// Copyright 2011 Google Inc. All Rights Reserved.
+//
+// This code is licensed under the same terms as WebM:
+//  Software License Agreement:  http://www.webmproject.org/license/software/
+//  Additional IP Rights Grant:  http://www.webmproject.org/license/additional/
+// -----------------------------------------------------------------------------
+//
+// YUV to RGB upsampling functions.
+//
+// Author: somnath@google.com (Somnath Banerjee)
+
+#include "./dsp.h"
+#include "./yuv.h"
+
+#if defined(__cplusplus) || defined(c_plusplus)
+extern "C" {
+#endif
+
+//------------------------------------------------------------------------------
+// Fancy upsampler
+
+#ifdef FANCY_UPSAMPLING
+
+// Fancy upsampling functions to convert YUV to RGB
+WebPUpsampleLinePairFunc WebPUpsamplers[MODE_LAST];
+
+// Given samples laid out in a square as:
+//  [a b]
+//  [c d]
+// we interpolate u/v as:
+//  ([9*a + 3*b + 3*c +   d    3*a + 9*b + 3*c +   d] + [8 8]) / 16
+//  ([3*a +   b + 9*c + 3*d      a + 3*b + 3*c + 9*d]   [8 8]) / 16
+
+// We process u and v together stashed into 32bit (16bit each).
+#define LOAD_UV(u,v) ((u) | ((v) << 16))
+
+#define UPSAMPLE_FUNC(FUNC_NAME, FUNC, XSTEP)                                  \
+static void FUNC_NAME(const uint8_t* top_y, const uint8_t* bottom_y,           \
+                      const uint8_t* top_u, const uint8_t* top_v,              \
+                      const uint8_t* cur_u, const uint8_t* cur_v,              \
+                      uint8_t* top_dst, uint8_t* bottom_dst, int len) {        \
+  int x;                                                                       \
+  const int last_pixel_pair = (len - 1) >> 1;                                  \
+  uint32_t tl_uv = LOAD_UV(top_u[0], top_v[0]);   /* top-left sample */        \
+  uint32_t l_uv  = LOAD_UV(cur_u[0], cur_v[0]);   /* left-sample */            \
+  if (top_y) {                                                                 \
+    const uint32_t uv0 = (3 * tl_uv + l_uv + 0x00020002u) >> 2;                \
+    FUNC(top_y[0], uv0 & 0xff, (uv0 >> 16), top_dst);                          \
+  }                                                                            \
+  if (bottom_y) {                                                              \
+    const uint32_t uv0 = (3 * l_uv + tl_uv + 0x00020002u) >> 2;                \
+    FUNC(bottom_y[0], uv0 & 0xff, (uv0 >> 16), bottom_dst);                    \
+  }                                                                            \
+  for (x = 1; x <= last_pixel_pair; ++x) {                                     \
+    const uint32_t t_uv = LOAD_UV(top_u[x], top_v[x]);  /* top sample */       \
+    const uint32_t uv   = LOAD_UV(cur_u[x], cur_v[x]);  /* sample */           \
+    /* precompute invariant values associated with first and second diagonals*/\
+    const uint32_t avg = tl_uv + t_uv + l_uv + uv + 0x00080008u;               \
+    const uint32_t diag_12 = (avg + 2 * (t_uv + l_uv)) >> 3;                   \
+    const uint32_t diag_03 = (avg + 2 * (tl_uv + uv)) >> 3;                    \
+    if (top_y) {                                                               \
+      const uint32_t uv0 = (diag_12 + tl_uv) >> 1;                             \
+      const uint32_t uv1 = (diag_03 + t_uv) >> 1;                              \
+      FUNC(top_y[2 * x - 1], uv0 & 0xff, (uv0 >> 16),                          \
+           top_dst + (2 * x - 1) * XSTEP);                                     \
+      FUNC(top_y[2 * x - 0], uv1 & 0xff, (uv1 >> 16),                          \
+           top_dst + (2 * x - 0) * XSTEP);                                     \
+    }                                                                          \
+    if (bottom_y) {                                                            \
+      const uint32_t uv0 = (diag_03 + l_uv) >> 1;                              \
+      const uint32_t uv1 = (diag_12 + uv) >> 1;                                \
+      FUNC(bottom_y[2 * x - 1], uv0 & 0xff, (uv0 >> 16),                       \
+           bottom_dst + (2 * x - 1) * XSTEP);                                  \
+      FUNC(bottom_y[2 * x + 0], uv1 & 0xff, (uv1 >> 16),                       \
+           bottom_dst + (2 * x + 0) * XSTEP);                                  \
+    }                                                                          \
+    tl_uv = t_uv;                                                              \
+    l_uv = uv;                                                                 \
+  }                                                                            \
+  if (!(len & 1)) {                                                            \
+    if (top_y) {                                                               \
+      const uint32_t uv0 = (3 * tl_uv + l_uv + 0x00020002u) >> 2;              \
+      FUNC(top_y[len - 1], uv0 & 0xff, (uv0 >> 16),                            \
+           top_dst + (len - 1) * XSTEP);                                       \
+    }                                                                          \
+    if (bottom_y) {                                                            \
+      const uint32_t uv0 = (3 * l_uv + tl_uv + 0x00020002u) >> 2;              \
+      FUNC(bottom_y[len - 1], uv0 & 0xff, (uv0 >> 16),                         \
+           bottom_dst + (len - 1) * XSTEP);                                    \
+    }                                                                          \
+  }                                                                            \
+}
+
+// All variants implemented.
+UPSAMPLE_FUNC(UpsampleRgbLinePair,  VP8YuvToRgb,  3)
+UPSAMPLE_FUNC(UpsampleBgrLinePair,  VP8YuvToBgr,  3)
+UPSAMPLE_FUNC(UpsampleRgbaLinePair, VP8YuvToRgba, 4)
+UPSAMPLE_FUNC(UpsampleBgraLinePair, VP8YuvToBgra, 4)
+UPSAMPLE_FUNC(UpsampleArgbLinePair, VP8YuvToArgb, 4)
+UPSAMPLE_FUNC(UpsampleRgba4444LinePair, VP8YuvToRgba4444, 2)
+UPSAMPLE_FUNC(UpsampleRgb565LinePair,  VP8YuvToRgb565,  2)
+
+#undef LOAD_UV
+#undef UPSAMPLE_FUNC
+
+#endif  // FANCY_UPSAMPLING
+
+//------------------------------------------------------------------------------
+// simple point-sampling
+
+#define SAMPLE_FUNC(FUNC_NAME, FUNC, XSTEP)                                    \
+static void FUNC_NAME(const uint8_t* top_y, const uint8_t* bottom_y,           \
+                      const uint8_t* u, const uint8_t* v,                      \
+                      uint8_t* top_dst, uint8_t* bottom_dst, int len) {        \
+  int i;                                                                       \
+  for (i = 0; i < len - 1; i += 2) {                                           \
+    FUNC(top_y[0], u[0], v[0], top_dst);                                       \
+    FUNC(top_y[1], u[0], v[0], top_dst + XSTEP);                               \
+    FUNC(bottom_y[0], u[0], v[0], bottom_dst);                                 \
+    FUNC(bottom_y[1], u[0], v[0], bottom_dst + XSTEP);                         \
+    top_y += 2;                                                                \
+    bottom_y += 2;                                                             \
+    u++;                                                                       \
+    v++;                                                                       \
+    top_dst += 2 * XSTEP;                                                      \
+    bottom_dst += 2 * XSTEP;                                                   \
+  }                                                                            \
+  if (i == len - 1) {    /* last one */                                        \
+    FUNC(top_y[0], u[0], v[0], top_dst);                                       \
+    FUNC(bottom_y[0], u[0], v[0], bottom_dst);                                 \
+  }                                                                            \
+}
+
+// All variants implemented.
+SAMPLE_FUNC(SampleRgbLinePair,      VP8YuvToRgb,  3)
+SAMPLE_FUNC(SampleBgrLinePair,      VP8YuvToBgr,  3)
+SAMPLE_FUNC(SampleRgbaLinePair,     VP8YuvToRgba, 4)
+SAMPLE_FUNC(SampleBgraLinePair,     VP8YuvToBgra, 4)
+SAMPLE_FUNC(SampleArgbLinePair,     VP8YuvToArgb, 4)
+SAMPLE_FUNC(SampleRgba4444LinePair, VP8YuvToRgba4444, 2)
+SAMPLE_FUNC(SampleRgb565LinePair,   VP8YuvToRgb565, 2)
+
+#undef SAMPLE_FUNC
+
+const WebPSampleLinePairFunc WebPSamplers[MODE_LAST] = {
+  SampleRgbLinePair,       // MODE_RGB
+  SampleRgbaLinePair,      // MODE_RGBA
+  SampleBgrLinePair,       // MODE_BGR
+  SampleBgraLinePair,      // MODE_BGRA
+  SampleArgbLinePair,      // MODE_ARGB
+  SampleRgba4444LinePair,  // MODE_RGBA_4444
+  SampleRgb565LinePair,    // MODE_RGB_565
+  SampleRgbaLinePair,      // MODE_rgbA
+  SampleBgraLinePair,      // MODE_bgrA
+  SampleArgbLinePair,      // MODE_Argb
+  SampleRgba4444LinePair   // MODE_rgbA_4444
+};
+
+//------------------------------------------------------------------------------
+
+#if !defined(FANCY_UPSAMPLING)
+#define DUAL_SAMPLE_FUNC(FUNC_NAME, FUNC)                                      \
+static void FUNC_NAME(const uint8_t* top_y, const uint8_t* bot_y,              \
+                      const uint8_t* top_u, const uint8_t* top_v,              \
+                      const uint8_t* bot_u, const uint8_t* bot_v,              \
+                      uint8_t* top_dst, uint8_t* bot_dst, int len) {           \
+  const int half_len = len >> 1;                                               \
+  int x;                                                                       \
+  if (top_dst != NULL) {                                                       \
+    for (x = 0; x < half_len; ++x) {                                           \
+      FUNC(top_y[2 * x + 0], top_u[x], top_v[x], top_dst + 8 * x + 0);         \
+      FUNC(top_y[2 * x + 1], top_u[x], top_v[x], top_dst + 8 * x + 4);         \
+    }                                                                          \
+    if (len & 1) FUNC(top_y[2 * x + 0], top_u[x], top_v[x], top_dst + 8 * x);  \
+  }                                                                            \
+  if (bot_dst != NULL) {                                                       \
+    for (x = 0; x < half_len; ++x) {                                           \
+      FUNC(bot_y[2 * x + 0], bot_u[x], bot_v[x], bot_dst + 8 * x + 0);         \
+      FUNC(bot_y[2 * x + 1], bot_u[x], bot_v[x], bot_dst + 8 * x + 4);         \
+    }                                                                          \
+    if (len & 1) FUNC(bot_y[2 * x + 0], bot_u[x], bot_v[x], bot_dst + 8 * x);  \
+  }                                                                            \
+}
+
+DUAL_SAMPLE_FUNC(DualLineSamplerBGRA, VP8YuvToBgra)
+DUAL_SAMPLE_FUNC(DualLineSamplerARGB, VP8YuvToArgb)
+#undef DUAL_SAMPLE_FUNC
+
+#endif  // !FANCY_UPSAMPLING
+
+WebPUpsampleLinePairFunc WebPGetLinePairConverter(int alpha_is_last) {
+  WebPInitUpsamplers();
+  VP8YUVInit();
+#ifdef FANCY_UPSAMPLING
+  return WebPUpsamplers[alpha_is_last ? MODE_BGRA : MODE_ARGB];
+#else
+  return (alpha_is_last ? DualLineSamplerBGRA : DualLineSamplerARGB);
+#endif
+}
+
+//------------------------------------------------------------------------------
+// YUV444 converter
+
+#define YUV444_FUNC(FUNC_NAME, FUNC, XSTEP)                                    \
+static void FUNC_NAME(const uint8_t* y, const uint8_t* u, const uint8_t* v,    \
+                      uint8_t* dst, int len) {                                 \
+  int i;                                                                       \
+  for (i = 0; i < len; ++i) FUNC(y[i], u[i], v[i], &dst[i * XSTEP]);           \
+}
+
+YUV444_FUNC(Yuv444ToRgb,      VP8YuvToRgb,  3)
+YUV444_FUNC(Yuv444ToBgr,      VP8YuvToBgr,  3)
+YUV444_FUNC(Yuv444ToRgba,     VP8YuvToRgba, 4)
+YUV444_FUNC(Yuv444ToBgra,     VP8YuvToBgra, 4)
+YUV444_FUNC(Yuv444ToArgb,     VP8YuvToArgb, 4)
+YUV444_FUNC(Yuv444ToRgba4444, VP8YuvToRgba4444, 2)
+YUV444_FUNC(Yuv444ToRgb565,   VP8YuvToRgb565, 2)
+
+#undef YUV444_FUNC
+
+const WebPYUV444Converter WebPYUV444Converters[MODE_LAST] = {
+  Yuv444ToRgb,       // MODE_RGB
+  Yuv444ToRgba,      // MODE_RGBA
+  Yuv444ToBgr,       // MODE_BGR
+  Yuv444ToBgra,      // MODE_BGRA
+  Yuv444ToArgb,      // MODE_ARGB
+  Yuv444ToRgba4444,  // MODE_RGBA_4444
+  Yuv444ToRgb565,    // MODE_RGB_565
+  Yuv444ToRgba,      // MODE_rgbA
+  Yuv444ToBgra,      // MODE_bgrA
+  Yuv444ToArgb,      // MODE_Argb
+  Yuv444ToRgba4444   // MODE_rgbA_4444
+};
+
+//------------------------------------------------------------------------------
+// Premultiplied modes
+
+// non dithered-modes
+
+// (x * a * 32897) >> 23 is bit-wise equivalent to (int)(x * a / 255.)
+// for all 8bit x or a. For bit-wise equivalence to (int)(x * a / 255. + .5),
+// one can use instead: (x * a * 65793 + (1 << 23)) >> 24
+#if 1     // (int)(x * a / 255.)
+#define MULTIPLIER(a)   ((a) * 32897UL)
+#define PREMULTIPLY(x, m) (((x) * (m)) >> 23)
+#else     // (int)(x * a / 255. + .5)
+#define MULTIPLIER(a) ((a) * 65793UL)
+#define PREMULTIPLY(x, m) (((x) * (m) + (1UL << 23)) >> 24)
+#endif
+
+static void ApplyAlphaMultiply(uint8_t* rgba, int alpha_first,
+                               int w, int h, int stride) {
+  while (h-- > 0) {
+    uint8_t* const rgb = rgba + (alpha_first ? 1 : 0);
+    const uint8_t* const alpha = rgba + (alpha_first ? 0 : 3);
+    int i;
+    for (i = 0; i < w; ++i) {
+      const uint32_t a = alpha[4 * i];
+      if (a != 0xff) {
+        const uint32_t mult = MULTIPLIER(a);
+        rgb[4 * i + 0] = PREMULTIPLY(rgb[4 * i + 0], mult);
+        rgb[4 * i + 1] = PREMULTIPLY(rgb[4 * i + 1], mult);
+        rgb[4 * i + 2] = PREMULTIPLY(rgb[4 * i + 2], mult);
+      }
+    }
+    rgba += stride;
+  }
+}
+#undef MULTIPLIER
+#undef PREMULTIPLY
+
+// rgbA4444
+
+#define MULTIPLIER(a)  ((a) * 0x1111)    // 0x1111 ~= (1 << 16) / 15
+
+static WEBP_INLINE uint8_t dither_hi(uint8_t x) {
+  return (x & 0xf0) | (x >> 4);
+}
+
+static WEBP_INLINE uint8_t dither_lo(uint8_t x) {
+  return (x & 0x0f) | (x << 4);
+}
+
+static WEBP_INLINE uint8_t multiply(uint8_t x, uint32_t m) {
+  return (x * m) >> 16;
+}
+
+static void ApplyAlphaMultiply4444(uint8_t* rgba4444,
+                                   int w, int h, int stride) {
+  while (h-- > 0) {
+    int i;
+    for (i = 0; i < w; ++i) {
+      const uint8_t a = (rgba4444[2 * i + 1] & 0x0f);
+      const uint32_t mult = MULTIPLIER(a);
+      const uint8_t r = multiply(dither_hi(rgba4444[2 * i + 0]), mult);
+      const uint8_t g = multiply(dither_lo(rgba4444[2 * i + 0]), mult);
+      const uint8_t b = multiply(dither_hi(rgba4444[2 * i + 1]), mult);
+      rgba4444[2 * i + 0] = (r & 0xf0) | ((g >> 4) & 0x0f);
+      rgba4444[2 * i + 1] = (b & 0xf0) | a;
+    }
+    rgba4444 += stride;
+  }
+}
+#undef MULTIPLIER
+
+void (*WebPApplyAlphaMultiply)(uint8_t*, int, int, int, int)
+    = ApplyAlphaMultiply;
+void (*WebPApplyAlphaMultiply4444)(uint8_t*, int, int, int)
+    = ApplyAlphaMultiply4444;
+
+//------------------------------------------------------------------------------
+// Main call
+
+void WebPInitUpsamplers(void) {
+#ifdef FANCY_UPSAMPLING
+  WebPUpsamplers[MODE_RGB]       = UpsampleRgbLinePair;
+  WebPUpsamplers[MODE_RGBA]      = UpsampleRgbaLinePair;
+  WebPUpsamplers[MODE_BGR]       = UpsampleBgrLinePair;
+  WebPUpsamplers[MODE_BGRA]      = UpsampleBgraLinePair;
+  WebPUpsamplers[MODE_ARGB]      = UpsampleArgbLinePair;
+  WebPUpsamplers[MODE_RGBA_4444] = UpsampleRgba4444LinePair;
+  WebPUpsamplers[MODE_RGB_565]   = UpsampleRgb565LinePair;
+
+  // If defined, use CPUInfo() to overwrite some pointers with faster versions.
+  if (VP8GetCPUInfo != NULL) {
+#if defined(WEBP_USE_SSE2)
+    if (VP8GetCPUInfo(kSSE2)) {
+      WebPInitUpsamplersSSE2();
+    }
+#endif
+  }
+#endif  // FANCY_UPSAMPLING
+}
+
+void WebPInitPremultiply(void) {
+  WebPApplyAlphaMultiply = ApplyAlphaMultiply;
+  WebPApplyAlphaMultiply4444 = ApplyAlphaMultiply4444;
+
+#ifdef FANCY_UPSAMPLING
+  WebPUpsamplers[MODE_rgbA]      = UpsampleRgbaLinePair;
+  WebPUpsamplers[MODE_bgrA]      = UpsampleBgraLinePair;
+  WebPUpsamplers[MODE_Argb]      = UpsampleArgbLinePair;
+  WebPUpsamplers[MODE_rgbA_4444] = UpsampleRgba4444LinePair;
+
+  if (VP8GetCPUInfo != NULL) {
+#if defined(WEBP_USE_SSE2)
+    if (VP8GetCPUInfo(kSSE2)) {
+      WebPInitPremultiplySSE2();
+    }
+#endif
+  }
+#endif  // FANCY_UPSAMPLING
+}
+
+#if defined(__cplusplus) || defined(c_plusplus)
+}    // extern "C"
+#endif
diff --git a/drivers/webpold/dsp/upsampling_sse2.c b/drivers/webpold/dsp/upsampling_sse2.c
new file mode 100644
index 0000000000..8cb275a02b
--- /dev/null
+++ b/drivers/webpold/dsp/upsampling_sse2.c
@@ -0,0 +1,209 @@
+// Copyright 2011 Google Inc. All Rights Reserved.
+//
+// This code is licensed under the same terms as WebM:
+//  Software License Agreement:  http://www.webmproject.org/license/software/
+//  Additional IP Rights Grant:  http://www.webmproject.org/license/additional/
+// -----------------------------------------------------------------------------
+//
+// SSE2 version of YUV to RGB upsampling functions.
+//
+// Author: somnath@google.com (Somnath Banerjee)
+
+#include "./dsp.h"
+
+#if defined(WEBP_USE_SSE2)
+
+#include <assert.h>
+#include <emmintrin.h>
+#include <string.h>
+#include "./yuv.h"
+
+#if defined(__cplusplus) || defined(c_plusplus)
+extern "C" {
+#endif
+
+#ifdef FANCY_UPSAMPLING
+
+// We compute (9*a + 3*b + 3*c + d + 8) / 16 as follows
+// u = (9*a + 3*b + 3*c + d + 8) / 16
+//   = (a + (a + 3*b + 3*c + d) / 8 + 1) / 2
+//   = (a + m + 1) / 2
+// where m = (a + 3*b + 3*c + d) / 8
+//         = ((a + b + c + d) / 2 + b + c) / 4
+//
+// Let's say  k = (a + b + c + d) / 4.
+// We can compute k as
+// k = (s + t + 1) / 2 - ((a^d) | (b^c) | (s^t)) & 1
+// where s = (a + d + 1) / 2 and t = (b + c + 1) / 2
+//
+// Then m can be written as
+// m = (k + t + 1) / 2 - (((b^c) & (s^t)) | (k^t)) & 1
+
+// Computes out = (k + in + 1) / 2 - ((ij & (s^t)) | (k^in)) & 1
+#define GET_M(ij, in, out) do {                                                \
+  const __m128i tmp0 = _mm_avg_epu8(k, (in));     /* (k + in + 1) / 2 */       \
+  const __m128i tmp1 = _mm_and_si128((ij), st);   /* (ij) & (s^t) */           \
+  const __m128i tmp2 = _mm_xor_si128(k, (in));    /* (k^in) */                 \
+  const __m128i tmp3 = _mm_or_si128(tmp1, tmp2);  /* ((ij) & (s^t)) | (k^in) */\
+  const __m128i tmp4 = _mm_and_si128(tmp3, one);  /* & 1 -> lsb_correction */  \
+  (out) = _mm_sub_epi8(tmp0, tmp4);    /* (k + in + 1) / 2 - lsb_correction */ \
+} while (0)
+
+// pack and store two alterning pixel rows
+#define PACK_AND_STORE(a, b, da, db, out) do {                                 \
+  const __m128i ta = _mm_avg_epu8(a, da);  /* (9a + 3b + 3c +  d + 8) / 16 */  \
+  const __m128i tb = _mm_avg_epu8(b, db);  /* (3a + 9b +  c + 3d + 8) / 16 */  \
+  const __m128i t1 = _mm_unpacklo_epi8(ta, tb);                                \
+  const __m128i t2 = _mm_unpackhi_epi8(ta, tb);                                \
+  _mm_store_si128(((__m128i*)(out)) + 0, t1);                                  \
+  _mm_store_si128(((__m128i*)(out)) + 1, t2);                                  \
+} while (0)
+
+// Loads 17 pixels each from rows r1 and r2 and generates 32 pixels.
+#define UPSAMPLE_32PIXELS(r1, r2, out) {                                       \
+  const __m128i one = _mm_set1_epi8(1);                                        \
+  const __m128i a = _mm_loadu_si128((__m128i*)&(r1)[0]);                       \
+  const __m128i b = _mm_loadu_si128((__m128i*)&(r1)[1]);                       \
+  const __m128i c = _mm_loadu_si128((__m128i*)&(r2)[0]);                       \
+  const __m128i d = _mm_loadu_si128((__m128i*)&(r2)[1]);                       \
+                                                                               \
+  const __m128i s = _mm_avg_epu8(a, d);        /* s = (a + d + 1) / 2 */       \
+  const __m128i t = _mm_avg_epu8(b, c);        /* t = (b + c + 1) / 2 */       \
+  const __m128i st = _mm_xor_si128(s, t);      /* st = s^t */                  \
+                                                                               \
+  const __m128i ad = _mm_xor_si128(a, d);      /* ad = a^d */                  \
+  const __m128i bc = _mm_xor_si128(b, c);      /* bc = b^c */                  \
+                                                                               \
+  const __m128i t1 = _mm_or_si128(ad, bc);     /* (a^d) | (b^c) */             \
+  const __m128i t2 = _mm_or_si128(t1, st);     /* (a^d) | (b^c) | (s^t) */     \
+  const __m128i t3 = _mm_and_si128(t2, one);   /* (a^d) | (b^c) | (s^t) & 1 */ \
+  const __m128i t4 = _mm_avg_epu8(s, t);                                       \
+  const __m128i k = _mm_sub_epi8(t4, t3);      /* k = (a + b + c + d) / 4 */   \
+  __m128i diag1, diag2;                                                        \
+                                                                               \
+  GET_M(bc, t, diag1);                  /* diag1 = (a + 3b + 3c + d) / 8 */    \
+  GET_M(ad, s, diag2);                  /* diag2 = (3a + b + c + 3d) / 8 */    \
+                                                                               \
+  /* pack the alternate pixels */                                              \
+  PACK_AND_STORE(a, b, diag1, diag2, &(out)[0 * 32]);                          \
+  PACK_AND_STORE(c, d, diag2, diag1, &(out)[2 * 32]);                          \
+}
+
+// Turn the macro into a function for reducing code-size when non-critical
+static void Upsample32Pixels(const uint8_t r1[], const uint8_t r2[],
+                             uint8_t* const out) {
+  UPSAMPLE_32PIXELS(r1, r2, out);
+}
+
+#define UPSAMPLE_LAST_BLOCK(tb, bb, num_pixels, out) {                         \
+  uint8_t r1[17], r2[17];                                                      \
+  memcpy(r1, (tb), (num_pixels));                                              \
+  memcpy(r2, (bb), (num_pixels));                                              \
+  /* replicate last byte */                                                    \
+  memset(r1 + (num_pixels), r1[(num_pixels) - 1], 17 - (num_pixels));          \
+  memset(r2 + (num_pixels), r2[(num_pixels) - 1], 17 - (num_pixels));          \
+  /* using the shared function instead of the macro saves ~3k code size */     \
+  Upsample32Pixels(r1, r2, out);                                               \
+}
+
+#define CONVERT2RGB(FUNC, XSTEP, top_y, bottom_y, uv,                          \
+                    top_dst, bottom_dst, cur_x, num_pixels) {                  \
+  int n;                                                                       \
+  if (top_y) {                                                                 \
+    for (n = 0; n < (num_pixels); ++n) {                                       \
+      FUNC(top_y[(cur_x) + n], (uv)[n], (uv)[32 + n],                          \
+           top_dst + ((cur_x) + n) * XSTEP);                                   \
+    }                                                                          \
+  }                                                                            \
+  if (bottom_y) {                                                              \
+    for (n = 0; n < (num_pixels); ++n) {                                       \
+      FUNC(bottom_y[(cur_x) + n], (uv)[64 + n], (uv)[64 + 32 + n],             \
+           bottom_dst + ((cur_x) + n) * XSTEP);                                \
+    }                                                                          \
+  }                                                                            \
+}
+
+#define SSE2_UPSAMPLE_FUNC(FUNC_NAME, FUNC, XSTEP)                             \
+static void FUNC_NAME(const uint8_t* top_y, const uint8_t* bottom_y,           \
+                      const uint8_t* top_u, const uint8_t* top_v,              \
+                      const uint8_t* cur_u, const uint8_t* cur_v,              \
+                      uint8_t* top_dst, uint8_t* bottom_dst, int len) {        \
+  int b;                                                                       \
+  /* 16 byte aligned array to cache reconstructed u and v */                   \
+  uint8_t uv_buf[4 * 32 + 15];                                                 \
+  uint8_t* const r_uv = (uint8_t*)((uintptr_t)(uv_buf + 15) & ~15);            \
+  const int uv_len = (len + 1) >> 1;                                           \
+  /* 17 pixels must be read-able for each block */                             \
+  const int num_blocks = (uv_len - 1) >> 4;                                    \
+  const int leftover = uv_len - num_blocks * 16;                               \
+  const int last_pos = 1 + 32 * num_blocks;                                    \
+                                                                               \
+  const int u_diag = ((top_u[0] + cur_u[0]) >> 1) + 1;                         \
+  const int v_diag = ((top_v[0] + cur_v[0]) >> 1) + 1;                         \
+                                                                               \
+  assert(len > 0);                                                             \
+  /* Treat the first pixel in regular way */                                   \
+  if (top_y) {                                                                 \
+    const int u0 = (top_u[0] + u_diag) >> 1;                                   \
+    const int v0 = (top_v[0] + v_diag) >> 1;                                   \
+    FUNC(top_y[0], u0, v0, top_dst);                                           \
+  }                                                                            \
+  if (bottom_y) {                                                              \
+    const int u0 = (cur_u[0] + u_diag) >> 1;                                   \
+    const int v0 = (cur_v[0] + v_diag) >> 1;                                   \
+    FUNC(bottom_y[0], u0, v0, bottom_dst);                                     \
+  }                                                                            \
+                                                                               \
+  for (b = 0; b < num_blocks; ++b) {                                           \
+    UPSAMPLE_32PIXELS(top_u, cur_u, r_uv + 0 * 32);                            \
+    UPSAMPLE_32PIXELS(top_v, cur_v, r_uv + 1 * 32);                            \
+    CONVERT2RGB(FUNC, XSTEP, top_y, bottom_y, r_uv, top_dst, bottom_dst,       \
+                32 * b + 1, 32)                                                \
+    top_u += 16;                                                               \
+    cur_u += 16;                                                               \
+    top_v += 16;                                                               \
+    cur_v += 16;                                                               \
+  }                                                                            \
+                                                                               \
+  UPSAMPLE_LAST_BLOCK(top_u, cur_u, leftover, r_uv + 0 * 32);                  \
+  UPSAMPLE_LAST_BLOCK(top_v, cur_v, leftover, r_uv + 1 * 32);                  \
+  CONVERT2RGB(FUNC, XSTEP, top_y, bottom_y, r_uv, top_dst, bottom_dst,         \
+              last_pos, len - last_pos);                                       \
+}
+
+// SSE2 variants of the fancy upsampler.
+SSE2_UPSAMPLE_FUNC(UpsampleRgbLinePairSSE2,  VP8YuvToRgb,  3)
+SSE2_UPSAMPLE_FUNC(UpsampleBgrLinePairSSE2,  VP8YuvToBgr,  3)
+SSE2_UPSAMPLE_FUNC(UpsampleRgbaLinePairSSE2, VP8YuvToRgba, 4)
+SSE2_UPSAMPLE_FUNC(UpsampleBgraLinePairSSE2, VP8YuvToBgra, 4)
+
+#undef GET_M
+#undef PACK_AND_STORE
+#undef UPSAMPLE_32PIXELS
+#undef UPSAMPLE_LAST_BLOCK
+#undef CONVERT2RGB
+#undef SSE2_UPSAMPLE_FUNC
+
+//------------------------------------------------------------------------------
+
+extern WebPUpsampleLinePairFunc WebPUpsamplers[/* MODE_LAST */];
+
+void WebPInitUpsamplersSSE2(void) {
+  WebPUpsamplers[MODE_RGB]  = UpsampleRgbLinePairSSE2;
+  WebPUpsamplers[MODE_RGBA] = UpsampleRgbaLinePairSSE2;
+  WebPUpsamplers[MODE_BGR]  = UpsampleBgrLinePairSSE2;
+  WebPUpsamplers[MODE_BGRA] = UpsampleBgraLinePairSSE2;
+}
+
+void WebPInitPremultiplySSE2(void) {
+  WebPUpsamplers[MODE_rgbA] = UpsampleRgbaLinePairSSE2;
+  WebPUpsamplers[MODE_bgrA] = UpsampleBgraLinePairSSE2;
+}
+
+#endif  // FANCY_UPSAMPLING
+
+#if defined(__cplusplus) || defined(c_plusplus)
+}    // extern "C"
+#endif
+
+#endif   // WEBP_USE_SSE2
diff --git a/drivers/webpold/dsp/yuv.c b/drivers/webpold/dsp/yuv.c
new file mode 100644
index 0000000000..7f05f9a3aa
--- /dev/null
+++ b/drivers/webpold/dsp/yuv.c
@@ -0,0 +1,52 @@
+// Copyright 2010 Google Inc. All Rights Reserved.
+//
+// This code is licensed under the same terms as WebM:
+//  Software License Agreement:  http://www.webmproject.org/license/software/
+//  Additional IP Rights Grant:  http://www.webmproject.org/license/additional/
+// -----------------------------------------------------------------------------
+//
+// YUV->RGB conversion function
+//
+// Author: Skal (pascal.massimino@gmail.com)
+
+#include "./yuv.h"
+
+#if defined(__cplusplus) || defined(c_plusplus)
+extern "C" {
+#endif
+
+enum { YUV_HALF = 1 << (YUV_FIX - 1) };
+
+int16_t VP8kVToR[256], VP8kUToB[256];
+int32_t VP8kVToG[256], VP8kUToG[256];
+uint8_t VP8kClip[YUV_RANGE_MAX - YUV_RANGE_MIN];
+uint8_t VP8kClip4Bits[YUV_RANGE_MAX - YUV_RANGE_MIN];
+
+static int done = 0;
+
+static WEBP_INLINE uint8_t clip(int v, int max_value) {
+  return v < 0 ? 0 : v > max_value ? max_value : v;
+}
+
+void VP8YUVInit(void) {
+  int i;
+  if (done) {
+    return;
+  }
+  for (i = 0; i < 256; ++i) {
+    VP8kVToR[i] = (89858 * (i - 128) + YUV_HALF) >> YUV_FIX;
+    VP8kUToG[i] = -22014 * (i - 128) + YUV_HALF;
+    VP8kVToG[i] = -45773 * (i - 128);
+    VP8kUToB[i] = (113618 * (i - 128) + YUV_HALF) >> YUV_FIX;
+  }
+  for (i = YUV_RANGE_MIN; i < YUV_RANGE_MAX; ++i) {
+    const int k = ((i - 16) * 76283 + YUV_HALF) >> YUV_FIX;
+    VP8kClip[i - YUV_RANGE_MIN] = clip(k, 255);
+    VP8kClip4Bits[i - YUV_RANGE_MIN] = clip((k + 8) >> 4, 15);
+  }
+  done = 1;
+}
+
+#if defined(__cplusplus) || defined(c_plusplus)
+}    // extern "C"
+#endif
diff --git a/drivers/webpold/dsp/yuv.h b/drivers/webpold/dsp/yuv.h
new file mode 100644
index 0000000000..a569109c54
--- /dev/null
+++ b/drivers/webpold/dsp/yuv.h
@@ -0,0 +1,128 @@
+// Copyright 2010 Google Inc. All Rights Reserved.
+//
+// This code is licensed under the same terms as WebM:
+//  Software License Agreement:  http://www.webmproject.org/license/software/
+//  Additional IP Rights Grant:  http://www.webmproject.org/license/additional/
+// -----------------------------------------------------------------------------
+//
+// inline YUV<->RGB conversion function
+//
+// Author: Skal (pascal.massimino@gmail.com)
+
+#ifndef WEBP_DSP_YUV_H_
+#define WEBP_DSP_YUV_H_
+
+#include "../dec/decode_vp8.h"
+
+//------------------------------------------------------------------------------
+// YUV -> RGB conversion
+
+#if defined(__cplusplus) || defined(c_plusplus)
+extern "C" {
+#endif
+
+enum { YUV_FIX = 16,                // fixed-point precision
+       YUV_RANGE_MIN = -227,        // min value of r/g/b output
+       YUV_RANGE_MAX = 256 + 226    // max value of r/g/b output
+};
+extern int16_t VP8kVToR[256], VP8kUToB[256];
+extern int32_t VP8kVToG[256], VP8kUToG[256];
+extern uint8_t VP8kClip[YUV_RANGE_MAX - YUV_RANGE_MIN];
+extern uint8_t VP8kClip4Bits[YUV_RANGE_MAX - YUV_RANGE_MIN];
+
+static WEBP_INLINE void VP8YuvToRgb(uint8_t y, uint8_t u, uint8_t v,
+                                    uint8_t* const rgb) {
+  const int r_off = VP8kVToR[v];
+  const int g_off = (VP8kVToG[v] + VP8kUToG[u]) >> YUV_FIX;
+  const int b_off = VP8kUToB[u];
+  rgb[0] = VP8kClip[y + r_off - YUV_RANGE_MIN];
+  rgb[1] = VP8kClip[y + g_off - YUV_RANGE_MIN];
+  rgb[2] = VP8kClip[y + b_off - YUV_RANGE_MIN];
+}
+
+static WEBP_INLINE void VP8YuvToRgb565(uint8_t y, uint8_t u, uint8_t v,
+                                       uint8_t* const rgb) {
+  const int r_off = VP8kVToR[v];
+  const int g_off = (VP8kVToG[v] + VP8kUToG[u]) >> YUV_FIX;
+  const int b_off = VP8kUToB[u];
+  rgb[0] = ((VP8kClip[y + r_off - YUV_RANGE_MIN] & 0xf8) |
+            (VP8kClip[y + g_off - YUV_RANGE_MIN] >> 5));
+  rgb[1] = (((VP8kClip[y + g_off - YUV_RANGE_MIN] << 3) & 0xe0) |
+            (VP8kClip[y + b_off - YUV_RANGE_MIN] >> 3));
+}
+
+static WEBP_INLINE void VP8YuvToArgb(uint8_t y, uint8_t u, uint8_t v,
+                                     uint8_t* const argb) {
+  argb[0] = 0xff;
+  VP8YuvToRgb(y, u, v, argb + 1);
+}
+
+static WEBP_INLINE void VP8YuvToRgba4444(uint8_t y, uint8_t u, uint8_t v,
+                                         uint8_t* const argb) {
+  const int r_off = VP8kVToR[v];
+  const int g_off = (VP8kVToG[v] + VP8kUToG[u]) >> YUV_FIX;
+  const int b_off = VP8kUToB[u];
+  // Don't update alpha (last 4 bits of argb[1])
+  argb[0] = ((VP8kClip4Bits[y + r_off - YUV_RANGE_MIN] << 4) |
+             VP8kClip4Bits[y + g_off - YUV_RANGE_MIN]);
+  argb[1] = 0x0f | (VP8kClip4Bits[y + b_off - YUV_RANGE_MIN] << 4);
+}
+
+static WEBP_INLINE void VP8YuvToBgr(uint8_t y, uint8_t u, uint8_t v,
+                                    uint8_t* const bgr) {
+  const int r_off = VP8kVToR[v];
+  const int g_off = (VP8kVToG[v] + VP8kUToG[u]) >> YUV_FIX;
+  const int b_off = VP8kUToB[u];
+  bgr[0] = VP8kClip[y + b_off - YUV_RANGE_MIN];
+  bgr[1] = VP8kClip[y + g_off - YUV_RANGE_MIN];
+  bgr[2] = VP8kClip[y + r_off - YUV_RANGE_MIN];
+}
+
+static WEBP_INLINE void VP8YuvToBgra(uint8_t y, uint8_t u, uint8_t v,
+                                     uint8_t* const bgra) {
+  VP8YuvToBgr(y, u, v, bgra);
+  bgra[3] = 0xff;
+}
+
+static WEBP_INLINE void VP8YuvToRgba(uint8_t y, uint8_t u, uint8_t v,
+                                     uint8_t* const rgba) {
+  VP8YuvToRgb(y, u, v, rgba);
+  rgba[3] = 0xff;
+}
+
+// Must be called before everything, to initialize the tables.
+void VP8YUVInit(void);
+
+//------------------------------------------------------------------------------
+// RGB -> YUV conversion
+// The exact naming is Y'CbCr, following the ITU-R BT.601 standard.
+// More information at: http://en.wikipedia.org/wiki/YCbCr
+// Y = 0.2569 * R + 0.5044 * G + 0.0979 * B + 16
+// U = -0.1483 * R - 0.2911 * G + 0.4394 * B + 128
+// V = 0.4394 * R - 0.3679 * G - 0.0715 * B + 128
+// We use 16bit fixed point operations.
+
+static WEBP_INLINE int VP8ClipUV(int v) {
+   v = (v + (257 << (YUV_FIX + 2 - 1))) >> (YUV_FIX + 2);
+   return ((v & ~0xff) == 0) ? v : (v < 0) ? 0 : 255;
+}
+
+static WEBP_INLINE int VP8RGBToY(int r, int g, int b) {
+  const int kRound = (1 << (YUV_FIX - 1)) + (16 << YUV_FIX);
+  const int luma = 16839 * r + 33059 * g + 6420 * b;
+  return (luma + kRound) >> YUV_FIX;  // no need to clip
+}
+
+static WEBP_INLINE int VP8RGBToU(int r, int g, int b) {
+  return VP8ClipUV(-9719 * r - 19081 * g + 28800 * b);
+}
+
+static WEBP_INLINE int VP8RGBToV(int r, int g, int b) {
+  return VP8ClipUV(+28800 * r - 24116 * g - 4684 * b);
+}
+
+#if defined(__cplusplus) || defined(c_plusplus)
+}    // extern "C"
+#endif
+
+#endif  /* WEBP_DSP_YUV_H_ */
diff --git a/drivers/webpold/enc/alpha.c b/drivers/webpold/enc/alpha.c
new file mode 100644
index 0000000000..e554eb7f30
--- /dev/null
+++ b/drivers/webpold/enc/alpha.c
@@ -0,0 +1,330 @@
+// Copyright 2011 Google Inc. All Rights Reserved.
+//
+// This code is licensed under the same terms as WebM:
+//  Software License Agreement:  http://www.webmproject.org/license/software/
+//  Additional IP Rights Grant:  http://www.webmproject.org/license/additional/
+// -----------------------------------------------------------------------------
+//
+// Alpha-plane compression.
+//
+// Author: Skal (pascal.massimino@gmail.com)
+
+#include <assert.h>
+#include <stdlib.h>
+
+#include "./vp8enci.h"
+#include "../utils/filters.h"
+#include "../utils/quant_levels.h"
+#include "../format_constants.h"
+
+#if defined(__cplusplus) || defined(c_plusplus)
+extern "C" {
+#endif
+
+// -----------------------------------------------------------------------------
+// Encodes the given alpha data via specified compression method 'method'.
+// The pre-processing (quantization) is performed if 'quality' is less than 100.
+// For such cases, the encoding is lossy. The valid range is [0, 100] for
+// 'quality' and [0, 1] for 'method':
+//   'method = 0' - No compression;
+//   'method = 1' - Use lossless coder on the alpha plane only
+// 'filter' values [0, 4] correspond to prediction modes none, horizontal,
+// vertical & gradient filters. The prediction mode 4 will try all the
+// prediction modes 0 to 3 and pick the best one.
+// 'effort_level': specifies how much effort must be spent to try and reduce
+//  the compressed output size. In range 0 (quick) to 6 (slow).
+//
+// 'output' corresponds to the buffer containing compressed alpha data.
+//          This buffer is allocated by this method and caller should call
+//          free(*output) when done.
+// 'output_size' corresponds to size of this compressed alpha buffer.
+//
+// Returns 1 on successfully encoding the alpha and
+//         0 if either:
+//           invalid quality or method, or
+//           memory allocation for the compressed data fails.
+
+#include "../enc/vp8li.h"
+
+static int EncodeLossless(const uint8_t* const data, int width, int height,
+                          int effort_level,  // in [0..6] range
+                          VP8BitWriter* const bw,
+                          WebPAuxStats* const stats) {
+  int ok = 0;
+  WebPConfig config;
+  WebPPicture picture;
+  VP8LBitWriter tmp_bw;
+
+  WebPPictureInit(&picture);
+  picture.width = width;
+  picture.height = height;
+  picture.use_argb = 1;
+  picture.stats = stats;
+  if (!WebPPictureAlloc(&picture)) return 0;
+
+  // Transfer the alpha values to the green channel.
+  {
+    int i, j;
+    uint32_t* dst = picture.argb;
+    const uint8_t* src = data;
+    for (j = 0; j < picture.height; ++j) {
+      for (i = 0; i < picture.width; ++i) {
+        dst[i] = (src[i] << 8) | 0xff000000u;
+      }
+      src += width;
+      dst += picture.argb_stride;
+    }
+  }
+
+  WebPConfigInit(&config);
+  config.lossless = 1;
+  config.method = effort_level;  // impact is very small
+  // Set moderate default quality setting for alpha. Higher qualities (80 and
+  // above) could be very slow.
+  config.quality = 10.f + 15.f * effort_level;
+  if (config.quality > 100.f) config.quality = 100.f;
+
+  ok = VP8LBitWriterInit(&tmp_bw, (width * height) >> 3);
+  ok = ok && (VP8LEncodeStream(&config, &picture, &tmp_bw) == VP8_ENC_OK);
+  WebPPictureFree(&picture);
+  if (ok) {
+    const uint8_t* const data = VP8LBitWriterFinish(&tmp_bw);
+    const size_t data_size = VP8LBitWriterNumBytes(&tmp_bw);
+    VP8BitWriterAppend(bw, data, data_size);
+  }
+  VP8LBitWriterDestroy(&tmp_bw);
+  return ok && !bw->error_;
+}
+
+// -----------------------------------------------------------------------------
+
+static int EncodeAlphaInternal(const uint8_t* const data, int width, int height,
+                               int method, int filter, int reduce_levels,
+                               int effort_level,  // in [0..6] range
+                               uint8_t* const tmp_alpha,
+                               VP8BitWriter* const bw,
+                               WebPAuxStats* const stats) {
+  int ok = 0;
+  const uint8_t* alpha_src;
+  WebPFilterFunc filter_func;
+  uint8_t header;
+  size_t expected_size;
+  const size_t data_size = width * height;
+
+  assert((uint64_t)data_size == (uint64_t)width * height);  // as per spec
+  assert(filter >= 0 && filter < WEBP_FILTER_LAST);
+  assert(method >= ALPHA_NO_COMPRESSION);
+  assert(method <= ALPHA_LOSSLESS_COMPRESSION);
+  assert(sizeof(header) == ALPHA_HEADER_LEN);
+  // TODO(skal): have a common function and #define's to validate alpha params.
+
+  expected_size =
+      (method == ALPHA_NO_COMPRESSION) ? (ALPHA_HEADER_LEN + data_size)
+                                       : (data_size >> 5);
+  header = method | (filter << 2);
+  if (reduce_levels) header |= ALPHA_PREPROCESSED_LEVELS << 4;
+
+  VP8BitWriterInit(bw, expected_size);
+  VP8BitWriterAppend(bw, &header, ALPHA_HEADER_LEN);
+
+  filter_func = WebPFilters[filter];
+  if (filter_func) {
+    filter_func(data, width, height, 1, width, tmp_alpha);
+    alpha_src = tmp_alpha;
+  }  else {
+    alpha_src = data;
+  }
+
+  if (method == ALPHA_NO_COMPRESSION) {
+    ok = VP8BitWriterAppend(bw, alpha_src, width * height);
+    ok = ok && !bw->error_;
+  } else {
+    ok = EncodeLossless(alpha_src, width, height, effort_level, bw, stats);
+    VP8BitWriterFinish(bw);
+  }
+  return ok;
+}
+
+// -----------------------------------------------------------------------------
+
+// TODO(skal): move to dsp/ ?
+static void CopyPlane(const uint8_t* src, int src_stride,
+                      uint8_t* dst, int dst_stride, int width, int height) {
+  while (height-- > 0) {
+    memcpy(dst, src, width);
+    src += src_stride;
+    dst += dst_stride;
+  }
+}
+
+static int EncodeAlpha(VP8Encoder* const enc,
+                       int quality, int method, int filter,
+                       int effort_level,
+                       uint8_t** const output, size_t* const output_size) {
+  const WebPPicture* const pic = enc->pic_;
+  const int width = pic->width;
+  const int height = pic->height;
+
+  uint8_t* quant_alpha = NULL;
+  const size_t data_size = width * height;
+  uint64_t sse = 0;
+  int ok = 1;
+  const int reduce_levels = (quality < 100);
+
+  // quick sanity checks
+  assert((uint64_t)data_size == (uint64_t)width * height);  // as per spec
+  assert(enc != NULL && pic != NULL && pic->a != NULL);
+  assert(output != NULL && output_size != NULL);
+  assert(width > 0 && height > 0);
+  assert(pic->a_stride >= width);
+  assert(filter >= WEBP_FILTER_NONE && filter <= WEBP_FILTER_FAST);
+
+  if (quality < 0 || quality > 100) {
+    return 0;
+  }
+
+  if (method < ALPHA_NO_COMPRESSION || method > ALPHA_LOSSLESS_COMPRESSION) {
+    return 0;
+  }
+
+  quant_alpha = (uint8_t*)malloc(data_size);
+  if (quant_alpha == NULL) {
+    return 0;
+  }
+
+  // Extract alpha data (width x height) from raw_data (stride x height).
+  CopyPlane(pic->a, pic->a_stride, quant_alpha, width, width, height);
+
+  if (reduce_levels) {  // No Quantization required for 'quality = 100'.
+    // 16 alpha levels gives quite a low MSE w.r.t original alpha plane hence
+    // mapped to moderate quality 70. Hence Quality:[0, 70] -> Levels:[2, 16]
+    // and Quality:]70, 100] -> Levels:]16, 256].
+    const int alpha_levels = (quality <= 70) ? (2 + quality / 5)
+                                             : (16 + (quality - 70) * 8);
+    ok = QuantizeLevels(quant_alpha, width, height, alpha_levels, &sse);
+  }
+
+  if (ok) {
+    VP8BitWriter bw;
+    int test_filter;
+    uint8_t* filtered_alpha = NULL;
+
+    // We always test WEBP_FILTER_NONE first.
+    ok = EncodeAlphaInternal(quant_alpha, width, height,
+                             method, WEBP_FILTER_NONE, reduce_levels,
+                             effort_level, NULL, &bw, pic->stats);
+    if (!ok) {
+      VP8BitWriterWipeOut(&bw);
+      goto End;
+    }
+
+    if (filter == WEBP_FILTER_FAST) {  // Quick estimate of a second candidate?
+      filter = EstimateBestFilter(quant_alpha, width, height, width);
+    }
+    // Stop?
+    if (filter == WEBP_FILTER_NONE) {
+      goto Ok;
+    }
+
+    filtered_alpha = (uint8_t*)malloc(data_size);
+    ok = (filtered_alpha != NULL);
+    if (!ok) {
+      goto End;
+    }
+
+    // Try the other mode(s).
+    {
+      WebPAuxStats best_stats;
+      size_t best_score = VP8BitWriterSize(&bw);
+
+      memset(&best_stats, 0, sizeof(best_stats));  // prevent spurious warning
+      if (pic->stats != NULL) best_stats = *pic->stats;
+      for (test_filter = WEBP_FILTER_HORIZONTAL;
+           ok && (test_filter <= WEBP_FILTER_GRADIENT);
+           ++test_filter) {
+        VP8BitWriter tmp_bw;
+        if (filter != WEBP_FILTER_BEST && test_filter != filter) {
+          continue;
+        }
+        ok = EncodeAlphaInternal(quant_alpha, width, height,
+                                 method, test_filter, reduce_levels,
+                                 effort_level, filtered_alpha, &tmp_bw,
+                                 pic->stats);
+        if (ok) {
+          const size_t score = VP8BitWriterSize(&tmp_bw);
+          if (score < best_score) {
+            // swap bitwriter objects.
+            VP8BitWriter tmp = tmp_bw;
+            tmp_bw = bw;
+            bw = tmp;
+            best_score = score;
+            if (pic->stats != NULL) best_stats = *pic->stats;
+          }
+        } else {
+          VP8BitWriterWipeOut(&bw);
+        }
+        VP8BitWriterWipeOut(&tmp_bw);
+      }
+      if (pic->stats != NULL) *pic->stats = best_stats;
+    }
+ Ok:
+    if (ok) {
+      *output_size = VP8BitWriterSize(&bw);
+      *output = VP8BitWriterBuf(&bw);
+      if (pic->stats != NULL) {         // need stats?
+        pic->stats->coded_size += (int)(*output_size);
+        enc->sse_[3] = sse;
+      }
+    }
+    free(filtered_alpha);
+  }
+ End:
+  free(quant_alpha);
+  return ok;
+}
+
+
+//------------------------------------------------------------------------------
+// Main calls
+
+void VP8EncInitAlpha(VP8Encoder* const enc) {
+  enc->has_alpha_ = WebPPictureHasTransparency(enc->pic_);
+  enc->alpha_data_ = NULL;
+  enc->alpha_data_size_ = 0;
+}
+
+int VP8EncFinishAlpha(VP8Encoder* const enc) {
+  if (enc->has_alpha_) {
+    const WebPConfig* config = enc->config_;
+    uint8_t* tmp_data = NULL;
+    size_t tmp_size = 0;
+    const int effort_level = config->method;  // maps to [0..6]
+    const WEBP_FILTER_TYPE filter =
+        (config->alpha_filtering == 0) ? WEBP_FILTER_NONE :
+        (config->alpha_filtering == 1) ? WEBP_FILTER_FAST :
+                                         WEBP_FILTER_BEST;
+
+    if (!EncodeAlpha(enc, config->alpha_quality, config->alpha_compression,
+                     filter, effort_level, &tmp_data, &tmp_size)) {
+      return 0;
+    }
+    if (tmp_size != (uint32_t)tmp_size) {  // Sanity check.
+      free(tmp_data);
+      return 0;
+    }
+    enc->alpha_data_size_ = (uint32_t)tmp_size;
+    enc->alpha_data_ = tmp_data;
+  }
+  return WebPReportProgress(enc->pic_, enc->percent_ + 20, &enc->percent_);
+}
+
+void VP8EncDeleteAlpha(VP8Encoder* const enc) {
+  free(enc->alpha_data_);
+  enc->alpha_data_ = NULL;
+  enc->alpha_data_size_ = 0;
+  enc->has_alpha_ = 0;
+}
+
+#if defined(__cplusplus) || defined(c_plusplus)
+}    // extern "C"
+#endif
diff --git a/drivers/webpold/enc/analysis.c b/drivers/webpold/enc/analysis.c
new file mode 100644
index 0000000000..22cfb492e7
--- /dev/null
+++ b/drivers/webpold/enc/analysis.c
@@ -0,0 +1,364 @@
+// Copyright 2011 Google Inc. All Rights Reserved.
+//
+// This code is licensed under the same terms as WebM:
+//  Software License Agreement:  http://www.webmproject.org/license/software/
+//  Additional IP Rights Grant:  http://www.webmproject.org/license/additional/
+// -----------------------------------------------------------------------------
+//
+// Macroblock analysis
+//
+// Author: Skal (pascal.massimino@gmail.com)
+
+#include <stdlib.h>
+#include <string.h>
+#include <assert.h>
+
+#include "./vp8enci.h"
+#include "./cost.h"
+#include "../utils/utils.h"
+
+#if defined(__cplusplus) || defined(c_plusplus)
+extern "C" {
+#endif
+
+#define MAX_ITERS_K_MEANS  6
+
+static int ClipAlpha(int alpha) {
+  return alpha < 0 ? 0 : alpha > 255 ? 255 : alpha;
+}
+
+//------------------------------------------------------------------------------
+// Smooth the segment map by replacing isolated block by the majority of its
+// neighbours.
+
+static void SmoothSegmentMap(VP8Encoder* const enc) {
+  int n, x, y;
+  const int w = enc->mb_w_;
+  const int h = enc->mb_h_;
+  const int majority_cnt_3_x_3_grid = 5;
+  uint8_t* const tmp = (uint8_t*)WebPSafeMalloc((uint64_t)w * h, sizeof(*tmp));
+  assert((uint64_t)(w * h) == (uint64_t)w * h);   // no overflow, as per spec
+
+  if (tmp == NULL) return;
+  for (y = 1; y < h - 1; ++y) {
+    for (x = 1; x < w - 1; ++x) {
+      int cnt[NUM_MB_SEGMENTS] = { 0 };
+      const VP8MBInfo* const mb = &enc->mb_info_[x + w * y];
+      int majority_seg = mb->segment_;
+      // Check the 8 neighbouring segment values.
+      cnt[mb[-w - 1].segment_]++;  // top-left
+      cnt[mb[-w + 0].segment_]++;  // top
+      cnt[mb[-w + 1].segment_]++;  // top-right
+      cnt[mb[   - 1].segment_]++;  // left
+      cnt[mb[   + 1].segment_]++;  // right
+      cnt[mb[ w - 1].segment_]++;  // bottom-left
+      cnt[mb[ w + 0].segment_]++;  // bottom
+      cnt[mb[ w + 1].segment_]++;  // bottom-right
+      for (n = 0; n < NUM_MB_SEGMENTS; ++n) {
+        if (cnt[n] >= majority_cnt_3_x_3_grid) {
+          majority_seg = n;
+        }
+      }
+      tmp[x + y * w] = majority_seg;
+    }
+  }
+  for (y = 1; y < h - 1; ++y) {
+    for (x = 1; x < w - 1; ++x) {
+      VP8MBInfo* const mb = &enc->mb_info_[x + w * y];
+      mb->segment_ = tmp[x + y * w];
+    }
+  }
+  free(tmp);
+}
+
+//------------------------------------------------------------------------------
+// Finalize Segment probability based on the coding tree
+
+static int GetProba(int a, int b) {
+  int proba;
+  const int total = a + b;
+  if (total == 0) return 255;  // that's the default probability.
+  proba = (255 * a + total / 2) / total;
+  return proba;
+}
+
+static void SetSegmentProbas(VP8Encoder* const enc) {
+  int p[NUM_MB_SEGMENTS] = { 0 };
+  int n;
+
+  for (n = 0; n < enc->mb_w_ * enc->mb_h_; ++n) {
+    const VP8MBInfo* const mb = &enc->mb_info_[n];
+    p[mb->segment_]++;
+  }
+  if (enc->pic_->stats) {
+    for (n = 0; n < NUM_MB_SEGMENTS; ++n) {
+      enc->pic_->stats->segment_size[n] = p[n];
+    }
+  }
+  if (enc->segment_hdr_.num_segments_ > 1) {
+    uint8_t* const probas = enc->proba_.segments_;
+    probas[0] = GetProba(p[0] + p[1], p[2] + p[3]);
+    probas[1] = GetProba(p[0], p[1]);
+    probas[2] = GetProba(p[2], p[3]);
+
+    enc->segment_hdr_.update_map_ =
+        (probas[0] != 255) || (probas[1] != 255) || (probas[2] != 255);
+    enc->segment_hdr_.size_ =
+      p[0] * (VP8BitCost(0, probas[0]) + VP8BitCost(0, probas[1])) +
+      p[1] * (VP8BitCost(0, probas[0]) + VP8BitCost(1, probas[1])) +
+      p[2] * (VP8BitCost(1, probas[0]) + VP8BitCost(0, probas[2])) +
+      p[3] * (VP8BitCost(1, probas[0]) + VP8BitCost(1, probas[2]));
+  } else {
+    enc->segment_hdr_.update_map_ = 0;
+    enc->segment_hdr_.size_ = 0;
+  }
+}
+
+static WEBP_INLINE int clip(int v, int m, int M) {
+  return v < m ? m : v > M ? M : v;
+}
+
+static void SetSegmentAlphas(VP8Encoder* const enc,
+                             const int centers[NUM_MB_SEGMENTS],
+                             int mid) {
+  const int nb = enc->segment_hdr_.num_segments_;
+  int min = centers[0], max = centers[0];
+  int n;
+
+  if (nb > 1) {
+    for (n = 0; n < nb; ++n) {
+      if (min > centers[n]) min = centers[n];
+      if (max < centers[n]) max = centers[n];
+    }
+  }
+  if (max == min) max = min + 1;
+  assert(mid <= max && mid >= min);
+  for (n = 0; n < nb; ++n) {
+    const int alpha = 255 * (centers[n] - mid) / (max - min);
+    const int beta = 255 * (centers[n] - min) / (max - min);
+    enc->dqm_[n].alpha_ = clip(alpha, -127, 127);
+    enc->dqm_[n].beta_ = clip(beta, 0, 255);
+  }
+}
+
+//------------------------------------------------------------------------------
+// Simplified k-Means, to assign Nb segments based on alpha-histogram
+
+static void AssignSegments(VP8Encoder* const enc, const int alphas[256]) {
+  const int nb = enc->segment_hdr_.num_segments_;
+  int centers[NUM_MB_SEGMENTS];
+  int weighted_average = 0;
+  int map[256];
+  int a, n, k;
+  int min_a = 0, max_a = 255, range_a;
+  // 'int' type is ok for histo, and won't overflow
+  int accum[NUM_MB_SEGMENTS], dist_accum[NUM_MB_SEGMENTS];
+
+  // bracket the input
+  for (n = 0; n < 256 && alphas[n] == 0; ++n) {}
+  min_a = n;
+  for (n = 255; n > min_a && alphas[n] == 0; --n) {}
+  max_a = n;
+  range_a = max_a - min_a;
+
+  // Spread initial centers evenly
+  for (n = 1, k = 0; n < 2 * nb; n += 2) {
+    centers[k++] = min_a + (n * range_a) / (2 * nb);
+  }
+
+  for (k = 0; k < MAX_ITERS_K_MEANS; ++k) {     // few iters are enough
+    int total_weight;
+    int displaced;
+    // Reset stats
+    for (n = 0; n < nb; ++n) {
+      accum[n] = 0;
+      dist_accum[n] = 0;
+    }
+    // Assign nearest center for each 'a'
+    n = 0;    // track the nearest center for current 'a'
+    for (a = min_a; a <= max_a; ++a) {
+      if (alphas[a]) {
+        while (n < nb - 1 && abs(a - centers[n + 1]) < abs(a - centers[n])) {
+          n++;
+        }
+        map[a] = n;
+        // accumulate contribution into best centroid
+        dist_accum[n] += a * alphas[a];
+        accum[n] += alphas[a];
+      }
+    }
+    // All point are classified. Move the centroids to the
+    // center of their respective cloud.
+    displaced = 0;
+    weighted_average = 0;
+    total_weight = 0;
+    for (n = 0; n < nb; ++n) {
+      if (accum[n]) {
+        const int new_center = (dist_accum[n] + accum[n] / 2) / accum[n];
+        displaced += abs(centers[n] - new_center);
+        centers[n] = new_center;
+        weighted_average += new_center * accum[n];
+        total_weight += accum[n];
+      }
+    }
+    weighted_average = (weighted_average + total_weight / 2) / total_weight;
+    if (displaced < 5) break;   // no need to keep on looping...
+  }
+
+  // Map each original value to the closest centroid
+  for (n = 0; n < enc->mb_w_ * enc->mb_h_; ++n) {
+    VP8MBInfo* const mb = &enc->mb_info_[n];
+    const int alpha = mb->alpha_;
+    mb->segment_ = map[alpha];
+    mb->alpha_ = centers[map[alpha]];     // just for the record.
+  }
+
+  if (nb > 1) {
+    const int smooth = (enc->config_->preprocessing & 1);
+    if (smooth) SmoothSegmentMap(enc);
+  }
+
+  SetSegmentProbas(enc);                             // Assign final proba
+  SetSegmentAlphas(enc, centers, weighted_average);  // pick some alphas.
+}
+
+//------------------------------------------------------------------------------
+// Macroblock analysis: collect histogram for each mode, deduce the maximal
+// susceptibility and set best modes for this macroblock.
+// Segment assignment is done later.
+
+// Number of modes to inspect for alpha_ evaluation. For high-quality settings,
+// we don't need to test all the possible modes during the analysis phase.
+#define MAX_INTRA16_MODE 2
+#define MAX_INTRA4_MODE  2
+#define MAX_UV_MODE      2
+
+static int MBAnalyzeBestIntra16Mode(VP8EncIterator* const it) {
+  const int max_mode = (it->enc_->method_ >= 3) ? MAX_INTRA16_MODE : 4;
+  int mode;
+  int best_alpha = -1;
+  int best_mode = 0;
+
+  VP8MakeLuma16Preds(it);
+  for (mode = 0; mode < max_mode; ++mode) {
+    const int alpha = VP8CollectHistogram(it->yuv_in_ + Y_OFF,
+                                          it->yuv_p_ + VP8I16ModeOffsets[mode],
+                                          0, 16);
+    if (alpha > best_alpha) {
+      best_alpha = alpha;
+      best_mode = mode;
+    }
+  }
+  VP8SetIntra16Mode(it, best_mode);
+  return best_alpha;
+}
+
+static int MBAnalyzeBestIntra4Mode(VP8EncIterator* const it,
+                                   int best_alpha) {
+  uint8_t modes[16];
+  const int max_mode = (it->enc_->method_ >= 3) ? MAX_INTRA4_MODE : NUM_BMODES;
+  int i4_alpha = 0;
+  VP8IteratorStartI4(it);
+  do {
+    int mode;
+    int best_mode_alpha = -1;
+    const uint8_t* const src = it->yuv_in_ + Y_OFF + VP8Scan[it->i4_];
+
+    VP8MakeIntra4Preds(it);
+    for (mode = 0; mode < max_mode; ++mode) {
+      const int alpha = VP8CollectHistogram(src,
+                                            it->yuv_p_ + VP8I4ModeOffsets[mode],
+                                            0, 1);
+      if (alpha > best_mode_alpha) {
+        best_mode_alpha = alpha;
+        modes[it->i4_] = mode;
+      }
+    }
+    i4_alpha += best_mode_alpha;
+    // Note: we reuse the original samples for predictors
+  } while (VP8IteratorRotateI4(it, it->yuv_in_ + Y_OFF));
+
+  if (i4_alpha > best_alpha) {
+    VP8SetIntra4Mode(it, modes);
+    best_alpha = ClipAlpha(i4_alpha);
+  }
+  return best_alpha;
+}
+
+static int MBAnalyzeBestUVMode(VP8EncIterator* const it) {
+  int best_alpha = -1;
+  int best_mode = 0;
+  const int max_mode = (it->enc_->method_ >= 3) ? MAX_UV_MODE : 4;
+  int mode;
+  VP8MakeChroma8Preds(it);
+  for (mode = 0; mode < max_mode; ++mode) {
+    const int alpha = VP8CollectHistogram(it->yuv_in_ + U_OFF,
+                                          it->yuv_p_ + VP8UVModeOffsets[mode],
+                                          16, 16 + 4 + 4);
+    if (alpha > best_alpha) {
+      best_alpha = alpha;
+      best_mode = mode;
+    }
+  }
+  VP8SetIntraUVMode(it, best_mode);
+  return best_alpha;
+}
+
+static void MBAnalyze(VP8EncIterator* const it,
+                      int alphas[256], int* const uv_alpha) {
+  const VP8Encoder* const enc = it->enc_;
+  int best_alpha, best_uv_alpha;
+
+  VP8SetIntra16Mode(it, 0);  // default: Intra16, DC_PRED
+  VP8SetSkip(it, 0);         // not skipped
+  VP8SetSegment(it, 0);      // default segment, spec-wise.
+
+  best_alpha = MBAnalyzeBestIntra16Mode(it);
+  if (enc->method_ != 3) {
+    // We go and make a fast decision for intra4/intra16.
+    // It's usually not a good and definitive pick, but helps seeding the stats
+    // about level bit-cost.
+    // TODO(skal): improve criterion.
+    best_alpha = MBAnalyzeBestIntra4Mode(it, best_alpha);
+  }
+  best_uv_alpha = MBAnalyzeBestUVMode(it);
+
+  // Final susceptibility mix
+  best_alpha = (best_alpha + best_uv_alpha + 1) / 2;
+  alphas[best_alpha]++;
+  *uv_alpha += best_uv_alpha;
+  it->mb_->alpha_ = best_alpha;   // Informative only.
+}
+
+//------------------------------------------------------------------------------
+// Main analysis loop:
+// Collect all susceptibilities for each macroblock and record their
+// distribution in alphas[]. Segments is assigned a-posteriori, based on
+// this histogram.
+// We also pick an intra16 prediction mode, which shouldn't be considered
+// final except for fast-encode settings. We can also pick some intra4 modes
+// and decide intra4/intra16, but that's usually almost always a bad choice at
+// this stage.
+
+int VP8EncAnalyze(VP8Encoder* const enc) {
+  int ok = 1;
+  int alphas[256] = { 0 };
+  VP8EncIterator it;
+
+  VP8IteratorInit(enc, &it);
+  enc->uv_alpha_ = 0;
+  do {
+    VP8IteratorImport(&it);
+    MBAnalyze(&it, alphas, &enc->uv_alpha_);
+    ok = VP8IteratorProgress(&it, 20);
+    // Let's pretend we have perfect lossless reconstruction.
+  } while (ok && VP8IteratorNext(&it, it.yuv_in_));
+  enc->uv_alpha_ /= enc->mb_w_ * enc->mb_h_;
+  if (ok) AssignSegments(enc, alphas);
+
+  return ok;
+}
+
+#if defined(__cplusplus) || defined(c_plusplus)
+}    // extern "C"
+#endif
diff --git a/drivers/webpold/enc/backward_references.c b/drivers/webpold/enc/backward_references.c
new file mode 100644
index 0000000000..b8c8ece806
--- /dev/null
+++ b/drivers/webpold/enc/backward_references.c
@@ -0,0 +1,874 @@
+// Copyright 2012 Google Inc. All Rights Reserved.
+//
+// This code is licensed under the same terms as WebM:
+//  Software License Agreement:  http://www.webmproject.org/license/software/
+//  Additional IP Rights Grant:  http://www.webmproject.org/license/additional/
+// -----------------------------------------------------------------------------
+//
+// Author: Jyrki Alakuijala (jyrki@google.com)
+//
+
+#include <assert.h>
+#include <math.h>
+#include <stdio.h>
+
+#include "./backward_references.h"
+#include "./histogram.h"
+#include "../dsp/lossless.h"
+#include "../utils/color_cache.h"
+#include "../utils/utils.h"
+
+#define VALUES_IN_BYTE 256
+
+#define HASH_BITS 18
+#define HASH_SIZE (1 << HASH_BITS)
+#define HASH_MULTIPLIER (0xc6a4a7935bd1e995ULL)
+
+// 1M window (4M bytes) minus 120 special codes for short distances.
+#define WINDOW_SIZE ((1 << 20) - 120)
+
+// Bounds for the match length.
+#define MIN_LENGTH 2
+#define MAX_LENGTH 4096
+
+typedef struct {
+  // Stores the most recently added position with the given hash value.
+  int32_t hash_to_first_index_[HASH_SIZE];
+  // chain_[pos] stores the previous position with the same hash value
+  // for every pixel in the image.
+  int32_t* chain_;
+} HashChain;
+
+// -----------------------------------------------------------------------------
+
+static const uint8_t plane_to_code_lut[128] = {
+ 96,   73,  55,  39,  23,  13,   5,  1,  255, 255, 255, 255, 255, 255, 255, 255,
+ 101,  78,  58,  42,  26,  16,   8,  2,    0,   3,  9,   17,  27,  43,  59,  79,
+ 102,  86,  62,  46,  32,  20,  10,  6,    4,   7,  11,  21,  33,  47,  63,  87,
+ 105,  90,  70,  52,  37,  28,  18,  14,  12,  15,  19,  29,  38,  53,  71,  91,
+ 110,  99,  82,  66,  48,  35,  30,  24,  22,  25,  31,  36,  49,  67,  83, 100,
+ 115, 108,  94,  76,  64,  50,  44,  40,  34,  41,  45,  51,  65,  77,  95, 109,
+ 118, 113, 103,  92,  80,  68,  60,  56,  54,  57,  61,  69,  81,  93, 104, 114,
+ 119, 116, 111, 106,  97,  88,  84,  74,  72,  75,  85,  89,  98, 107, 112, 117
+};
+
+static int DistanceToPlaneCode(int xsize, int dist) {
+  const int yoffset = dist / xsize;
+  const int xoffset = dist - yoffset * xsize;
+  if (xoffset <= 8 && yoffset < 8) {
+    return plane_to_code_lut[yoffset * 16 + 8 - xoffset] + 1;
+  } else if (xoffset > xsize - 8 && yoffset < 7) {
+    return plane_to_code_lut[(yoffset + 1) * 16 + 8 + (xsize - xoffset)] + 1;
+  }
+  return dist + 120;
+}
+
+static WEBP_INLINE int FindMatchLength(const uint32_t* const array1,
+                                       const uint32_t* const array2,
+                                       const int max_limit) {
+  int match_len = 0;
+  while (match_len < max_limit && array1[match_len] == array2[match_len]) {
+    ++match_len;
+  }
+  return match_len;
+}
+
+// -----------------------------------------------------------------------------
+//  VP8LBackwardRefs
+
+void VP8LInitBackwardRefs(VP8LBackwardRefs* const refs) {
+  if (refs != NULL) {
+    refs->refs = NULL;
+    refs->size = 0;
+    refs->max_size = 0;
+  }
+}
+
+void VP8LClearBackwardRefs(VP8LBackwardRefs* const refs) {
+  if (refs != NULL) {
+    free(refs->refs);
+    VP8LInitBackwardRefs(refs);
+  }
+}
+
+int VP8LBackwardRefsAlloc(VP8LBackwardRefs* const refs, int max_size) {
+  assert(refs != NULL);
+  refs->size = 0;
+  refs->max_size = 0;
+  refs->refs = (PixOrCopy*)WebPSafeMalloc((uint64_t)max_size,
+                                          sizeof(*refs->refs));
+  if (refs->refs == NULL) return 0;
+  refs->max_size = max_size;
+  return 1;
+}
+
+// -----------------------------------------------------------------------------
+// Hash chains
+
+static WEBP_INLINE uint64_t GetPixPairHash64(const uint32_t* const argb) {
+  uint64_t key = ((uint64_t)(argb[1]) << 32) | argb[0];
+  key = (key * HASH_MULTIPLIER) >> (64 - HASH_BITS);
+  return key;
+}
+
+static int HashChainInit(HashChain* const p, int size) {
+  int i;
+  p->chain_ = (int*)WebPSafeMalloc((uint64_t)size, sizeof(*p->chain_));
+  if (p->chain_ == NULL) {
+    return 0;
+  }
+  for (i = 0; i < size; ++i) {
+    p->chain_[i] = -1;
+  }
+  for (i = 0; i < HASH_SIZE; ++i) {
+    p->hash_to_first_index_[i] = -1;
+  }
+  return 1;
+}
+
+static void HashChainDelete(HashChain* const p) {
+  if (p != NULL) {
+    free(p->chain_);
+    free(p);
+  }
+}
+
+// Insertion of two pixels at a time.
+static void HashChainInsert(HashChain* const p,
+                            const uint32_t* const argb, int pos) {
+  const uint64_t hash_code = GetPixPairHash64(argb);
+  p->chain_[pos] = p->hash_to_first_index_[hash_code];
+  p->hash_to_first_index_[hash_code] = pos;
+}
+
+static int HashChainFindCopy(const HashChain* const p,
+                             int quality, int index, int xsize,
+                             const uint32_t* const argb, int maxlen,
+                             int* const distance_ptr,
+                             int* const length_ptr) {
+  const uint64_t hash_code = GetPixPairHash64(&argb[index]);
+  int prev_length = 0;
+  int64_t best_val = 0;
+  int best_length = 0;
+  int best_distance = 0;
+  const uint32_t* const argb_start = argb + index;
+  const int iter_min_mult = (quality < 50) ? 2 : (quality < 75) ? 4 : 8;
+  const int iter_min = -quality * iter_min_mult;
+  int iter_cnt = 10 + (quality >> 1);
+  const int min_pos = (index > WINDOW_SIZE) ? index - WINDOW_SIZE : 0;
+  int pos;
+
+  assert(xsize > 0);
+  for (pos = p->hash_to_first_index_[hash_code];
+       pos >= min_pos;
+       pos = p->chain_[pos]) {
+    int64_t val;
+    int curr_length;
+    if (iter_cnt < 0) {
+      if (iter_cnt < iter_min || best_val >= 0xff0000) {
+        break;
+      }
+    }
+    --iter_cnt;
+    if (best_length != 0 &&
+        argb[pos + best_length - 1] != argb_start[best_length - 1]) {
+      continue;
+    }
+    curr_length = FindMatchLength(argb + pos, argb_start, maxlen);
+    if (curr_length < prev_length) {
+      continue;
+    }
+    val = 65536 * curr_length;
+    // Favoring 2d locality here gives savings for certain images.
+    if (index - pos < 9 * xsize) {
+      const int y = (index - pos) / xsize;
+      int x = (index - pos) % xsize;
+      if (x > xsize / 2) {
+        x = xsize - x;
+      }
+      if (x <= 7 && x >= -8) {
+        val -= y * y + x * x;
+      } else {
+        val -= 9 * 9 + 9 * 9;
+      }
+    } else {
+      val -= 9 * 9 + 9 * 9;
+    }
+    if (best_val < val) {
+      prev_length = curr_length;
+      best_val = val;
+      best_length = curr_length;
+      best_distance = index - pos;
+      if (curr_length >= MAX_LENGTH) {
+        break;
+      }
+      if ((best_distance == 1 || best_distance == xsize) &&
+          best_length >= 128) {
+        break;
+      }
+    }
+  }
+  *distance_ptr = best_distance;
+  *length_ptr = best_length;
+  return (best_length >= MIN_LENGTH);
+}
+
+static WEBP_INLINE void PushBackCopy(VP8LBackwardRefs* const refs, int length) {
+  int size = refs->size;
+  while (length >= MAX_LENGTH) {
+    refs->refs[size++] = PixOrCopyCreateCopy(1, MAX_LENGTH);
+    length -= MAX_LENGTH;
+  }
+  if (length > 0) {
+    refs->refs[size++] = PixOrCopyCreateCopy(1, length);
+  }
+  refs->size = size;
+}
+
+static void BackwardReferencesRle(int xsize, int ysize,
+                                  const uint32_t* const argb,
+                                  VP8LBackwardRefs* const refs) {
+  const int pix_count = xsize * ysize;
+  int match_len = 0;
+  int i;
+  refs->size = 0;
+  PushBackCopy(refs, match_len);    // i=0 case
+  refs->refs[refs->size++] = PixOrCopyCreateLiteral(argb[0]);
+  for (i = 1; i < pix_count; ++i) {
+    if (argb[i] == argb[i - 1]) {
+      ++match_len;
+    } else {
+      PushBackCopy(refs, match_len);
+      match_len = 0;
+      refs->refs[refs->size++] = PixOrCopyCreateLiteral(argb[i]);
+    }
+  }
+  PushBackCopy(refs, match_len);
+}
+
+static int BackwardReferencesHashChain(int xsize, int ysize,
+                                       const uint32_t* const argb,
+                                       int cache_bits, int quality,
+                                       VP8LBackwardRefs* const refs) {
+  int i;
+  int ok = 0;
+  int cc_init = 0;
+  const int use_color_cache = (cache_bits > 0);
+  const int pix_count = xsize * ysize;
+  HashChain* const hash_chain = (HashChain*)malloc(sizeof(*hash_chain));
+  VP8LColorCache hashers;
+
+  if (hash_chain == NULL) return 0;
+  if (use_color_cache) {
+    cc_init = VP8LColorCacheInit(&hashers, cache_bits);
+    if (!cc_init) goto Error;
+  }
+
+  if (!HashChainInit(hash_chain, pix_count)) goto Error;
+
+  refs->size = 0;
+  for (i = 0; i < pix_count; ) {
+    // Alternative#1: Code the pixels starting at 'i' using backward reference.
+    int offset = 0;
+    int len = 0;
+    if (i < pix_count - 1) {  // FindCopy(i,..) reads pixels at [i] and [i + 1].
+      int maxlen = pix_count - i;
+      if (maxlen > MAX_LENGTH) {
+        maxlen = MAX_LENGTH;
+      }
+      HashChainFindCopy(hash_chain, quality, i, xsize, argb, maxlen,
+                        &offset, &len);
+    }
+    if (len >= MIN_LENGTH) {
+      // Alternative#2: Insert the pixel at 'i' as literal, and code the
+      // pixels starting at 'i + 1' using backward reference.
+      int offset2 = 0;
+      int len2 = 0;
+      int k;
+      HashChainInsert(hash_chain, &argb[i], i);
+      if (i < pix_count - 2) {  // FindCopy(i+1,..) reads [i + 1] and [i + 2].
+        int maxlen = pix_count - (i + 1);
+        if (maxlen > MAX_LENGTH) {
+          maxlen = MAX_LENGTH;
+        }
+        HashChainFindCopy(hash_chain, quality,
+                          i + 1, xsize, argb, maxlen, &offset2, &len2);
+        if (len2 > len + 1) {
+          const uint32_t pixel = argb[i];
+          // Alternative#2 is a better match. So push pixel at 'i' as literal.
+          if (use_color_cache && VP8LColorCacheContains(&hashers, pixel)) {
+            const int ix = VP8LColorCacheGetIndex(&hashers, pixel);
+            refs->refs[refs->size] = PixOrCopyCreateCacheIdx(ix);
+          } else {
+            refs->refs[refs->size] = PixOrCopyCreateLiteral(pixel);
+          }
+          ++refs->size;
+          if (use_color_cache) VP8LColorCacheInsert(&hashers, pixel);
+          i++;  // Backward reference to be done for next pixel.
+          len = len2;
+          offset = offset2;
+        }
+      }
+      if (len >= MAX_LENGTH) {
+        len = MAX_LENGTH - 1;
+      }
+      refs->refs[refs->size++] = PixOrCopyCreateCopy(offset, len);
+      if (use_color_cache) {
+        for (k = 0; k < len; ++k) {
+          VP8LColorCacheInsert(&hashers, argb[i + k]);
+        }
+      }
+      // Add to the hash_chain (but cannot add the last pixel).
+      {
+        const int last = (len < pix_count - 1 - i) ? len : pix_count - 1 - i;
+        for (k = 1; k < last; ++k) {
+          HashChainInsert(hash_chain, &argb[i + k], i + k);
+        }
+      }
+      i += len;
+    } else {
+      const uint32_t pixel = argb[i];
+      if (use_color_cache && VP8LColorCacheContains(&hashers, pixel)) {
+        // push pixel as a PixOrCopyCreateCacheIdx pixel
+        const int ix = VP8LColorCacheGetIndex(&hashers, pixel);
+        refs->refs[refs->size] = PixOrCopyCreateCacheIdx(ix);
+      } else {
+        refs->refs[refs->size] = PixOrCopyCreateLiteral(pixel);
+      }
+      ++refs->size;
+      if (use_color_cache) VP8LColorCacheInsert(&hashers, pixel);
+      if (i + 1 < pix_count) {
+        HashChainInsert(hash_chain, &argb[i], i);
+      }
+      ++i;
+    }
+  }
+  ok = 1;
+Error:
+  if (cc_init) VP8LColorCacheClear(&hashers);
+  HashChainDelete(hash_chain);
+  return ok;
+}
+
+// -----------------------------------------------------------------------------
+
+typedef struct {
+  double alpha_[VALUES_IN_BYTE];
+  double red_[VALUES_IN_BYTE];
+  double literal_[PIX_OR_COPY_CODES_MAX];
+  double blue_[VALUES_IN_BYTE];
+  double distance_[NUM_DISTANCE_CODES];
+} CostModel;
+
+static int BackwardReferencesTraceBackwards(
+    int xsize, int ysize, int recursive_cost_model,
+    const uint32_t* const argb, int cache_bits, VP8LBackwardRefs* const refs);
+
+static void ConvertPopulationCountTableToBitEstimates(
+    int num_symbols, const int population_counts[], double output[]) {
+  int sum = 0;
+  int nonzeros = 0;
+  int i;
+  for (i = 0; i < num_symbols; ++i) {
+    sum += population_counts[i];
+    if (population_counts[i] > 0) {
+      ++nonzeros;
+    }
+  }
+  if (nonzeros <= 1) {
+    memset(output, 0, num_symbols * sizeof(*output));
+  } else {
+    const double logsum = VP8LFastLog2(sum);
+    for (i = 0; i < num_symbols; ++i) {
+      output[i] = logsum - VP8LFastLog2(population_counts[i]);
+    }
+  }
+}
+
+static int CostModelBuild(CostModel* const m, int xsize, int ysize,
+                          int recursion_level, const uint32_t* const argb,
+                          int cache_bits) {
+  int ok = 0;
+  VP8LHistogram histo;
+  VP8LBackwardRefs refs;
+  const int quality = 100;
+
+  if (!VP8LBackwardRefsAlloc(&refs, xsize * ysize)) goto Error;
+
+  if (recursion_level > 0) {
+    if (!BackwardReferencesTraceBackwards(xsize, ysize, recursion_level - 1,
+                                          argb, cache_bits, &refs)) {
+      goto Error;
+    }
+  } else {
+    if (!BackwardReferencesHashChain(xsize, ysize, argb, cache_bits, quality,
+                                     &refs)) {
+      goto Error;
+    }
+  }
+  VP8LHistogramCreate(&histo, &refs, cache_bits);
+  ConvertPopulationCountTableToBitEstimates(
+      VP8LHistogramNumCodes(&histo), histo.literal_, m->literal_);
+  ConvertPopulationCountTableToBitEstimates(
+      VALUES_IN_BYTE, histo.red_, m->red_);
+  ConvertPopulationCountTableToBitEstimates(
+      VALUES_IN_BYTE, histo.blue_, m->blue_);
+  ConvertPopulationCountTableToBitEstimates(
+      VALUES_IN_BYTE, histo.alpha_, m->alpha_);
+  ConvertPopulationCountTableToBitEstimates(
+      NUM_DISTANCE_CODES, histo.distance_, m->distance_);
+  ok = 1;
+
+ Error:
+  VP8LClearBackwardRefs(&refs);
+  return ok;
+}
+
+static WEBP_INLINE double GetLiteralCost(const CostModel* const m, uint32_t v) {
+  return m->alpha_[v >> 24] +
+         m->red_[(v >> 16) & 0xff] +
+         m->literal_[(v >> 8) & 0xff] +
+         m->blue_[v & 0xff];
+}
+
+static WEBP_INLINE double GetCacheCost(const CostModel* const m, uint32_t idx) {
+  const int literal_idx = VALUES_IN_BYTE + NUM_LENGTH_CODES + idx;
+  return m->literal_[literal_idx];
+}
+
+static WEBP_INLINE double GetLengthCost(const CostModel* const m,
+                                        uint32_t length) {
+  int code, extra_bits_count, extra_bits_value;
+  PrefixEncode(length, &code, &extra_bits_count, &extra_bits_value);
+  return m->literal_[VALUES_IN_BYTE + code] + extra_bits_count;
+}
+
+static WEBP_INLINE double GetDistanceCost(const CostModel* const m,
+                                          uint32_t distance) {
+  int code, extra_bits_count, extra_bits_value;
+  PrefixEncode(distance, &code, &extra_bits_count, &extra_bits_value);
+  return m->distance_[code] + extra_bits_count;
+}
+
+static int BackwardReferencesHashChainDistanceOnly(
+    int xsize, int ysize, int recursive_cost_model, const uint32_t* const argb,
+    int cache_bits, uint32_t* const dist_array) {
+  int i;
+  int ok = 0;
+  int cc_init = 0;
+  const int quality = 100;
+  const int pix_count = xsize * ysize;
+  const int use_color_cache = (cache_bits > 0);
+  double* const cost =
+      (double*)WebPSafeMalloc((uint64_t)pix_count, sizeof(*cost));
+  CostModel* cost_model = (CostModel*)malloc(sizeof(*cost_model));
+  HashChain* hash_chain = (HashChain*)malloc(sizeof(*hash_chain));
+  VP8LColorCache hashers;
+  const double mul0 = (recursive_cost_model != 0) ? 1.0 : 0.68;
+  const double mul1 = (recursive_cost_model != 0) ? 1.0 : 0.82;
+
+  if (cost == NULL || cost_model == NULL || hash_chain == NULL) goto Error;
+
+  if (!HashChainInit(hash_chain, pix_count)) goto Error;
+
+  if (use_color_cache) {
+    cc_init = VP8LColorCacheInit(&hashers, cache_bits);
+    if (!cc_init) goto Error;
+  }
+
+  if (!CostModelBuild(cost_model, xsize, ysize, recursive_cost_model, argb,
+                      cache_bits)) {
+    goto Error;
+  }
+
+  for (i = 0; i < pix_count; ++i) cost[i] = 1e100;
+
+  // We loop one pixel at a time, but store all currently best points to
+  // non-processed locations from this point.
+  dist_array[0] = 0;
+  for (i = 0; i < pix_count; ++i) {
+    double prev_cost = 0.0;
+    int shortmax;
+    if (i > 0) {
+      prev_cost = cost[i - 1];
+    }
+    for (shortmax = 0; shortmax < 2; ++shortmax) {
+      int offset = 0;
+      int len = 0;
+      if (i < pix_count - 1) {  // FindCopy reads pixels at [i] and [i + 1].
+        int maxlen = shortmax ? 2 : MAX_LENGTH;
+        if (maxlen > pix_count - i) {
+          maxlen = pix_count - i;
+        }
+        HashChainFindCopy(hash_chain, quality, i, xsize, argb, maxlen,
+                          &offset, &len);
+      }
+      if (len >= MIN_LENGTH) {
+        const int code = DistanceToPlaneCode(xsize, offset);
+        const double distance_cost =
+            prev_cost + GetDistanceCost(cost_model, code);
+        int k;
+        for (k = 1; k < len; ++k) {
+          const double cost_val =
+              distance_cost + GetLengthCost(cost_model, k);
+          if (cost[i + k] > cost_val) {
+            cost[i + k] = cost_val;
+            dist_array[i + k] = k + 1;
+          }
+        }
+        // This if is for speedup only. It roughly doubles the speed, and
+        // makes compression worse by .1 %.
+        if (len >= 128 && code < 2) {
+          // Long copy for short distances, let's skip the middle
+          // lookups for better copies.
+          // 1) insert the hashes.
+          if (use_color_cache) {
+            for (k = 0; k < len; ++k) {
+              VP8LColorCacheInsert(&hashers, argb[i + k]);
+            }
+          }
+          // 2) Add to the hash_chain (but cannot add the last pixel)
+          {
+            const int last = (len < pix_count - 1 - i) ? len
+                                                       : pix_count - 1 - i;
+            for (k = 0; k < last; ++k) {
+              HashChainInsert(hash_chain, &argb[i + k], i + k);
+            }
+          }
+          // 3) jump.
+          i += len - 1;  // for loop does ++i, thus -1 here.
+          goto next_symbol;
+        }
+      }
+    }
+    if (i < pix_count - 1) {
+      HashChainInsert(hash_chain, &argb[i], i);
+    }
+    {
+      // inserting a literal pixel
+      double cost_val = prev_cost;
+      if (use_color_cache && VP8LColorCacheContains(&hashers, argb[i])) {
+        const int ix = VP8LColorCacheGetIndex(&hashers, argb[i]);
+        cost_val += GetCacheCost(cost_model, ix) * mul0;
+      } else {
+        cost_val += GetLiteralCost(cost_model, argb[i]) * mul1;
+      }
+      if (cost[i] > cost_val) {
+        cost[i] = cost_val;
+        dist_array[i] = 1;  // only one is inserted.
+      }
+      if (use_color_cache) VP8LColorCacheInsert(&hashers, argb[i]);
+    }
+ next_symbol: ;
+  }
+  // Last pixel still to do, it can only be a single step if not reached
+  // through cheaper means already.
+  ok = 1;
+Error:
+  if (cc_init) VP8LColorCacheClear(&hashers);
+  HashChainDelete(hash_chain);
+  free(cost_model);
+  free(cost);
+  return ok;
+}
+
+static int TraceBackwards(const uint32_t* const dist_array,
+                          int dist_array_size,
+                          uint32_t** const chosen_path,
+                          int* const chosen_path_size) {
+  int i;
+  // Count how many.
+  int count = 0;
+  for (i = dist_array_size - 1; i >= 0; ) {
+    int k = dist_array[i];
+    assert(k >= 1);
+    ++count;
+    i -= k;
+  }
+  // Allocate.
+  *chosen_path_size = count;
+  *chosen_path =
+      (uint32_t*)WebPSafeMalloc((uint64_t)count, sizeof(**chosen_path));
+  if (*chosen_path == NULL) return 0;
+
+  // Write in reverse order.
+  for (i = dist_array_size - 1; i >= 0; ) {
+    int k = dist_array[i];
+    assert(k >= 1);
+    (*chosen_path)[--count] = k;
+    i -= k;
+  }
+  return 1;
+}
+
+static int BackwardReferencesHashChainFollowChosenPath(
+    int xsize, int ysize, const uint32_t* const argb, int cache_bits,
+    const uint32_t* const chosen_path, int chosen_path_size,
+    VP8LBackwardRefs* const refs) {
+  const int quality = 100;
+  const int pix_count = xsize * ysize;
+  const int use_color_cache = (cache_bits > 0);
+  int size = 0;
+  int i = 0;
+  int k;
+  int ix;
+  int ok = 0;
+  int cc_init = 0;
+  HashChain* hash_chain = (HashChain*)malloc(sizeof(*hash_chain));
+  VP8LColorCache hashers;
+
+  if (hash_chain == NULL || !HashChainInit(hash_chain, pix_count)) {
+    goto Error;
+  }
+  if (use_color_cache) {
+    cc_init = VP8LColorCacheInit(&hashers, cache_bits);
+    if (!cc_init) goto Error;
+  }
+
+  refs->size = 0;
+  for (ix = 0; ix < chosen_path_size; ++ix, ++size) {
+    int offset = 0;
+    int len = 0;
+    int maxlen = chosen_path[ix];
+    if (maxlen != 1) {
+      HashChainFindCopy(hash_chain, quality,
+                        i, xsize, argb, maxlen, &offset, &len);
+      assert(len == maxlen);
+      refs->refs[size] = PixOrCopyCreateCopy(offset, len);
+      if (use_color_cache) {
+        for (k = 0; k < len; ++k) {
+          VP8LColorCacheInsert(&hashers, argb[i + k]);
+        }
+      }
+      {
+        const int last = (len < pix_count - 1 - i) ? len : pix_count - 1 - i;
+        for (k = 0; k < last; ++k) {
+          HashChainInsert(hash_chain, &argb[i + k], i + k);
+        }
+      }
+      i += len;
+    } else {
+      if (use_color_cache && VP8LColorCacheContains(&hashers, argb[i])) {
+        // push pixel as a color cache index
+        const int idx = VP8LColorCacheGetIndex(&hashers, argb[i]);
+        refs->refs[size] = PixOrCopyCreateCacheIdx(idx);
+      } else {
+        refs->refs[size] = PixOrCopyCreateLiteral(argb[i]);
+      }
+      if (use_color_cache) VP8LColorCacheInsert(&hashers, argb[i]);
+      if (i + 1 < pix_count) {
+        HashChainInsert(hash_chain, &argb[i], i);
+      }
+      ++i;
+    }
+  }
+  assert(size <= refs->max_size);
+  refs->size = size;
+  ok = 1;
+Error:
+  if (cc_init) VP8LColorCacheClear(&hashers);
+  HashChainDelete(hash_chain);
+  return ok;
+}
+
+// Returns 1 on success.
+static int BackwardReferencesTraceBackwards(int xsize, int ysize,
+                                            int recursive_cost_model,
+                                            const uint32_t* const argb,
+                                            int cache_bits,
+                                            VP8LBackwardRefs* const refs) {
+  int ok = 0;
+  const int dist_array_size = xsize * ysize;
+  uint32_t* chosen_path = NULL;
+  int chosen_path_size = 0;
+  uint32_t* dist_array =
+      (uint32_t*)WebPSafeMalloc((uint64_t)dist_array_size, sizeof(*dist_array));
+
+  if (dist_array == NULL) goto Error;
+
+  if (!BackwardReferencesHashChainDistanceOnly(
+      xsize, ysize, recursive_cost_model, argb, cache_bits, dist_array)) {
+    goto Error;
+  }
+  if (!TraceBackwards(dist_array, dist_array_size,
+                      &chosen_path, &chosen_path_size)) {
+    goto Error;
+  }
+  free(dist_array);   // no need to retain this memory any longer
+  dist_array = NULL;
+  if (!BackwardReferencesHashChainFollowChosenPath(
+      xsize, ysize, argb, cache_bits, chosen_path, chosen_path_size, refs)) {
+    goto Error;
+  }
+  ok = 1;
+ Error:
+  free(chosen_path);
+  free(dist_array);
+  return ok;
+}
+
+static void BackwardReferences2DLocality(int xsize,
+                                         VP8LBackwardRefs* const refs) {
+  int i;
+  for (i = 0; i < refs->size; ++i) {
+    if (PixOrCopyIsCopy(&refs->refs[i])) {
+      const int dist = refs->refs[i].argb_or_distance;
+      const int transformed_dist = DistanceToPlaneCode(xsize, dist);
+      refs->refs[i].argb_or_distance = transformed_dist;
+    }
+  }
+}
+
+int VP8LGetBackwardReferences(int width, int height,
+                              const uint32_t* const argb,
+                              int quality, int cache_bits, int use_2d_locality,
+                              VP8LBackwardRefs* const best) {
+  int ok = 0;
+  int lz77_is_useful;
+  VP8LBackwardRefs refs_rle, refs_lz77;
+  const int num_pix = width * height;
+
+  VP8LBackwardRefsAlloc(&refs_rle, num_pix);
+  VP8LBackwardRefsAlloc(&refs_lz77, num_pix);
+  VP8LInitBackwardRefs(best);
+  if (refs_rle.refs == NULL || refs_lz77.refs == NULL) {
+ Error1:
+    VP8LClearBackwardRefs(&refs_rle);
+    VP8LClearBackwardRefs(&refs_lz77);
+    goto End;
+  }
+
+  if (!BackwardReferencesHashChain(width, height, argb, cache_bits, quality,
+                                   &refs_lz77)) {
+    goto End;
+  }
+  // Backward Reference using RLE only.
+  BackwardReferencesRle(width, height, argb, &refs_rle);
+
+  {
+    double bit_cost_lz77, bit_cost_rle;
+    VP8LHistogram* const histo = (VP8LHistogram*)malloc(sizeof(*histo));
+    if (histo == NULL) goto Error1;
+    // Evaluate lz77 coding
+    VP8LHistogramCreate(histo, &refs_lz77, cache_bits);
+    bit_cost_lz77 = VP8LHistogramEstimateBits(histo);
+    // Evaluate RLE coding
+    VP8LHistogramCreate(histo, &refs_rle, cache_bits);
+    bit_cost_rle = VP8LHistogramEstimateBits(histo);
+    // Decide if LZ77 is useful.
+    lz77_is_useful = (bit_cost_lz77 < bit_cost_rle);
+    free(histo);
+  }
+
+  // Choose appropriate backward reference.
+  if (lz77_is_useful) {
+    // TraceBackwards is costly. Run it for higher qualities.
+    const int try_lz77_trace_backwards = (quality >= 75);
+    *best = refs_lz77;   // default guess: lz77 is better
+    VP8LClearBackwardRefs(&refs_rle);
+    if (try_lz77_trace_backwards) {
+      const int recursion_level = (num_pix < 320 * 200) ? 1 : 0;
+      VP8LBackwardRefs refs_trace;
+      if (!VP8LBackwardRefsAlloc(&refs_trace, num_pix)) {
+        goto End;
+      }
+      if (BackwardReferencesTraceBackwards(
+          width, height, recursion_level, argb, cache_bits, &refs_trace)) {
+        VP8LClearBackwardRefs(&refs_lz77);
+        *best = refs_trace;
+      }
+    }
+  } else {
+    VP8LClearBackwardRefs(&refs_lz77);
+    *best = refs_rle;
+  }
+
+  if (use_2d_locality) BackwardReferences2DLocality(width, best);
+
+  ok = 1;
+
+ End:
+  if (!ok) {
+    VP8LClearBackwardRefs(best);
+  }
+  return ok;
+}
+
+// Returns 1 on success.
+static int ComputeCacheHistogram(const uint32_t* const argb,
+                                 int xsize, int ysize,
+                                 const VP8LBackwardRefs* const refs,
+                                 int cache_bits,
+                                 VP8LHistogram* const histo) {
+  int pixel_index = 0;
+  int i;
+  uint32_t k;
+  VP8LColorCache hashers;
+  const int use_color_cache = (cache_bits > 0);
+  int cc_init = 0;
+
+  if (use_color_cache) {
+    cc_init = VP8LColorCacheInit(&hashers, cache_bits);
+    if (!cc_init) return 0;
+  }
+
+  for (i = 0; i < refs->size; ++i) {
+    const PixOrCopy* const v = &refs->refs[i];
+    if (PixOrCopyIsLiteral(v)) {
+      if (use_color_cache &&
+          VP8LColorCacheContains(&hashers, argb[pixel_index])) {
+        // push pixel as a cache index
+        const int ix = VP8LColorCacheGetIndex(&hashers, argb[pixel_index]);
+        const PixOrCopy token = PixOrCopyCreateCacheIdx(ix);
+        VP8LHistogramAddSinglePixOrCopy(histo, &token);
+      } else {
+        VP8LHistogramAddSinglePixOrCopy(histo, v);
+      }
+    } else {
+      VP8LHistogramAddSinglePixOrCopy(histo, v);
+    }
+    if (use_color_cache) {
+      for (k = 0; k < PixOrCopyLength(v); ++k) {
+        VP8LColorCacheInsert(&hashers, argb[pixel_index + k]);
+      }
+    }
+    pixel_index += PixOrCopyLength(v);
+  }
+  assert(pixel_index == xsize * ysize);
+  (void)xsize;  // xsize is not used in non-debug compilations otherwise.
+  (void)ysize;  // ysize is not used in non-debug compilations otherwise.
+  if (cc_init) VP8LColorCacheClear(&hashers);
+  return 1;
+}
+
+// Returns how many bits are to be used for a color cache.
+int VP8LCalculateEstimateForCacheSize(const uint32_t* const argb,
+                                      int xsize, int ysize,
+                                      int* const best_cache_bits) {
+  int ok = 0;
+  int cache_bits;
+  double lowest_entropy = 1e99;
+  VP8LBackwardRefs refs;
+  static const double kSmallPenaltyForLargeCache = 4.0;
+  static const int quality = 30;
+  if (!VP8LBackwardRefsAlloc(&refs, xsize * ysize) ||
+      !BackwardReferencesHashChain(xsize, ysize, argb, 0, quality, &refs)) {
+    goto Error;
+  }
+  for (cache_bits = 0; cache_bits <= MAX_COLOR_CACHE_BITS; ++cache_bits) {
+    double cur_entropy;
+    VP8LHistogram histo;
+    VP8LHistogramInit(&histo, cache_bits);
+    ComputeCacheHistogram(argb, xsize, ysize, &refs, cache_bits, &histo);
+    cur_entropy = VP8LHistogramEstimateBits(&histo) +
+        kSmallPenaltyForLargeCache * cache_bits;
+    if (cache_bits == 0 || cur_entropy < lowest_entropy) {
+      *best_cache_bits = cache_bits;
+      lowest_entropy = cur_entropy;
+    }
+  }
+  ok = 1;
+ Error:
+  VP8LClearBackwardRefs(&refs);
+  return ok;
+}
diff --git a/drivers/webpold/enc/backward_references.h b/drivers/webpold/enc/backward_references.h
new file mode 100644
index 0000000000..8006a56ba1
--- /dev/null
+++ b/drivers/webpold/enc/backward_references.h
@@ -0,0 +1,212 @@
+// Copyright 2012 Google Inc. All Rights Reserved.
+//
+// This code is licensed under the same terms as WebM:
+//  Software License Agreement:  http://www.webmproject.org/license/software/
+//  Additional IP Rights Grant:  http://www.webmproject.org/license/additional/
+// -----------------------------------------------------------------------------
+//
+// Author: Jyrki Alakuijala (jyrki@google.com)
+//
+
+#ifndef WEBP_ENC_BACKWARD_REFERENCES_H_
+#define WEBP_ENC_BACKWARD_REFERENCES_H_
+
+#include <assert.h>
+#include <stdlib.h>
+#include "../types.h"
+#include "../format_constants.h"
+
+#if defined(__cplusplus) || defined(c_plusplus)
+extern "C" {
+#endif
+
+// The spec allows 11, we use 9 bits to reduce memory consumption in encoding.
+// Having 9 instead of 11 only removes about 0.25 % of compression density.
+#define MAX_COLOR_CACHE_BITS 9
+
+// Max ever number of codes we'll use:
+#define PIX_OR_COPY_CODES_MAX \
+    (NUM_LITERAL_CODES + NUM_LENGTH_CODES + (1 << MAX_COLOR_CACHE_BITS))
+
+// -----------------------------------------------------------------------------
+// PrefixEncode()
+
+// use GNU builtins where available.
+#if defined(__GNUC__) && \
+    ((__GNUC__ == 3 && __GNUC_MINOR__ >= 4) || __GNUC__ >= 4)
+static WEBP_INLINE int BitsLog2Floor(uint32_t n) {
+  return n == 0 ? -1 : 31 ^ __builtin_clz(n);
+}
+#elif defined(_MSC_VER) && (defined(_M_X64) || defined(_M_IX86))
+#include <intrin.h>
+#pragma intrinsic(_BitScanReverse)
+
+static WEBP_INLINE int BitsLog2Floor(uint32_t n) {
+  unsigned long first_set_bit;
+  return _BitScanReverse(&first_set_bit, n) ? first_set_bit : -1;
+}
+#else
+static WEBP_INLINE int BitsLog2Floor(uint32_t n) {
+  int log = 0;
+  uint32_t value = n;
+  int i;
+
+  if (value == 0) return -1;
+  for (i = 4; i >= 0; --i) {
+    const int shift = (1 << i);
+    const uint32_t x = value >> shift;
+    if (x != 0) {
+      value = x;
+      log += shift;
+    }
+  }
+  return log;
+}
+#endif
+
+static WEBP_INLINE int VP8LBitsLog2Ceiling(uint32_t n) {
+  const int floor = BitsLog2Floor(n);
+  if (n == (n & ~(n - 1)))  // zero or a power of two.
+    return floor;
+  else
+    return floor + 1;
+}
+
+// Splitting of distance and length codes into prefixes and
+// extra bits. The prefixes are encoded with an entropy code
+// while the extra bits are stored just as normal bits.
+static WEBP_INLINE void PrefixEncode(int distance, int* const code,
+                                     int* const extra_bits_count,
+                                     int* const extra_bits_value) {
+  // Collect the two most significant bits where the highest bit is 1.
+  const int highest_bit = BitsLog2Floor(--distance);
+  // & 0x3f is to make behavior well defined when highest_bit
+  // does not exist or is the least significant bit.
+  const int second_highest_bit =
+      (distance >> ((highest_bit - 1) & 0x3f)) & 1;
+  *extra_bits_count = (highest_bit > 0) ? (highest_bit - 1) : 0;
+  *extra_bits_value = distance & ((1 << *extra_bits_count) - 1);
+  *code = (highest_bit > 0) ? (2 * highest_bit + second_highest_bit)
+                            : (highest_bit == 0) ? 1 : 0;
+}
+
+// -----------------------------------------------------------------------------
+// PixOrCopy
+
+enum Mode {
+  kLiteral,
+  kCacheIdx,
+  kCopy,
+  kNone
+};
+
+typedef struct {
+  // mode as uint8_t to make the memory layout to be exactly 8 bytes.
+  uint8_t mode;
+  uint16_t len;
+  uint32_t argb_or_distance;
+} PixOrCopy;
+
+static WEBP_INLINE PixOrCopy PixOrCopyCreateCopy(uint32_t distance,
+                                                 uint16_t len) {
+  PixOrCopy retval;
+  retval.mode = kCopy;
+  retval.argb_or_distance = distance;
+  retval.len = len;
+  return retval;
+}
+
+static WEBP_INLINE PixOrCopy PixOrCopyCreateCacheIdx(int idx) {
+  PixOrCopy retval;
+  assert(idx >= 0);
+  assert(idx < (1 << MAX_COLOR_CACHE_BITS));
+  retval.mode = kCacheIdx;
+  retval.argb_or_distance = idx;
+  retval.len = 1;
+  return retval;
+}
+
+static WEBP_INLINE PixOrCopy PixOrCopyCreateLiteral(uint32_t argb) {
+  PixOrCopy retval;
+  retval.mode = kLiteral;
+  retval.argb_or_distance = argb;
+  retval.len = 1;
+  return retval;
+}
+
+static WEBP_INLINE int PixOrCopyIsLiteral(const PixOrCopy* const p) {
+  return (p->mode == kLiteral);
+}
+
+static WEBP_INLINE int PixOrCopyIsCacheIdx(const PixOrCopy* const p) {
+  return (p->mode == kCacheIdx);
+}
+
+static WEBP_INLINE int PixOrCopyIsCopy(const PixOrCopy* const p) {
+  return (p->mode == kCopy);
+}
+
+static WEBP_INLINE uint32_t PixOrCopyLiteral(const PixOrCopy* const p,
+                                             int component) {
+  assert(p->mode == kLiteral);
+  return (p->argb_or_distance >> (component * 8)) & 0xff;
+}
+
+static WEBP_INLINE uint32_t PixOrCopyLength(const PixOrCopy* const p) {
+  return p->len;
+}
+
+static WEBP_INLINE uint32_t PixOrCopyArgb(const PixOrCopy* const p) {
+  assert(p->mode == kLiteral);
+  return p->argb_or_distance;
+}
+
+static WEBP_INLINE uint32_t PixOrCopyCacheIdx(const PixOrCopy* const p) {
+  assert(p->mode == kCacheIdx);
+  assert(p->argb_or_distance < (1U << MAX_COLOR_CACHE_BITS));
+  return p->argb_or_distance;
+}
+
+static WEBP_INLINE uint32_t PixOrCopyDistance(const PixOrCopy* const p) {
+  assert(p->mode == kCopy);
+  return p->argb_or_distance;
+}
+
+// -----------------------------------------------------------------------------
+// VP8LBackwardRefs
+
+typedef struct {
+  PixOrCopy* refs;
+  int size;      // currently used
+  int max_size;  // maximum capacity
+} VP8LBackwardRefs;
+
+// Initialize the object. Must be called first. 'refs' can be NULL.
+void VP8LInitBackwardRefs(VP8LBackwardRefs* const refs);
+
+// Release memory and re-initialize the object. 'refs' can be NULL.
+void VP8LClearBackwardRefs(VP8LBackwardRefs* const refs);
+
+// Allocate 'max_size' references. Returns false in case of memory error.
+int VP8LBackwardRefsAlloc(VP8LBackwardRefs* const refs, int max_size);
+
+// -----------------------------------------------------------------------------
+// Main entry points
+
+// Evaluates best possible backward references for specified quality.
+// Further optimize for 2D locality if use_2d_locality flag is set.
+int VP8LGetBackwardReferences(int width, int height,
+                              const uint32_t* const argb,
+                              int quality, int cache_bits, int use_2d_locality,
+                              VP8LBackwardRefs* const best);
+
+// Produce an estimate for a good color cache size for the image.
+int VP8LCalculateEstimateForCacheSize(const uint32_t* const argb,
+                                      int xsize, int ysize,
+                                      int* const best_cache_bits);
+
+#if defined(__cplusplus) || defined(c_plusplus)
+}
+#endif
+
+#endif  // WEBP_ENC_BACKWARD_REFERENCES_H_
diff --git a/drivers/webpold/enc/config.c b/drivers/webpold/enc/config.c
new file mode 100644
index 0000000000..4136f6c227
--- /dev/null
+++ b/drivers/webpold/enc/config.c
@@ -0,0 +1,132 @@
+// Copyright 2011 Google Inc. All Rights Reserved.
+//
+// This code is licensed under the same terms as WebM:
+//  Software License Agreement:  http://www.webmproject.org/license/software/
+//  Additional IP Rights Grant:  http://www.webmproject.org/license/additional/
+// -----------------------------------------------------------------------------
+//
+// Coding tools configuration
+//
+// Author: Skal (pascal.massimino@gmail.com)
+
+#include "../encode.h"
+
+#if defined(__cplusplus) || defined(c_plusplus)
+extern "C" {
+#endif
+
+//------------------------------------------------------------------------------
+// WebPConfig
+//------------------------------------------------------------------------------
+
+int WebPConfigInitInternal(WebPConfig* config,
+                           WebPPreset preset, float quality, int version) {
+  if (WEBP_ABI_IS_INCOMPATIBLE(version, WEBP_ENCODER_ABI_VERSION)) {
+    return 0;   // caller/system version mismatch!
+  }
+  if (config == NULL) return 0;
+
+  config->quality = quality;
+  config->target_size = 0;
+  config->target_PSNR = 0.;
+  config->method = 4;
+  config->sns_strength = 50;
+  config->filter_strength = 20;   // default: light filtering
+  config->filter_sharpness = 0;
+  config->filter_type = 0;        // default: simple
+  config->partitions = 0;
+  config->segments = 4;
+  config->pass = 1;
+  config->show_compressed = 0;
+  config->preprocessing = 0;
+  config->autofilter = 0;
+  config->partition_limit = 0;
+  config->alpha_compression = 1;
+  config->alpha_filtering = 1;
+  config->alpha_quality = 100;
+  config->lossless = 0;
+  config->image_hint = WEBP_HINT_DEFAULT;
+
+  // TODO(skal): tune.
+  switch (preset) {
+    case WEBP_PRESET_PICTURE:
+      config->sns_strength = 80;
+      config->filter_sharpness = 4;
+      config->filter_strength = 35;
+      break;
+    case WEBP_PRESET_PHOTO:
+      config->sns_strength = 80;
+      config->filter_sharpness = 3;
+      config->filter_strength = 30;
+      break;
+    case WEBP_PRESET_DRAWING:
+      config->sns_strength = 25;
+      config->filter_sharpness = 6;
+      config->filter_strength = 10;
+      break;
+    case WEBP_PRESET_ICON:
+      config->sns_strength = 0;
+      config->filter_strength = 0;   // disable filtering to retain sharpness
+      break;
+    case WEBP_PRESET_TEXT:
+      config->sns_strength = 0;
+      config->filter_strength = 0;   // disable filtering to retain sharpness
+      config->segments = 2;
+      break;
+    case WEBP_PRESET_DEFAULT:
+    default:
+      break;
+  }
+  return WebPValidateConfig(config);
+}
+
+int WebPValidateConfig(const WebPConfig* config) {
+  if (config == NULL) return 0;
+  if (config->quality < 0 || config->quality > 100)
+    return 0;
+  if (config->target_size < 0)
+    return 0;
+  if (config->target_PSNR < 0)
+    return 0;
+  if (config->method < 0 || config->method > 6)
+    return 0;
+  if (config->segments < 1 || config->segments > 4)
+    return 0;
+  if (config->sns_strength < 0 || config->sns_strength > 100)
+    return 0;
+  if (config->filter_strength < 0 || config->filter_strength > 100)
+    return 0;
+  if (config->filter_sharpness < 0 || config->filter_sharpness > 7)
+    return 0;
+  if (config->filter_type < 0 || config->filter_type > 1)
+    return 0;
+  if (config->autofilter < 0 || config->autofilter > 1)
+    return 0;
+  if (config->pass < 1 || config->pass > 10)
+    return 0;
+  if (config->show_compressed < 0 || config->show_compressed > 1)
+    return 0;
+  if (config->preprocessing < 0 || config->preprocessing > 1)
+    return 0;
+  if (config->partitions < 0 || config->partitions > 3)
+    return 0;
+  if (config->partition_limit < 0 || config->partition_limit > 100)
+    return 0;
+  if (config->alpha_compression < 0)
+    return 0;
+  if (config->alpha_filtering < 0)
+    return 0;
+  if (config->alpha_quality < 0 || config->alpha_quality > 100)
+    return 0;
+  if (config->lossless < 0 || config->lossless > 1)
+    return 0;
+  if (config->image_hint >= WEBP_HINT_LAST)
+    return 0;
+  return 1;
+}
+
+//------------------------------------------------------------------------------
+
+#if defined(__cplusplus) || defined(c_plusplus)
+}    // extern "C"
+#endif
diff --git a/drivers/webpold/enc/cost.c b/drivers/webpold/enc/cost.c
new file mode 100644
index 0000000000..92e0cc713c
--- /dev/null
+++ b/drivers/webpold/enc/cost.c
@@ -0,0 +1,494 @@
+// Copyright 2011 Google Inc. All Rights Reserved.
+//
+// This code is licensed under the same terms as WebM:
+//  Software License Agreement:  http://www.webmproject.org/license/software/
+//  Additional IP Rights Grant:  http://www.webmproject.org/license/additional/
+// -----------------------------------------------------------------------------
+//
+// Cost tables for level and modes
+//
+// Author: Skal (pascal.massimino@gmail.com)
+
+#include "./cost.h"
+
+#if defined(__cplusplus) || defined(c_plusplus)
+extern "C" {
+#endif
+
+//------------------------------------------------------------------------------
+// Boolean-cost cost table
+
+const uint16_t VP8EntropyCost[256] = {
+  1792, 1792, 1792, 1536, 1536, 1408, 1366, 1280, 1280, 1216,
+  1178, 1152, 1110, 1076, 1061, 1024, 1024,  992,  968,  951,
+   939,  911,  896,  878,  871,  854,  838,  820,  811,  794,
+   786,  768,  768,  752,  740,  732,  720,  709,  704,  690,
+   683,  672,  666,  655,  647,  640,  631,  622,  615,  607,
+   598,  592,  586,  576,  572,  564,  559,  555,  547,  541,
+   534,  528,  522,  512,  512,  504,  500,  494,  488,  483,
+   477,  473,  467,  461,  458,  452,  448,  443,  438,  434,
+   427,  424,  419,  415,  410,  406,  403,  399,  394,  390,
+   384,  384,  377,  374,  370,  366,  362,  359,  355,  351,
+   347,  342,  342,  336,  333,  330,  326,  323,  320,  316,
+   312,  308,  305,  302,  299,  296,  293,  288,  287,  283,
+   280,  277,  274,  272,  268,  266,  262,  256,  256,  256,
+   251,  248,  245,  242,  240,  237,  234,  232,  228,  226,
+   223,  221,  218,  216,  214,  211,  208,  205,  203,  201,
+   198,  196,  192,  191,  188,  187,  183,  181,  179,  176,
+   175,  171,  171,  168,  165,  163,  160,  159,  156,  154,
+   152,  150,  148,  146,  144,  142,  139,  138,  135,  133,
+   131,  128,  128,  125,  123,  121,  119,  117,  115,  113,
+   111,  110,  107,  105,  103,  102,  100,   98,   96,   94,
+    92,   91,   89,   86,   86,   83,   82,   80,   77,   76,
+    74,   73,   71,   69,   67,   66,   64,   63,   61,   59,
+    57,   55,   54,   52,   51,   49,   47,   46,   44,   43,
+    41,   40,   38,   36,   35,   33,   32,   30,   29,   27,
+    25,   24,   22,   21,   19,   18,   16,   15,   13,   12,
+    10,    9,    7,    6,    4,    3
+};
+
+//------------------------------------------------------------------------------
+// Level cost tables
+
+// For each given level, the following table gives the pattern of contexts to
+// use for coding it (in [][0]) as well as the bit value to use for each
+// context (in [][1]).
+const uint16_t VP8LevelCodes[MAX_VARIABLE_LEVEL][2] = {
+                  {0x001, 0x000}, {0x007, 0x001}, {0x00f, 0x005},
+  {0x00f, 0x00d}, {0x033, 0x003}, {0x033, 0x003}, {0x033, 0x023},
+  {0x033, 0x023}, {0x033, 0x023}, {0x033, 0x023}, {0x0d3, 0x013},
+  {0x0d3, 0x013}, {0x0d3, 0x013}, {0x0d3, 0x013}, {0x0d3, 0x013},
+  {0x0d3, 0x013}, {0x0d3, 0x013}, {0x0d3, 0x013}, {0x0d3, 0x093},
+  {0x0d3, 0x093}, {0x0d3, 0x093}, {0x0d3, 0x093}, {0x0d3, 0x093},
+  {0x0d3, 0x093}, {0x0d3, 0x093}, {0x0d3, 0x093}, {0x0d3, 0x093},
+  {0x0d3, 0x093}, {0x0d3, 0x093}, {0x0d3, 0x093}, {0x0d3, 0x093},
+  {0x0d3, 0x093}, {0x0d3, 0x093}, {0x0d3, 0x093}, {0x153, 0x053},
+  {0x153, 0x053}, {0x153, 0x053}, {0x153, 0x053}, {0x153, 0x053},
+  {0x153, 0x053}, {0x153, 0x053}, {0x153, 0x053}, {0x153, 0x053},
+  {0x153, 0x053}, {0x153, 0x053}, {0x153, 0x053}, {0x153, 0x053},
+  {0x153, 0x053}, {0x153, 0x053}, {0x153, 0x053}, {0x153, 0x053},
+  {0x153, 0x053}, {0x153, 0x053}, {0x153, 0x053}, {0x153, 0x053},
+  {0x153, 0x053}, {0x153, 0x053}, {0x153, 0x053}, {0x153, 0x053},
+  {0x153, 0x053}, {0x153, 0x053}, {0x153, 0x053}, {0x153, 0x053},
+  {0x153, 0x053}, {0x153, 0x053}, {0x153, 0x053}, {0x153, 0x153}
+};
+
+// fixed costs for coding levels, deduce from the coding tree.
+// This is only the part that doesn't depend on the probability state.
+const uint16_t VP8LevelFixedCosts[2048] = {
+     0,  256,  256,  256,  256,  432,  618,  630,
+   731,  640,  640,  828,  901,  948, 1021, 1101,
+  1174, 1221, 1294, 1042, 1085, 1115, 1158, 1202,
+  1245, 1275, 1318, 1337, 1380, 1410, 1453, 1497,
+  1540, 1570, 1613, 1280, 1295, 1317, 1332, 1358,
+  1373, 1395, 1410, 1454, 1469, 1491, 1506, 1532,
+  1547, 1569, 1584, 1601, 1616, 1638, 1653, 1679,
+  1694, 1716, 1731, 1775, 1790, 1812, 1827, 1853,
+  1868, 1890, 1905, 1727, 1733, 1742, 1748, 1759,
+  1765, 1774, 1780, 1800, 1806, 1815, 1821, 1832,
+  1838, 1847, 1853, 1878, 1884, 1893, 1899, 1910,
+  1916, 1925, 1931, 1951, 1957, 1966, 1972, 1983,
+  1989, 1998, 2004, 2027, 2033, 2042, 2048, 2059,
+  2065, 2074, 2080, 2100, 2106, 2115, 2121, 2132,
+  2138, 2147, 2153, 2178, 2184, 2193, 2199, 2210,
+  2216, 2225, 2231, 2251, 2257, 2266, 2272, 2283,
+  2289, 2298, 2304, 2168, 2174, 2183, 2189, 2200,
+  2206, 2215, 2221, 2241, 2247, 2256, 2262, 2273,
+  2279, 2288, 2294, 2319, 2325, 2334, 2340, 2351,
+  2357, 2366, 2372, 2392, 2398, 2407, 2413, 2424,
+  2430, 2439, 2445, 2468, 2474, 2483, 2489, 2500,
+  2506, 2515, 2521, 2541, 2547, 2556, 2562, 2573,
+  2579, 2588, 2594, 2619, 2625, 2634, 2640, 2651,
+  2657, 2666, 2672, 2692, 2698, 2707, 2713, 2724,
+  2730, 2739, 2745, 2540, 2546, 2555, 2561, 2572,
+  2578, 2587, 2593, 2613, 2619, 2628, 2634, 2645,
+  2651, 2660, 2666, 2691, 2697, 2706, 2712, 2723,
+  2729, 2738, 2744, 2764, 2770, 2779, 2785, 2796,
+  2802, 2811, 2817, 2840, 2846, 2855, 2861, 2872,
+  2878, 2887, 2893, 2913, 2919, 2928, 2934, 2945,
+  2951, 2960, 2966, 2991, 2997, 3006, 3012, 3023,
+  3029, 3038, 3044, 3064, 3070, 3079, 3085, 3096,
+  3102, 3111, 3117, 2981, 2987, 2996, 3002, 3013,
+  3019, 3028, 3034, 3054, 3060, 3069, 3075, 3086,
+  3092, 3101, 3107, 3132, 3138, 3147, 3153, 3164,
+  3170, 3179, 3185, 3205, 3211, 3220, 3226, 3237,
+  3243, 3252, 3258, 3281, 3287, 3296, 3302, 3313,
+  3319, 3328, 3334, 3354, 3360, 3369, 3375, 3386,
+  3392, 3401, 3407, 3432, 3438, 3447, 3453, 3464,
+  3470, 3479, 3485, 3505, 3511, 3520, 3526, 3537,
+  3543, 3552, 3558, 2816, 2822, 2831, 2837, 2848,
+  2854, 2863, 2869, 2889, 2895, 2904, 2910, 2921,
+  2927, 2936, 2942, 2967, 2973, 2982, 2988, 2999,
+  3005, 3014, 3020, 3040, 3046, 3055, 3061, 3072,
+  3078, 3087, 3093, 3116, 3122, 3131, 3137, 3148,
+  3154, 3163, 3169, 3189, 3195, 3204, 3210, 3221,
+  3227, 3236, 3242, 3267, 3273, 3282, 3288, 3299,
+  3305, 3314, 3320, 3340, 3346, 3355, 3361, 3372,
+  3378, 3387, 3393, 3257, 3263, 3272, 3278, 3289,
+  3295, 3304, 3310, 3330, 3336, 3345, 3351, 3362,
+  3368, 3377, 3383, 3408, 3414, 3423, 3429, 3440,
+  3446, 3455, 3461, 3481, 3487, 3496, 3502, 3513,
+  3519, 3528, 3534, 3557, 3563, 3572, 3578, 3589,
+  3595, 3604, 3610, 3630, 3636, 3645, 3651, 3662,
+  3668, 3677, 3683, 3708, 3714, 3723, 3729, 3740,
+  3746, 3755, 3761, 3781, 3787, 3796, 3802, 3813,
+  3819, 3828, 3834, 3629, 3635, 3644, 3650, 3661,
+  3667, 3676, 3682, 3702, 3708, 3717, 3723, 3734,
+  3740, 3749, 3755, 3780, 3786, 3795, 3801, 3812,
+  3818, 3827, 3833, 3853, 3859, 3868, 3874, 3885,
+  3891, 3900, 3906, 3929, 3935, 3944, 3950, 3961,
+  3967, 3976, 3982, 4002, 4008, 4017, 4023, 4034,
+  4040, 4049, 4055, 4080, 4086, 4095, 4101, 4112,
+  4118, 4127, 4133, 4153, 4159, 4168, 4174, 4185,
+  4191, 4200, 4206, 4070, 4076, 4085, 4091, 4102,
+  4108, 4117, 4123, 4143, 4149, 4158, 4164, 4175,
+  4181, 4190, 4196, 4221, 4227, 4236, 4242, 4253,
+  4259, 4268, 4274, 4294, 4300, 4309, 4315, 4326,
+  4332, 4341, 4347, 4370, 4376, 4385, 4391, 4402,
+  4408, 4417, 4423, 4443, 4449, 4458, 4464, 4475,
+  4481, 4490, 4496, 4521, 4527, 4536, 4542, 4553,
+  4559, 4568, 4574, 4594, 4600, 4609, 4615, 4626,
+  4632, 4641, 4647, 3515, 3521, 3530, 3536, 3547,
+  3553, 3562, 3568, 3588, 3594, 3603, 3609, 3620,
+  3626, 3635, 3641, 3666, 3672, 3681, 3687, 3698,
+  3704, 3713, 3719, 3739, 3745, 3754, 3760, 3771,
+  3777, 3786, 3792, 3815, 3821, 3830, 3836, 3847,
+  3853, 3862, 3868, 3888, 3894, 3903, 3909, 3920,
+  3926, 3935, 3941, 3966, 3972, 3981, 3987, 3998,
+  4004, 4013, 4019, 4039, 4045, 4054, 4060, 4071,
+  4077, 4086, 4092, 3956, 3962, 3971, 3977, 3988,
+  3994, 4003, 4009, 4029, 4035, 4044, 4050, 4061,
+  4067, 4076, 4082, 4107, 4113, 4122, 4128, 4139,
+  4145, 4154, 4160, 4180, 4186, 4195, 4201, 4212,
+  4218, 4227, 4233, 4256, 4262, 4271, 4277, 4288,
+  4294, 4303, 4309, 4329, 4335, 4344, 4350, 4361,
+  4367, 4376, 4382, 4407, 4413, 4422, 4428, 4439,
+  4445, 4454, 4460, 4480, 4486, 4495, 4501, 4512,
+  4518, 4527, 4533, 4328, 4334, 4343, 4349, 4360,
+  4366, 4375, 4381, 4401, 4407, 4416, 4422, 4433,
+  4439, 4448, 4454, 4479, 4485, 4494, 4500, 4511,
+  4517, 4526, 4532, 4552, 4558, 4567, 4573, 4584,
+  4590, 4599, 4605, 4628, 4634, 4643, 4649, 4660,
+  4666, 4675, 4681, 4701, 4707, 4716, 4722, 4733,
+  4739, 4748, 4754, 4779, 4785, 4794, 4800, 4811,
+  4817, 4826, 4832, 4852, 4858, 4867, 4873, 4884,
+  4890, 4899, 4905, 4769, 4775, 4784, 4790, 4801,
+  4807, 4816, 4822, 4842, 4848, 4857, 4863, 4874,
+  4880, 4889, 4895, 4920, 4926, 4935, 4941, 4952,
+  4958, 4967, 4973, 4993, 4999, 5008, 5014, 5025,
+  5031, 5040, 5046, 5069, 5075, 5084, 5090, 5101,
+  5107, 5116, 5122, 5142, 5148, 5157, 5163, 5174,
+  5180, 5189, 5195, 5220, 5226, 5235, 5241, 5252,
+  5258, 5267, 5273, 5293, 5299, 5308, 5314, 5325,
+  5331, 5340, 5346, 4604, 4610, 4619, 4625, 4636,
+  4642, 4651, 4657, 4677, 4683, 4692, 4698, 4709,
+  4715, 4724, 4730, 4755, 4761, 4770, 4776, 4787,
+  4793, 4802, 4808, 4828, 4834, 4843, 4849, 4860,
+  4866, 4875, 4881, 4904, 4910, 4919, 4925, 4936,
+  4942, 4951, 4957, 4977, 4983, 4992, 4998, 5009,
+  5015, 5024, 5030, 5055, 5061, 5070, 5076, 5087,
+  5093, 5102, 5108, 5128, 5134, 5143, 5149, 5160,
+  5166, 5175, 5181, 5045, 5051, 5060, 5066, 5077,
+  5083, 5092, 5098, 5118, 5124, 5133, 5139, 5150,
+  5156, 5165, 5171, 5196, 5202, 5211, 5217, 5228,
+  5234, 5243, 5249, 5269, 5275, 5284, 5290, 5301,
+  5307, 5316, 5322, 5345, 5351, 5360, 5366, 5377,
+  5383, 5392, 5398, 5418, 5424, 5433, 5439, 5450,
+  5456, 5465, 5471, 5496, 5502, 5511, 5517, 5528,
+  5534, 5543, 5549, 5569, 5575, 5584, 5590, 5601,
+  5607, 5616, 5622, 5417, 5423, 5432, 5438, 5449,
+  5455, 5464, 5470, 5490, 5496, 5505, 5511, 5522,
+  5528, 5537, 5543, 5568, 5574, 5583, 5589, 5600,
+  5606, 5615, 5621, 5641, 5647, 5656, 5662, 5673,
+  5679, 5688, 5694, 5717, 5723, 5732, 5738, 5749,
+  5755, 5764, 5770, 5790, 5796, 5805, 5811, 5822,
+  5828, 5837, 5843, 5868, 5874, 5883, 5889, 5900,
+  5906, 5915, 5921, 5941, 5947, 5956, 5962, 5973,
+  5979, 5988, 5994, 5858, 5864, 5873, 5879, 5890,
+  5896, 5905, 5911, 5931, 5937, 5946, 5952, 5963,
+  5969, 5978, 5984, 6009, 6015, 6024, 6030, 6041,
+  6047, 6056, 6062, 6082, 6088, 6097, 6103, 6114,
+  6120, 6129, 6135, 6158, 6164, 6173, 6179, 6190,
+  6196, 6205, 6211, 6231, 6237, 6246, 6252, 6263,
+  6269, 6278, 6284, 6309, 6315, 6324, 6330, 6341,
+  6347, 6356, 6362, 6382, 6388, 6397, 6403, 6414,
+  6420, 6429, 6435, 3515, 3521, 3530, 3536, 3547,
+  3553, 3562, 3568, 3588, 3594, 3603, 3609, 3620,
+  3626, 3635, 3641, 3666, 3672, 3681, 3687, 3698,
+  3704, 3713, 3719, 3739, 3745, 3754, 3760, 3771,
+  3777, 3786, 3792, 3815, 3821, 3830, 3836, 3847,
+  3853, 3862, 3868, 3888, 3894, 3903, 3909, 3920,
+  3926, 3935, 3941, 3966, 3972, 3981, 3987, 3998,
+  4004, 4013, 4019, 4039, 4045, 4054, 4060, 4071,
+  4077, 4086, 4092, 3956, 3962, 3971, 3977, 3988,
+  3994, 4003, 4009, 4029, 4035, 4044, 4050, 4061,
+  4067, 4076, 4082, 4107, 4113, 4122, 4128, 4139,
+  4145, 4154, 4160, 4180, 4186, 4195, 4201, 4212,
+  4218, 4227, 4233, 4256, 4262, 4271, 4277, 4288,
+  4294, 4303, 4309, 4329, 4335, 4344, 4350, 4361,
+  4367, 4376, 4382, 4407, 4413, 4422, 4428, 4439,
+  4445, 4454, 4460, 4480, 4486, 4495, 4501, 4512,
+  4518, 4527, 4533, 4328, 4334, 4343, 4349, 4360,
+  4366, 4375, 4381, 4401, 4407, 4416, 4422, 4433,
+  4439, 4448, 4454, 4479, 4485, 4494, 4500, 4511,
+  4517, 4526, 4532, 4552, 4558, 4567, 4573, 4584,
+  4590, 4599, 4605, 4628, 4634, 4643, 4649, 4660,
+  4666, 4675, 4681, 4701, 4707, 4716, 4722, 4733,
+  4739, 4748, 4754, 4779, 4785, 4794, 4800, 4811,
+  4817, 4826, 4832, 4852, 4858, 4867, 4873, 4884,
+  4890, 4899, 4905, 4769, 4775, 4784, 4790, 4801,
+  4807, 4816, 4822, 4842, 4848, 4857, 4863, 4874,
+  4880, 4889, 4895, 4920, 4926, 4935, 4941, 4952,
+  4958, 4967, 4973, 4993, 4999, 5008, 5014, 5025,
+  5031, 5040, 5046, 5069, 5075, 5084, 5090, 5101,
+  5107, 5116, 5122, 5142, 5148, 5157, 5163, 5174,
+  5180, 5189, 5195, 5220, 5226, 5235, 5241, 5252,
+  5258, 5267, 5273, 5293, 5299, 5308, 5314, 5325,
+  5331, 5340, 5346, 4604, 4610, 4619, 4625, 4636,
+  4642, 4651, 4657, 4677, 4683, 4692, 4698, 4709,
+  4715, 4724, 4730, 4755, 4761, 4770, 4776, 4787,
+  4793, 4802, 4808, 4828, 4834, 4843, 4849, 4860,
+  4866, 4875, 4881, 4904, 4910, 4919, 4925, 4936,
+  4942, 4951, 4957, 4977, 4983, 4992, 4998, 5009,
+  5015, 5024, 5030, 5055, 5061, 5070, 5076, 5087,
+  5093, 5102, 5108, 5128, 5134, 5143, 5149, 5160,
+  5166, 5175, 5181, 5045, 5051, 5060, 5066, 5077,
+  5083, 5092, 5098, 5118, 5124, 5133, 5139, 5150,
+  5156, 5165, 5171, 5196, 5202, 5211, 5217, 5228,
+  5234, 5243, 5249, 5269, 5275, 5284, 5290, 5301,
+  5307, 5316, 5322, 5345, 5351, 5360, 5366, 5377,
+  5383, 5392, 5398, 5418, 5424, 5433, 5439, 5450,
+  5456, 5465, 5471, 5496, 5502, 5511, 5517, 5528,
+  5534, 5543, 5549, 5569, 5575, 5584, 5590, 5601,
+  5607, 5616, 5622, 5417, 5423, 5432, 5438, 5449,
+  5455, 5464, 5470, 5490, 5496, 5505, 5511, 5522,
+  5528, 5537, 5543, 5568, 5574, 5583, 5589, 5600,
+  5606, 5615, 5621, 5641, 5647, 5656, 5662, 5673,
+  5679, 5688, 5694, 5717, 5723, 5732, 5738, 5749,
+  5755, 5764, 5770, 5790, 5796, 5805, 5811, 5822,
+  5828, 5837, 5843, 5868, 5874, 5883, 5889, 5900,
+  5906, 5915, 5921, 5941, 5947, 5956, 5962, 5973,
+  5979, 5988, 5994, 5858, 5864, 5873, 5879, 5890,
+  5896, 5905, 5911, 5931, 5937, 5946, 5952, 5963,
+  5969, 5978, 5984, 6009, 6015, 6024, 6030, 6041,
+  6047, 6056, 6062, 6082, 6088, 6097, 6103, 6114,
+  6120, 6129, 6135, 6158, 6164, 6173, 6179, 6190,
+  6196, 6205, 6211, 6231, 6237, 6246, 6252, 6263,
+  6269, 6278, 6284, 6309, 6315, 6324, 6330, 6341,
+  6347, 6356, 6362, 6382, 6388, 6397, 6403, 6414,
+  6420, 6429, 6435, 5303, 5309, 5318, 5324, 5335,
+  5341, 5350, 5356, 5376, 5382, 5391, 5397, 5408,
+  5414, 5423, 5429, 5454, 5460, 5469, 5475, 5486,
+  5492, 5501, 5507, 5527, 5533, 5542, 5548, 5559,
+  5565, 5574, 5580, 5603, 5609, 5618, 5624, 5635,
+  5641, 5650, 5656, 5676, 5682, 5691, 5697, 5708,
+  5714, 5723, 5729, 5754, 5760, 5769, 5775, 5786,
+  5792, 5801, 5807, 5827, 5833, 5842, 5848, 5859,
+  5865, 5874, 5880, 5744, 5750, 5759, 5765, 5776,
+  5782, 5791, 5797, 5817, 5823, 5832, 5838, 5849,
+  5855, 5864, 5870, 5895, 5901, 5910, 5916, 5927,
+  5933, 5942, 5948, 5968, 5974, 5983, 5989, 6000,
+  6006, 6015, 6021, 6044, 6050, 6059, 6065, 6076,
+  6082, 6091, 6097, 6117, 6123, 6132, 6138, 6149,
+  6155, 6164, 6170, 6195, 6201, 6210, 6216, 6227,
+  6233, 6242, 6248, 6268, 6274, 6283, 6289, 6300,
+  6306, 6315, 6321, 6116, 6122, 6131, 6137, 6148,
+  6154, 6163, 6169, 6189, 6195, 6204, 6210, 6221,
+  6227, 6236, 6242, 6267, 6273, 6282, 6288, 6299,
+  6305, 6314, 6320, 6340, 6346, 6355, 6361, 6372,
+  6378, 6387, 6393, 6416, 6422, 6431, 6437, 6448,
+  6454, 6463, 6469, 6489, 6495, 6504, 6510, 6521,
+  6527, 6536, 6542, 6567, 6573, 6582, 6588, 6599,
+  6605, 6614, 6620, 6640, 6646, 6655, 6661, 6672,
+  6678, 6687, 6693, 6557, 6563, 6572, 6578, 6589,
+  6595, 6604, 6610, 6630, 6636, 6645, 6651, 6662,
+  6668, 6677, 6683, 6708, 6714, 6723, 6729, 6740,
+  6746, 6755, 6761, 6781, 6787, 6796, 6802, 6813,
+  6819, 6828, 6834, 6857, 6863, 6872, 6878, 6889,
+  6895, 6904, 6910, 6930, 6936, 6945, 6951, 6962,
+  6968, 6977, 6983, 7008, 7014, 7023, 7029, 7040,
+  7046, 7055, 7061, 7081, 7087, 7096, 7102, 7113,
+  7119, 7128, 7134, 6392, 6398, 6407, 6413, 6424,
+  6430, 6439, 6445, 6465, 6471, 6480, 6486, 6497,
+  6503, 6512, 6518, 6543, 6549, 6558, 6564, 6575,
+  6581, 6590, 6596, 6616, 6622, 6631, 6637, 6648,
+  6654, 6663, 6669, 6692, 6698, 6707, 6713, 6724,
+  6730, 6739, 6745, 6765, 6771, 6780, 6786, 6797,
+  6803, 6812, 6818, 6843, 6849, 6858, 6864, 6875,
+  6881, 6890, 6896, 6916, 6922, 6931, 6937, 6948,
+  6954, 6963, 6969, 6833, 6839, 6848, 6854, 6865,
+  6871, 6880, 6886, 6906, 6912, 6921, 6927, 6938,
+  6944, 6953, 6959, 6984, 6990, 6999, 7005, 7016,
+  7022, 7031, 7037, 7057, 7063, 7072, 7078, 7089,
+  7095, 7104, 7110, 7133, 7139, 7148, 7154, 7165,
+  7171, 7180, 7186, 7206, 7212, 7221, 7227, 7238,
+  7244, 7253, 7259, 7284, 7290, 7299, 7305, 7316,
+  7322, 7331, 7337, 7357, 7363, 7372, 7378, 7389,
+  7395, 7404, 7410, 7205, 7211, 7220, 7226, 7237,
+  7243, 7252, 7258, 7278, 7284, 7293, 7299, 7310,
+  7316, 7325, 7331, 7356, 7362, 7371, 7377, 7388,
+  7394, 7403, 7409, 7429, 7435, 7444, 7450, 7461,
+  7467, 7476, 7482, 7505, 7511, 7520, 7526, 7537,
+  7543, 7552, 7558, 7578, 7584, 7593, 7599, 7610,
+  7616, 7625, 7631, 7656, 7662, 7671, 7677, 7688,
+  7694, 7703, 7709, 7729, 7735, 7744, 7750, 7761
+};
+
+static int VariableLevelCost(int level, const uint8_t probas[NUM_PROBAS]) {
+  int pattern = VP8LevelCodes[level - 1][0];
+  int bits = VP8LevelCodes[level - 1][1];
+  int cost = 0;
+  int i;
+  for (i = 2; pattern; ++i) {
+    if (pattern & 1) {
+      cost += VP8BitCost(bits & 1, probas[i]);
+    }
+    bits >>= 1;
+    pattern >>= 1;
+  }
+  return cost;
+}
+
+//------------------------------------------------------------------------------
+// Pre-calc level costs once for all
+
+void VP8CalculateLevelCosts(VP8Proba* const proba) {
+  int ctype, band, ctx;
+
+  if (!proba->dirty_) return;  // nothing to do.
+
+  for (ctype = 0; ctype < NUM_TYPES; ++ctype) {
+    for (band = 0; band < NUM_BANDS; ++band) {
+      for(ctx = 0; ctx < NUM_CTX; ++ctx) {
+        const uint8_t* const p = proba->coeffs_[ctype][band][ctx];
+        uint16_t* const table = proba->level_cost_[ctype][band][ctx];
+        const int cost_base = VP8BitCost(1, p[1]);
+        int v;
+        table[0] = VP8BitCost(0, p[1]);
+        for (v = 1; v <= MAX_VARIABLE_LEVEL; ++v) {
+          table[v] = cost_base + VariableLevelCost(v, p);
+        }
+        // Starting at level 67 and up, the variable part of the cost is
+        // actually constant.
+      }
+    }
+  }
+  proba->dirty_ = 0;
+}
+
+//------------------------------------------------------------------------------
+// Mode cost tables.
+
+// These are the fixed probabilities (in the coding trees) turned into bit-cost
+// by calling VP8BitCost().
+const uint16_t VP8FixedCostsUV[4] = { 302, 984, 439, 642 };
+// note: these values include the fixed VP8BitCost(1, 145) mode selection cost.
+const uint16_t VP8FixedCostsI16[4] = { 663, 919, 872, 919 };
+const uint16_t VP8FixedCostsI4[NUM_BMODES][NUM_BMODES][NUM_BMODES] = {
+  { {  251, 1362, 1934, 2085, 2314, 2230, 1839, 1988, 2437, 2348 },
+    {  403,  680, 1507, 1519, 2060, 2005, 1992, 1914, 1924, 1733 },
+    {  353, 1121,  973, 1895, 2060, 1787, 1671, 1516, 2012, 1868 },
+    {  770,  852, 1581,  632, 1393, 1780, 1823, 1936, 1074, 1218 },
+    {  510, 1270, 1467, 1319,  847, 1279, 1792, 2094, 1080, 1353 },
+    {  488, 1322,  918, 1573, 1300,  883, 1814, 1752, 1756, 1502 },
+    {  425,  992, 1820, 1514, 1843, 2440,  937, 1771, 1924, 1129 },
+    {  363, 1248, 1257, 1970, 2194, 2385, 1569,  953, 1951, 1601 },
+    {  723, 1257, 1631,  964,  963, 1508, 1697, 1824,  671, 1418 },
+    {  635, 1038, 1573,  930, 1673, 1413, 1410, 1687, 1410,  749 } },
+  { {  451,  613, 1345, 1702, 1870, 1716, 1728, 1766, 2190, 2310 },
+    {  678,  453, 1171, 1443, 1925, 1831, 2045, 1781, 1887, 1602 },
+    {  711,  666,  674, 1718, 1910, 1493, 1775, 1193, 2325, 2325 },
+    {  883,  854, 1583,  542, 1800, 1878, 1664, 2149, 1207, 1087 },
+    {  669,  994, 1248, 1122,  949, 1179, 1376, 1729, 1070, 1244 },
+    {  715, 1026,  715, 1350, 1430,  930, 1717, 1296, 1479, 1479 },
+    {  544,  841, 1656, 1450, 2094, 3883, 1010, 1759, 2076,  809 },
+    {  610,  855,  957, 1553, 2067, 1561, 1704,  824, 2066, 1226 },
+    {  833,  960, 1416,  819, 1277, 1619, 1501, 1617,  757, 1182 },
+    {  711,  964, 1252,  879, 1441, 1828, 1508, 1636, 1594,  734 } },
+  { {  605,  764,  734, 1713, 1747, 1192, 1819, 1353, 1877, 2392 },
+    {  866,  641,  586, 1622, 2072, 1431, 1888, 1346, 2189, 1764 },
+    {  901,  851,  456, 2165, 2281, 1405, 1739, 1193, 2183, 2443 },
+    {  770, 1045,  952, 1078, 1342, 1191, 1436, 1063, 1303,  995 },
+    {  901, 1086,  727, 1170,  884, 1105, 1267, 1401, 1739, 1337 },
+    {  951, 1162,  595, 1488, 1388,  703, 1790, 1366, 2057, 1724 },
+    {  534,  986, 1273, 1987, 3273, 1485, 1024, 1399, 1583,  866 },
+    {  699, 1182,  695, 1978, 1726, 1986, 1326,  714, 1750, 1672 },
+    {  951, 1217, 1209,  920, 1062, 1441, 1548,  999,  952,  932 },
+    {  733, 1284,  784, 1256, 1557, 1098, 1257, 1357, 1414,  908 } },
+  { {  316, 1075, 1653, 1220, 2145, 2051, 1730, 2131, 1884, 1790 },
+    {  745,  516, 1404,  894, 1599, 2375, 2013, 2105, 1475, 1381 },
+    {  516,  729, 1088, 1319, 1637, 3426, 1636, 1275, 1531, 1453 },
+    {  894,  943, 2138,  468, 1704, 2259, 2069, 1763, 1266, 1158 },
+    {  605, 1025, 1235,  871, 1170, 1767, 1493, 1500, 1104, 1258 },
+    {  739,  826, 1207, 1151, 1412,  846, 1305, 2726, 1014, 1569 },
+    {  558,  825, 1820, 1398, 3344, 1556, 1218, 1550, 1228,  878 },
+    {  429,  951, 1089, 1816, 3861, 3861, 1556,  969, 1568, 1828 },
+    {  883,  961, 1752,  769, 1468, 1810, 2081, 2346,  613, 1298 },
+    {  803,  895, 1372,  641, 1303, 1708, 1686, 1700, 1306, 1033 } },
+  { {  439, 1267, 1270, 1579,  963, 1193, 1723, 1729, 1198, 1993 },
+    {  705,  725, 1029, 1153, 1176, 1103, 1821, 1567, 1259, 1574 },
+    {  723,  859,  802, 1253,  972, 1202, 1407, 1665, 1520, 1674 },
+    {  894,  960, 1254,  887, 1052, 1607, 1344, 1349,  865, 1150 },
+    {  833, 1312, 1337, 1205,  572, 1288, 1414, 1529, 1088, 1430 },
+    {  842, 1279, 1068, 1861,  862,  688, 1861, 1630, 1039, 1381 },
+    {  766,  938, 1279, 1546, 3338, 1550, 1031, 1542, 1288,  640 },
+    {  715, 1090,  835, 1609, 1100, 1100, 1603, 1019, 1102, 1617 },
+    {  894, 1813, 1500, 1188,  789, 1194, 1491, 1919,  617, 1333 },
+    {  610, 1076, 1644, 1281, 1283,  975, 1179, 1688, 1434,  889 } },
+  { {  544,  971, 1146, 1849, 1221,  740, 1857, 1621, 1683, 2430 },
+    {  723,  705,  961, 1371, 1426,  821, 2081, 2079, 1839, 1380 },
+    {  783,  857,  703, 2145, 1419,  814, 1791, 1310, 1609, 2206 },
+    {  997, 1000, 1153,  792, 1229, 1162, 1810, 1418,  942,  979 },
+    {  901, 1226,  883, 1289,  793,  715, 1904, 1649, 1319, 3108 },
+    {  979, 1478,  782, 2216, 1454,  455, 3092, 1591, 1997, 1664 },
+    {  663, 1110, 1504, 1114, 1522, 3311,  676, 1522, 1530, 1024 },
+    {  605, 1138, 1153, 1314, 1569, 1315, 1157,  804, 1574, 1320 },
+    {  770, 1216, 1218, 1227,  869, 1384, 1232, 1375,  834, 1239 },
+    {  775, 1007,  843, 1216, 1225, 1074, 2527, 1479, 1149,  975 } },
+  { {  477,  817, 1309, 1439, 1708, 1454, 1159, 1241, 1945, 1672 },
+    {  577,  796, 1112, 1271, 1618, 1458, 1087, 1345, 1831, 1265 },
+    {  663,  776,  753, 1940, 1690, 1690, 1227, 1097, 3149, 1361 },
+    {  766, 1299, 1744, 1161, 1565, 1106, 1045, 1230, 1232,  707 },
+    {  915, 1026, 1404, 1182, 1184,  851, 1428, 2425, 1043,  789 },
+    {  883, 1456,  790, 1082, 1086,  985, 1083, 1484, 1238, 1160 },
+    {  507, 1345, 2261, 1995, 1847, 3636,  653, 1761, 2287,  933 },
+    {  553, 1193, 1470, 2057, 2059, 2059,  833,  779, 2058, 1263 },
+    {  766, 1275, 1515, 1039,  957, 1554, 1286, 1540, 1289,  705 },
+    {  499, 1378, 1496, 1385, 1850, 1850, 1044, 2465, 1515,  720 } },
+  { {  553,  930,  978, 2077, 1968, 1481, 1457,  761, 1957, 2362 },
+    {  694,  864,  905, 1720, 1670, 1621, 1429,  718, 2125, 1477 },
+    {  699,  968,  658, 3190, 2024, 1479, 1865,  750, 2060, 2320 },
+    {  733, 1308, 1296, 1062, 1576, 1322, 1062, 1112, 1172,  816 },
+    {  920,  927, 1052,  939,  947, 1156, 1152, 1073, 3056, 1268 },
+    {  723, 1534,  711, 1547, 1294,  892, 1553,  928, 1815, 1561 },
+    {  663, 1366, 1583, 2111, 1712, 3501,  522, 1155, 2130, 1133 },
+    {  614, 1731, 1188, 2343, 1944, 3733, 1287,  487, 3546, 1758 },
+    {  770, 1585, 1312,  826,  884, 2673, 1185, 1006, 1195, 1195 },
+    {  758, 1333, 1273, 1023, 1621, 1162, 1351,  833, 1479,  862 } },
+  { {  376, 1193, 1446, 1149, 1545, 1577, 1870, 1789, 1175, 1823 },
+    {  803,  633, 1136, 1058, 1350, 1323, 1598, 2247, 1072, 1252 },
+    {  614, 1048,  943,  981, 1152, 1869, 1461, 1020, 1618, 1618 },
+    { 1107, 1085, 1282,  592, 1779, 1933, 1648, 2403,  691, 1246 },
+    {  851, 1309, 1223, 1243,  895, 1593, 1792, 2317,  627, 1076 },
+    {  770, 1216, 1030, 1125,  921,  981, 1629, 1131, 1049, 1646 },
+    {  626, 1469, 1456, 1081, 1489, 3278,  981, 1232, 1498,  733 },
+    {  617, 1201,  812, 1220, 1476, 1476, 1478,  970, 1228, 1488 },
+    { 1179, 1393, 1540,  999, 1243, 1503, 1916, 1925,  414, 1614 },
+    {  943, 1088, 1490,  682, 1112, 1372, 1756, 1505,  966,  966 } },
+  { {  322, 1142, 1589, 1396, 2144, 1859, 1359, 1925, 2084, 1518 },
+    {  617,  625, 1241, 1234, 2121, 1615, 1524, 1858, 1720, 1004 },
+    {  553,  851,  786, 1299, 1452, 1560, 1372, 1561, 1967, 1713 },
+    {  770,  977, 1396,  568, 1893, 1639, 1540, 2108, 1430, 1013 },
+    {  684, 1120, 1375,  982,  930, 2719, 1638, 1643,  933,  993 },
+    {  553, 1103,  996, 1356, 1361, 1005, 1507, 1761, 1184, 1268 },
+    {  419, 1247, 1537, 1554, 1817, 3606, 1026, 1666, 1829,  923 },
+    {  439, 1139, 1101, 1257, 3710, 1922, 1205, 1040, 1931, 1529 },
+    {  979,  935, 1269,  847, 1202, 1286, 1530, 1535,  827, 1036 },
+    {  516, 1378, 1569, 1110, 1798, 1798, 1198, 2199, 1543,  712 } },
+};
+
+//------------------------------------------------------------------------------
+
+#if defined(__cplusplus) || defined(c_plusplus)
+}    // extern "C"
+#endif
diff --git a/drivers/webpold/enc/cost.h b/drivers/webpold/enc/cost.h
new file mode 100644
index 0000000000..09b75b699d
--- /dev/null
+++ b/drivers/webpold/enc/cost.h
@@ -0,0 +1,48 @@
+// Copyright 2011 Google Inc. All Rights Reserved.
+//
+// This code is licensed under the same terms as WebM:
+//  Software License Agreement:  http://www.webmproject.org/license/software/
+//  Additional IP Rights Grant:  http://www.webmproject.org/license/additional/
+// -----------------------------------------------------------------------------
+//
+// Cost tables for level and modes.
+//
+// Author: Skal (pascal.massimino@gmail.com)
+
+#ifndef WEBP_ENC_COST_H_
+#define WEBP_ENC_COST_H_
+
+#include "./vp8enci.h"
+
+#if defined(__cplusplus) || defined(c_plusplus)
+extern "C" {
+#endif
+
+extern const uint16_t VP8LevelFixedCosts[2048];   // approximate cost per level
+extern const uint16_t VP8EntropyCost[256];        // 8bit fixed-point log(p)
+
+// Cost of coding one event with probability 'proba'.
+static WEBP_INLINE int VP8BitCost(int bit, uint8_t proba) {
+  return !bit ? VP8EntropyCost[proba] : VP8EntropyCost[255 - proba];
+}
+
+// Level cost calculations
+extern const uint16_t VP8LevelCodes[MAX_VARIABLE_LEVEL][2];
+void VP8CalculateLevelCosts(VP8Proba* const proba);
+static WEBP_INLINE int VP8LevelCost(const uint16_t* const table, int level) {
+  return VP8LevelFixedCosts[level]
+       + table[(level > MAX_VARIABLE_LEVEL) ? MAX_VARIABLE_LEVEL : level];
+}
+
+// Mode costs
+extern const uint16_t VP8FixedCostsUV[4];
+extern const uint16_t VP8FixedCostsI16[4];
+extern const uint16_t VP8FixedCostsI4[NUM_BMODES][NUM_BMODES][NUM_BMODES];
+
+//------------------------------------------------------------------------------
+
+#if defined(__cplusplus) || defined(c_plusplus)
+}    // extern "C"
+#endif
+
+#endif  /* WEBP_ENC_COST_H_ */
diff --git a/drivers/webpold/enc/filter.c b/drivers/webpold/enc/filter.c
new file mode 100644
index 0000000000..7fb78a3949
--- /dev/null
+++ b/drivers/webpold/enc/filter.c
@@ -0,0 +1,409 @@
+// Copyright 2011 Google Inc. All Rights Reserved.
+//
+// This code is licensed under the same terms as WebM:
+//  Software License Agreement:  http://www.webmproject.org/license/software/
+//  Additional IP Rights Grant:  http://www.webmproject.org/license/additional/
+// -----------------------------------------------------------------------------
+//
+// Selecting filter level
+//
+// Author: somnath@google.com (Somnath Banerjee)
+
+#include "./vp8enci.h"
+
+#if defined(__cplusplus) || defined(c_plusplus)
+extern "C" {
+#endif
+
+// NOTE: clip1, tables and InitTables are repeated entries of dsp.c
+static uint8_t abs0[255 + 255 + 1];     // abs(i)
+static uint8_t abs1[255 + 255 + 1];     // abs(i)>>1
+static int8_t sclip1[1020 + 1020 + 1];  // clips [-1020, 1020] to [-128, 127]
+static int8_t sclip2[112 + 112 + 1];    // clips [-112, 112] to [-16, 15]
+static uint8_t clip1[255 + 510 + 1];    // clips [-255,510] to [0,255]
+
+static int tables_ok = 0;
+
+static void InitTables(void) {
+  if (!tables_ok) {
+    int i;
+    for (i = -255; i <= 255; ++i) {
+      abs0[255 + i] = (i < 0) ? -i : i;
+      abs1[255 + i] = abs0[255 + i] >> 1;
+    }
+    for (i = -1020; i <= 1020; ++i) {
+      sclip1[1020 + i] = (i < -128) ? -128 : (i > 127) ? 127 : i;
+    }
+    for (i = -112; i <= 112; ++i) {
+      sclip2[112 + i] = (i < -16) ? -16 : (i > 15) ? 15 : i;
+    }
+    for (i = -255; i <= 255 + 255; ++i) {
+      clip1[255 + i] = (i < 0) ? 0 : (i > 255) ? 255 : i;
+    }
+    tables_ok = 1;
+  }
+}
+
+//------------------------------------------------------------------------------
+// Edge filtering functions
+
+// 4 pixels in, 2 pixels out
+static WEBP_INLINE void do_filter2(uint8_t* p, int step) {
+  const int p1 = p[-2*step], p0 = p[-step], q0 = p[0], q1 = p[step];
+  const int a = 3 * (q0 - p0) + sclip1[1020 + p1 - q1];
+  const int a1 = sclip2[112 + ((a + 4) >> 3)];
+  const int a2 = sclip2[112 + ((a + 3) >> 3)];
+  p[-step] = clip1[255 + p0 + a2];
+  p[    0] = clip1[255 + q0 - a1];
+}
+
+// 4 pixels in, 4 pixels out
+static WEBP_INLINE void do_filter4(uint8_t* p, int step) {
+  const int p1 = p[-2*step], p0 = p[-step], q0 = p[0], q1 = p[step];
+  const int a = 3 * (q0 - p0);
+  const int a1 = sclip2[112 + ((a + 4) >> 3)];
+  const int a2 = sclip2[112 + ((a + 3) >> 3)];
+  const int a3 = (a1 + 1) >> 1;
+  p[-2*step] = clip1[255 + p1 + a3];
+  p[-  step] = clip1[255 + p0 + a2];
+  p[      0] = clip1[255 + q0 - a1];
+  p[   step] = clip1[255 + q1 - a3];
+}
+
+// high edge-variance
+static WEBP_INLINE int hev(const uint8_t* p, int step, int thresh) {
+  const int p1 = p[-2*step], p0 = p[-step], q0 = p[0], q1 = p[step];
+  return (abs0[255 + p1 - p0] > thresh) || (abs0[255 + q1 - q0] > thresh);
+}
+
+static WEBP_INLINE int needs_filter(const uint8_t* p, int step, int thresh) {
+  const int p1 = p[-2*step], p0 = p[-step], q0 = p[0], q1 = p[step];
+  return (2 * abs0[255 + p0 - q0] + abs1[255 + p1 - q1]) <= thresh;
+}
+
+static WEBP_INLINE int needs_filter2(const uint8_t* p,
+                                     int step, int t, int it) {
+  const int p3 = p[-4*step], p2 = p[-3*step], p1 = p[-2*step], p0 = p[-step];
+  const int q0 = p[0], q1 = p[step], q2 = p[2*step], q3 = p[3*step];
+  if ((2 * abs0[255 + p0 - q0] + abs1[255 + p1 - q1]) > t)
+    return 0;
+  return abs0[255 + p3 - p2] <= it && abs0[255 + p2 - p1] <= it &&
+         abs0[255 + p1 - p0] <= it && abs0[255 + q3 - q2] <= it &&
+         abs0[255 + q2 - q1] <= it && abs0[255 + q1 - q0] <= it;
+}
+
+//------------------------------------------------------------------------------
+// Simple In-loop filtering (Paragraph 15.2)
+
+static void SimpleVFilter16(uint8_t* p, int stride, int thresh) {
+  int i;
+  for (i = 0; i < 16; ++i) {
+    if (needs_filter(p + i, stride, thresh)) {
+      do_filter2(p + i, stride);
+    }
+  }
+}
+
+static void SimpleHFilter16(uint8_t* p, int stride, int thresh) {
+  int i;
+  for (i = 0; i < 16; ++i) {
+    if (needs_filter(p + i * stride, 1, thresh)) {
+      do_filter2(p + i * stride, 1);
+    }
+  }
+}
+
+static void SimpleVFilter16i(uint8_t* p, int stride, int thresh) {
+  int k;
+  for (k = 3; k > 0; --k) {
+    p += 4 * stride;
+    SimpleVFilter16(p, stride, thresh);
+  }
+}
+
+static void SimpleHFilter16i(uint8_t* p, int stride, int thresh) {
+  int k;
+  for (k = 3; k > 0; --k) {
+    p += 4;
+    SimpleHFilter16(p, stride, thresh);
+  }
+}
+
+//------------------------------------------------------------------------------
+// Complex In-loop filtering (Paragraph 15.3)
+
+static WEBP_INLINE void FilterLoop24(uint8_t* p,
+                                     int hstride, int vstride, int size,
+                                     int thresh, int ithresh, int hev_thresh) {
+  while (size-- > 0) {
+    if (needs_filter2(p, hstride, thresh, ithresh)) {
+      if (hev(p, hstride, hev_thresh)) {
+        do_filter2(p, hstride);
+      } else {
+        do_filter4(p, hstride);
+      }
+    }
+    p += vstride;
+  }
+}
+
+// on three inner edges
+static void VFilter16i(uint8_t* p, int stride,
+                       int thresh, int ithresh, int hev_thresh) {
+  int k;
+  for (k = 3; k > 0; --k) {
+    p += 4 * stride;
+    FilterLoop24(p, stride, 1, 16, thresh, ithresh, hev_thresh);
+  }
+}
+
+static void HFilter16i(uint8_t* p, int stride,
+                       int thresh, int ithresh, int hev_thresh) {
+  int k;
+  for (k = 3; k > 0; --k) {
+    p += 4;
+    FilterLoop24(p, 1, stride, 16, thresh, ithresh, hev_thresh);
+  }
+}
+
+static void VFilter8i(uint8_t* u, uint8_t* v, int stride,
+                      int thresh, int ithresh, int hev_thresh) {
+  FilterLoop24(u + 4 * stride, stride, 1, 8, thresh, ithresh, hev_thresh);
+  FilterLoop24(v + 4 * stride, stride, 1, 8, thresh, ithresh, hev_thresh);
+}
+
+static void HFilter8i(uint8_t* u, uint8_t* v, int stride,
+                      int thresh, int ithresh, int hev_thresh) {
+  FilterLoop24(u + 4, 1, stride, 8, thresh, ithresh, hev_thresh);
+  FilterLoop24(v + 4, 1, stride, 8, thresh, ithresh, hev_thresh);
+}
+
+//------------------------------------------------------------------------------
+
+void (*VP8EncVFilter16i)(uint8_t*, int, int, int, int) = VFilter16i;
+void (*VP8EncHFilter16i)(uint8_t*, int, int, int, int) = HFilter16i;
+void (*VP8EncVFilter8i)(uint8_t*, uint8_t*, int, int, int, int) = VFilter8i;
+void (*VP8EncHFilter8i)(uint8_t*, uint8_t*, int, int, int, int) = HFilter8i;
+
+void (*VP8EncSimpleVFilter16i)(uint8_t*, int, int) = SimpleVFilter16i;
+void (*VP8EncSimpleHFilter16i)(uint8_t*, int, int) = SimpleHFilter16i;
+
+//------------------------------------------------------------------------------
+// Paragraph 15.4: compute the inner-edge filtering strength
+
+static int GetILevel(int sharpness, int level) {
+  if (sharpness > 0) {
+    if (sharpness > 4) {
+      level >>= 2;
+    } else {
+      level >>= 1;
+    }
+    if (level > 9 - sharpness) {
+      level = 9 - sharpness;
+    }
+  }
+  if (level < 1) level = 1;
+  return level;
+}
+
+static void DoFilter(const VP8EncIterator* const it, int level) {
+  const VP8Encoder* const enc = it->enc_;
+  const int ilevel = GetILevel(enc->config_->filter_sharpness, level);
+  const int limit = 2 * level + ilevel;
+
+  uint8_t* const y_dst = it->yuv_out2_ + Y_OFF;
+  uint8_t* const u_dst = it->yuv_out2_ + U_OFF;
+  uint8_t* const v_dst = it->yuv_out2_ + V_OFF;
+
+  // copy current block to yuv_out2_
+  memcpy(y_dst, it->yuv_out_, YUV_SIZE * sizeof(uint8_t));
+
+  if (enc->filter_hdr_.simple_ == 1) {   // simple
+    VP8EncSimpleHFilter16i(y_dst, BPS, limit);
+    VP8EncSimpleVFilter16i(y_dst, BPS, limit);
+  } else {    // complex
+    const int hev_thresh = (level >= 40) ? 2 : (level >= 15) ? 1 : 0;
+    VP8EncHFilter16i(y_dst, BPS, limit, ilevel, hev_thresh);
+    VP8EncHFilter8i(u_dst, v_dst, BPS, limit, ilevel, hev_thresh);
+    VP8EncVFilter16i(y_dst, BPS, limit, ilevel, hev_thresh);
+    VP8EncVFilter8i(u_dst, v_dst, BPS, limit, ilevel, hev_thresh);
+  }
+}
+
+//------------------------------------------------------------------------------
+// SSIM metric
+
+enum { KERNEL = 3 };
+static const double kMinValue = 1.e-10;  // minimal threshold
+
+void VP8SSIMAddStats(const DistoStats* const src, DistoStats* const dst) {
+  dst->w   += src->w;
+  dst->xm  += src->xm;
+  dst->ym  += src->ym;
+  dst->xxm += src->xxm;
+  dst->xym += src->xym;
+  dst->yym += src->yym;
+}
+
+static void VP8SSIMAccumulate(const uint8_t* src1, int stride1,
+                              const uint8_t* src2, int stride2,
+                              int xo, int yo, int W, int H,
+                              DistoStats* const stats) {
+  const int ymin = (yo - KERNEL < 0) ? 0 : yo - KERNEL;
+  const int ymax = (yo + KERNEL > H - 1) ? H - 1 : yo + KERNEL;
+  const int xmin = (xo - KERNEL < 0) ? 0 : xo - KERNEL;
+  const int xmax = (xo + KERNEL > W - 1) ? W - 1 : xo + KERNEL;
+  int x, y;
+  src1 += ymin * stride1;
+  src2 += ymin * stride2;
+  for (y = ymin; y <= ymax; ++y, src1 += stride1, src2 += stride2) {
+    for (x = xmin; x <= xmax; ++x) {
+      const int s1 = src1[x];
+      const int s2 = src2[x];
+      stats->w   += 1;
+      stats->xm  += s1;
+      stats->ym  += s2;
+      stats->xxm += s1 * s1;
+      stats->xym += s1 * s2;
+      stats->yym += s2 * s2;
+    }
+  }
+}
+
+double VP8SSIMGet(const DistoStats* const stats) {
+  const double xmxm = stats->xm * stats->xm;
+  const double ymym = stats->ym * stats->ym;
+  const double xmym = stats->xm * stats->ym;
+  const double w2 = stats->w * stats->w;
+  double sxx = stats->xxm * stats->w - xmxm;
+  double syy = stats->yym * stats->w - ymym;
+  double sxy = stats->xym * stats->w - xmym;
+  double C1, C2;
+  double fnum;
+  double fden;
+  // small errors are possible, due to rounding. Clamp to zero.
+  if (sxx < 0.) sxx = 0.;
+  if (syy < 0.) syy = 0.;
+  C1 = 6.5025 * w2;
+  C2 = 58.5225 * w2;
+  fnum = (2 * xmym + C1) * (2 * sxy + C2);
+  fden = (xmxm + ymym + C1) * (sxx + syy + C2);
+  return (fden != 0.) ? fnum / fden : kMinValue;
+}
+
+double VP8SSIMGetSquaredError(const DistoStats* const s) {
+  if (s->w > 0.) {
+    const double iw2 = 1. / (s->w * s->w);
+    const double sxx = s->xxm * s->w - s->xm * s->xm;
+    const double syy = s->yym * s->w - s->ym * s->ym;
+    const double sxy = s->xym * s->w - s->xm * s->ym;
+    const double SSE = iw2 * (sxx + syy - 2. * sxy);
+    if (SSE > kMinValue) return SSE;
+  }
+  return kMinValue;
+}
+
+void VP8SSIMAccumulatePlane(const uint8_t* src1, int stride1,
+                            const uint8_t* src2, int stride2,
+                            int W, int H, DistoStats* const stats) {
+  int x, y;
+  for (y = 0; y < H; ++y) {
+    for (x = 0; x < W; ++x) {
+      VP8SSIMAccumulate(src1, stride1, src2, stride2, x, y, W, H, stats);
+    }
+  }
+}
+
+static double GetMBSSIM(const uint8_t* yuv1, const uint8_t* yuv2) {
+  int x, y;
+  DistoStats s = { .0, .0, .0, .0, .0, .0 };
+
+  // compute SSIM in a 10 x 10 window
+  for (x = 3; x < 13; x++) {
+    for (y = 3; y < 13; y++) {
+      VP8SSIMAccumulate(yuv1 + Y_OFF, BPS, yuv2 + Y_OFF, BPS, x, y, 16, 16, &s);
+    }
+  }
+  for (x = 1; x < 7; x++) {
+    for (y = 1; y < 7; y++) {
+      VP8SSIMAccumulate(yuv1 + U_OFF, BPS, yuv2 + U_OFF, BPS, x, y, 8, 8, &s);
+      VP8SSIMAccumulate(yuv1 + V_OFF, BPS, yuv2 + V_OFF, BPS, x, y, 8, 8, &s);
+    }
+  }
+  return VP8SSIMGet(&s);
+}
+
+//------------------------------------------------------------------------------
+// Exposed APIs: Encoder should call the following 3 functions to adjust
+// loop filter strength
+
+void VP8InitFilter(VP8EncIterator* const it) {
+  int s, i;
+  if (!it->lf_stats_) return;
+
+  InitTables();
+  for (s = 0; s < NUM_MB_SEGMENTS; s++) {
+    for (i = 0; i < MAX_LF_LEVELS; i++) {
+      (*it->lf_stats_)[s][i] = 0;
+    }
+  }
+}
+
+void VP8StoreFilterStats(VP8EncIterator* const it) {
+  int d;
+  const int s = it->mb_->segment_;
+  const int level0 = it->enc_->dqm_[s].fstrength_;  // TODO: ref_lf_delta[]
+
+  // explore +/-quant range of values around level0
+  const int delta_min = -it->enc_->dqm_[s].quant_;
+  const int delta_max = it->enc_->dqm_[s].quant_;
+  const int step_size = (delta_max - delta_min >= 4) ? 4 : 1;
+
+  if (!it->lf_stats_) return;
+
+  // NOTE: Currently we are applying filter only across the sublock edges
+  // There are two reasons for that.
+  // 1. Applying filter on macro block edges will change the pixels in
+  // the left and top macro blocks. That will be hard to restore
+  // 2. Macro Blocks on the bottom and right are not yet compressed. So we
+  // cannot apply filter on the right and bottom macro block edges.
+  if (it->mb_->type_ == 1 && it->mb_->skip_) return;
+
+  // Always try filter level  zero
+  (*it->lf_stats_)[s][0] += GetMBSSIM(it->yuv_in_, it->yuv_out_);
+
+  for (d = delta_min; d <= delta_max; d += step_size) {
+    const int level = level0 + d;
+    if (level <= 0 || level >= MAX_LF_LEVELS) {
+      continue;
+    }
+    DoFilter(it, level);
+    (*it->lf_stats_)[s][level] += GetMBSSIM(it->yuv_in_, it->yuv_out2_);
+  }
+}
+
+void VP8AdjustFilterStrength(VP8EncIterator* const it) {
+  int s;
+  VP8Encoder* const enc = it->enc_;
+
+  if (!it->lf_stats_) {
+    return;
+  }
+  for (s = 0; s < NUM_MB_SEGMENTS; s++) {
+    int i, best_level = 0;
+    // Improvement over filter level 0 should be at least 1e-5 (relatively)
+    double best_v = 1.00001 * (*it->lf_stats_)[s][0];
+    for (i = 1; i < MAX_LF_LEVELS; i++) {
+      const double v = (*it->lf_stats_)[s][i];
+      if (v > best_v) {
+        best_v = v;
+        best_level = i;
+      }
+    }
+    enc->dqm_[s].fstrength_ = best_level;
+  }
+}
+
+#if defined(__cplusplus) || defined(c_plusplus)
+}    // extern "C"
+#endif
diff --git a/drivers/webpold/enc/frame.c b/drivers/webpold/enc/frame.c
new file mode 100644
index 0000000000..bdd360069b
--- /dev/null
+++ b/drivers/webpold/enc/frame.c
@@ -0,0 +1,939 @@
+// Copyright 2011 Google Inc. All Rights Reserved.
+//
+// This code is licensed under the same terms as WebM:
+//  Software License Agreement:  http://www.webmproject.org/license/software/
+//  Additional IP Rights Grant:  http://www.webmproject.org/license/additional/
+// -----------------------------------------------------------------------------
+//
+//   frame coding and analysis
+//
+// Author: Skal (pascal.massimino@gmail.com)
+
+#include <assert.h>
+#include <stdlib.h>
+#include <string.h>
+#include <math.h>
+
+#include "./vp8enci.h"
+#include "./cost.h"
+
+#if defined(__cplusplus) || defined(c_plusplus)
+extern "C" {
+#endif
+
+#define SEGMENT_VISU 0
+#define DEBUG_SEARCH 0    // useful to track search convergence
+
+// On-the-fly info about the current set of residuals. Handy to avoid
+// passing zillions of params.
+typedef struct {
+  int first;
+  int last;
+  const int16_t* coeffs;
+
+  int coeff_type;
+  ProbaArray* prob;
+  StatsArray* stats;
+  CostArray*  cost;
+} VP8Residual;
+
+//------------------------------------------------------------------------------
+// Tables for level coding
+
+const uint8_t VP8EncBands[16 + 1] = {
+  0, 1, 2, 3, 6, 4, 5, 6, 6, 6, 6, 6, 6, 6, 6, 7,
+  0  // sentinel
+};
+
+static const uint8_t kCat3[] = { 173, 148, 140 };
+static const uint8_t kCat4[] = { 176, 155, 140, 135 };
+static const uint8_t kCat5[] = { 180, 157, 141, 134, 130 };
+static const uint8_t kCat6[] =
+    { 254, 254, 243, 230, 196, 177, 153, 140, 133, 130, 129 };
+
+//------------------------------------------------------------------------------
+// Reset the statistics about: number of skips, token proba, level cost,...
+
+static void ResetStats(VP8Encoder* const enc) {
+  VP8Proba* const proba = &enc->proba_;
+  VP8CalculateLevelCosts(proba);
+  proba->nb_skip_ = 0;
+}
+
+//------------------------------------------------------------------------------
+// Skip decision probability
+
+#define SKIP_PROBA_THRESHOLD 250  // value below which using skip_proba is OK.
+
+static int CalcSkipProba(uint64_t nb, uint64_t total) {
+  return (int)(total ? (total - nb) * 255 / total : 255);
+}
+
+// Returns the bit-cost for coding the skip probability.
+static int FinalizeSkipProba(VP8Encoder* const enc) {
+  VP8Proba* const proba = &enc->proba_;
+  const int nb_mbs = enc->mb_w_ * enc->mb_h_;
+  const int nb_events = proba->nb_skip_;
+  int size;
+  proba->skip_proba_ = CalcSkipProba(nb_events, nb_mbs);
+  proba->use_skip_proba_ = (proba->skip_proba_ < SKIP_PROBA_THRESHOLD);
+  size = 256;   // 'use_skip_proba' bit
+  if (proba->use_skip_proba_) {
+    size +=  nb_events * VP8BitCost(1, proba->skip_proba_)
+         + (nb_mbs - nb_events) * VP8BitCost(0, proba->skip_proba_);
+    size += 8 * 256;   // cost of signaling the skip_proba_ itself.
+  }
+  return size;
+}
+
+//------------------------------------------------------------------------------
+// Recording of token probabilities.
+
+static void ResetTokenStats(VP8Encoder* const enc) {
+  VP8Proba* const proba = &enc->proba_;
+  memset(proba->stats_, 0, sizeof(proba->stats_));
+}
+
+// Record proba context used
+static int Record(int bit, proba_t* const stats) {
+  proba_t p = *stats;
+  if (p >= 0xffff0000u) {               // an overflow is inbound.
+    p = ((p + 1u) >> 1) & 0x7fff7fffu;  // -> divide the stats by 2.
+  }
+  // record bit count (lower 16 bits) and increment total count (upper 16 bits).
+  p += 0x00010000u + bit;
+  *stats = p;
+  return bit;
+}
+
+// We keep the table free variant around for reference, in case.
+#define USE_LEVEL_CODE_TABLE
+
+// Simulate block coding, but only record statistics.
+// Note: no need to record the fixed probas.
+static int RecordCoeffs(int ctx, const VP8Residual* const res) {
+  int n = res->first;
+  proba_t* s = res->stats[VP8EncBands[n]][ctx];
+  if (res->last  < 0) {
+    Record(0, s + 0);
+    return 0;
+  }
+  while (n <= res->last) {
+    int v;
+    Record(1, s + 0);
+    while ((v = res->coeffs[n++]) == 0) {
+      Record(0, s + 1);
+      s = res->stats[VP8EncBands[n]][0];
+    }
+    Record(1, s + 1);
+    if (!Record(2u < (unsigned int)(v + 1), s + 2)) {  // v = -1 or 1
+      s = res->stats[VP8EncBands[n]][1];
+    } else {
+      v = abs(v);
+#if !defined(USE_LEVEL_CODE_TABLE)
+      if (!Record(v > 4, s + 3)) {
+        if (Record(v != 2, s + 4))
+          Record(v == 4, s + 5);
+      } else if (!Record(v > 10, s + 6)) {
+        Record(v > 6, s + 7);
+      } else if (!Record((v >= 3 + (8 << 2)), s + 8)) {
+        Record((v >= 3 + (8 << 1)), s + 9);
+      } else {
+        Record((v >= 3 + (8 << 3)), s + 10);
+      }
+#else
+      if (v > MAX_VARIABLE_LEVEL)
+        v = MAX_VARIABLE_LEVEL;
+
+      {
+        const int bits = VP8LevelCodes[v - 1][1];
+        int pattern = VP8LevelCodes[v - 1][0];
+        int i;
+        for (i = 0; (pattern >>= 1) != 0; ++i) {
+          const int mask = 2 << i;
+          if (pattern & 1) Record(!!(bits & mask), s + 3 + i);
+        }
+      }
+#endif
+      s = res->stats[VP8EncBands[n]][2];
+    }
+  }
+  if (n < 16) Record(0, s + 0);
+  return 1;
+}
+
+// Collect statistics and deduce probabilities for next coding pass.
+// Return the total bit-cost for coding the probability updates.
+static int CalcTokenProba(int nb, int total) {
+  assert(nb <= total);
+  return nb ? (255 - nb * 255 / total) : 255;
+}
+
+// Cost of coding 'nb' 1's and 'total-nb' 0's using 'proba' probability.
+static int BranchCost(int nb, int total, int proba) {
+  return nb * VP8BitCost(1, proba) + (total - nb) * VP8BitCost(0, proba);
+}
+
+static int FinalizeTokenProbas(VP8Encoder* const enc) {
+  VP8Proba* const proba = &enc->proba_;
+  int has_changed = 0;
+  int size = 0;
+  int t, b, c, p;
+  for (t = 0; t < NUM_TYPES; ++t) {
+    for (b = 0; b < NUM_BANDS; ++b) {
+      for (c = 0; c < NUM_CTX; ++c) {
+        for (p = 0; p < NUM_PROBAS; ++p) {
+          const proba_t stats = proba->stats_[t][b][c][p];
+          const int nb = (stats >> 0) & 0xffff;
+          const int total = (stats >> 16) & 0xffff;
+          const int update_proba = VP8CoeffsUpdateProba[t][b][c][p];
+          const int old_p = VP8CoeffsProba0[t][b][c][p];
+          const int new_p = CalcTokenProba(nb, total);
+          const int old_cost = BranchCost(nb, total, old_p)
+                             + VP8BitCost(0, update_proba);
+          const int new_cost = BranchCost(nb, total, new_p)
+                             + VP8BitCost(1, update_proba)
+                             + 8 * 256;
+          const int use_new_p = (old_cost > new_cost);
+          size += VP8BitCost(use_new_p, update_proba);
+          if (use_new_p) {  // only use proba that seem meaningful enough.
+            proba->coeffs_[t][b][c][p] = new_p;
+            has_changed |= (new_p != old_p);
+            size += 8 * 256;
+          } else {
+            proba->coeffs_[t][b][c][p] = old_p;
+          }
+        }
+      }
+    }
+  }
+  proba->dirty_ = has_changed;
+  return size;
+}
+
+//------------------------------------------------------------------------------
+// helper functions for residuals struct VP8Residual.
+
+static void InitResidual(int first, int coeff_type,
+                         VP8Encoder* const enc, VP8Residual* const res) {
+  res->coeff_type = coeff_type;
+  res->prob  = enc->proba_.coeffs_[coeff_type];
+  res->stats = enc->proba_.stats_[coeff_type];
+  res->cost  = enc->proba_.level_cost_[coeff_type];
+  res->first = first;
+}
+
+static void SetResidualCoeffs(const int16_t* const coeffs,
+                              VP8Residual* const res) {
+  int n;
+  res->last = -1;
+  for (n = 15; n >= res->first; --n) {
+    if (coeffs[n]) {
+      res->last = n;
+      break;
+    }
+  }
+  res->coeffs = coeffs;
+}
+
+//------------------------------------------------------------------------------
+// Mode costs
+
+static int GetResidualCost(int ctx, const VP8Residual* const res) {
+  int n = res->first;
+  int p0 = res->prob[VP8EncBands[n]][ctx][0];
+  const uint16_t* t = res->cost[VP8EncBands[n]][ctx];
+  int cost;
+
+  if (res->last < 0) {
+    return VP8BitCost(0, p0);
+  }
+  cost = 0;
+  while (n <= res->last) {
+    const int v = res->coeffs[n];
+    const int b = VP8EncBands[n + 1];
+    ++n;
+    if (v == 0) {
+      // short-case for VP8LevelCost(t, 0) (note: VP8LevelFixedCosts[0] == 0):
+      cost += t[0];
+      t = res->cost[b][0];
+      continue;
+    }
+    cost += VP8BitCost(1, p0);
+    if (2u >= (unsigned int)(v + 1)) {   // v = -1 or 1
+      // short-case for "VP8LevelCost(t, 1)" (256 is VP8LevelFixedCosts[1]):
+      cost += 256 + t[1];
+      p0 = res->prob[b][1][0];
+      t = res->cost[b][1];
+    } else {
+      cost += VP8LevelCost(t, abs(v));
+      p0 = res->prob[b][2][0];
+      t = res->cost[b][2];
+    }
+  }
+  if (n < 16) cost += VP8BitCost(0, p0);
+  return cost;
+}
+
+int VP8GetCostLuma4(VP8EncIterator* const it, const int16_t levels[16]) {
+  const int x = (it->i4_ & 3), y = (it->i4_ >> 2);
+  VP8Residual res;
+  VP8Encoder* const enc = it->enc_;
+  int R = 0;
+  int ctx;
+
+  InitResidual(0, 3, enc, &res);
+  ctx = it->top_nz_[x] + it->left_nz_[y];
+  SetResidualCoeffs(levels, &res);
+  R += GetResidualCost(ctx, &res);
+  return R;
+}
+
+int VP8GetCostLuma16(VP8EncIterator* const it, const VP8ModeScore* const rd) {
+  VP8Residual res;
+  VP8Encoder* const enc = it->enc_;
+  int x, y;
+  int R = 0;
+
+  VP8IteratorNzToBytes(it);   // re-import the non-zero context
+
+  // DC
+  InitResidual(0, 1, enc, &res);
+  SetResidualCoeffs(rd->y_dc_levels, &res);
+  R += GetResidualCost(it->top_nz_[8] + it->left_nz_[8], &res);
+
+  // AC
+  InitResidual(1, 0, enc, &res);
+  for (y = 0; y < 4; ++y) {
+    for (x = 0; x < 4; ++x) {
+      const int ctx = it->top_nz_[x] + it->left_nz_[y];
+      SetResidualCoeffs(rd->y_ac_levels[x + y * 4], &res);
+      R += GetResidualCost(ctx, &res);
+      it->top_nz_[x] = it->left_nz_[y] = (res.last >= 0);
+    }
+  }
+  return R;
+}
+
+int VP8GetCostUV(VP8EncIterator* const it, const VP8ModeScore* const rd) {
+  VP8Residual res;
+  VP8Encoder* const enc = it->enc_;
+  int ch, x, y;
+  int R = 0;
+
+  VP8IteratorNzToBytes(it);  // re-import the non-zero context
+
+  InitResidual(0, 2, enc, &res);
+  for (ch = 0; ch <= 2; ch += 2) {
+    for (y = 0; y < 2; ++y) {
+      for (x = 0; x < 2; ++x) {
+        const int ctx = it->top_nz_[4 + ch + x] + it->left_nz_[4 + ch + y];
+        SetResidualCoeffs(rd->uv_levels[ch * 2 + x + y * 2], &res);
+        R += GetResidualCost(ctx, &res);
+        it->top_nz_[4 + ch + x] = it->left_nz_[4 + ch + y] = (res.last >= 0);
+      }
+    }
+  }
+  return R;
+}
+
+//------------------------------------------------------------------------------
+// Coefficient coding
+
+static int PutCoeffs(VP8BitWriter* const bw, int ctx, const VP8Residual* res) {
+  int n = res->first;
+  const uint8_t* p = res->prob[VP8EncBands[n]][ctx];
+  if (!VP8PutBit(bw, res->last >= 0, p[0])) {
+    return 0;
+  }
+
+  while (n < 16) {
+    const int c = res->coeffs[n++];
+    const int sign = c < 0;
+    int v = sign ? -c : c;
+    if (!VP8PutBit(bw, v != 0, p[1])) {
+      p = res->prob[VP8EncBands[n]][0];
+      continue;
+    }
+    if (!VP8PutBit(bw, v > 1, p[2])) {
+      p = res->prob[VP8EncBands[n]][1];
+    } else {
+      if (!VP8PutBit(bw, v > 4, p[3])) {
+        if (VP8PutBit(bw, v != 2, p[4]))
+          VP8PutBit(bw, v == 4, p[5]);
+      } else if (!VP8PutBit(bw, v > 10, p[6])) {
+        if (!VP8PutBit(bw, v > 6, p[7])) {
+          VP8PutBit(bw, v == 6, 159);
+        } else {
+          VP8PutBit(bw, v >= 9, 165);
+          VP8PutBit(bw, !(v & 1), 145);
+        }
+      } else {
+        int mask;
+        const uint8_t* tab;
+        if (v < 3 + (8 << 1)) {          // kCat3  (3b)
+          VP8PutBit(bw, 0, p[8]);
+          VP8PutBit(bw, 0, p[9]);
+          v -= 3 + (8 << 0);
+          mask = 1 << 2;
+          tab = kCat3;
+        } else if (v < 3 + (8 << 2)) {   // kCat4  (4b)
+          VP8PutBit(bw, 0, p[8]);
+          VP8PutBit(bw, 1, p[9]);
+          v -= 3 + (8 << 1);
+          mask = 1 << 3;
+          tab = kCat4;
+        } else if (v < 3 + (8 << 3)) {   // kCat5  (5b)
+          VP8PutBit(bw, 1, p[8]);
+          VP8PutBit(bw, 0, p[10]);
+          v -= 3 + (8 << 2);
+          mask = 1 << 4;
+          tab = kCat5;
+        } else {                         // kCat6 (11b)
+          VP8PutBit(bw, 1, p[8]);
+          VP8PutBit(bw, 1, p[10]);
+          v -= 3 + (8 << 3);
+          mask = 1 << 10;
+          tab = kCat6;
+        }
+        while (mask) {
+          VP8PutBit(bw, !!(v & mask), *tab++);
+          mask >>= 1;
+        }
+      }
+      p = res->prob[VP8EncBands[n]][2];
+    }
+    VP8PutBitUniform(bw, sign);
+    if (n == 16 || !VP8PutBit(bw, n <= res->last, p[0])) {
+      return 1;   // EOB
+    }
+  }
+  return 1;
+}
+
+static void CodeResiduals(VP8BitWriter* const bw,
+                          VP8EncIterator* const it,
+                          const VP8ModeScore* const rd) {
+  int x, y, ch;
+  VP8Residual res;
+  uint64_t pos1, pos2, pos3;
+  const int i16 = (it->mb_->type_ == 1);
+  const int segment = it->mb_->segment_;
+  VP8Encoder* const enc = it->enc_;
+
+  VP8IteratorNzToBytes(it);
+
+  pos1 = VP8BitWriterPos(bw);
+  if (i16) {
+    InitResidual(0, 1, enc, &res);
+    SetResidualCoeffs(rd->y_dc_levels, &res);
+    it->top_nz_[8] = it->left_nz_[8] =
+      PutCoeffs(bw, it->top_nz_[8] + it->left_nz_[8], &res);
+    InitResidual(1, 0, enc, &res);
+  } else {
+    InitResidual(0, 3, enc, &res);
+  }
+
+  // luma-AC
+  for (y = 0; y < 4; ++y) {
+    for (x = 0; x < 4; ++x) {
+      const int ctx = it->top_nz_[x] + it->left_nz_[y];
+      SetResidualCoeffs(rd->y_ac_levels[x + y * 4], &res);
+      it->top_nz_[x] = it->left_nz_[y] = PutCoeffs(bw, ctx, &res);
+    }
+  }
+  pos2 = VP8BitWriterPos(bw);
+
+  // U/V
+  InitResidual(0, 2, enc, &res);
+  for (ch = 0; ch <= 2; ch += 2) {
+    for (y = 0; y < 2; ++y) {
+      for (x = 0; x < 2; ++x) {
+        const int ctx = it->top_nz_[4 + ch + x] + it->left_nz_[4 + ch + y];
+        SetResidualCoeffs(rd->uv_levels[ch * 2 + x + y * 2], &res);
+        it->top_nz_[4 + ch + x] = it->left_nz_[4 + ch + y] =
+            PutCoeffs(bw, ctx, &res);
+      }
+    }
+  }
+  pos3 = VP8BitWriterPos(bw);
+  it->luma_bits_ = pos2 - pos1;
+  it->uv_bits_ = pos3 - pos2;
+  it->bit_count_[segment][i16] += it->luma_bits_;
+  it->bit_count_[segment][2] += it->uv_bits_;
+  VP8IteratorBytesToNz(it);
+}
+
+// Same as CodeResiduals, but doesn't actually write anything.
+// Instead, it just records the event distribution.
+static void RecordResiduals(VP8EncIterator* const it,
+                            const VP8ModeScore* const rd) {
+  int x, y, ch;
+  VP8Residual res;
+  VP8Encoder* const enc = it->enc_;
+
+  VP8IteratorNzToBytes(it);
+
+  if (it->mb_->type_ == 1) {   // i16x16
+    InitResidual(0, 1, enc, &res);
+    SetResidualCoeffs(rd->y_dc_levels, &res);
+    it->top_nz_[8] = it->left_nz_[8] =
+      RecordCoeffs(it->top_nz_[8] + it->left_nz_[8], &res);
+    InitResidual(1, 0, enc, &res);
+  } else {
+    InitResidual(0, 3, enc, &res);
+  }
+
+  // luma-AC
+  for (y = 0; y < 4; ++y) {
+    for (x = 0; x < 4; ++x) {
+      const int ctx = it->top_nz_[x] + it->left_nz_[y];
+      SetResidualCoeffs(rd->y_ac_levels[x + y * 4], &res);
+      it->top_nz_[x] = it->left_nz_[y] = RecordCoeffs(ctx, &res);
+    }
+  }
+
+  // U/V
+  InitResidual(0, 2, enc, &res);
+  for (ch = 0; ch <= 2; ch += 2) {
+    for (y = 0; y < 2; ++y) {
+      for (x = 0; x < 2; ++x) {
+        const int ctx = it->top_nz_[4 + ch + x] + it->left_nz_[4 + ch + y];
+        SetResidualCoeffs(rd->uv_levels[ch * 2 + x + y * 2], &res);
+        it->top_nz_[4 + ch + x] = it->left_nz_[4 + ch + y] =
+            RecordCoeffs(ctx, &res);
+      }
+    }
+  }
+
+  VP8IteratorBytesToNz(it);
+}
+
+//------------------------------------------------------------------------------
+// Token buffer
+
+#ifdef USE_TOKEN_BUFFER
+
+void VP8TBufferInit(VP8TBuffer* const b) {
+  b->rows_ = NULL;
+  b->tokens_ = NULL;
+  b->last_ = &b->rows_;
+  b->left_ = 0;
+  b->error_ = 0;
+}
+
+int VP8TBufferNewPage(VP8TBuffer* const b) {
+  VP8Tokens* const page = b->error_ ? NULL : (VP8Tokens*)malloc(sizeof(*page));
+  if (page == NULL) {
+    b->error_ = 1;
+    return 0;
+  }
+  *b->last_ = page;
+  b->last_ = &page->next_;
+  b->left_ = MAX_NUM_TOKEN;
+  b->tokens_ = page->tokens_;
+  return 1;
+}
+
+void VP8TBufferClear(VP8TBuffer* const b) {
+  if (b != NULL) {
+    const VP8Tokens* p = b->rows_;
+    while (p != NULL) {
+      const VP8Tokens* const next = p->next_;
+      free((void*)p);
+      p = next;
+    }
+    VP8TBufferInit(b);
+  }
+}
+
+int VP8EmitTokens(const VP8TBuffer* const b, VP8BitWriter* const bw,
+                  const uint8_t* const probas) {
+  VP8Tokens* p = b->rows_;
+  if (b->error_) return 0;
+  while (p != NULL) {
+    const int N = (p->next_ == NULL) ? b->left_ : 0;
+    int n = MAX_NUM_TOKEN;
+    while (n-- > N) {
+      VP8PutBit(bw, (p->tokens_[n] >> 15) & 1, probas[p->tokens_[n] & 0x7fff]);
+    }
+    p = p->next_;
+  }
+  return 1;
+}
+
+#define TOKEN_ID(b, ctx, p) ((p) + NUM_PROBAS * ((ctx) + (b) * NUM_CTX))
+
+static int RecordCoeffTokens(int ctx, const VP8Residual* const res,
+                             VP8TBuffer* tokens) {
+  int n = res->first;
+  int b = VP8EncBands[n];
+  if (!VP8AddToken(tokens, res->last >= 0, TOKEN_ID(b, ctx, 0))) {
+    return 0;
+  }
+
+  while (n < 16) {
+    const int c = res->coeffs[n++];
+    const int sign = c < 0;
+    int v = sign ? -c : c;
+    const int base_id = TOKEN_ID(b, ctx, 0);
+    if (!VP8AddToken(tokens, v != 0, base_id + 1)) {
+      b = VP8EncBands[n];
+      ctx = 0;
+      continue;
+    }
+    if (!VP8AddToken(tokens, v > 1, base_id + 2)) {
+      b = VP8EncBands[n];
+      ctx = 1;
+    } else {
+      if (!VP8AddToken(tokens, v > 4, base_id + 3)) {
+        if (VP8AddToken(tokens, v != 2, base_id + 4))
+          VP8AddToken(tokens, v == 4, base_id + 5);
+      } else if (!VP8AddToken(tokens, v > 10, base_id + 6)) {
+        if (!VP8AddToken(tokens, v > 6, base_id + 7)) {
+//          VP8AddToken(tokens, v == 6, 159);
+        } else {
+//          VP8AddToken(tokens, v >= 9, 165);
+//          VP8AddToken(tokens, !(v & 1), 145);
+        }
+      } else {
+        int mask;
+        const uint8_t* tab;
+        if (v < 3 + (8 << 1)) {          // kCat3  (3b)
+          VP8AddToken(tokens, 0, base_id + 8);
+          VP8AddToken(tokens, 0, base_id + 9);
+          v -= 3 + (8 << 0);
+          mask = 1 << 2;
+          tab = kCat3;
+        } else if (v < 3 + (8 << 2)) {   // kCat4  (4b)
+          VP8AddToken(tokens, 0, base_id + 8);
+          VP8AddToken(tokens, 1, base_id + 9);
+          v -= 3 + (8 << 1);
+          mask = 1 << 3;
+          tab = kCat4;
+        } else if (v < 3 + (8 << 3)) {   // kCat5  (5b)
+          VP8AddToken(tokens, 1, base_id + 8);
+          VP8AddToken(tokens, 0, base_id + 10);
+          v -= 3 + (8 << 2);
+          mask = 1 << 4;
+          tab = kCat5;
+        } else {                         // kCat6 (11b)
+          VP8AddToken(tokens, 1, base_id + 8);
+          VP8AddToken(tokens, 1, base_id + 10);
+          v -= 3 + (8 << 3);
+          mask = 1 << 10;
+          tab = kCat6;
+        }
+        while (mask) {
+          // VP8AddToken(tokens, !!(v & mask), *tab++);
+          mask >>= 1;
+        }
+      }
+      ctx = 2;
+    }
+    b = VP8EncBands[n];
+    // VP8PutBitUniform(bw, sign);
+    if (n == 16 || !VP8AddToken(tokens, n <= res->last, TOKEN_ID(b, ctx, 0))) {
+      return 1;   // EOB
+    }
+  }
+  return 1;
+}
+
+static void RecordTokens(VP8EncIterator* const it,
+                         const VP8ModeScore* const rd, VP8TBuffer tokens[2]) {
+  int x, y, ch;
+  VP8Residual res;
+  VP8Encoder* const enc = it->enc_;
+
+  VP8IteratorNzToBytes(it);
+  if (it->mb_->type_ == 1) {   // i16x16
+    InitResidual(0, 1, enc, &res);
+    SetResidualCoeffs(rd->y_dc_levels, &res);
+// TODO(skal): FIX ->    it->top_nz_[8] = it->left_nz_[8] =
+      RecordCoeffTokens(it->top_nz_[8] + it->left_nz_[8], &res, &tokens[0]);
+    InitResidual(1, 0, enc, &res);
+  } else {
+    InitResidual(0, 3, enc, &res);
+  }
+
+  // luma-AC
+  for (y = 0; y < 4; ++y) {
+    for (x = 0; x < 4; ++x) {
+      const int ctx = it->top_nz_[x] + it->left_nz_[y];
+      SetResidualCoeffs(rd->y_ac_levels[x + y * 4], &res);
+      it->top_nz_[x] = it->left_nz_[y] =
+          RecordCoeffTokens(ctx, &res, &tokens[0]);
+    }
+  }
+
+  // U/V
+  InitResidual(0, 2, enc, &res);
+  for (ch = 0; ch <= 2; ch += 2) {
+    for (y = 0; y < 2; ++y) {
+      for (x = 0; x < 2; ++x) {
+        const int ctx = it->top_nz_[4 + ch + x] + it->left_nz_[4 + ch + y];
+        SetResidualCoeffs(rd->uv_levels[ch * 2 + x + y * 2], &res);
+        it->top_nz_[4 + ch + x] = it->left_nz_[4 + ch + y] =
+            RecordCoeffTokens(ctx, &res, &tokens[1]);
+      }
+    }
+  }
+}
+
+#endif    // USE_TOKEN_BUFFER
+
+//------------------------------------------------------------------------------
+// ExtraInfo map / Debug function
+
+#if SEGMENT_VISU
+static void SetBlock(uint8_t* p, int value, int size) {
+  int y;
+  for (y = 0; y < size; ++y) {
+    memset(p, value, size);
+    p += BPS;
+  }
+}
+#endif
+
+static void ResetSSE(VP8Encoder* const enc) {
+  memset(enc->sse_, 0, sizeof(enc->sse_));
+  enc->sse_count_ = 0;
+}
+
+static void StoreSSE(const VP8EncIterator* const it) {
+  VP8Encoder* const enc = it->enc_;
+  const uint8_t* const in = it->yuv_in_;
+  const uint8_t* const out = it->yuv_out_;
+  // Note: not totally accurate at boundary. And doesn't include in-loop filter.
+  enc->sse_[0] += VP8SSE16x16(in + Y_OFF, out + Y_OFF);
+  enc->sse_[1] += VP8SSE8x8(in + U_OFF, out + U_OFF);
+  enc->sse_[2] += VP8SSE8x8(in + V_OFF, out + V_OFF);
+  enc->sse_count_ += 16 * 16;
+}
+
+static void StoreSideInfo(const VP8EncIterator* const it) {
+  VP8Encoder* const enc = it->enc_;
+  const VP8MBInfo* const mb = it->mb_;
+  WebPPicture* const pic = enc->pic_;
+
+  if (pic->stats != NULL) {
+    StoreSSE(it);
+    enc->block_count_[0] += (mb->type_ == 0);
+    enc->block_count_[1] += (mb->type_ == 1);
+    enc->block_count_[2] += (mb->skip_ != 0);
+  }
+
+  if (pic->extra_info != NULL) {
+    uint8_t* const info = &pic->extra_info[it->x_ + it->y_ * enc->mb_w_];
+    switch (pic->extra_info_type) {
+      case 1: *info = mb->type_; break;
+      case 2: *info = mb->segment_; break;
+      case 3: *info = enc->dqm_[mb->segment_].quant_; break;
+      case 4: *info = (mb->type_ == 1) ? it->preds_[0] : 0xff; break;
+      case 5: *info = mb->uv_mode_; break;
+      case 6: {
+        const int b = (int)((it->luma_bits_ + it->uv_bits_ + 7) >> 3);
+        *info = (b > 255) ? 255 : b; break;
+      }
+      default: *info = 0; break;
+    };
+  }
+#if SEGMENT_VISU  // visualize segments and prediction modes
+  SetBlock(it->yuv_out_ + Y_OFF, mb->segment_ * 64, 16);
+  SetBlock(it->yuv_out_ + U_OFF, it->preds_[0] * 64, 8);
+  SetBlock(it->yuv_out_ + V_OFF, mb->uv_mode_ * 64, 8);
+#endif
+}
+
+//------------------------------------------------------------------------------
+// Main loops
+//
+//  VP8EncLoop(): does the final bitstream coding.
+
+static void ResetAfterSkip(VP8EncIterator* const it) {
+  if (it->mb_->type_ == 1) {
+    *it->nz_ = 0;  // reset all predictors
+    it->left_nz_[8] = 0;
+  } else {
+    *it->nz_ &= (1 << 24);  // preserve the dc_nz bit
+  }
+}
+
+int VP8EncLoop(VP8Encoder* const enc) {
+  int i, s, p;
+  int ok = 1;
+  VP8EncIterator it;
+  VP8ModeScore info;
+  const int dont_use_skip = !enc->proba_.use_skip_proba_;
+  const int rd_opt = enc->rd_opt_level_;
+  const int kAverageBytesPerMB = 5;     // TODO: have a kTable[quality/10]
+  const int bytes_per_parts =
+    enc->mb_w_ * enc->mb_h_ * kAverageBytesPerMB / enc->num_parts_;
+
+  // Initialize the bit-writers
+  for (p = 0; p < enc->num_parts_; ++p) {
+    VP8BitWriterInit(enc->parts_ + p, bytes_per_parts);
+  }
+
+  ResetStats(enc);
+  ResetSSE(enc);
+
+  VP8IteratorInit(enc, &it);
+  VP8InitFilter(&it);
+  do {
+    VP8IteratorImport(&it);
+    // Warning! order is important: first call VP8Decimate() and
+    // *then* decide how to code the skip decision if there's one.
+    if (!VP8Decimate(&it, &info, rd_opt) || dont_use_skip) {
+      CodeResiduals(it.bw_, &it, &info);
+    } else {   // reset predictors after a skip
+      ResetAfterSkip(&it);
+    }
+#ifdef WEBP_EXPERIMENTAL_FEATURES
+    if (enc->use_layer_) {
+      VP8EncCodeLayerBlock(&it);
+    }
+#endif
+    StoreSideInfo(&it);
+    VP8StoreFilterStats(&it);
+    VP8IteratorExport(&it);
+    ok = VP8IteratorProgress(&it, 20);
+  } while (ok && VP8IteratorNext(&it, it.yuv_out_));
+
+  if (ok) {      // Finalize the partitions, check for extra errors.
+    for (p = 0; p < enc->num_parts_; ++p) {
+      VP8BitWriterFinish(enc->parts_ + p);
+      ok &= !enc->parts_[p].error_;
+    }
+  }
+
+  if (ok) {      // All good. Finish up.
+    if (enc->pic_->stats) {           // finalize byte counters...
+      for (i = 0; i <= 2; ++i) {
+        for (s = 0; s < NUM_MB_SEGMENTS; ++s) {
+          enc->residual_bytes_[i][s] = (int)((it.bit_count_[s][i] + 7) >> 3);
+        }
+      }
+    }
+    VP8AdjustFilterStrength(&it);     // ...and store filter stats.
+  } else {
+    // Something bad happened -> need to do some memory cleanup.
+    VP8EncFreeBitWriters(enc);
+  }
+
+  return ok;
+}
+
+//------------------------------------------------------------------------------
+//  VP8StatLoop(): only collect statistics (number of skips, token usage, ...)
+//                 This is used for deciding optimal probabilities. It also
+//                 modifies the quantizer value if some target (size, PNSR)
+//                 was specified.
+
+#define kHeaderSizeEstimate (15 + 20 + 10)      // TODO: fix better
+
+static int OneStatPass(VP8Encoder* const enc, float q, int rd_opt, int nb_mbs,
+                       float* const PSNR, int percent_delta) {
+  VP8EncIterator it;
+  uint64_t size = 0;
+  uint64_t distortion = 0;
+  const uint64_t pixel_count = nb_mbs * 384;
+
+  // Make sure the quality parameter is inside valid bounds
+  if (q < 0.) {
+    q = 0;
+  } else if (q > 100.) {
+    q = 100;
+  }
+
+  VP8SetSegmentParams(enc, q);      // setup segment quantizations and filters
+
+  ResetStats(enc);
+  ResetTokenStats(enc);
+
+  VP8IteratorInit(enc, &it);
+  do {
+    VP8ModeScore info;
+    VP8IteratorImport(&it);
+    if (VP8Decimate(&it, &info, rd_opt)) {
+      // Just record the number of skips and act like skip_proba is not used.
+      enc->proba_.nb_skip_++;
+    }
+    RecordResiduals(&it, &info);
+    size += info.R;
+    distortion += info.D;
+    if (percent_delta && !VP8IteratorProgress(&it, percent_delta))
+      return 0;
+  } while (VP8IteratorNext(&it, it.yuv_out_) && --nb_mbs > 0);
+  size += FinalizeSkipProba(enc);
+  size += FinalizeTokenProbas(enc);
+  size += enc->segment_hdr_.size_;
+  size = ((size + 1024) >> 11) + kHeaderSizeEstimate;
+
+  if (PSNR) {
+    *PSNR = (float)(10.* log10(255. * 255. * pixel_count / distortion));
+  }
+  return (int)size;
+}
+
+// successive refinement increments.
+static const int dqs[] = { 20, 15, 10, 8, 6, 4, 2, 1, 0 };
+
+int VP8StatLoop(VP8Encoder* const enc) {
+  const int do_search =
+    (enc->config_->target_size > 0 || enc->config_->target_PSNR > 0);
+  const int fast_probe = (enc->method_ < 2 && !do_search);
+  float q = enc->config_->quality;
+  const int max_passes = enc->config_->pass;
+  const int task_percent = 20;
+  const int percent_per_pass = (task_percent + max_passes / 2) / max_passes;
+  const int final_percent = enc->percent_ + task_percent;
+  int pass;
+  int nb_mbs;
+
+  // Fast mode: quick analysis pass over few mbs. Better than nothing.
+  nb_mbs = enc->mb_w_ * enc->mb_h_;
+  if (fast_probe && nb_mbs > 100) nb_mbs = 100;
+
+  // No target size: just do several pass without changing 'q'
+  if (!do_search) {
+    for (pass = 0; pass < max_passes; ++pass) {
+      const int rd_opt = (enc->method_ > 2);
+      if (!OneStatPass(enc, q, rd_opt, nb_mbs, NULL, percent_per_pass)) {
+        return 0;
+      }
+    }
+  } else {
+    // binary search for a size close to target
+    for (pass = 0; pass < max_passes && (dqs[pass] > 0); ++pass) {
+      const int rd_opt = 1;
+      float PSNR;
+      int criterion;
+      const int size = OneStatPass(enc, q, rd_opt, nb_mbs, &PSNR,
+                                   percent_per_pass);
+#if DEBUG_SEARCH
+      printf("#%d size=%d PSNR=%.2f q=%.2f\n", pass, size, PSNR, q);
+#endif
+      if (!size) return 0;
+      if (enc->config_->target_PSNR > 0) {
+        criterion = (PSNR < enc->config_->target_PSNR);
+      } else {
+        criterion = (size < enc->config_->target_size);
+      }
+      // dichotomize
+      if (criterion) {
+        q += dqs[pass];
+      } else {
+        q -= dqs[pass];
+      }
+    }
+  }
+  return WebPReportProgress(enc->pic_, final_percent, &enc->percent_);
+}
+
+//------------------------------------------------------------------------------
+
+#if defined(__cplusplus) || defined(c_plusplus)
+}    // extern "C"
+#endif
diff --git a/drivers/webpold/enc/histogram.c b/drivers/webpold/enc/histogram.c
new file mode 100644
index 0000000000..ca838e064d
--- /dev/null
+++ b/drivers/webpold/enc/histogram.c
@@ -0,0 +1,406 @@
+// Copyright 2012 Google Inc. All Rights Reserved.
+//
+// This code is licensed under the same terms as WebM:
+//  Software License Agreement:  http://www.webmproject.org/license/software/
+//  Additional IP Rights Grant:  http://www.webmproject.org/license/additional/
+// -----------------------------------------------------------------------------
+//
+// Author: Jyrki Alakuijala (jyrki@google.com)
+//
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+
+#include <math.h>
+#include <stdio.h>
+
+#include "./backward_references.h"
+#include "./histogram.h"
+#include "../dsp/lossless.h"
+#include "../utils/utils.h"
+
+static void HistogramClear(VP8LHistogram* const p) {
+  memset(p->literal_, 0, sizeof(p->literal_));
+  memset(p->red_, 0, sizeof(p->red_));
+  memset(p->blue_, 0, sizeof(p->blue_));
+  memset(p->alpha_, 0, sizeof(p->alpha_));
+  memset(p->distance_, 0, sizeof(p->distance_));
+  p->bit_cost_ = 0;
+}
+
+void VP8LHistogramStoreRefs(const VP8LBackwardRefs* const refs,
+                            VP8LHistogram* const histo) {
+  int i;
+  for (i = 0; i < refs->size; ++i) {
+    VP8LHistogramAddSinglePixOrCopy(histo, &refs->refs[i]);
+  }
+}
+
+void VP8LHistogramCreate(VP8LHistogram* const p,
+                         const VP8LBackwardRefs* const refs,
+                         int palette_code_bits) {
+  if (palette_code_bits >= 0) {
+    p->palette_code_bits_ = palette_code_bits;
+  }
+  HistogramClear(p);
+  VP8LHistogramStoreRefs(refs, p);
+}
+
+void VP8LHistogramInit(VP8LHistogram* const p, int palette_code_bits) {
+  p->palette_code_bits_ = palette_code_bits;
+  HistogramClear(p);
+}
+
+VP8LHistogramSet* VP8LAllocateHistogramSet(int size, int cache_bits) {
+  int i;
+  VP8LHistogramSet* set;
+  VP8LHistogram* bulk;
+  const uint64_t total_size = (uint64_t)sizeof(*set)
+                            + size * sizeof(*set->histograms)
+                            + size * sizeof(**set->histograms);
+  uint8_t* memory = (uint8_t*)WebPSafeMalloc(total_size, sizeof(*memory));
+  if (memory == NULL) return NULL;
+
+  set = (VP8LHistogramSet*)memory;
+  memory += sizeof(*set);
+  set->histograms = (VP8LHistogram**)memory;
+  memory += size * sizeof(*set->histograms);
+  bulk = (VP8LHistogram*)memory;
+  set->max_size = size;
+  set->size = size;
+  for (i = 0; i < size; ++i) {
+    set->histograms[i] = bulk + i;
+    VP8LHistogramInit(set->histograms[i], cache_bits);
+  }
+  return set;
+}
+
+// -----------------------------------------------------------------------------
+
+void VP8LHistogramAddSinglePixOrCopy(VP8LHistogram* const histo,
+                                     const PixOrCopy* const v) {
+  if (PixOrCopyIsLiteral(v)) {
+    ++histo->alpha_[PixOrCopyLiteral(v, 3)];
+    ++histo->red_[PixOrCopyLiteral(v, 2)];
+    ++histo->literal_[PixOrCopyLiteral(v, 1)];
+    ++histo->blue_[PixOrCopyLiteral(v, 0)];
+  } else if (PixOrCopyIsCacheIdx(v)) {
+    int literal_ix = 256 + NUM_LENGTH_CODES + PixOrCopyCacheIdx(v);
+    ++histo->literal_[literal_ix];
+  } else {
+    int code, extra_bits_count, extra_bits_value;
+    PrefixEncode(PixOrCopyLength(v),
+                 &code, &extra_bits_count, &extra_bits_value);
+    ++histo->literal_[256 + code];
+    PrefixEncode(PixOrCopyDistance(v),
+                 &code, &extra_bits_count, &extra_bits_value);
+    ++histo->distance_[code];
+  }
+}
+
+
+
+static double BitsEntropy(const int* const array, int n) {
+  double retval = 0.;
+  int sum = 0;
+  int nonzeros = 0;
+  int max_val = 0;
+  int i;
+  double mix;
+  for (i = 0; i < n; ++i) {
+    if (array[i] != 0) {
+      sum += array[i];
+      ++nonzeros;
+      retval -= VP8LFastSLog2(array[i]);
+      if (max_val < array[i]) {
+        max_val = array[i];
+      }
+    }
+  }
+  retval += VP8LFastSLog2(sum);
+
+  if (nonzeros < 5) {
+    if (nonzeros <= 1) {
+      return 0;
+    }
+    // Two symbols, they will be 0 and 1 in a Huffman code.
+    // Let's mix in a bit of entropy to favor good clustering when
+    // distributions of these are combined.
+    if (nonzeros == 2) {
+      return 0.99 * sum + 0.01 * retval;
+    }
+    // No matter what the entropy says, we cannot be better than min_limit
+    // with Huffman coding. I am mixing a bit of entropy into the
+    // min_limit since it produces much better (~0.5 %) compression results
+    // perhaps because of better entropy clustering.
+    if (nonzeros == 3) {
+      mix = 0.95;
+    } else {
+      mix = 0.7;  // nonzeros == 4.
+    }
+  } else {
+    mix = 0.627;
+  }
+
+  {
+    double min_limit = 2 * sum - max_val;
+    min_limit = mix * min_limit + (1.0 - mix) * retval;
+    return (retval < min_limit) ? min_limit : retval;
+  }
+}
+
+double VP8LHistogramEstimateBitsBulk(const VP8LHistogram* const p) {
+  double retval = BitsEntropy(&p->literal_[0], VP8LHistogramNumCodes(p))
+                + BitsEntropy(&p->red_[0], 256)
+                + BitsEntropy(&p->blue_[0], 256)
+                + BitsEntropy(&p->alpha_[0], 256)
+                + BitsEntropy(&p->distance_[0], NUM_DISTANCE_CODES);
+  // Compute the extra bits cost.
+  int i;
+  for (i = 2; i < NUM_LENGTH_CODES - 2; ++i) {
+    retval +=
+        (i >> 1) * p->literal_[256 + i + 2];
+  }
+  for (i = 2; i < NUM_DISTANCE_CODES - 2; ++i) {
+    retval += (i >> 1) * p->distance_[i + 2];
+  }
+  return retval;
+}
+
+
+// Returns the cost encode the rle-encoded entropy code.
+// The constants in this function are experimental.
+static double HuffmanCost(const int* const population, int length) {
+  // Small bias because Huffman code length is typically not stored in
+  // full length.
+  static const int kHuffmanCodeOfHuffmanCodeSize = CODE_LENGTH_CODES * 3;
+  static const double kSmallBias = 9.1;
+  double retval = kHuffmanCodeOfHuffmanCodeSize - kSmallBias;
+  int streak = 0;
+  int i = 0;
+  for (; i < length - 1; ++i) {
+    ++streak;
+    if (population[i] == population[i + 1]) {
+      continue;
+    }
+ last_streak_hack:
+    // population[i] points now to the symbol in the streak of same values.
+    if (streak > 3) {
+      if (population[i] == 0) {
+        retval += 1.5625 + 0.234375 * streak;
+      } else {
+        retval += 2.578125 + 0.703125 * streak;
+      }
+    } else {
+      if (population[i] == 0) {
+        retval += 1.796875 * streak;
+      } else {
+        retval += 3.28125 * streak;
+      }
+    }
+    streak = 0;
+  }
+  if (i == length - 1) {
+    ++streak;
+    goto last_streak_hack;
+  }
+  return retval;
+}
+
+// Estimates the Huffman dictionary + other block overhead size.
+static double HistogramEstimateBitsHeader(const VP8LHistogram* const p) {
+  return HuffmanCost(&p->alpha_[0], 256) +
+         HuffmanCost(&p->red_[0], 256) +
+         HuffmanCost(&p->literal_[0], VP8LHistogramNumCodes(p)) +
+         HuffmanCost(&p->blue_[0], 256) +
+         HuffmanCost(&p->distance_[0], NUM_DISTANCE_CODES);
+}
+
+double VP8LHistogramEstimateBits(const VP8LHistogram* const p) {
+  return HistogramEstimateBitsHeader(p) + VP8LHistogramEstimateBitsBulk(p);
+}
+
+static void HistogramBuildImage(int xsize, int histo_bits,
+                                const VP8LBackwardRefs* const backward_refs,
+                                VP8LHistogramSet* const image) {
+  int i;
+  int x = 0, y = 0;
+  const int histo_xsize = VP8LSubSampleSize(xsize, histo_bits);
+  VP8LHistogram** const histograms = image->histograms;
+  assert(histo_bits > 0);
+  for (i = 0; i < backward_refs->size; ++i) {
+    const PixOrCopy* const v = &backward_refs->refs[i];
+    const int ix = (y >> histo_bits) * histo_xsize + (x >> histo_bits);
+    VP8LHistogramAddSinglePixOrCopy(histograms[ix], v);
+    x += PixOrCopyLength(v);
+    while (x >= xsize) {
+      x -= xsize;
+      ++y;
+    }
+  }
+}
+
+static uint32_t MyRand(uint32_t *seed) {
+  *seed *= 16807U;
+  if (*seed == 0) {
+    *seed = 1;
+  }
+  return *seed;
+}
+
+static int HistogramCombine(const VP8LHistogramSet* const in,
+                            VP8LHistogramSet* const out, int num_pairs) {
+  int ok = 0;
+  int i, iter;
+  uint32_t seed = 0;
+  int tries_with_no_success = 0;
+  const int min_cluster_size = 2;
+  int out_size = in->size;
+  const int outer_iters = in->size * 3;
+  VP8LHistogram* const histos = (VP8LHistogram*)malloc(2 * sizeof(*histos));
+  VP8LHistogram* cur_combo = histos + 0;    // trial merged histogram
+  VP8LHistogram* best_combo = histos + 1;   // best merged histogram so far
+  if (histos == NULL) goto End;
+
+  // Copy histograms from in[] to out[].
+  assert(in->size <= out->size);
+  for (i = 0; i < in->size; ++i) {
+    in->histograms[i]->bit_cost_ = VP8LHistogramEstimateBits(in->histograms[i]);
+    *out->histograms[i] = *in->histograms[i];
+  }
+
+  // Collapse similar histograms in 'out'.
+  for (iter = 0; iter < outer_iters && out_size >= min_cluster_size; ++iter) {
+    // We pick the best pair to be combined out of 'inner_iters' pairs.
+    double best_cost_diff = 0.;
+    int best_idx1 = 0, best_idx2 = 1;
+    int j;
+    seed += iter;
+    for (j = 0; j < num_pairs; ++j) {
+      double curr_cost_diff;
+      // Choose two histograms at random and try to combine them.
+      const uint32_t idx1 = MyRand(&seed) % out_size;
+      const uint32_t tmp = ((j & 7) + 1) % (out_size - 1);
+      const uint32_t diff = (tmp < 3) ? tmp : MyRand(&seed) % (out_size - 1);
+      const uint32_t idx2 = (idx1 + diff + 1) % out_size;
+      if (idx1 == idx2) {
+        continue;
+      }
+      *cur_combo = *out->histograms[idx1];
+      VP8LHistogramAdd(cur_combo, out->histograms[idx2]);
+      cur_combo->bit_cost_ = VP8LHistogramEstimateBits(cur_combo);
+      // Calculate cost reduction on combining.
+      curr_cost_diff = cur_combo->bit_cost_
+                     - out->histograms[idx1]->bit_cost_
+                     - out->histograms[idx2]->bit_cost_;
+      if (best_cost_diff > curr_cost_diff) {    // found a better pair?
+        {     // swap cur/best combo histograms
+          VP8LHistogram* const tmp_histo = cur_combo;
+          cur_combo = best_combo;
+          best_combo = tmp_histo;
+        }
+        best_cost_diff = curr_cost_diff;
+        best_idx1 = idx1;
+        best_idx2 = idx2;
+      }
+    }
+
+    if (best_cost_diff < 0.0) {
+      *out->histograms[best_idx1] = *best_combo;
+      // swap best_idx2 slot with last one (which is now unused)
+      --out_size;
+      if (best_idx2 != out_size) {
+        out->histograms[best_idx2] = out->histograms[out_size];
+        out->histograms[out_size] = NULL;   // just for sanity check.
+      }
+      tries_with_no_success = 0;
+    }
+    if (++tries_with_no_success >= 50) {
+      break;
+    }
+  }
+  out->size = out_size;
+  ok = 1;
+
+ End:
+  free(histos);
+  return ok;
+}
+
+// -----------------------------------------------------------------------------
+// Histogram refinement
+
+// What is the bit cost of moving square_histogram from
+// cur_symbol to candidate_symbol.
+// TODO(skal): we don't really need to copy the histogram and Add(). Instead
+// we just need VP8LDualHistogramEstimateBits(A, B) estimation function.
+static double HistogramDistance(const VP8LHistogram* const square_histogram,
+                                const VP8LHistogram* const candidate) {
+  const double previous_bit_cost = candidate->bit_cost_;
+  double new_bit_cost;
+  VP8LHistogram modified_histo;
+  modified_histo = *candidate;
+  VP8LHistogramAdd(&modified_histo, square_histogram);
+  new_bit_cost = VP8LHistogramEstimateBits(&modified_histo);
+
+  return new_bit_cost - previous_bit_cost;
+}
+
+// Find the best 'out' histogram for each of the 'in' histograms.
+// Note: we assume that out[]->bit_cost_ is already up-to-date.
+static void HistogramRemap(const VP8LHistogramSet* const in,
+                           const VP8LHistogramSet* const out,
+                           uint16_t* const symbols) {
+  int i;
+  for (i = 0; i < in->size; ++i) {
+    int best_out = 0;
+    double best_bits = HistogramDistance(in->histograms[i], out->histograms[0]);
+    int k;
+    for (k = 1; k < out->size; ++k) {
+      const double cur_bits =
+          HistogramDistance(in->histograms[i], out->histograms[k]);
+      if (cur_bits < best_bits) {
+        best_bits = cur_bits;
+        best_out = k;
+      }
+    }
+    symbols[i] = best_out;
+  }
+
+  // Recompute each out based on raw and symbols.
+  for (i = 0; i < out->size; ++i) {
+    HistogramClear(out->histograms[i]);
+  }
+  for (i = 0; i < in->size; ++i) {
+    VP8LHistogramAdd(out->histograms[symbols[i]], in->histograms[i]);
+  }
+}
+
+int VP8LGetHistoImageSymbols(int xsize, int ysize,
+                             const VP8LBackwardRefs* const refs,
+                             int quality, int histo_bits, int cache_bits,
+                             VP8LHistogramSet* const image_in,
+                             uint16_t* const histogram_symbols) {
+  int ok = 0;
+  const int histo_xsize = histo_bits ? VP8LSubSampleSize(xsize, histo_bits) : 1;
+  const int histo_ysize = histo_bits ? VP8LSubSampleSize(ysize, histo_bits) : 1;
+  const int num_histo_pairs = 10 + quality / 2;  // For HistogramCombine().
+  const int histo_image_raw_size = histo_xsize * histo_ysize;
+  VP8LHistogramSet* const image_out =
+      VP8LAllocateHistogramSet(histo_image_raw_size, cache_bits);
+  if (image_out == NULL) return 0;
+
+  // Build histogram image.
+  HistogramBuildImage(xsize, histo_bits, refs, image_out);
+  // Collapse similar histograms.
+  if (!HistogramCombine(image_out, image_in, num_histo_pairs)) {
+    goto Error;
+  }
+  // Find the optimal map from original histograms to the final ones.
+  HistogramRemap(image_out, image_in, histogram_symbols);
+  ok = 1;
+
+Error:
+  free(image_out);
+  return ok;
+}
diff --git a/drivers/webpold/enc/histogram.h b/drivers/webpold/enc/histogram.h
new file mode 100644
index 0000000000..5b5de25539
--- /dev/null
+++ b/drivers/webpold/enc/histogram.h
@@ -0,0 +1,115 @@
+// Copyright 2012 Google Inc. All Rights Reserved.
+//
+// This code is licensed under the same terms as WebM:
+//  Software License Agreement:  http://www.webmproject.org/license/software/
+//  Additional IP Rights Grant:  http://www.webmproject.org/license/additional/
+// -----------------------------------------------------------------------------
+//
+// Author: Jyrki Alakuijala (jyrki@google.com)
+//
+// Models the histograms of literal and distance codes.
+
+#ifndef WEBP_ENC_HISTOGRAM_H_
+#define WEBP_ENC_HISTOGRAM_H_
+
+#include <assert.h>
+#include <stddef.h>
+#include <stdlib.h>
+#include <stdio.h>
+#include <string.h>
+
+#include "./backward_references.h"
+#include "../format_constants.h"
+#include "../types.h"
+
+#if defined(__cplusplus) || defined(c_plusplus)
+extern "C" {
+#endif
+
+// A simple container for histograms of data.
+typedef struct {
+  // literal_ contains green literal, palette-code and
+  // copy-length-prefix histogram
+  int literal_[PIX_OR_COPY_CODES_MAX];
+  int red_[256];
+  int blue_[256];
+  int alpha_[256];
+  // Backward reference prefix-code histogram.
+  int distance_[NUM_DISTANCE_CODES];
+  int palette_code_bits_;
+  double bit_cost_;   // cached value of VP8LHistogramEstimateBits(this)
+} VP8LHistogram;
+
+// Collection of histograms with fixed capacity, allocated as one
+// big memory chunk. Can be destroyed by simply calling 'free()'.
+typedef struct {
+  int size;         // number of slots currently in use
+  int max_size;     // maximum capacity
+  VP8LHistogram** histograms;
+} VP8LHistogramSet;
+
+// Create the histogram.
+//
+// The input data is the PixOrCopy data, which models the literals, stop
+// codes and backward references (both distances and lengths).  Also: if
+// palette_code_bits is >= 0, initialize the histogram with this value.
+void VP8LHistogramCreate(VP8LHistogram* const p,
+                         const VP8LBackwardRefs* const refs,
+                         int palette_code_bits);
+
+// Set the palette_code_bits and reset the stats.
+void VP8LHistogramInit(VP8LHistogram* const p, int palette_code_bits);
+
+// Collect all the references into a histogram (without reset)
+void VP8LHistogramStoreRefs(const VP8LBackwardRefs* const refs,
+                            VP8LHistogram* const histo);
+
+// Allocate an array of pointer to histograms, allocated and initialized
+// using 'cache_bits'. Return NULL in case of memory error.
+VP8LHistogramSet* VP8LAllocateHistogramSet(int size, int cache_bits);
+
+// Accumulate a token 'v' into a histogram.
+void VP8LHistogramAddSinglePixOrCopy(VP8LHistogram* const histo,
+                                     const PixOrCopy* const v);
+
+// Estimate how many bits the combined entropy of literals and distance
+// approximately maps to.
+double VP8LHistogramEstimateBits(const VP8LHistogram* const p);
+
+// This function estimates the cost in bits excluding the bits needed to
+// represent the entropy code itself.
+double VP8LHistogramEstimateBitsBulk(const VP8LHistogram* const p);
+
+static WEBP_INLINE void VP8LHistogramAdd(VP8LHistogram* const p,
+                                         const VP8LHistogram* const a) {
+  int i;
+  for (i = 0; i < PIX_OR_COPY_CODES_MAX; ++i) {
+    p->literal_[i] += a->literal_[i];
+  }
+  for (i = 0; i < NUM_DISTANCE_CODES; ++i) {
+    p->distance_[i] += a->distance_[i];
+  }
+  for (i = 0; i < 256; ++i) {
+    p->red_[i] += a->red_[i];
+    p->blue_[i] += a->blue_[i];
+    p->alpha_[i] += a->alpha_[i];
+  }
+}
+
+static WEBP_INLINE int VP8LHistogramNumCodes(const VP8LHistogram* const p) {
+  return 256 + NUM_LENGTH_CODES +
+      ((p->palette_code_bits_ > 0) ? (1 << p->palette_code_bits_) : 0);
+}
+
+// Builds the histogram image.
+int VP8LGetHistoImageSymbols(int xsize, int ysize,
+                             const VP8LBackwardRefs* const refs,
+                             int quality, int histogram_bits, int cache_bits,
+                             VP8LHistogramSet* const image_in,
+                             uint16_t* const histogram_symbols);
+
+#if defined(__cplusplus) || defined(c_plusplus)
+}
+#endif
+
+#endif  // WEBP_ENC_HISTOGRAM_H_
diff --git a/drivers/webpold/enc/iterator.c b/drivers/webpold/enc/iterator.c
new file mode 100644
index 0000000000..86e473bcf0
--- /dev/null
+++ b/drivers/webpold/enc/iterator.c
@@ -0,0 +1,422 @@
+// Copyright 2011 Google Inc. All Rights Reserved.
+//
+// This code is licensed under the same terms as WebM:
+//  Software License Agreement:  http://www.webmproject.org/license/software/
+//  Additional IP Rights Grant:  http://www.webmproject.org/license/additional/
+// -----------------------------------------------------------------------------
+//
+// VP8Iterator: block iterator
+//
+// Author: Skal (pascal.massimino@gmail.com)
+
+#include <string.h>
+
+#include "./vp8enci.h"
+
+#if defined(__cplusplus) || defined(c_plusplus)
+extern "C" {
+#endif
+
+//------------------------------------------------------------------------------
+// VP8Iterator
+//------------------------------------------------------------------------------
+
+static void InitLeft(VP8EncIterator* const it) {
+  const VP8Encoder* const enc = it->enc_;
+  enc->y_left_[-1] = enc->u_left_[-1] = enc->v_left_[-1] =
+      (it->y_ > 0) ? 129 : 127;
+  memset(enc->y_left_, 129, 16);
+  memset(enc->u_left_, 129, 8);
+  memset(enc->v_left_, 129, 8);
+  it->left_nz_[8] = 0;
+}
+
+static void InitTop(VP8EncIterator* const it) {
+  const VP8Encoder* const enc = it->enc_;
+  const size_t top_size = enc->mb_w_ * 16;
+  memset(enc->y_top_, 127, 2 * top_size);
+  memset(enc->nz_, 0, enc->mb_w_ * sizeof(*enc->nz_));
+}
+
+void VP8IteratorReset(VP8EncIterator* const it) {
+  VP8Encoder* const enc = it->enc_;
+  it->x_ = 0;
+  it->y_ = 0;
+  it->y_offset_ = 0;
+  it->uv_offset_ = 0;
+  it->mb_ = enc->mb_info_;
+  it->preds_ = enc->preds_;
+  it->nz_ = enc->nz_;
+  it->bw_ = &enc->parts_[0];
+  it->done_ = enc->mb_w_* enc->mb_h_;
+  InitTop(it);
+  InitLeft(it);
+  memset(it->bit_count_, 0, sizeof(it->bit_count_));
+  it->do_trellis_ = 0;
+}
+
+void VP8IteratorInit(VP8Encoder* const enc, VP8EncIterator* const it) {
+  it->enc_ = enc;
+  it->y_stride_  = enc->pic_->y_stride;
+  it->uv_stride_ = enc->pic_->uv_stride;
+  // TODO(later): for multithreading, these should be owned by 'it'.
+  it->yuv_in_   = enc->yuv_in_;
+  it->yuv_out_  = enc->yuv_out_;
+  it->yuv_out2_ = enc->yuv_out2_;
+  it->yuv_p_    = enc->yuv_p_;
+  it->lf_stats_ = enc->lf_stats_;
+  it->percent0_ = enc->percent_;
+  VP8IteratorReset(it);
+}
+
+int VP8IteratorProgress(const VP8EncIterator* const it, int delta) {
+  VP8Encoder* const enc = it->enc_;
+  if (delta && enc->pic_->progress_hook) {
+    const int percent = (enc->mb_h_ <= 1)
+                      ? it->percent0_
+                      : it->percent0_ + delta * it->y_ / (enc->mb_h_ - 1);
+    return WebPReportProgress(enc->pic_, percent, &enc->percent_);
+  }
+  return 1;
+}
+
+//------------------------------------------------------------------------------
+// Import the source samples into the cache. Takes care of replicating
+// boundary pixels if necessary.
+
+static void ImportBlock(const uint8_t* src, int src_stride,
+                        uint8_t* dst, int w, int h, int size) {
+  int i;
+  for (i = 0; i < h; ++i) {
+    memcpy(dst, src, w);
+    if (w < size) {
+      memset(dst + w, dst[w - 1], size - w);
+    }
+    dst += BPS;
+    src += src_stride;
+  }
+  for (i = h; i < size; ++i) {
+    memcpy(dst, dst - BPS, size);
+    dst += BPS;
+  }
+}
+
+void VP8IteratorImport(const VP8EncIterator* const it) {
+  const VP8Encoder* const enc = it->enc_;
+  const int x = it->x_, y = it->y_;
+  const WebPPicture* const pic = enc->pic_;
+  const uint8_t* const ysrc = pic->y + (y * pic->y_stride + x) * 16;
+  const uint8_t* const usrc = pic->u + (y * pic->uv_stride + x) * 8;
+  const uint8_t* const vsrc = pic->v + (y * pic->uv_stride + x) * 8;
+  uint8_t* const ydst = it->yuv_in_ + Y_OFF;
+  uint8_t* const udst = it->yuv_in_ + U_OFF;
+  uint8_t* const vdst = it->yuv_in_ + V_OFF;
+  int w = (pic->width - x * 16);
+  int h = (pic->height - y * 16);
+
+  if (w > 16) w = 16;
+  if (h > 16) h = 16;
+
+  // Luma plane
+  ImportBlock(ysrc, pic->y_stride, ydst, w, h, 16);
+
+  {   // U/V planes
+    const int uv_w = (w + 1) >> 1;
+    const int uv_h = (h + 1) >> 1;
+    ImportBlock(usrc, pic->uv_stride, udst, uv_w, uv_h, 8);
+    ImportBlock(vsrc, pic->uv_stride, vdst, uv_w, uv_h, 8);
+  }
+}
+
+//------------------------------------------------------------------------------
+// Copy back the compressed samples into user space if requested.
+
+static void ExportBlock(const uint8_t* src, uint8_t* dst, int dst_stride,
+                        int w, int h) {
+  while (h-- > 0) {
+    memcpy(dst, src, w);
+    dst += dst_stride;
+    src += BPS;
+  }
+}
+
+void VP8IteratorExport(const VP8EncIterator* const it) {
+  const VP8Encoder* const enc = it->enc_;
+  if (enc->config_->show_compressed) {
+    const int x = it->x_, y = it->y_;
+    const uint8_t* const ysrc = it->yuv_out_ + Y_OFF;
+    const uint8_t* const usrc = it->yuv_out_ + U_OFF;
+    const uint8_t* const vsrc = it->yuv_out_ + V_OFF;
+    const WebPPicture* const pic = enc->pic_;
+    uint8_t* const ydst = pic->y + (y * pic->y_stride + x) * 16;
+    uint8_t* const udst = pic->u + (y * pic->uv_stride + x) * 8;
+    uint8_t* const vdst = pic->v + (y * pic->uv_stride + x) * 8;
+    int w = (pic->width - x * 16);
+    int h = (pic->height - y * 16);
+
+    if (w > 16) w = 16;
+    if (h > 16) h = 16;
+
+    // Luma plane
+    ExportBlock(ysrc, ydst, pic->y_stride, w, h);
+
+    {   // U/V planes
+      const int uv_w = (w + 1) >> 1;
+      const int uv_h = (h + 1) >> 1;
+      ExportBlock(usrc, udst, pic->uv_stride, uv_w, uv_h);
+      ExportBlock(vsrc, vdst, pic->uv_stride, uv_w, uv_h);
+    }
+  }
+}
+
+//------------------------------------------------------------------------------
+// Non-zero contexts setup/teardown
+
+// Nz bits:
+//  0  1  2  3  Y
+//  4  5  6  7
+//  8  9 10 11
+// 12 13 14 15
+// 16 17        U
+// 18 19
+// 20 21        V
+// 22 23
+// 24           DC-intra16
+
+// Convert packed context to byte array
+#define BIT(nz, n) (!!((nz) & (1 << (n))))
+
+void VP8IteratorNzToBytes(VP8EncIterator* const it) {
+  const int tnz = it->nz_[0], lnz = it->nz_[-1];
+  int* const top_nz = it->top_nz_;
+  int* const left_nz = it->left_nz_;
+
+  // Top-Y
+  top_nz[0] = BIT(tnz, 12);
+  top_nz[1] = BIT(tnz, 13);
+  top_nz[2] = BIT(tnz, 14);
+  top_nz[3] = BIT(tnz, 15);
+  // Top-U
+  top_nz[4] = BIT(tnz, 18);
+  top_nz[5] = BIT(tnz, 19);
+  // Top-V
+  top_nz[6] = BIT(tnz, 22);
+  top_nz[7] = BIT(tnz, 23);
+  // DC
+  top_nz[8] = BIT(tnz, 24);
+
+  // left-Y
+  left_nz[0] = BIT(lnz,  3);
+  left_nz[1] = BIT(lnz,  7);
+  left_nz[2] = BIT(lnz, 11);
+  left_nz[3] = BIT(lnz, 15);
+  // left-U
+  left_nz[4] = BIT(lnz, 17);
+  left_nz[5] = BIT(lnz, 19);
+  // left-V
+  left_nz[6] = BIT(lnz, 21);
+  left_nz[7] = BIT(lnz, 23);
+  // left-DC is special, iterated separately
+}
+
+void VP8IteratorBytesToNz(VP8EncIterator* const it) {
+  uint32_t nz = 0;
+  const int* const top_nz = it->top_nz_;
+  const int* const left_nz = it->left_nz_;
+  // top
+  nz |= (top_nz[0] << 12) | (top_nz[1] << 13);
+  nz |= (top_nz[2] << 14) | (top_nz[3] << 15);
+  nz |= (top_nz[4] << 18) | (top_nz[5] << 19);
+  nz |= (top_nz[6] << 22) | (top_nz[7] << 23);
+  nz |= (top_nz[8] << 24);  // we propagate the _top_ bit, esp. for intra4
+  // left
+  nz |= (left_nz[0] << 3) | (left_nz[1] << 7);
+  nz |= (left_nz[2] << 11);
+  nz |= (left_nz[4] << 17) | (left_nz[6] << 21);
+
+  *it->nz_ = nz;
+}
+
+#undef BIT
+
+//------------------------------------------------------------------------------
+// Advance to the next position, doing the bookeeping.
+
+int VP8IteratorNext(VP8EncIterator* const it,
+                    const uint8_t* const block_to_save) {
+  VP8Encoder* const enc = it->enc_;
+  if (block_to_save) {
+    const int x = it->x_, y = it->y_;
+    const uint8_t* const ysrc = block_to_save + Y_OFF;
+    const uint8_t* const usrc = block_to_save + U_OFF;
+    if (x < enc->mb_w_ - 1) {   // left
+      int i;
+      for (i = 0; i < 16; ++i) {
+        enc->y_left_[i] = ysrc[15 + i * BPS];
+      }
+      for (i = 0; i < 8; ++i) {
+        enc->u_left_[i] = usrc[7 + i * BPS];
+        enc->v_left_[i] = usrc[15 + i * BPS];
+      }
+      // top-left (before 'top'!)
+      enc->y_left_[-1] = enc->y_top_[x * 16 + 15];
+      enc->u_left_[-1] = enc->uv_top_[x * 16 + 0 + 7];
+      enc->v_left_[-1] = enc->uv_top_[x * 16 + 8 + 7];
+    }
+    if (y < enc->mb_h_ - 1) {  // top
+      memcpy(enc->y_top_ + x * 16, ysrc + 15 * BPS, 16);
+      memcpy(enc->uv_top_ + x * 16, usrc + 7 * BPS, 8 + 8);
+    }
+  }
+
+  it->mb_++;
+  it->preds_ += 4;
+  it->nz_++;
+  it->x_++;
+  if (it->x_ == enc->mb_w_) {
+    it->x_ = 0;
+    it->y_++;
+    it->bw_ = &enc->parts_[it->y_ & (enc->num_parts_ - 1)];
+    it->preds_ = enc->preds_ + it->y_ * 4 * enc->preds_w_;
+    it->nz_ = enc->nz_;
+    InitLeft(it);
+  }
+  return (0 < --it->done_);
+}
+
+//------------------------------------------------------------------------------
+// Helper function to set mode properties
+
+void VP8SetIntra16Mode(const VP8EncIterator* const it, int mode) {
+  uint8_t* preds = it->preds_;
+  int y;
+  for (y = 0; y < 4; ++y) {
+    memset(preds, mode, 4);
+    preds += it->enc_->preds_w_;
+  }
+  it->mb_->type_ = 1;
+}
+
+void VP8SetIntra4Mode(const VP8EncIterator* const it, const uint8_t* modes) {
+  uint8_t* preds = it->preds_;
+  int y;
+  for (y = 4; y > 0; --y) {
+    memcpy(preds, modes, 4 * sizeof(*modes));
+    preds += it->enc_->preds_w_;
+    modes += 4;
+  }
+  it->mb_->type_ = 0;
+}
+
+void VP8SetIntraUVMode(const VP8EncIterator* const it, int mode) {
+  it->mb_->uv_mode_ = mode;
+}
+
+void VP8SetSkip(const VP8EncIterator* const it, int skip) {
+  it->mb_->skip_ = skip;
+}
+
+void VP8SetSegment(const VP8EncIterator* const it, int segment) {
+  it->mb_->segment_ = segment;
+}
+
+//------------------------------------------------------------------------------
+// Intra4x4 sub-blocks iteration
+//
+//  We store and update the boundary samples into an array of 37 pixels. They
+//  are updated as we iterate and reconstructs each intra4x4 blocks in turn.
+//  The position of the samples has the following snake pattern:
+//
+// 16|17 18 19 20|21 22 23 24|25 26 27 28|29 30 31 32|33 34 35 36  <- Top-right
+// --+-----------+-----------+-----------+-----------+
+// 15|         19|         23|         27|         31|
+// 14|         18|         22|         26|         30|
+// 13|         17|         21|         25|         29|
+// 12|13 14 15 16|17 18 19 20|21 22 23 24|25 26 27 28|
+// --+-----------+-----------+-----------+-----------+
+// 11|         15|         19|         23|         27|
+// 10|         14|         18|         22|         26|
+//  9|         13|         17|         21|         25|
+//  8| 9 10 11 12|13 14 15 16|17 18 19 20|21 22 23 24|
+// --+-----------+-----------+-----------+-----------+
+//  7|         11|         15|         19|         23|
+//  6|         10|         14|         18|         22|
+//  5|          9|         13|         17|         21|
+//  4| 5  6  7  8| 9 10 11 12|13 14 15 16|17 18 19 20|
+// --+-----------+-----------+-----------+-----------+
+//  3|          7|         11|         15|         19|
+//  2|          6|         10|         14|         18|
+//  1|          5|          9|         13|         17|
+//  0| 1  2  3  4| 5  6  7  8| 9 10 11 12|13 14 15 16|
+// --+-----------+-----------+-----------+-----------+
+
+// Array to record the position of the top sample to pass to the prediction
+// functions in dsp.c.
+static const uint8_t VP8TopLeftI4[16] = {
+  17, 21, 25, 29,
+  13, 17, 21, 25,
+  9,  13, 17, 21,
+  5,   9, 13, 17
+};
+
+void VP8IteratorStartI4(VP8EncIterator* const it) {
+  const VP8Encoder* const enc = it->enc_;
+  int i;
+
+  it->i4_ = 0;    // first 4x4 sub-block
+  it->i4_top_ = it->i4_boundary_ + VP8TopLeftI4[0];
+
+  // Import the boundary samples
+  for (i = 0; i < 17; ++i) {    // left
+    it->i4_boundary_[i] = enc->y_left_[15 - i];
+  }
+  for (i = 0; i < 16; ++i) {    // top
+    it->i4_boundary_[17 + i] = enc->y_top_[it->x_ * 16 + i];
+  }
+  // top-right samples have a special case on the far right of the picture
+  if (it->x_ < enc->mb_w_ - 1) {
+    for (i = 16; i < 16 + 4; ++i) {
+      it->i4_boundary_[17 + i] = enc->y_top_[it->x_ * 16 + i];
+    }
+  } else {    // else, replicate the last valid pixel four times
+    for (i = 16; i < 16 + 4; ++i) {
+      it->i4_boundary_[17 + i] = it->i4_boundary_[17 + 15];
+    }
+  }
+  VP8IteratorNzToBytes(it);  // import the non-zero context
+}
+
+int VP8IteratorRotateI4(VP8EncIterator* const it,
+                        const uint8_t* const yuv_out) {
+  const uint8_t* const blk = yuv_out + VP8Scan[it->i4_];
+  uint8_t* const top = it->i4_top_;
+  int i;
+
+  // Update the cache with 7 fresh samples
+  for (i = 0; i <= 3; ++i) {
+    top[-4 + i] = blk[i + 3 * BPS];   // store future top samples
+  }
+  if ((it->i4_ & 3) != 3) {  // if not on the right sub-blocks #3, #7, #11, #15
+    for (i = 0; i <= 2; ++i) {        // store future left samples
+      top[i] = blk[3 + (2 - i) * BPS];
+    }
+  } else {  // else replicate top-right samples, as says the specs.
+    for (i = 0; i <= 3; ++i) {
+      top[i] = top[i + 4];
+    }
+  }
+  // move pointers to next sub-block
+  ++it->i4_;
+  if (it->i4_ == 16) {    // we're done
+    return 0;
+  }
+
+  it->i4_top_ = it->i4_boundary_ + VP8TopLeftI4[it->i4_];
+  return 1;
+}
+
+//------------------------------------------------------------------------------
+
+#if defined(__cplusplus) || defined(c_plusplus)
+}    // extern "C"
+#endif
diff --git a/drivers/webp/enc/layer.c b/drivers/webpold/enc/layer.c
index 423127df63..423127df63 100644
--- a/drivers/webp/enc/layer.c
+++ b/drivers/webpold/enc/layer.c
diff --git a/drivers/webpold/enc/picture.c b/drivers/webpold/enc/picture.c
new file mode 100644
index 0000000000..44eed06083
--- /dev/null
+++ b/drivers/webpold/enc/picture.c
@@ -0,0 +1,1041 @@
+// Copyright 2011 Google Inc. All Rights Reserved.
+//
+// This code is licensed under the same terms as WebM:
+//  Software License Agreement:  http://www.webmproject.org/license/software/
+//  Additional IP Rights Grant:  http://www.webmproject.org/license/additional/
+// -----------------------------------------------------------------------------
+//
+// WebPPicture utils: colorspace conversion, crop, ...
+//
+// Author: Skal (pascal.massimino@gmail.com)
+
+#include <assert.h>
+#include <stdlib.h>
+#include <math.h>
+
+#include "./vp8enci.h"
+#include "../utils/rescaler.h"
+#include "../utils/utils.h"
+#include "../dsp/dsp.h"
+#include "../dsp/yuv.h"
+
+#if defined(__cplusplus) || defined(c_plusplus)
+extern "C" {
+#endif
+
+#define HALVE(x) (((x) + 1) >> 1)
+#define IS_YUV_CSP(csp, YUV_CSP) (((csp) & WEBP_CSP_UV_MASK) == (YUV_CSP))
+
+static const union {
+  uint32_t argb;
+  uint8_t  bytes[4];
+} test_endian = { 0xff000000u };
+#define ALPHA_IS_LAST (test_endian.bytes[3] == 0xff)
+
+//------------------------------------------------------------------------------
+// WebPPicture
+//------------------------------------------------------------------------------
+
+int WebPPictureAlloc(WebPPicture* picture) {
+  if (picture != NULL) {
+    const WebPEncCSP uv_csp = picture->colorspace & WEBP_CSP_UV_MASK;
+    const int has_alpha = picture->colorspace & WEBP_CSP_ALPHA_BIT;
+    const int width = picture->width;
+    const int height = picture->height;
+
+    if (!picture->use_argb) {
+      const int y_stride = width;
+      const int uv_width = HALVE(width);
+      const int uv_height = HALVE(height);
+      const int uv_stride = uv_width;
+      int uv0_stride = 0;
+      int a_width, a_stride;
+      uint64_t y_size, uv_size, uv0_size, a_size, total_size;
+      uint8_t* mem;
+
+      // U/V
+      switch (uv_csp) {
+        case WEBP_YUV420:
+          break;
+#ifdef WEBP_EXPERIMENTAL_FEATURES
+        case WEBP_YUV400:    // for now, we'll just reset the U/V samples
+          break;
+        case WEBP_YUV422:
+          uv0_stride = uv_width;
+          break;
+        case WEBP_YUV444:
+          uv0_stride = width;
+          break;
+#endif
+        default:
+          return 0;
+      }
+      uv0_size = height * uv0_stride;
+
+      // alpha
+      a_width = has_alpha ? width : 0;
+      a_stride = a_width;
+      y_size = (uint64_t)y_stride * height;
+      uv_size = (uint64_t)uv_stride * uv_height;
+      a_size =  (uint64_t)a_stride * height;
+
+      total_size = y_size + a_size + 2 * uv_size + 2 * uv0_size;
+
+      // Security and validation checks
+      if (width <= 0 || height <= 0 ||         // luma/alpha param error
+          uv_width < 0 || uv_height < 0) {     // u/v param error
+        return 0;
+      }
+      // Clear previous buffer and allocate a new one.
+      WebPPictureFree(picture);   // erase previous buffer
+      mem = (uint8_t*)WebPSafeMalloc(total_size, sizeof(*mem));
+      if (mem == NULL) return 0;
+
+      // From now on, we're in the clear, we can no longer fail...
+      picture->memory_ = (void*)mem;
+      picture->y_stride  = y_stride;
+      picture->uv_stride = uv_stride;
+      picture->a_stride  = a_stride;
+      picture->uv0_stride = uv0_stride;
+      // TODO(skal): we could align the y/u/v planes and adjust stride.
+      picture->y = mem;
+      mem += y_size;
+
+      picture->u = mem;
+      mem += uv_size;
+      picture->v = mem;
+      mem += uv_size;
+
+      if (a_size) {
+        picture->a = mem;
+        mem += a_size;
+      }
+      if (uv0_size) {
+        picture->u0 = mem;
+        mem += uv0_size;
+        picture->v0 = mem;
+        mem += uv0_size;
+      }
+    } else {
+      void* memory;
+      const uint64_t argb_size = (uint64_t)width * height;
+      if (width <= 0 || height <= 0) {
+        return 0;
+      }
+      // Clear previous buffer and allocate a new one.
+      WebPPictureFree(picture);   // erase previous buffer
+      memory = WebPSafeMalloc(argb_size, sizeof(*picture->argb));
+      if (memory == NULL) return 0;
+
+      // TODO(skal): align plane to cache line?
+      picture->memory_argb_ = memory;
+      picture->argb = (uint32_t*)memory;
+      picture->argb_stride = width;
+    }
+  }
+  return 1;
+}
+
+// Remove reference to the ARGB buffer (doesn't free anything).
+static void PictureResetARGB(WebPPicture* const picture) {
+  picture->memory_argb_ = NULL;
+  picture->argb = NULL;
+  picture->argb_stride = 0;
+}
+
+// Remove reference to the YUVA buffer (doesn't free anything).
+static void PictureResetYUVA(WebPPicture* const picture) {
+  picture->memory_ = NULL;
+  picture->y = picture->u = picture->v = picture->a = NULL;
+  picture->u0 = picture->v0 = NULL;
+  picture->y_stride = picture->uv_stride = 0;
+  picture->a_stride = 0;
+  picture->uv0_stride = 0;
+}
+
+// Grab the 'specs' (writer, *opaque, width, height...) from 'src' and copy them
+// into 'dst'. Mark 'dst' as not owning any memory.
+static void WebPPictureGrabSpecs(const WebPPicture* const src,
+                                 WebPPicture* const dst) {
+  assert(src != NULL && dst != NULL);
+  *dst = *src;
+  PictureResetYUVA(dst);
+  PictureResetARGB(dst);
+}
+
+// Allocate a new argb buffer, discarding any existing one and preserving
+// the other YUV(A) buffer.
+static int PictureAllocARGB(WebPPicture* const picture) {
+  WebPPicture tmp;
+  free(picture->memory_argb_);
+  PictureResetARGB(picture);
+  picture->use_argb = 1;
+  WebPPictureGrabSpecs(picture, &tmp);
+  if (!WebPPictureAlloc(&tmp)) {
+    return WebPEncodingSetError(picture, VP8_ENC_ERROR_OUT_OF_MEMORY);
+  }
+  picture->memory_argb_ = tmp.memory_argb_;
+  picture->argb = tmp.argb;
+  picture->argb_stride = tmp.argb_stride;
+  return 1;
+}
+
+// Release memory owned by 'picture' (both YUV and ARGB buffers).
+void WebPPictureFree(WebPPicture* picture) {
+  if (picture != NULL) {
+    free(picture->memory_);
+    free(picture->memory_argb_);
+    PictureResetYUVA(picture);
+    PictureResetARGB(picture);
+  }
+}
+
+//------------------------------------------------------------------------------
+// Picture copying
+
+// Not worth moving to dsp/enc.c (only used here).
+static void CopyPlane(const uint8_t* src, int src_stride,
+                      uint8_t* dst, int dst_stride, int width, int height) {
+  while (height-- > 0) {
+    memcpy(dst, src, width);
+    src += src_stride;
+    dst += dst_stride;
+  }
+}
+
+// Adjust top-left corner to chroma sample position.
+static void SnapTopLeftPosition(const WebPPicture* const pic,
+                                int* const left, int* const top) {
+  if (!pic->use_argb) {
+    const int is_yuv422 = IS_YUV_CSP(pic->colorspace, WEBP_YUV422);
+    if (IS_YUV_CSP(pic->colorspace, WEBP_YUV420) || is_yuv422) {
+      *left &= ~1;
+      if (!is_yuv422) *top &= ~1;
+    }
+  }
+}
+
+// Adjust top-left corner and verify that the sub-rectangle is valid.
+static int AdjustAndCheckRectangle(const WebPPicture* const pic,
+                                   int* const left, int* const top,
+                                   int width, int height) {
+  SnapTopLeftPosition(pic, left, top);
+  if ((*left) < 0 || (*top) < 0) return 0;
+  if (width <= 0 || height <= 0) return 0;
+  if ((*left) + width > pic->width) return 0;
+  if ((*top) + height > pic->height) return 0;
+  return 1;
+}
+
+int WebPPictureCopy(const WebPPicture* src, WebPPicture* dst) {
+  if (src == NULL || dst == NULL) return 0;
+  if (src == dst) return 1;
+
+  WebPPictureGrabSpecs(src, dst);
+  if (!WebPPictureAlloc(dst)) return 0;
+
+  if (!src->use_argb) {
+    CopyPlane(src->y, src->y_stride,
+              dst->y, dst->y_stride, dst->width, dst->height);
+    CopyPlane(src->u, src->uv_stride,
+              dst->u, dst->uv_stride, HALVE(dst->width), HALVE(dst->height));
+    CopyPlane(src->v, src->uv_stride,
+              dst->v, dst->uv_stride, HALVE(dst->width), HALVE(dst->height));
+    if (dst->a != NULL)  {
+      CopyPlane(src->a, src->a_stride,
+                dst->a, dst->a_stride, dst->width, dst->height);
+    }
+#ifdef WEBP_EXPERIMENTAL_FEATURES
+    if (dst->u0 != NULL)  {
+      int uv0_width = src->width;
+      if (IS_YUV_CSP(dst->colorspace, WEBP_YUV422)) {
+        uv0_width = HALVE(uv0_width);
+      }
+      CopyPlane(src->u0, src->uv0_stride,
+                dst->u0, dst->uv0_stride, uv0_width, dst->height);
+      CopyPlane(src->v0, src->uv0_stride,
+                dst->v0, dst->uv0_stride, uv0_width, dst->height);
+    }
+#endif
+  } else {
+    CopyPlane((const uint8_t*)src->argb, 4 * src->argb_stride,
+              (uint8_t*)dst->argb, 4 * dst->argb_stride,
+              4 * dst->width, dst->height);
+  }
+  return 1;
+}
+
+int WebPPictureIsView(const WebPPicture* picture) {
+  if (picture == NULL) return 0;
+  if (picture->use_argb) {
+    return (picture->memory_argb_ == NULL);
+  }
+  return (picture->memory_ == NULL);
+}
+
+int WebPPictureView(const WebPPicture* src,
+                    int left, int top, int width, int height,
+                    WebPPicture* dst) {
+  if (src == NULL || dst == NULL) return 0;
+
+  // verify rectangle position.
+  if (!AdjustAndCheckRectangle(src, &left, &top, width, height)) return 0;
+
+  if (src != dst) {  // beware of aliasing! We don't want to leak 'memory_'.
+    WebPPictureGrabSpecs(src, dst);
+  }
+  dst->width = width;
+  dst->height = height;
+  if (!src->use_argb) {
+    dst->y = src->y + top * src->y_stride + left;
+    dst->u = src->u + (top >> 1) * src->uv_stride + (left >> 1);
+    dst->v = src->v + (top >> 1) * src->uv_stride + (left >> 1);
+    if (src->a != NULL) {
+      dst->a = src->a + top * src->a_stride + left;
+    }
+#ifdef WEBP_EXPERIMENTAL_FEATURES
+    if (src->u0 != NULL) {
+      const int left_pos =
+          IS_YUV_CSP(dst->colorspace, WEBP_YUV422) ? (left >> 1) : left;
+      dst->u0 = src->u0 + top * src->uv0_stride + left_pos;
+      dst->v0 = src->v0 + top * src->uv0_stride + left_pos;
+    }
+#endif
+  } else {
+    dst->argb = src->argb + top * src->argb_stride + left;
+  }
+  return 1;
+}
+
+//------------------------------------------------------------------------------
+// Picture cropping
+
+int WebPPictureCrop(WebPPicture* pic,
+                    int left, int top, int width, int height) {
+  WebPPicture tmp;
+
+  if (pic == NULL) return 0;
+  if (!AdjustAndCheckRectangle(pic, &left, &top, width, height)) return 0;
+
+  WebPPictureGrabSpecs(pic, &tmp);
+  tmp.width = width;
+  tmp.height = height;
+  if (!WebPPictureAlloc(&tmp)) return 0;
+
+  if (!pic->use_argb) {
+    const int y_offset = top * pic->y_stride + left;
+    const int uv_offset = (top / 2) * pic->uv_stride + left / 2;
+    CopyPlane(pic->y + y_offset, pic->y_stride,
+              tmp.y, tmp.y_stride, width, height);
+    CopyPlane(pic->u + uv_offset, pic->uv_stride,
+              tmp.u, tmp.uv_stride, HALVE(width), HALVE(height));
+    CopyPlane(pic->v + uv_offset, pic->uv_stride,
+              tmp.v, tmp.uv_stride, HALVE(width), HALVE(height));
+
+    if (tmp.a != NULL) {
+      const int a_offset = top * pic->a_stride + left;
+      CopyPlane(pic->a + a_offset, pic->a_stride,
+                tmp.a, tmp.a_stride, width, height);
+    }
+#ifdef WEBP_EXPERIMENTAL_FEATURES
+    if (tmp.u0 != NULL) {
+      int w = width;
+      int left_pos = left;
+      if (IS_YUV_CSP(tmp.colorspace, WEBP_YUV422)) {
+        w = HALVE(w);
+        left_pos = HALVE(left_pos);
+      }
+      CopyPlane(pic->u0 + top * pic->uv0_stride + left_pos, pic->uv0_stride,
+                tmp.u0, tmp.uv0_stride, w, height);
+      CopyPlane(pic->v0 + top * pic->uv0_stride + left_pos, pic->uv0_stride,
+                tmp.v0, tmp.uv0_stride, w, height);
+    }
+#endif
+  } else {
+    const uint8_t* const src =
+        (const uint8_t*)(pic->argb + top * pic->argb_stride + left);
+    CopyPlane(src, pic->argb_stride * 4,
+              (uint8_t*)tmp.argb, tmp.argb_stride * 4,
+              width * 4, height);
+  }
+  WebPPictureFree(pic);
+  *pic = tmp;
+  return 1;
+}
+
+//------------------------------------------------------------------------------
+// Simple picture rescaler
+
+static void RescalePlane(const uint8_t* src,
+                         int src_width, int src_height, int src_stride,
+                         uint8_t* dst,
+                         int dst_width, int dst_height, int dst_stride,
+                         int32_t* const work,
+                         int num_channels) {
+  WebPRescaler rescaler;
+  int y = 0;
+  WebPRescalerInit(&rescaler, src_width, src_height,
+                   dst, dst_width, dst_height, dst_stride,
+                   num_channels,
+                   src_width, dst_width,
+                   src_height, dst_height,
+                   work);
+  memset(work, 0, 2 * dst_width * num_channels * sizeof(*work));
+  while (y < src_height) {
+    y += WebPRescalerImport(&rescaler, src_height - y,
+                            src + y * src_stride, src_stride);
+    WebPRescalerExport(&rescaler);
+  }
+}
+
+int WebPPictureRescale(WebPPicture* pic, int width, int height) {
+  WebPPicture tmp;
+  int prev_width, prev_height;
+  int32_t* work;
+
+  if (pic == NULL) return 0;
+  prev_width = pic->width;
+  prev_height = pic->height;
+  // if width is unspecified, scale original proportionally to height ratio.
+  if (width == 0) {
+    width = (prev_width * height + prev_height / 2) / prev_height;
+  }
+  // if height is unspecified, scale original proportionally to width ratio.
+  if (height == 0) {
+    height = (prev_height * width + prev_width / 2) / prev_width;
+  }
+  // Check if the overall dimensions still make sense.
+  if (width <= 0 || height <= 0) return 0;
+
+  WebPPictureGrabSpecs(pic, &tmp);
+  tmp.width = width;
+  tmp.height = height;
+  if (!WebPPictureAlloc(&tmp)) return 0;
+
+  if (!pic->use_argb) {
+    work = (int32_t*)WebPSafeMalloc(2ULL * width, sizeof(*work));
+    if (work == NULL) {
+      WebPPictureFree(&tmp);
+      return 0;
+    }
+
+    RescalePlane(pic->y, prev_width, prev_height, pic->y_stride,
+                 tmp.y, width, height, tmp.y_stride, work, 1);
+    RescalePlane(pic->u,
+                 HALVE(prev_width), HALVE(prev_height), pic->uv_stride,
+                 tmp.u,
+                 HALVE(width), HALVE(height), tmp.uv_stride, work, 1);
+    RescalePlane(pic->v,
+                 HALVE(prev_width), HALVE(prev_height), pic->uv_stride,
+                 tmp.v,
+                 HALVE(width), HALVE(height), tmp.uv_stride, work, 1);
+
+    if (tmp.a != NULL) {
+      RescalePlane(pic->a, prev_width, prev_height, pic->a_stride,
+                   tmp.a, width, height, tmp.a_stride, work, 1);
+    }
+#ifdef WEBP_EXPERIMENTAL_FEATURES
+    if (tmp.u0 != NULL) {
+      const int s = IS_YUV_CSP(tmp.colorspace, WEBP_YUV422) ? 2 : 1;
+      RescalePlane(
+          pic->u0, (prev_width + s / 2) / s, prev_height, pic->uv0_stride,
+          tmp.u0, (width + s / 2) / s, height, tmp.uv0_stride, work, 1);
+      RescalePlane(
+          pic->v0, (prev_width + s / 2) / s, prev_height, pic->uv0_stride,
+          tmp.v0, (width + s / 2) / s, height, tmp.uv0_stride, work, 1);
+    }
+#endif
+  } else {
+    work = (int32_t*)WebPSafeMalloc(2ULL * width * 4, sizeof(*work));
+    if (work == NULL) {
+      WebPPictureFree(&tmp);
+      return 0;
+    }
+
+    RescalePlane((const uint8_t*)pic->argb, prev_width, prev_height,
+                 pic->argb_stride * 4,
+                 (uint8_t*)tmp.argb, width, height,
+                 tmp.argb_stride * 4,
+                 work, 4);
+
+  }
+  WebPPictureFree(pic);
+  free(work);
+  *pic = tmp;
+  return 1;
+}
+
+//------------------------------------------------------------------------------
+// WebPMemoryWriter: Write-to-memory
+
+void WebPMemoryWriterInit(WebPMemoryWriter* writer) {
+  writer->mem = NULL;
+  writer->size = 0;
+  writer->max_size = 0;
+}
+
+int WebPMemoryWrite(const uint8_t* data, size_t data_size,
+                    const WebPPicture* picture) {
+  WebPMemoryWriter* const w = (WebPMemoryWriter*)picture->custom_ptr;
+  uint64_t next_size;
+  if (w == NULL) {
+    return 1;
+  }
+  next_size = (uint64_t)w->size + data_size;
+  if (next_size > w->max_size) {
+    uint8_t* new_mem;
+    uint64_t next_max_size = 2ULL * w->max_size;
+    if (next_max_size < next_size) next_max_size = next_size;
+    if (next_max_size < 8192ULL) next_max_size = 8192ULL;
+    new_mem = (uint8_t*)WebPSafeMalloc(next_max_size, 1);
+    if (new_mem == NULL) {
+      return 0;
+    }
+    if (w->size > 0) {
+      memcpy(new_mem, w->mem, w->size);
+    }
+    free(w->mem);
+    w->mem = new_mem;
+    // down-cast is ok, thanks to WebPSafeMalloc
+    w->max_size = (size_t)next_max_size;
+  }
+  if (data_size > 0) {
+    memcpy(w->mem + w->size, data, data_size);
+    w->size += data_size;
+  }
+  return 1;
+}
+
+//------------------------------------------------------------------------------
+// Detection of non-trivial transparency
+
+// Returns true if alpha[] has non-0xff values.
+static int CheckNonOpaque(const uint8_t* alpha, int width, int height,
+                          int x_step, int y_step) {
+  if (alpha == NULL) return 0;
+  while (height-- > 0) {
+    int x;
+    for (x = 0; x < width * x_step; x += x_step) {
+      if (alpha[x] != 0xff) return 1;  // TODO(skal): check 4/8 bytes at a time.
+    }
+    alpha += y_step;
+  }
+  return 0;
+}
+
+// Checking for the presence of non-opaque alpha.
+int WebPPictureHasTransparency(const WebPPicture* picture) {
+  if (picture == NULL) return 0;
+  if (!picture->use_argb) {
+    return CheckNonOpaque(picture->a, picture->width, picture->height,
+                          1, picture->a_stride);
+  } else {
+    int x, y;
+    const uint32_t* argb = picture->argb;
+    if (argb == NULL) return 0;
+    for (y = 0; y < picture->height; ++y) {
+      for (x = 0; x < picture->width; ++x) {
+        if (argb[x] < 0xff000000u) return 1;   // test any alpha values != 0xff
+      }
+      argb += picture->argb_stride;
+    }
+  }
+  return 0;
+}
+
+//------------------------------------------------------------------------------
+// RGB -> YUV conversion
+
+// TODO: we can do better than simply 2x2 averaging on U/V samples.
+#define SUM4(ptr) ((ptr)[0] + (ptr)[step] + \
+                   (ptr)[rgb_stride] + (ptr)[rgb_stride + step])
+#define SUM2H(ptr) (2 * (ptr)[0] + 2 * (ptr)[step])
+#define SUM2V(ptr) (2 * (ptr)[0] + 2 * (ptr)[rgb_stride])
+#define SUM1(ptr)  (4 * (ptr)[0])
+#define RGB_TO_UV(x, y, SUM) {                           \
+  const int src = (2 * (step * (x) + (y) * rgb_stride)); \
+  const int dst = (x) + (y) * picture->uv_stride;        \
+  const int r = SUM(r_ptr + src);                        \
+  const int g = SUM(g_ptr + src);                        \
+  const int b = SUM(b_ptr + src);                        \
+  picture->u[dst] = VP8RGBToU(r, g, b);                  \
+  picture->v[dst] = VP8RGBToV(r, g, b);                  \
+}
+
+#define RGB_TO_UV0(x_in, x_out, y, SUM) {                \
+  const int src = (step * (x_in) + (y) * rgb_stride);    \
+  const int dst = (x_out) + (y) * picture->uv0_stride;   \
+  const int r = SUM(r_ptr + src);                        \
+  const int g = SUM(g_ptr + src);                        \
+  const int b = SUM(b_ptr + src);                        \
+  picture->u0[dst] = VP8RGBToU(r, g, b);                 \
+  picture->v0[dst] = VP8RGBToV(r, g, b);                 \
+}
+
+static void MakeGray(WebPPicture* const picture) {
+  int y;
+  const int uv_width = HALVE(picture->width);
+  const int uv_height = HALVE(picture->height);
+  for (y = 0; y < uv_height; ++y) {
+    memset(picture->u + y * picture->uv_stride, 128, uv_width);
+    memset(picture->v + y * picture->uv_stride, 128, uv_width);
+  }
+}
+
+static int ImportYUVAFromRGBA(const uint8_t* const r_ptr,
+                              const uint8_t* const g_ptr,
+                              const uint8_t* const b_ptr,
+                              const uint8_t* const a_ptr,
+                              int step,         // bytes per pixel
+                              int rgb_stride,   // bytes per scanline
+                              WebPPicture* const picture) {
+  const WebPEncCSP uv_csp = picture->colorspace & WEBP_CSP_UV_MASK;
+  int x, y;
+  const int width = picture->width;
+  const int height = picture->height;
+  const int has_alpha = CheckNonOpaque(a_ptr, width, height, step, rgb_stride);
+
+  picture->colorspace = uv_csp;
+  picture->use_argb = 0;
+  if (has_alpha) {
+    picture->colorspace |= WEBP_CSP_ALPHA_BIT;
+  }
+  if (!WebPPictureAlloc(picture)) return 0;
+
+  // Import luma plane
+  for (y = 0; y < height; ++y) {
+    for (x = 0; x < width; ++x) {
+      const int offset = step * x + y * rgb_stride;
+      picture->y[x + y * picture->y_stride] =
+          VP8RGBToY(r_ptr[offset], g_ptr[offset], b_ptr[offset]);
+    }
+  }
+
+  // Downsample U/V plane
+  if (uv_csp != WEBP_YUV400) {
+    for (y = 0; y < (height >> 1); ++y) {
+      for (x = 0; x < (width >> 1); ++x) {
+        RGB_TO_UV(x, y, SUM4);
+      }
+      if (width & 1) {
+        RGB_TO_UV(x, y, SUM2V);
+      }
+    }
+    if (height & 1) {
+      for (x = 0; x < (width >> 1); ++x) {
+        RGB_TO_UV(x, y, SUM2H);
+      }
+      if (width & 1) {
+        RGB_TO_UV(x, y, SUM1);
+      }
+    }
+
+#ifdef WEBP_EXPERIMENTAL_FEATURES
+    // Store original U/V samples too
+    if (uv_csp == WEBP_YUV422) {
+      for (y = 0; y < height; ++y) {
+        for (x = 0; x < (width >> 1); ++x) {
+          RGB_TO_UV0(2 * x, x, y, SUM2H);
+        }
+        if (width & 1) {
+          RGB_TO_UV0(2 * x, x, y, SUM1);
+        }
+      }
+    } else if (uv_csp == WEBP_YUV444) {
+      for (y = 0; y < height; ++y) {
+        for (x = 0; x < width; ++x) {
+          RGB_TO_UV0(x, x, y, SUM1);
+        }
+      }
+    }
+#endif
+  } else {
+    MakeGray(picture);
+  }
+
+  if (has_alpha) {
+    assert(step >= 4);
+    for (y = 0; y < height; ++y) {
+      for (x = 0; x < width; ++x) {
+        picture->a[x + y * picture->a_stride] =
+            a_ptr[step * x + y * rgb_stride];
+      }
+    }
+  }
+  return 1;
+}
+
+static int Import(WebPPicture* const picture,
+                  const uint8_t* const rgb, int rgb_stride,
+                  int step, int swap_rb, int import_alpha) {
+  const uint8_t* const r_ptr = rgb + (swap_rb ? 2 : 0);
+  const uint8_t* const g_ptr = rgb + 1;
+  const uint8_t* const b_ptr = rgb + (swap_rb ? 0 : 2);
+  const uint8_t* const a_ptr = import_alpha ? rgb + 3 : NULL;
+  const int width = picture->width;
+  const int height = picture->height;
+
+  if (!picture->use_argb) {
+    return ImportYUVAFromRGBA(r_ptr, g_ptr, b_ptr, a_ptr, step, rgb_stride,
+                              picture);
+  }
+  if (import_alpha) {
+    picture->colorspace |= WEBP_CSP_ALPHA_BIT;
+  } else {
+    picture->colorspace &= ~WEBP_CSP_ALPHA_BIT;
+  }
+  if (!WebPPictureAlloc(picture)) return 0;
+
+  if (!import_alpha) {
+    int x, y;
+    for (y = 0; y < height; ++y) {
+      for (x = 0; x < width; ++x) {
+        const int offset = step * x + y * rgb_stride;
+        const uint32_t argb =
+            0xff000000u |
+            (r_ptr[offset] << 16) |
+            (g_ptr[offset] <<  8) |
+            (b_ptr[offset]);
+        picture->argb[x + y * picture->argb_stride] = argb;
+      }
+    }
+  } else {
+    int x, y;
+    assert(step >= 4);
+    for (y = 0; y < height; ++y) {
+      for (x = 0; x < width; ++x) {
+        const int offset = step * x + y * rgb_stride;
+        const uint32_t argb = (a_ptr[offset] << 24) |
+                              (r_ptr[offset] << 16) |
+                              (g_ptr[offset] <<  8) |
+                              (b_ptr[offset]);
+        picture->argb[x + y * picture->argb_stride] = argb;
+      }
+    }
+  }
+  return 1;
+}
+#undef SUM4
+#undef SUM2V
+#undef SUM2H
+#undef SUM1
+#undef RGB_TO_UV
+
+int WebPPictureImportRGB(WebPPicture* picture,
+                         const uint8_t* rgb, int rgb_stride) {
+  return Import(picture, rgb, rgb_stride, 3, 0, 0);
+}
+
+int WebPPictureImportBGR(WebPPicture* picture,
+                         const uint8_t* rgb, int rgb_stride) {
+  return Import(picture, rgb, rgb_stride, 3, 1, 0);
+}
+
+int WebPPictureImportRGBA(WebPPicture* picture,
+                          const uint8_t* rgba, int rgba_stride) {
+  return Import(picture, rgba, rgba_stride, 4, 0, 1);
+}
+
+int WebPPictureImportBGRA(WebPPicture* picture,
+                          const uint8_t* rgba, int rgba_stride) {
+  return Import(picture, rgba, rgba_stride, 4, 1, 1);
+}
+
+int WebPPictureImportRGBX(WebPPicture* picture,
+                          const uint8_t* rgba, int rgba_stride) {
+  return Import(picture, rgba, rgba_stride, 4, 0, 0);
+}
+
+int WebPPictureImportBGRX(WebPPicture* picture,
+                          const uint8_t* rgba, int rgba_stride) {
+  return Import(picture, rgba, rgba_stride, 4, 1, 0);
+}
+
+//------------------------------------------------------------------------------
+// Automatic YUV <-> ARGB conversions.
+
+int WebPPictureYUVAToARGB(WebPPicture* picture) {
+  if (picture == NULL) return 0;
+  if (picture->memory_ == NULL || picture->y == NULL ||
+      picture->u == NULL || picture->v == NULL) {
+    return WebPEncodingSetError(picture, VP8_ENC_ERROR_NULL_PARAMETER);
+  }
+  if ((picture->colorspace & WEBP_CSP_ALPHA_BIT) && picture->a == NULL) {
+    return WebPEncodingSetError(picture, VP8_ENC_ERROR_NULL_PARAMETER);
+  }
+  if ((picture->colorspace & WEBP_CSP_UV_MASK) != WEBP_YUV420) {
+    return WebPEncodingSetError(picture, VP8_ENC_ERROR_INVALID_CONFIGURATION);
+  }
+  // Allocate a new argb buffer (discarding the previous one).
+  if (!PictureAllocARGB(picture)) return 0;
+
+  // Convert
+  {
+    int y;
+    const int width = picture->width;
+    const int height = picture->height;
+    const int argb_stride = 4 * picture->argb_stride;
+    uint8_t* dst = (uint8_t*)picture->argb;
+    const uint8_t *cur_u = picture->u, *cur_v = picture->v, *cur_y = picture->y;
+    WebPUpsampleLinePairFunc upsample = WebPGetLinePairConverter(ALPHA_IS_LAST);
+
+    // First row, with replicated top samples.
+    upsample(NULL, cur_y, cur_u, cur_v, cur_u, cur_v, NULL, dst, width);
+    cur_y += picture->y_stride;
+    dst += argb_stride;
+    // Center rows.
+    for (y = 1; y + 1 < height; y += 2) {
+      const uint8_t* const top_u = cur_u;
+      const uint8_t* const top_v = cur_v;
+      cur_u += picture->uv_stride;
+      cur_v += picture->uv_stride;
+      upsample(cur_y, cur_y + picture->y_stride, top_u, top_v, cur_u, cur_v,
+               dst, dst + argb_stride, width);
+      cur_y += 2 * picture->y_stride;
+      dst += 2 * argb_stride;
+    }
+    // Last row (if needed), with replicated bottom samples.
+    if (height > 1 && !(height & 1)) {
+      upsample(cur_y, NULL, cur_u, cur_v, cur_u, cur_v, dst, NULL, width);
+    }
+    // Insert alpha values if needed, in replacement for the default 0xff ones.
+    if (picture->colorspace & WEBP_CSP_ALPHA_BIT) {
+      for (y = 0; y < height; ++y) {
+        uint32_t* const dst = picture->argb + y * picture->argb_stride;
+        const uint8_t* const src = picture->a + y * picture->a_stride;
+        int x;
+        for (x = 0; x < width; ++x) {
+          dst[x] = (dst[x] & 0x00ffffffu) | (src[x] << 24);
+        }
+      }
+    }
+  }
+  return 1;
+}
+
+int WebPPictureARGBToYUVA(WebPPicture* picture, WebPEncCSP colorspace) {
+  if (picture == NULL) return 0;
+  if (picture->argb == NULL) {
+    return WebPEncodingSetError(picture, VP8_ENC_ERROR_NULL_PARAMETER);
+  } else {
+    const uint8_t* const argb = (const uint8_t*)picture->argb;
+    const uint8_t* const r = ALPHA_IS_LAST ? argb + 2 : argb + 1;
+    const uint8_t* const g = ALPHA_IS_LAST ? argb + 1 : argb + 2;
+    const uint8_t* const b = ALPHA_IS_LAST ? argb + 0 : argb + 3;
+    const uint8_t* const a = ALPHA_IS_LAST ? argb + 3 : argb + 0;
+    // We work on a tmp copy of 'picture', because ImportYUVAFromRGBA()
+    // would be calling WebPPictureFree(picture) otherwise.
+    WebPPicture tmp = *picture;
+    PictureResetARGB(&tmp);  // reset ARGB buffer so that it's not free()'d.
+    tmp.use_argb = 0;
+    tmp.colorspace = colorspace & WEBP_CSP_UV_MASK;
+    if (!ImportYUVAFromRGBA(r, g, b, a, 4, 4 * picture->argb_stride, &tmp)) {
+      return WebPEncodingSetError(picture, VP8_ENC_ERROR_OUT_OF_MEMORY);
+    }
+    // Copy back the YUV specs into 'picture'.
+    tmp.argb = picture->argb;
+    tmp.argb_stride = picture->argb_stride;
+    tmp.memory_argb_ = picture->memory_argb_;
+    *picture = tmp;
+  }
+  return 1;
+}
+
+//------------------------------------------------------------------------------
+// Helper: clean up fully transparent area to help compressibility.
+
+#define SIZE 8
+#define SIZE2 (SIZE / 2)
+static int is_transparent_area(const uint8_t* ptr, int stride, int size) {
+  int y, x;
+  for (y = 0; y < size; ++y) {
+    for (x = 0; x < size; ++x) {
+      if (ptr[x]) {
+        return 0;
+      }
+    }
+    ptr += stride;
+  }
+  return 1;
+}
+
+static WEBP_INLINE void flatten(uint8_t* ptr, int v, int stride, int size) {
+  int y;
+  for (y = 0; y < size; ++y) {
+    memset(ptr, v, size);
+    ptr += stride;
+  }
+}
+
+void WebPCleanupTransparentArea(WebPPicture* pic) {
+  int x, y, w, h;
+  const uint8_t* a_ptr;
+  int values[3] = { 0 };
+
+  if (pic == NULL) return;
+
+  a_ptr = pic->a;
+  if (a_ptr == NULL) return;    // nothing to do
+
+  w = pic->width / SIZE;
+  h = pic->height / SIZE;
+  for (y = 0; y < h; ++y) {
+    int need_reset = 1;
+    for (x = 0; x < w; ++x) {
+      const int off_a = (y * pic->a_stride + x) * SIZE;
+      const int off_y = (y * pic->y_stride + x) * SIZE;
+      const int off_uv = (y * pic->uv_stride + x) * SIZE2;
+      if (is_transparent_area(a_ptr + off_a, pic->a_stride, SIZE)) {
+        if (need_reset) {
+          values[0] = pic->y[off_y];
+          values[1] = pic->u[off_uv];
+          values[2] = pic->v[off_uv];
+          need_reset = 0;
+        }
+        flatten(pic->y + off_y, values[0], pic->y_stride, SIZE);
+        flatten(pic->u + off_uv, values[1], pic->uv_stride, SIZE2);
+        flatten(pic->v + off_uv, values[2], pic->uv_stride, SIZE2);
+      } else {
+        need_reset = 1;
+      }
+    }
+    // ignore the left-overs on right/bottom
+  }
+}
+
+#undef SIZE
+#undef SIZE2
+
+
+//------------------------------------------------------------------------------
+// Distortion
+
+// Max value returned in case of exact similarity.
+static const double kMinDistortion_dB = 99.;
+
+int WebPPictureDistortion(const WebPPicture* pic1, const WebPPicture* pic2,
+                          int type, float result[5]) {
+  int c;
+  DistoStats stats[5];
+  int has_alpha;
+
+  if (pic1 == NULL || pic2 == NULL ||
+      pic1->width != pic2->width || pic1->height != pic2->height ||
+      pic1->y == NULL || pic2->y == NULL ||
+      pic1->u == NULL || pic2->u == NULL ||
+      pic1->v == NULL || pic2->v == NULL ||
+      result == NULL) {
+    return 0;
+  }
+  // TODO(skal): provide distortion for ARGB too.
+  if (pic1->use_argb == 1 || pic1->use_argb != pic2->use_argb) {
+    return 0;
+  }
+
+  has_alpha = !!(pic1->colorspace & WEBP_CSP_ALPHA_BIT);
+  if (has_alpha != !!(pic2->colorspace & WEBP_CSP_ALPHA_BIT) ||
+      (has_alpha && (pic1->a == NULL || pic2->a == NULL))) {
+    return 0;
+  }
+
+  memset(stats, 0, sizeof(stats));
+  VP8SSIMAccumulatePlane(pic1->y, pic1->y_stride,
+                         pic2->y, pic2->y_stride,
+                         pic1->width, pic1->height, &stats[0]);
+  VP8SSIMAccumulatePlane(pic1->u, pic1->uv_stride,
+                         pic2->u, pic2->uv_stride,
+                         (pic1->width + 1) >> 1, (pic1->height + 1) >> 1,
+                         &stats[1]);
+  VP8SSIMAccumulatePlane(pic1->v, pic1->uv_stride,
+                         pic2->v, pic2->uv_stride,
+                         (pic1->width + 1) >> 1, (pic1->height + 1) >> 1,
+                         &stats[2]);
+  if (has_alpha) {
+    VP8SSIMAccumulatePlane(pic1->a, pic1->a_stride,
+                           pic2->a, pic2->a_stride,
+                           pic1->width, pic1->height, &stats[3]);
+  }
+  for (c = 0; c <= 4; ++c) {
+    if (type == 1) {
+      const double v = VP8SSIMGet(&stats[c]);
+      result[c] = (float)((v < 1.) ? -10.0 * log10(1. - v)
+                                   : kMinDistortion_dB);
+    } else {
+      const double v = VP8SSIMGetSquaredError(&stats[c]);
+      result[c] = (float)((v > 0.) ? -4.3429448 * log(v / (255 * 255.))
+                                   : kMinDistortion_dB);
+    }
+    // Accumulate forward
+    if (c < 4) VP8SSIMAddStats(&stats[c], &stats[4]);
+  }
+  return 1;
+}
+
+//------------------------------------------------------------------------------
+// Simplest high-level calls:
+
+typedef int (*Importer)(WebPPicture* const, const uint8_t* const, int);
+
+static size_t Encode(const uint8_t* rgba, int width, int height, int stride,
+                     Importer import, float quality_factor, int lossless,
+                     uint8_t** output) {
+  WebPPicture pic;
+  WebPConfig config;
+  WebPMemoryWriter wrt;
+  int ok;
+
+  if (!WebPConfigPreset(&config, WEBP_PRESET_DEFAULT, quality_factor) ||
+      !WebPPictureInit(&pic)) {
+    return 0;  // shouldn't happen, except if system installation is broken
+  }
+
+  config.lossless = !!lossless;
+  pic.use_argb = !!lossless;
+  pic.width = width;
+  pic.height = height;
+  pic.writer = WebPMemoryWrite;
+  pic.custom_ptr = &wrt;
+  WebPMemoryWriterInit(&wrt);
+
+  ok = import(&pic, rgba, stride) && WebPEncode(&config, &pic);
+  WebPPictureFree(&pic);
+  if (!ok) {
+    free(wrt.mem);
+    *output = NULL;
+    return 0;
+  }
+  *output = wrt.mem;
+  return wrt.size;
+}
+
+#define ENCODE_FUNC(NAME, IMPORTER)                                     \
+size_t NAME(const uint8_t* in, int w, int h, int bps, float q,          \
+            uint8_t** out) {                                            \
+  return Encode(in, w, h, bps, IMPORTER, q, 0, out);                    \
+}
+
+ENCODE_FUNC(WebPEncodeRGB, WebPPictureImportRGB);
+ENCODE_FUNC(WebPEncodeBGR, WebPPictureImportBGR);
+ENCODE_FUNC(WebPEncodeRGBA, WebPPictureImportRGBA);
+ENCODE_FUNC(WebPEncodeBGRA, WebPPictureImportBGRA);
+
+#undef ENCODE_FUNC
+
+#define LOSSLESS_DEFAULT_QUALITY 70.
+#define LOSSLESS_ENCODE_FUNC(NAME, IMPORTER)                                 \
+size_t NAME(const uint8_t* in, int w, int h, int bps, uint8_t** out) {       \
+  return Encode(in, w, h, bps, IMPORTER, LOSSLESS_DEFAULT_QUALITY, 1, out);  \
+}
+
+LOSSLESS_ENCODE_FUNC(WebPEncodeLosslessRGB, WebPPictureImportRGB);
+LOSSLESS_ENCODE_FUNC(WebPEncodeLosslessBGR, WebPPictureImportBGR);
+LOSSLESS_ENCODE_FUNC(WebPEncodeLosslessRGBA, WebPPictureImportRGBA);
+LOSSLESS_ENCODE_FUNC(WebPEncodeLosslessBGRA, WebPPictureImportBGRA);
+
+#undef LOSSLESS_ENCODE_FUNC
+
+//------------------------------------------------------------------------------
+
+#if defined(__cplusplus) || defined(c_plusplus)
+}    // extern "C"
+#endif
diff --git a/drivers/webpold/enc/quant.c b/drivers/webpold/enc/quant.c
new file mode 100644
index 0000000000..ea153849c8
--- /dev/null
+++ b/drivers/webpold/enc/quant.c
@@ -0,0 +1,930 @@
+// Copyright 2011 Google Inc. All Rights Reserved.
+//
+// This code is licensed under the same terms as WebM:
+//  Software License Agreement:  http://www.webmproject.org/license/software/
+//  Additional IP Rights Grant:  http://www.webmproject.org/license/additional/
+// -----------------------------------------------------------------------------
+//
+//   Quantization
+//
+// Author: Skal (pascal.massimino@gmail.com)
+
+#include <assert.h>
+#include <math.h>
+
+#include "./vp8enci.h"
+#include "./cost.h"
+
+#define DO_TRELLIS_I4  1
+#define DO_TRELLIS_I16 1   // not a huge gain, but ok at low bitrate.
+#define DO_TRELLIS_UV  0   // disable trellis for UV. Risky. Not worth.
+#define USE_TDISTO 1
+
+#define MID_ALPHA 64      // neutral value for susceptibility
+#define MIN_ALPHA 30      // lowest usable value for susceptibility
+#define MAX_ALPHA 100     // higher meaninful value for susceptibility
+
+#define SNS_TO_DQ 0.9     // Scaling constant between the sns value and the QP
+                          // power-law modulation. Must be strictly less than 1.
+
+#define MULT_8B(a, b) (((a) * (b) + 128) >> 8)
+
+#if defined(__cplusplus) || defined(c_plusplus)
+extern "C" {
+#endif
+
+//------------------------------------------------------------------------------
+
+static WEBP_INLINE int clip(int v, int m, int M) {
+  return v < m ? m : v > M ? M : v;
+}
+
+static const uint8_t kZigzag[16] = {
+  0, 1, 4, 8, 5, 2, 3, 6, 9, 12, 13, 10, 7, 11, 14, 15
+};
+
+static const uint8_t kDcTable[128] = {
+  4,     5,   6,   7,   8,   9,  10,  10,
+  11,   12,  13,  14,  15,  16,  17,  17,
+  18,   19,  20,  20,  21,  21,  22,  22,
+  23,   23,  24,  25,  25,  26,  27,  28,
+  29,   30,  31,  32,  33,  34,  35,  36,
+  37,   37,  38,  39,  40,  41,  42,  43,
+  44,   45,  46,  46,  47,  48,  49,  50,
+  51,   52,  53,  54,  55,  56,  57,  58,
+  59,   60,  61,  62,  63,  64,  65,  66,
+  67,   68,  69,  70,  71,  72,  73,  74,
+  75,   76,  76,  77,  78,  79,  80,  81,
+  82,   83,  84,  85,  86,  87,  88,  89,
+  91,   93,  95,  96,  98, 100, 101, 102,
+  104, 106, 108, 110, 112, 114, 116, 118,
+  122, 124, 126, 128, 130, 132, 134, 136,
+  138, 140, 143, 145, 148, 151, 154, 157
+};
+
+static const uint16_t kAcTable[128] = {
+  4,     5,   6,   7,   8,   9,  10,  11,
+  12,   13,  14,  15,  16,  17,  18,  19,
+  20,   21,  22,  23,  24,  25,  26,  27,
+  28,   29,  30,  31,  32,  33,  34,  35,
+  36,   37,  38,  39,  40,  41,  42,  43,
+  44,   45,  46,  47,  48,  49,  50,  51,
+  52,   53,  54,  55,  56,  57,  58,  60,
+  62,   64,  66,  68,  70,  72,  74,  76,
+  78,   80,  82,  84,  86,  88,  90,  92,
+  94,   96,  98, 100, 102, 104, 106, 108,
+  110, 112, 114, 116, 119, 122, 125, 128,
+  131, 134, 137, 140, 143, 146, 149, 152,
+  155, 158, 161, 164, 167, 170, 173, 177,
+  181, 185, 189, 193, 197, 201, 205, 209,
+  213, 217, 221, 225, 229, 234, 239, 245,
+  249, 254, 259, 264, 269, 274, 279, 284
+};
+
+static const uint16_t kAcTable2[128] = {
+  8,     8,   9,  10,  12,  13,  15,  17,
+  18,   20,  21,  23,  24,  26,  27,  29,
+  31,   32,  34,  35,  37,  38,  40,  41,
+  43,   44,  46,  48,  49,  51,  52,  54,
+  55,   57,  58,  60,  62,  63,  65,  66,
+  68,   69,  71,  72,  74,  75,  77,  79,
+  80,   82,  83,  85,  86,  88,  89,  93,
+  96,   99, 102, 105, 108, 111, 114, 117,
+  120, 124, 127, 130, 133, 136, 139, 142,
+  145, 148, 151, 155, 158, 161, 164, 167,
+  170, 173, 176, 179, 184, 189, 193, 198,
+  203, 207, 212, 217, 221, 226, 230, 235,
+  240, 244, 249, 254, 258, 263, 268, 274,
+  280, 286, 292, 299, 305, 311, 317, 323,
+  330, 336, 342, 348, 354, 362, 370, 379,
+  385, 393, 401, 409, 416, 424, 432, 440
+};
+
+static const uint16_t kCoeffThresh[16] = {
+  0,  10, 20, 30,
+  10, 20, 30, 30,
+  20, 30, 30, 30,
+  30, 30, 30, 30
+};
+
+// TODO(skal): tune more. Coeff thresholding?
+static const uint8_t kBiasMatrices[3][16] = {  // [3] = [luma-ac,luma-dc,chroma]
+  { 96, 96, 96, 96,
+    96, 96, 96, 96,
+    96, 96, 96, 96,
+    96, 96, 96, 96 },
+  { 96, 96, 96, 96,
+    96, 96, 96, 96,
+    96, 96, 96, 96,
+    96, 96, 96, 96 },
+  { 96, 96, 96, 96,
+    96, 96, 96, 96,
+    96, 96, 96, 96,
+    96, 96, 96, 96 }
+};
+
+// Sharpening by (slightly) raising the hi-frequency coeffs (only for trellis).
+// Hack-ish but helpful for mid-bitrate range. Use with care.
+static const uint8_t kFreqSharpening[16] = {
+  0,  30, 60, 90,
+  30, 60, 90, 90,
+  60, 90, 90, 90,
+  90, 90, 90, 90
+};
+
+//------------------------------------------------------------------------------
+// Initialize quantization parameters in VP8Matrix
+
+// Returns the average quantizer
+static int ExpandMatrix(VP8Matrix* const m, int type) {
+  int i;
+  int sum = 0;
+  for (i = 2; i < 16; ++i) {
+    m->q_[i] = m->q_[1];
+  }
+  for (i = 0; i < 16; ++i) {
+    const int j = kZigzag[i];
+    const int bias = kBiasMatrices[type][j];
+    m->iq_[j] = (1 << QFIX) / m->q_[j];
+    m->bias_[j] = BIAS(bias);
+    // TODO(skal): tune kCoeffThresh[]
+    m->zthresh_[j] = ((256 /*+ kCoeffThresh[j]*/ - bias) * m->q_[j] + 127) >> 8;
+    m->sharpen_[j] = (kFreqSharpening[j] * m->q_[j]) >> 11;
+    sum += m->q_[j];
+  }
+  return (sum + 8) >> 4;
+}
+
+static void SetupMatrices(VP8Encoder* enc) {
+  int i;
+  const int tlambda_scale =
+    (enc->method_ >= 4) ? enc->config_->sns_strength
+                        : 0;
+  const int num_segments = enc->segment_hdr_.num_segments_;
+  for (i = 0; i < num_segments; ++i) {
+    VP8SegmentInfo* const m = &enc->dqm_[i];
+    const int q = m->quant_;
+    int q4, q16, quv;
+    m->y1_.q_[0] = kDcTable[clip(q + enc->dq_y1_dc_, 0, 127)];
+    m->y1_.q_[1] = kAcTable[clip(q,                  0, 127)];
+
+    m->y2_.q_[0] = kDcTable[ clip(q + enc->dq_y2_dc_, 0, 127)] * 2;
+    m->y2_.q_[1] = kAcTable2[clip(q + enc->dq_y2_ac_, 0, 127)];
+
+    m->uv_.q_[0] = kDcTable[clip(q + enc->dq_uv_dc_, 0, 117)];
+    m->uv_.q_[1] = kAcTable[clip(q + enc->dq_uv_ac_, 0, 127)];
+
+    q4  = ExpandMatrix(&m->y1_, 0);
+    q16 = ExpandMatrix(&m->y2_, 1);
+    quv = ExpandMatrix(&m->uv_, 2);
+
+    // TODO: Switch to kLambda*[] tables?
+    {
+      m->lambda_i4_  = (3 * q4 * q4) >> 7;
+      m->lambda_i16_ = (3 * q16 * q16);
+      m->lambda_uv_  = (3 * quv * quv) >> 6;
+      m->lambda_mode_    = (1 * q4 * q4) >> 7;
+      m->lambda_trellis_i4_  = (7 * q4 * q4) >> 3;
+      m->lambda_trellis_i16_ = (q16 * q16) >> 2;
+      m->lambda_trellis_uv_  = (quv *quv) << 1;
+      m->tlambda_            = (tlambda_scale * q4) >> 5;
+    }
+  }
+}
+
+//------------------------------------------------------------------------------
+// Initialize filtering parameters
+
+// Very small filter-strength values have close to no visual effect. So we can
+// save a little decoding-CPU by turning filtering off for these.
+#define FSTRENGTH_CUTOFF 3
+
+static void SetupFilterStrength(VP8Encoder* const enc) {
+  int i;
+  const int level0 = enc->config_->filter_strength;
+  for (i = 0; i < NUM_MB_SEGMENTS; ++i) {
+    // Segments with lower quantizer will be less filtered. TODO: tune (wrt SNS)
+    const int level = level0 * 256 * enc->dqm_[i].quant_ / 128;
+    const int f = level / (256 + enc->dqm_[i].beta_);
+    enc->dqm_[i].fstrength_ = (f < FSTRENGTH_CUTOFF) ? 0 : (f > 63) ? 63 : f;
+  }
+  // We record the initial strength (mainly for the case of 1-segment only).
+  enc->filter_hdr_.level_ = enc->dqm_[0].fstrength_;
+  enc->filter_hdr_.simple_ = (enc->config_->filter_type == 0);
+  enc->filter_hdr_.sharpness_ = enc->config_->filter_sharpness;
+}
+
+//------------------------------------------------------------------------------
+
+// Note: if you change the values below, remember that the max range
+// allowed by the syntax for DQ_UV is [-16,16].
+#define MAX_DQ_UV (6)
+#define MIN_DQ_UV (-4)
+
+// We want to emulate jpeg-like behaviour where the expected "good" quality
+// is around q=75. Internally, our "good" middle is around c=50. So we
+// map accordingly using linear piece-wise function
+static double QualityToCompression(double q) {
+  const double c = q / 100.;
+  return (c < 0.75) ? c * (2. / 3.) : 2. * c - 1.;
+}
+
+void VP8SetSegmentParams(VP8Encoder* const enc, float quality) {
+  int i;
+  int dq_uv_ac, dq_uv_dc;
+  const int num_segments = enc->config_->segments;
+  const double amp = SNS_TO_DQ * enc->config_->sns_strength / 100. / 128.;
+  const double c_base = QualityToCompression(quality);
+  for (i = 0; i < num_segments; ++i) {
+    // The file size roughly scales as pow(quantizer, 3.). Actually, the
+    // exponent is somewhere between 2.8 and 3.2, but we're mostly interested
+    // in the mid-quant range. So we scale the compressibility inversely to
+    // this power-law: quant ~= compression ^ 1/3. This law holds well for
+    // low quant. Finer modelling for high-quant would make use of kAcTable[]
+    // more explicitely.
+    // Additionally, we modulate the base exponent 1/3 to accommodate for the
+    // quantization susceptibility and allow denser segments to be quantized
+    // more.
+    const double expn = (1. - amp * enc->dqm_[i].alpha_) / 3.;
+    const double c = pow(c_base, expn);
+    const int q = (int)(127. * (1. - c));
+    assert(expn > 0.);
+    enc->dqm_[i].quant_ = clip(q, 0, 127);
+  }
+
+  // purely indicative in the bitstream (except for the 1-segment case)
+  enc->base_quant_ = enc->dqm_[0].quant_;
+
+  // fill-in values for the unused segments (required by the syntax)
+  for (i = num_segments; i < NUM_MB_SEGMENTS; ++i) {
+    enc->dqm_[i].quant_ = enc->base_quant_;
+  }
+
+  // uv_alpha_ is normally spread around ~60. The useful range is
+  // typically ~30 (quite bad) to ~100 (ok to decimate UV more).
+  // We map it to the safe maximal range of MAX/MIN_DQ_UV for dq_uv.
+  dq_uv_ac = (enc->uv_alpha_ - MID_ALPHA) * (MAX_DQ_UV - MIN_DQ_UV)
+                                          / (MAX_ALPHA - MIN_ALPHA);
+  // we rescale by the user-defined strength of adaptation
+  dq_uv_ac = dq_uv_ac * enc->config_->sns_strength / 100;
+  // and make it safe.
+  dq_uv_ac = clip(dq_uv_ac, MIN_DQ_UV, MAX_DQ_UV);
+  // We also boost the dc-uv-quant a little, based on sns-strength, since
+  // U/V channels are quite more reactive to high quants (flat DC-blocks
+  // tend to appear, and are displeasant).
+  dq_uv_dc = -4 * enc->config_->sns_strength / 100;
+  dq_uv_dc = clip(dq_uv_dc, -15, 15);   // 4bit-signed max allowed
+
+  enc->dq_y1_dc_ = 0;       // TODO(skal): dq-lum
+  enc->dq_y2_dc_ = 0;
+  enc->dq_y2_ac_ = 0;
+  enc->dq_uv_dc_ = dq_uv_dc;
+  enc->dq_uv_ac_ = dq_uv_ac;
+
+  SetupMatrices(enc);
+
+  SetupFilterStrength(enc);   // initialize segments' filtering, eventually
+}
+
+//------------------------------------------------------------------------------
+// Form the predictions in cache
+
+// Must be ordered using {DC_PRED, TM_PRED, V_PRED, H_PRED} as index
+const int VP8I16ModeOffsets[4] = { I16DC16, I16TM16, I16VE16, I16HE16 };
+const int VP8UVModeOffsets[4] = { C8DC8, C8TM8, C8VE8, C8HE8 };
+
+// Must be indexed using {B_DC_PRED -> B_HU_PRED} as index
+const int VP8I4ModeOffsets[NUM_BMODES] = {
+  I4DC4, I4TM4, I4VE4, I4HE4, I4RD4, I4VR4, I4LD4, I4VL4, I4HD4, I4HU4
+};
+
+void VP8MakeLuma16Preds(const VP8EncIterator* const it) {
+  const VP8Encoder* const enc = it->enc_;
+  const uint8_t* const left = it->x_ ? enc->y_left_ : NULL;
+  const uint8_t* const top = it->y_ ? enc->y_top_ + it->x_ * 16 : NULL;
+  VP8EncPredLuma16(it->yuv_p_, left, top);
+}
+
+void VP8MakeChroma8Preds(const VP8EncIterator* const it) {
+  const VP8Encoder* const enc = it->enc_;
+  const uint8_t* const left = it->x_ ? enc->u_left_ : NULL;
+  const uint8_t* const top = it->y_ ? enc->uv_top_ + it->x_ * 16 : NULL;
+  VP8EncPredChroma8(it->yuv_p_, left, top);
+}
+
+void VP8MakeIntra4Preds(const VP8EncIterator* const it) {
+  VP8EncPredLuma4(it->yuv_p_, it->i4_top_);
+}
+
+//------------------------------------------------------------------------------
+// Quantize
+
+// Layout:
+// +----+
+// |YYYY| 0
+// |YYYY| 4
+// |YYYY| 8
+// |YYYY| 12
+// +----+
+// |UUVV| 16
+// |UUVV| 20
+// +----+
+
+const int VP8Scan[16 + 4 + 4] = {
+  // Luma
+  0 +  0 * BPS,  4 +  0 * BPS, 8 +  0 * BPS, 12 +  0 * BPS,
+  0 +  4 * BPS,  4 +  4 * BPS, 8 +  4 * BPS, 12 +  4 * BPS,
+  0 +  8 * BPS,  4 +  8 * BPS, 8 +  8 * BPS, 12 +  8 * BPS,
+  0 + 12 * BPS,  4 + 12 * BPS, 8 + 12 * BPS, 12 + 12 * BPS,
+
+  0 + 0 * BPS,   4 + 0 * BPS, 0 + 4 * BPS,  4 + 4 * BPS,    // U
+  8 + 0 * BPS,  12 + 0 * BPS, 8 + 4 * BPS, 12 + 4 * BPS     // V
+};
+
+//------------------------------------------------------------------------------
+// Distortion measurement
+
+static const uint16_t kWeightY[16] = {
+  38, 32, 20, 9, 32, 28, 17, 7, 20, 17, 10, 4, 9, 7, 4, 2
+};
+
+static const uint16_t kWeightTrellis[16] = {
+#if USE_TDISTO == 0
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16
+#else
+  30, 27, 19, 11,
+  27, 24, 17, 10,
+  19, 17, 12,  8,
+  11, 10,  8,  6
+#endif
+};
+
+// Init/Copy the common fields in score.
+static void InitScore(VP8ModeScore* const rd) {
+  rd->D  = 0;
+  rd->SD = 0;
+  rd->R  = 0;
+  rd->nz = 0;
+  rd->score = MAX_COST;
+}
+
+static void CopyScore(VP8ModeScore* const dst, const VP8ModeScore* const src) {
+  dst->D  = src->D;
+  dst->SD = src->SD;
+  dst->R  = src->R;
+  dst->nz = src->nz;      // note that nz is not accumulated, but just copied.
+  dst->score = src->score;
+}
+
+static void AddScore(VP8ModeScore* const dst, const VP8ModeScore* const src) {
+  dst->D  += src->D;
+  dst->SD += src->SD;
+  dst->R  += src->R;
+  dst->nz |= src->nz;     // here, new nz bits are accumulated.
+  dst->score += src->score;
+}
+
+//------------------------------------------------------------------------------
+// Performs trellis-optimized quantization.
+
+// Trellis
+
+typedef struct {
+  int prev;        // best previous
+  int level;       // level
+  int sign;        // sign of coeff_i
+  score_t cost;    // bit cost
+  score_t error;   // distortion = sum of (|coeff_i| - level_i * Q_i)^2
+  int ctx;         // context (only depends on 'level'. Could be spared.)
+} Node;
+
+// If a coefficient was quantized to a value Q (using a neutral bias),
+// we test all alternate possibilities between [Q-MIN_DELTA, Q+MAX_DELTA]
+// We don't test negative values though.
+#define MIN_DELTA 0   // how much lower level to try
+#define MAX_DELTA 1   // how much higher
+#define NUM_NODES (MIN_DELTA + 1 + MAX_DELTA)
+#define NODE(n, l) (nodes[(n) + 1][(l) + MIN_DELTA])
+
+static WEBP_INLINE void SetRDScore(int lambda, VP8ModeScore* const rd) {
+  // TODO: incorporate the "* 256" in the tables?
+  rd->score = rd->R * lambda + 256 * (rd->D + rd->SD);
+}
+
+static WEBP_INLINE score_t RDScoreTrellis(int lambda, score_t rate,
+                                          score_t distortion) {
+  return rate * lambda + 256 * distortion;
+}
+
+static int TrellisQuantizeBlock(const VP8EncIterator* const it,
+                                int16_t in[16], int16_t out[16],
+                                int ctx0, int coeff_type,
+                                const VP8Matrix* const mtx,
+                                int lambda) {
+  ProbaArray* const last_costs = it->enc_->proba_.coeffs_[coeff_type];
+  CostArray* const costs = it->enc_->proba_.level_cost_[coeff_type];
+  const int first = (coeff_type == 0) ? 1 : 0;
+  Node nodes[17][NUM_NODES];
+  int best_path[3] = {-1, -1, -1};   // store best-last/best-level/best-previous
+  score_t best_score;
+  int best_node;
+  int last = first - 1;
+  int n, m, p, nz;
+
+  {
+    score_t cost;
+    score_t max_error;
+    const int thresh = mtx->q_[1] * mtx->q_[1] / 4;
+    const int last_proba = last_costs[VP8EncBands[first]][ctx0][0];
+
+    // compute maximal distortion.
+    max_error = 0;
+    for (n = first; n < 16; ++n) {
+      const int j  = kZigzag[n];
+      const int err = in[j] * in[j];
+      max_error += kWeightTrellis[j] * err;
+      if (err > thresh) last = n;
+    }
+    // we don't need to go inspect up to n = 16 coeffs. We can just go up
+    // to last + 1 (inclusive) without losing much.
+    if (last < 15) ++last;
+
+    // compute 'skip' score. This is the max score one can do.
+    cost = VP8BitCost(0, last_proba);
+    best_score = RDScoreTrellis(lambda, cost, max_error);
+
+    // initialize source node.
+    n = first - 1;
+    for (m = -MIN_DELTA; m <= MAX_DELTA; ++m) {
+      NODE(n, m).cost = 0;
+      NODE(n, m).error = max_error;
+      NODE(n, m).ctx = ctx0;
+    }
+  }
+
+  // traverse trellis.
+  for (n = first; n <= last; ++n) {
+    const int j  = kZigzag[n];
+    const int Q  = mtx->q_[j];
+    const int iQ = mtx->iq_[j];
+    const int B = BIAS(0x00);     // neutral bias
+    // note: it's important to take sign of the _original_ coeff,
+    // so we don't have to consider level < 0 afterward.
+    const int sign = (in[j] < 0);
+    int coeff0 = (sign ? -in[j] : in[j]) + mtx->sharpen_[j];
+    int level0;
+    if (coeff0 > 2047) coeff0 = 2047;
+
+    level0 = QUANTDIV(coeff0, iQ, B);
+    // test all alternate level values around level0.
+    for (m = -MIN_DELTA; m <= MAX_DELTA; ++m) {
+      Node* const cur = &NODE(n, m);
+      int delta_error, new_error;
+      score_t cur_score = MAX_COST;
+      int level = level0 + m;
+      int last_proba;
+
+      cur->sign = sign;
+      cur->level = level;
+      cur->ctx = (level == 0) ? 0 : (level == 1) ? 1 : 2;
+      if (level >= 2048 || level < 0) {   // node is dead?
+        cur->cost = MAX_COST;
+        continue;
+      }
+      last_proba = last_costs[VP8EncBands[n + 1]][cur->ctx][0];
+
+      // Compute delta_error = how much coding this level will
+      // subtract as distortion to max_error
+      new_error = coeff0 - level * Q;
+      delta_error =
+        kWeightTrellis[j] * (coeff0 * coeff0 - new_error * new_error);
+
+      // Inspect all possible non-dead predecessors. Retain only the best one.
+      for (p = -MIN_DELTA; p <= MAX_DELTA; ++p) {
+        const Node* const prev = &NODE(n - 1, p);
+        const int prev_ctx = prev->ctx;
+        const uint16_t* const tcost = costs[VP8EncBands[n]][prev_ctx];
+        const score_t total_error = prev->error - delta_error;
+        score_t cost, base_cost, score;
+
+        if (prev->cost >= MAX_COST) {   // dead node?
+          continue;
+        }
+
+        // Base cost of both terminal/non-terminal
+        base_cost = prev->cost + VP8LevelCost(tcost, level);
+
+        // Examine node assuming it's a non-terminal one.
+        cost = base_cost;
+        if (level && n < 15) {
+          cost += VP8BitCost(1, last_proba);
+        }
+        score = RDScoreTrellis(lambda, cost, total_error);
+        if (score < cur_score) {
+          cur_score = score;
+          cur->cost  = cost;
+          cur->error = total_error;
+          cur->prev  = p;
+        }
+
+        // Now, record best terminal node (and thus best entry in the graph).
+        if (level) {
+          cost = base_cost;
+          if (n < 15) cost += VP8BitCost(0, last_proba);
+          score = RDScoreTrellis(lambda, cost, total_error);
+          if (score < best_score) {
+            best_score = score;
+            best_path[0] = n;   // best eob position
+            best_path[1] = m;   // best level
+            best_path[2] = p;   // best predecessor
+          }
+        }
+      }
+    }
+  }
+
+  // Fresh start
+  memset(in + first, 0, (16 - first) * sizeof(*in));
+  memset(out + first, 0, (16 - first) * sizeof(*out));
+  if (best_path[0] == -1) {
+    return 0;   // skip!
+  }
+
+  // Unwind the best path.
+  // Note: best-prev on terminal node is not necessarily equal to the
+  // best_prev for non-terminal. So we patch best_path[2] in.
+  n = best_path[0];
+  best_node = best_path[1];
+  NODE(n, best_node).prev = best_path[2];   // force best-prev for terminal
+  nz = 0;
+
+  for (; n >= first; --n) {
+    const Node* const node = &NODE(n, best_node);
+    const int j = kZigzag[n];
+    out[n] = node->sign ? -node->level : node->level;
+    nz |= (node->level != 0);
+    in[j] = out[n] * mtx->q_[j];
+    best_node = node->prev;
+  }
+  return nz;
+}
+
+#undef NODE
+
+//------------------------------------------------------------------------------
+// Performs: difference, transform, quantize, back-transform, add
+// all at once. Output is the reconstructed block in *yuv_out, and the
+// quantized levels in *levels.
+
+static int ReconstructIntra16(VP8EncIterator* const it,
+                              VP8ModeScore* const rd,
+                              uint8_t* const yuv_out,
+                              int mode) {
+  const VP8Encoder* const enc = it->enc_;
+  const uint8_t* const ref = it->yuv_p_ + VP8I16ModeOffsets[mode];
+  const uint8_t* const src = it->yuv_in_ + Y_OFF;
+  const VP8SegmentInfo* const dqm = &enc->dqm_[it->mb_->segment_];
+  int nz = 0;
+  int n;
+  int16_t tmp[16][16], dc_tmp[16];
+
+  for (n = 0; n < 16; ++n) {
+    VP8FTransform(src + VP8Scan[n], ref + VP8Scan[n], tmp[n]);
+  }
+  VP8FTransformWHT(tmp[0], dc_tmp);
+  nz |= VP8EncQuantizeBlock(dc_tmp, rd->y_dc_levels, 0, &dqm->y2_) << 24;
+
+  if (DO_TRELLIS_I16 && it->do_trellis_) {
+    int x, y;
+    VP8IteratorNzToBytes(it);
+    for (y = 0, n = 0; y < 4; ++y) {
+      for (x = 0; x < 4; ++x, ++n) {
+        const int ctx = it->top_nz_[x] + it->left_nz_[y];
+        const int non_zero =
+           TrellisQuantizeBlock(it, tmp[n], rd->y_ac_levels[n], ctx, 0,
+                                &dqm->y1_, dqm->lambda_trellis_i16_);
+        it->top_nz_[x] = it->left_nz_[y] = non_zero;
+        nz |= non_zero << n;
+      }
+    }
+  } else {
+    for (n = 0; n < 16; ++n) {
+      nz |= VP8EncQuantizeBlock(tmp[n], rd->y_ac_levels[n], 1, &dqm->y1_) << n;
+    }
+  }
+
+  // Transform back
+  VP8ITransformWHT(dc_tmp, tmp[0]);
+  for (n = 0; n < 16; n += 2) {
+    VP8ITransform(ref + VP8Scan[n], tmp[n], yuv_out + VP8Scan[n], 1);
+  }
+
+  return nz;
+}
+
+static int ReconstructIntra4(VP8EncIterator* const it,
+                             int16_t levels[16],
+                             const uint8_t* const src,
+                             uint8_t* const yuv_out,
+                             int mode) {
+  const VP8Encoder* const enc = it->enc_;
+  const uint8_t* const ref = it->yuv_p_ + VP8I4ModeOffsets[mode];
+  const VP8SegmentInfo* const dqm = &enc->dqm_[it->mb_->segment_];
+  int nz = 0;
+  int16_t tmp[16];
+
+  VP8FTransform(src, ref, tmp);
+  if (DO_TRELLIS_I4 && it->do_trellis_) {
+    const int x = it->i4_ & 3, y = it->i4_ >> 2;
+    const int ctx = it->top_nz_[x] + it->left_nz_[y];
+    nz = TrellisQuantizeBlock(it, tmp, levels, ctx, 3, &dqm->y1_,
+                              dqm->lambda_trellis_i4_);
+  } else {
+    nz = VP8EncQuantizeBlock(tmp, levels, 0, &dqm->y1_);
+  }
+  VP8ITransform(ref, tmp, yuv_out, 0);
+  return nz;
+}
+
+static int ReconstructUV(VP8EncIterator* const it, VP8ModeScore* const rd,
+                         uint8_t* const yuv_out, int mode) {
+  const VP8Encoder* const enc = it->enc_;
+  const uint8_t* const ref = it->yuv_p_ + VP8UVModeOffsets[mode];
+  const uint8_t* const src = it->yuv_in_ + U_OFF;
+  const VP8SegmentInfo* const dqm = &enc->dqm_[it->mb_->segment_];
+  int nz = 0;
+  int n;
+  int16_t tmp[8][16];
+
+  for (n = 0; n < 8; ++n) {
+    VP8FTransform(src + VP8Scan[16 + n], ref + VP8Scan[16 + n], tmp[n]);
+  }
+  if (DO_TRELLIS_UV && it->do_trellis_) {
+    int ch, x, y;
+    for (ch = 0, n = 0; ch <= 2; ch += 2) {
+      for (y = 0; y < 2; ++y) {
+        for (x = 0; x < 2; ++x, ++n) {
+          const int ctx = it->top_nz_[4 + ch + x] + it->left_nz_[4 + ch + y];
+          const int non_zero =
+            TrellisQuantizeBlock(it, tmp[n], rd->uv_levels[n], ctx, 2,
+                                 &dqm->uv_, dqm->lambda_trellis_uv_);
+          it->top_nz_[4 + ch + x] = it->left_nz_[4 + ch + y] = non_zero;
+          nz |= non_zero << n;
+        }
+      }
+    }
+  } else {
+    for (n = 0; n < 8; ++n) {
+      nz |= VP8EncQuantizeBlock(tmp[n], rd->uv_levels[n], 0, &dqm->uv_) << n;
+    }
+  }
+
+  for (n = 0; n < 8; n += 2) {
+    VP8ITransform(ref + VP8Scan[16 + n], tmp[n], yuv_out + VP8Scan[16 + n], 1);
+  }
+  return (nz << 16);
+}
+
+//------------------------------------------------------------------------------
+// RD-opt decision. Reconstruct each modes, evalue distortion and bit-cost.
+// Pick the mode is lower RD-cost = Rate + lamba * Distortion.
+
+static void SwapPtr(uint8_t** a, uint8_t** b) {
+  uint8_t* const tmp = *a;
+  *a = *b;
+  *b = tmp;
+}
+
+static void SwapOut(VP8EncIterator* const it) {
+  SwapPtr(&it->yuv_out_, &it->yuv_out2_);
+}
+
+static void PickBestIntra16(VP8EncIterator* const it, VP8ModeScore* const rd) {
+  const VP8Encoder* const enc = it->enc_;
+  const VP8SegmentInfo* const dqm = &enc->dqm_[it->mb_->segment_];
+  const int lambda = dqm->lambda_i16_;
+  const int tlambda = dqm->tlambda_;
+  const uint8_t* const src = it->yuv_in_ + Y_OFF;
+  VP8ModeScore rd16;
+  int mode;
+
+  rd->mode_i16 = -1;
+  for (mode = 0; mode < 4; ++mode) {
+    uint8_t* const tmp_dst = it->yuv_out2_ + Y_OFF;  // scratch buffer
+    int nz;
+
+    // Reconstruct
+    nz = ReconstructIntra16(it, &rd16, tmp_dst, mode);
+
+    // Measure RD-score
+    rd16.D = VP8SSE16x16(src, tmp_dst);
+    rd16.SD = tlambda ? MULT_8B(tlambda, VP8TDisto16x16(src, tmp_dst, kWeightY))
+            : 0;
+    rd16.R = VP8GetCostLuma16(it, &rd16);
+    rd16.R += VP8FixedCostsI16[mode];
+
+    // Since we always examine Intra16 first, we can overwrite *rd directly.
+    SetRDScore(lambda, &rd16);
+    if (mode == 0 || rd16.score < rd->score) {
+      CopyScore(rd, &rd16);
+      rd->mode_i16 = mode;
+      rd->nz = nz;
+      memcpy(rd->y_ac_levels, rd16.y_ac_levels, sizeof(rd16.y_ac_levels));
+      memcpy(rd->y_dc_levels, rd16.y_dc_levels, sizeof(rd16.y_dc_levels));
+      SwapOut(it);
+    }
+  }
+  SetRDScore(dqm->lambda_mode_, rd);   // finalize score for mode decision.
+  VP8SetIntra16Mode(it, rd->mode_i16);
+}
+
+//------------------------------------------------------------------------------
+
+// return the cost array corresponding to the surrounding prediction modes.
+static const uint16_t* GetCostModeI4(VP8EncIterator* const it,
+                                     const uint8_t modes[16]) {
+  const int preds_w = it->enc_->preds_w_;
+  const int x = (it->i4_ & 3), y = it->i4_ >> 2;
+  const int left = (x == 0) ? it->preds_[y * preds_w - 1] : modes[it->i4_ - 1];
+  const int top = (y == 0) ? it->preds_[-preds_w + x] : modes[it->i4_ - 4];
+  return VP8FixedCostsI4[top][left];
+}
+
+static int PickBestIntra4(VP8EncIterator* const it, VP8ModeScore* const rd) {
+  const VP8Encoder* const enc = it->enc_;
+  const VP8SegmentInfo* const dqm = &enc->dqm_[it->mb_->segment_];
+  const int lambda = dqm->lambda_i4_;
+  const int tlambda = dqm->tlambda_;
+  const uint8_t* const src0 = it->yuv_in_ + Y_OFF;
+  uint8_t* const best_blocks = it->yuv_out2_ + Y_OFF;
+  int total_header_bits = 0;
+  VP8ModeScore rd_best;
+
+  if (enc->max_i4_header_bits_ == 0) {
+    return 0;
+  }
+
+  InitScore(&rd_best);
+  rd_best.score = 211;  // '211' is the value of VP8BitCost(0, 145)
+  VP8IteratorStartI4(it);
+  do {
+    VP8ModeScore rd_i4;
+    int mode;
+    int best_mode = -1;
+    const uint8_t* const src = src0 + VP8Scan[it->i4_];
+    const uint16_t* const mode_costs = GetCostModeI4(it, rd->modes_i4);
+    uint8_t* best_block = best_blocks + VP8Scan[it->i4_];
+    uint8_t* tmp_dst = it->yuv_p_ + I4TMP;    // scratch buffer.
+
+    InitScore(&rd_i4);
+    VP8MakeIntra4Preds(it);
+    for (mode = 0; mode < NUM_BMODES; ++mode) {
+      VP8ModeScore rd_tmp;
+      int16_t tmp_levels[16];
+
+      // Reconstruct
+      rd_tmp.nz =
+          ReconstructIntra4(it, tmp_levels, src, tmp_dst, mode) << it->i4_;
+
+      // Compute RD-score
+      rd_tmp.D = VP8SSE4x4(src, tmp_dst);
+      rd_tmp.SD =
+          tlambda ? MULT_8B(tlambda, VP8TDisto4x4(src, tmp_dst, kWeightY))
+                  : 0;
+      rd_tmp.R = VP8GetCostLuma4(it, tmp_levels);
+      rd_tmp.R += mode_costs[mode];
+
+      SetRDScore(lambda, &rd_tmp);
+      if (best_mode < 0 || rd_tmp.score < rd_i4.score) {
+        CopyScore(&rd_i4, &rd_tmp);
+        best_mode = mode;
+        SwapPtr(&tmp_dst, &best_block);
+        memcpy(rd_best.y_ac_levels[it->i4_], tmp_levels, sizeof(tmp_levels));
+      }
+    }
+    SetRDScore(dqm->lambda_mode_, &rd_i4);
+    AddScore(&rd_best, &rd_i4);
+    total_header_bits += mode_costs[best_mode];
+    if (rd_best.score >= rd->score ||
+        total_header_bits > enc->max_i4_header_bits_) {
+      return 0;
+    }
+    // Copy selected samples if not in the right place already.
+    if (best_block != best_blocks + VP8Scan[it->i4_])
+      VP8Copy4x4(best_block, best_blocks + VP8Scan[it->i4_]);
+    rd->modes_i4[it->i4_] = best_mode;
+    it->top_nz_[it->i4_ & 3] = it->left_nz_[it->i4_ >> 2] = (rd_i4.nz ? 1 : 0);
+  } while (VP8IteratorRotateI4(it, best_blocks));
+
+  // finalize state
+  CopyScore(rd, &rd_best);
+  VP8SetIntra4Mode(it, rd->modes_i4);
+  SwapOut(it);
+  memcpy(rd->y_ac_levels, rd_best.y_ac_levels, sizeof(rd->y_ac_levels));
+  return 1;   // select intra4x4 over intra16x16
+}
+
+//------------------------------------------------------------------------------
+
+static void PickBestUV(VP8EncIterator* const it, VP8ModeScore* const rd) {
+  const VP8Encoder* const enc = it->enc_;
+  const VP8SegmentInfo* const dqm = &enc->dqm_[it->mb_->segment_];
+  const int lambda = dqm->lambda_uv_;
+  const uint8_t* const src = it->yuv_in_ + U_OFF;
+  uint8_t* const tmp_dst = it->yuv_out2_ + U_OFF;  // scratch buffer
+  uint8_t* const dst0 = it->yuv_out_ + U_OFF;
+  VP8ModeScore rd_best;
+  int mode;
+
+  rd->mode_uv = -1;
+  InitScore(&rd_best);
+  for (mode = 0; mode < 4; ++mode) {
+    VP8ModeScore rd_uv;
+
+    // Reconstruct
+    rd_uv.nz = ReconstructUV(it, &rd_uv, tmp_dst, mode);
+
+    // Compute RD-score
+    rd_uv.D  = VP8SSE16x8(src, tmp_dst);
+    rd_uv.SD = 0;    // TODO: should we call TDisto? it tends to flatten areas.
+    rd_uv.R  = VP8GetCostUV(it, &rd_uv);
+    rd_uv.R += VP8FixedCostsUV[mode];
+
+    SetRDScore(lambda, &rd_uv);
+    if (mode == 0 || rd_uv.score < rd_best.score) {
+      CopyScore(&rd_best, &rd_uv);
+      rd->mode_uv = mode;
+      memcpy(rd->uv_levels, rd_uv.uv_levels, sizeof(rd->uv_levels));
+      memcpy(dst0, tmp_dst, UV_SIZE);   //  TODO: SwapUVOut() ?
+    }
+  }
+  VP8SetIntraUVMode(it, rd->mode_uv);
+  AddScore(rd, &rd_best);
+}
+
+//------------------------------------------------------------------------------
+// Final reconstruction and quantization.
+
+static void SimpleQuantize(VP8EncIterator* const it, VP8ModeScore* const rd) {
+  const VP8Encoder* const enc = it->enc_;
+  const int i16 = (it->mb_->type_ == 1);
+  int nz = 0;
+
+  if (i16) {
+    nz = ReconstructIntra16(it, rd, it->yuv_out_ + Y_OFF, it->preds_[0]);
+  } else {
+    VP8IteratorStartI4(it);
+    do {
+      const int mode =
+          it->preds_[(it->i4_ & 3) + (it->i4_ >> 2) * enc->preds_w_];
+      const uint8_t* const src = it->yuv_in_ + Y_OFF + VP8Scan[it->i4_];
+      uint8_t* const dst = it->yuv_out_ + Y_OFF + VP8Scan[it->i4_];
+      VP8MakeIntra4Preds(it);
+      nz |= ReconstructIntra4(it, rd->y_ac_levels[it->i4_],
+                              src, dst, mode) << it->i4_;
+    } while (VP8IteratorRotateI4(it, it->yuv_out_ + Y_OFF));
+  }
+
+  nz |= ReconstructUV(it, rd, it->yuv_out_ + U_OFF, it->mb_->uv_mode_);
+  rd->nz = nz;
+}
+
+//------------------------------------------------------------------------------
+// Entry point
+
+int VP8Decimate(VP8EncIterator* const it, VP8ModeScore* const rd, int rd_opt) {
+  int is_skipped;
+
+  InitScore(rd);
+
+  // We can perform predictions for Luma16x16 and Chroma8x8 already.
+  // Luma4x4 predictions needs to be done as-we-go.
+  VP8MakeLuma16Preds(it);
+  VP8MakeChroma8Preds(it);
+
+  // for rd_opt = 2, we perform trellis-quant on the final decision only.
+  // for rd_opt > 2, we use it for every scoring (=much slower).
+  if (rd_opt > 0) {
+    it->do_trellis_ = (rd_opt > 2);
+    PickBestIntra16(it, rd);
+    if (it->enc_->method_ >= 2) {
+      PickBestIntra4(it, rd);
+    }
+    PickBestUV(it, rd);
+    if (rd_opt == 2) {
+      it->do_trellis_ = 1;
+      SimpleQuantize(it, rd);
+    }
+  } else {
+    // TODO: for method_ == 2, pick the best intra4/intra16 based on SSE
+    it->do_trellis_ = (it->enc_->method_ == 2);
+    SimpleQuantize(it, rd);
+  }
+  is_skipped = (rd->nz == 0);
+  VP8SetSkip(it, is_skipped);
+  return is_skipped;
+}
+
+#if defined(__cplusplus) || defined(c_plusplus)
+}    // extern "C"
+#endif
diff --git a/drivers/webpold/enc/syntax.c b/drivers/webpold/enc/syntax.c
new file mode 100644
index 0000000000..4221436ff9
--- /dev/null
+++ b/drivers/webpold/enc/syntax.c
@@ -0,0 +1,437 @@
+// Copyright 2011 Google Inc. All Rights Reserved.
+//
+// This code is licensed under the same terms as WebM:
+//  Software License Agreement:  http://www.webmproject.org/license/software/
+//  Additional IP Rights Grant:  http://www.webmproject.org/license/additional/
+// -----------------------------------------------------------------------------
+//
+// Header syntax writing
+//
+// Author: Skal (pascal.massimino@gmail.com)
+
+#include <assert.h>
+
+#include "../format_constants.h"
+#include "./vp8enci.h"
+
+#if defined(__cplusplus) || defined(c_plusplus)
+extern "C" {
+#endif
+
+//------------------------------------------------------------------------------
+// Helper functions
+
+// TODO(later): Move to webp/format_constants.h?
+static void PutLE24(uint8_t* const data, uint32_t val) {
+  data[0] = (val >>  0) & 0xff;
+  data[1] = (val >>  8) & 0xff;
+  data[2] = (val >> 16) & 0xff;
+}
+
+static void PutLE32(uint8_t* const data, uint32_t val) {
+  PutLE24(data, val);
+  data[3] = (val >> 24) & 0xff;
+}
+
+static int IsVP8XNeeded(const VP8Encoder* const enc) {
+  return !!enc->has_alpha_;  // Currently the only case when VP8X is needed.
+                             // This could change in the future.
+}
+
+static int PutPaddingByte(const WebPPicture* const pic) {
+
+  const uint8_t pad_byte[1] = { 0 };
+  return !!pic->writer(pad_byte, 1, pic);
+}
+
+//------------------------------------------------------------------------------
+// Writers for header's various pieces (in order of appearance)
+
+static WebPEncodingError PutRIFFHeader(const VP8Encoder* const enc,
+                                       size_t riff_size) {
+  const WebPPicture* const pic = enc->pic_;
+  uint8_t riff[RIFF_HEADER_SIZE] = {
+    'R', 'I', 'F', 'F', 0, 0, 0, 0, 'W', 'E', 'B', 'P'
+  };
+  assert(riff_size == (uint32_t)riff_size);
+  PutLE32(riff + TAG_SIZE, (uint32_t)riff_size);
+  if (!pic->writer(riff, sizeof(riff), pic)) {
+    return VP8_ENC_ERROR_BAD_WRITE;
+  }
+  return VP8_ENC_OK;
+}
+
+static WebPEncodingError PutVP8XHeader(const VP8Encoder* const enc) {
+  const WebPPicture* const pic = enc->pic_;
+  uint8_t vp8x[CHUNK_HEADER_SIZE + VP8X_CHUNK_SIZE] = {
+    'V', 'P', '8', 'X'
+  };
+  uint32_t flags = 0;
+
+  assert(IsVP8XNeeded(enc));
+  assert(pic->width >= 1 && pic->height >= 1);
+  assert(pic->width <= MAX_CANVAS_SIZE && pic->height <= MAX_CANVAS_SIZE);
+
+  if (enc->has_alpha_) {
+    flags |= ALPHA_FLAG_BIT;
+  }
+
+  PutLE32(vp8x + TAG_SIZE,              VP8X_CHUNK_SIZE);
+  PutLE32(vp8x + CHUNK_HEADER_SIZE,     flags);
+  PutLE24(vp8x + CHUNK_HEADER_SIZE + 4, pic->width - 1);
+  PutLE24(vp8x + CHUNK_HEADER_SIZE + 7, pic->height - 1);
+  if(!pic->writer(vp8x, sizeof(vp8x), pic)) {
+    return VP8_ENC_ERROR_BAD_WRITE;
+  }
+  return VP8_ENC_OK;
+}
+
+static WebPEncodingError PutAlphaChunk(const VP8Encoder* const enc) {
+  const WebPPicture* const pic = enc->pic_;
+  uint8_t alpha_chunk_hdr[CHUNK_HEADER_SIZE] = {
+    'A', 'L', 'P', 'H'
+  };
+
+  assert(enc->has_alpha_);
+
+  // Alpha chunk header.
+  PutLE32(alpha_chunk_hdr + TAG_SIZE, enc->alpha_data_size_);
+  if (!pic->writer(alpha_chunk_hdr, sizeof(alpha_chunk_hdr), pic)) {
+    return VP8_ENC_ERROR_BAD_WRITE;
+  }
+
+  // Alpha chunk data.
+  if (!pic->writer(enc->alpha_data_, enc->alpha_data_size_, pic)) {
+    return VP8_ENC_ERROR_BAD_WRITE;
+  }
+
+  // Padding.
+  if ((enc->alpha_data_size_ & 1) && !PutPaddingByte(pic)) {
+    return VP8_ENC_ERROR_BAD_WRITE;
+  }
+  return VP8_ENC_OK;
+}
+
+static WebPEncodingError PutVP8Header(const WebPPicture* const pic,
+                                      size_t vp8_size) {
+  uint8_t vp8_chunk_hdr[CHUNK_HEADER_SIZE] = {
+    'V', 'P', '8', ' '
+  };
+  assert(vp8_size == (uint32_t)vp8_size);
+  PutLE32(vp8_chunk_hdr + TAG_SIZE, (uint32_t)vp8_size);
+  if (!pic->writer(vp8_chunk_hdr, sizeof(vp8_chunk_hdr), pic)) {
+    return VP8_ENC_ERROR_BAD_WRITE;
+  }
+  return VP8_ENC_OK;
+}
+
+static WebPEncodingError PutVP8FrameHeader(const WebPPicture* const pic,
+                                           int profile, size_t size0) {
+  uint8_t vp8_frm_hdr[VP8_FRAME_HEADER_SIZE];
+  uint32_t bits;
+
+  if (size0 >= VP8_MAX_PARTITION0_SIZE) {  // partition #0 is too big to fit
+    return VP8_ENC_ERROR_PARTITION0_OVERFLOW;
+  }
+
+  // Paragraph 9.1.
+  bits = 0                         // keyframe (1b)
+       | (profile << 1)            // profile (3b)
+       | (1 << 4)                  // visible (1b)
+       | ((uint32_t)size0 << 5);   // partition length (19b)
+  vp8_frm_hdr[0] = (bits >>  0) & 0xff;
+  vp8_frm_hdr[1] = (bits >>  8) & 0xff;
+  vp8_frm_hdr[2] = (bits >> 16) & 0xff;
+  // signature
+  vp8_frm_hdr[3] = (VP8_SIGNATURE >> 16) & 0xff;
+  vp8_frm_hdr[4] = (VP8_SIGNATURE >>  8) & 0xff;
+  vp8_frm_hdr[5] = (VP8_SIGNATURE >>  0) & 0xff;
+  // dimensions
+  vp8_frm_hdr[6] = pic->width & 0xff;
+  vp8_frm_hdr[7] = pic->width >> 8;
+  vp8_frm_hdr[8] = pic->height & 0xff;
+  vp8_frm_hdr[9] = pic->height >> 8;
+
+  if (!pic->writer(vp8_frm_hdr, sizeof(vp8_frm_hdr), pic)) {
+    return VP8_ENC_ERROR_BAD_WRITE;
+  }
+  return VP8_ENC_OK;
+}
+
+// WebP Headers.
+static int PutWebPHeaders(const VP8Encoder* const enc, size_t size0,
+                          size_t vp8_size, size_t riff_size) {
+  WebPPicture* const pic = enc->pic_;
+  WebPEncodingError err = VP8_ENC_OK;
+
+  // RIFF header.
+  err = PutRIFFHeader(enc, riff_size);
+  if (err != VP8_ENC_OK) goto Error;
+
+  // VP8X.
+  if (IsVP8XNeeded(enc)) {
+    err = PutVP8XHeader(enc);
+    if (err != VP8_ENC_OK) goto Error;
+  }
+
+  // Alpha.
+  if (enc->has_alpha_) {
+    err = PutAlphaChunk(enc);
+    if (err != VP8_ENC_OK) goto Error;
+  }
+
+  // VP8 header.
+  err = PutVP8Header(pic, vp8_size);
+  if (err != VP8_ENC_OK) goto Error;
+
+  // VP8 frame header.
+  err = PutVP8FrameHeader(pic, enc->profile_, size0);
+  if (err != VP8_ENC_OK) goto Error;
+
+  // All OK.
+  return 1;
+
+  // Error.
+ Error:
+  return WebPEncodingSetError(pic, err);
+}
+
+// Segmentation header
+static void PutSegmentHeader(VP8BitWriter* const bw,
+                             const VP8Encoder* const enc) {
+  const VP8SegmentHeader* const hdr = &enc->segment_hdr_;
+  const VP8Proba* const proba = &enc->proba_;
+  if (VP8PutBitUniform(bw, (hdr->num_segments_ > 1))) {
+    // We always 'update' the quant and filter strength values
+    const int update_data = 1;
+    int s;
+    VP8PutBitUniform(bw, hdr->update_map_);
+    if (VP8PutBitUniform(bw, update_data)) {
+      // we always use absolute values, not relative ones
+      VP8PutBitUniform(bw, 1);   // (segment_feature_mode = 1. Paragraph 9.3.)
+      for (s = 0; s < NUM_MB_SEGMENTS; ++s) {
+        VP8PutSignedValue(bw, enc->dqm_[s].quant_, 7);
+      }
+      for (s = 0; s < NUM_MB_SEGMENTS; ++s) {
+        VP8PutSignedValue(bw, enc->dqm_[s].fstrength_, 6);
+      }
+    }
+    if (hdr->update_map_) {
+      for (s = 0; s < 3; ++s) {
+        if (VP8PutBitUniform(bw, (proba->segments_[s] != 255u))) {
+          VP8PutValue(bw, proba->segments_[s], 8);
+        }
+      }
+    }
+  }
+}
+
+// Filtering parameters header
+static void PutFilterHeader(VP8BitWriter* const bw,
+                            const VP8FilterHeader* const hdr) {
+  const int use_lf_delta = (hdr->i4x4_lf_delta_ != 0);
+  VP8PutBitUniform(bw, hdr->simple_);
+  VP8PutValue(bw, hdr->level_, 6);
+  VP8PutValue(bw, hdr->sharpness_, 3);
+  if (VP8PutBitUniform(bw, use_lf_delta)) {
+    // '0' is the default value for i4x4_lf_delta_ at frame #0.
+    const int need_update = (hdr->i4x4_lf_delta_ != 0);
+    if (VP8PutBitUniform(bw, need_update)) {
+      // we don't use ref_lf_delta => emit four 0 bits
+      VP8PutValue(bw, 0, 4);
+      // we use mode_lf_delta for i4x4
+      VP8PutSignedValue(bw, hdr->i4x4_lf_delta_, 6);
+      VP8PutValue(bw, 0, 3);    // all others unused
+    }
+  }
+}
+
+// Nominal quantization parameters
+static void PutQuant(VP8BitWriter* const bw,
+                     const VP8Encoder* const enc) {
+  VP8PutValue(bw, enc->base_quant_, 7);
+  VP8PutSignedValue(bw, enc->dq_y1_dc_, 4);
+  VP8PutSignedValue(bw, enc->dq_y2_dc_, 4);
+  VP8PutSignedValue(bw, enc->dq_y2_ac_, 4);
+  VP8PutSignedValue(bw, enc->dq_uv_dc_, 4);
+  VP8PutSignedValue(bw, enc->dq_uv_ac_, 4);
+}
+
+// Partition sizes
+static int EmitPartitionsSize(const VP8Encoder* const enc,
+                              WebPPicture* const pic) {
+  uint8_t buf[3 * (MAX_NUM_PARTITIONS - 1)];
+  int p;
+  for (p = 0; p < enc->num_parts_ - 1; ++p) {
+    const size_t part_size = VP8BitWriterSize(enc->parts_ + p);
+    if (part_size >= VP8_MAX_PARTITION_SIZE) {
+      return WebPEncodingSetError(pic, VP8_ENC_ERROR_PARTITION_OVERFLOW);
+    }
+    buf[3 * p + 0] = (part_size >>  0) & 0xff;
+    buf[3 * p + 1] = (part_size >>  8) & 0xff;
+    buf[3 * p + 2] = (part_size >> 16) & 0xff;
+  }
+  return p ? pic->writer(buf, 3 * p, pic) : 1;
+}
+
+//------------------------------------------------------------------------------
+
+#ifdef WEBP_EXPERIMENTAL_FEATURES
+
+#define KTRAILER_SIZE 8
+
+static int WriteExtensions(VP8Encoder* const enc) {
+  uint8_t buffer[KTRAILER_SIZE];
+  VP8BitWriter* const bw = &enc->bw_;
+  WebPPicture* const pic = enc->pic_;
+
+  // Layer (bytes 0..3)
+  PutLE24(buffer + 0, enc->layer_data_size_);
+  buffer[3] = enc->pic_->colorspace & WEBP_CSP_UV_MASK;
+  if (enc->layer_data_size_ > 0) {
+    assert(enc->use_layer_);
+    // append layer data to last partition
+    if (!VP8BitWriterAppend(&enc->parts_[enc->num_parts_ - 1],
+                            enc->layer_data_, enc->layer_data_size_)) {
+      return WebPEncodingSetError(pic, VP8_ENC_ERROR_BITSTREAM_OUT_OF_MEMORY);
+    }
+  }
+
+  buffer[KTRAILER_SIZE - 1] = 0x01;  // marker
+  if (!VP8BitWriterAppend(bw, buffer, KTRAILER_SIZE)) {
+    return WebPEncodingSetError(pic, VP8_ENC_ERROR_BITSTREAM_OUT_OF_MEMORY);
+  }
+  return 1;
+}
+
+#endif    /* WEBP_EXPERIMENTAL_FEATURES */
+
+//------------------------------------------------------------------------------
+
+static size_t GeneratePartition0(VP8Encoder* const enc) {
+  VP8BitWriter* const bw = &enc->bw_;
+  const int mb_size = enc->mb_w_ * enc->mb_h_;
+  uint64_t pos1, pos2, pos3;
+#ifdef WEBP_EXPERIMENTAL_FEATURES
+  const int need_extensions = enc->use_layer_;
+#endif
+
+  pos1 = VP8BitWriterPos(bw);
+  VP8BitWriterInit(bw, mb_size * 7 / 8);        // ~7 bits per macroblock
+#ifdef WEBP_EXPERIMENTAL_FEATURES
+  VP8PutBitUniform(bw, need_extensions);   // extensions
+#else
+  VP8PutBitUniform(bw, 0);   // colorspace
+#endif
+  VP8PutBitUniform(bw, 0);   // clamp type
+
+  PutSegmentHeader(bw, enc);
+  PutFilterHeader(bw, &enc->filter_hdr_);
+  VP8PutValue(bw, enc->config_->partitions, 2);
+  PutQuant(bw, enc);
+  VP8PutBitUniform(bw, 0);   // no proba update
+  VP8WriteProbas(bw, &enc->proba_);
+  pos2 = VP8BitWriterPos(bw);
+  VP8CodeIntraModes(enc);
+  VP8BitWriterFinish(bw);
+
+#ifdef WEBP_EXPERIMENTAL_FEATURES
+  if (need_extensions && !WriteExtensions(enc)) {
+    return 0;
+  }
+#endif
+
+  pos3 = VP8BitWriterPos(bw);
+
+  if (enc->pic_->stats) {
+    enc->pic_->stats->header_bytes[0] = (int)((pos2 - pos1 + 7) >> 3);
+    enc->pic_->stats->header_bytes[1] = (int)((pos3 - pos2 + 7) >> 3);
+    enc->pic_->stats->alpha_data_size = (int)enc->alpha_data_size_;
+    enc->pic_->stats->layer_data_size = (int)enc->layer_data_size_;
+  }
+  return !bw->error_;
+}
+
+void VP8EncFreeBitWriters(VP8Encoder* const enc) {
+  int p;
+  VP8BitWriterWipeOut(&enc->bw_);
+  for (p = 0; p < enc->num_parts_; ++p) {
+    VP8BitWriterWipeOut(enc->parts_ + p);
+  }
+}
+
+int VP8EncWrite(VP8Encoder* const enc) {
+  WebPPicture* const pic = enc->pic_;
+  VP8BitWriter* const bw = &enc->bw_;
+  const int task_percent = 19;
+  const int percent_per_part = task_percent / enc->num_parts_;
+  const int final_percent = enc->percent_ + task_percent;
+  int ok = 0;
+  size_t vp8_size, pad, riff_size;
+  int p;
+
+  // Partition #0 with header and partition sizes
+  ok = !!GeneratePartition0(enc);
+
+  // Compute VP8 size
+  vp8_size = VP8_FRAME_HEADER_SIZE +
+             VP8BitWriterSize(bw) +
+             3 * (enc->num_parts_ - 1);
+  for (p = 0; p < enc->num_parts_; ++p) {
+    vp8_size += VP8BitWriterSize(enc->parts_ + p);
+  }
+  pad = vp8_size & 1;
+  vp8_size += pad;
+
+  // Compute RIFF size
+  // At the minimum it is: "WEBPVP8 nnnn" + VP8 data size.
+  riff_size = TAG_SIZE + CHUNK_HEADER_SIZE + vp8_size;
+  if (IsVP8XNeeded(enc)) {  // Add size for: VP8X header + data.
+    riff_size += CHUNK_HEADER_SIZE + VP8X_CHUNK_SIZE;
+  }
+  if (enc->has_alpha_) {  // Add size for: ALPH header + data.
+    const uint32_t padded_alpha_size = enc->alpha_data_size_ +
+                                       (enc->alpha_data_size_ & 1);
+    riff_size += CHUNK_HEADER_SIZE + padded_alpha_size;
+  }
+  // Sanity check.
+  if (riff_size > 0xfffffffeU) {
+    return WebPEncodingSetError(pic, VP8_ENC_ERROR_FILE_TOO_BIG);
+  }
+
+  // Emit headers and partition #0
+  {
+    const uint8_t* const part0 = VP8BitWriterBuf(bw);
+    const size_t size0 = VP8BitWriterSize(bw);
+    ok = ok && PutWebPHeaders(enc, size0, vp8_size, riff_size)
+            && pic->writer(part0, size0, pic)
+            && EmitPartitionsSize(enc, pic);
+    VP8BitWriterWipeOut(bw);    // will free the internal buffer.
+  }
+
+  // Token partitions
+  for (p = 0; p < enc->num_parts_; ++p) {
+    const uint8_t* const buf = VP8BitWriterBuf(enc->parts_ + p);
+    const size_t size = VP8BitWriterSize(enc->parts_ + p);
+    if (size)
+      ok = ok && pic->writer(buf, size, pic);
+    VP8BitWriterWipeOut(enc->parts_ + p);    // will free the internal buffer.
+    ok = ok && WebPReportProgress(pic, enc->percent_ + percent_per_part,
+                                  &enc->percent_);
+  }
+
+  // Padding byte
+  if (ok && pad) {
+    ok = PutPaddingByte(pic);
+  }
+
+  enc->coded_size_ = (int)(CHUNK_HEADER_SIZE + riff_size);
+  ok = ok && WebPReportProgress(pic, final_percent, &enc->percent_);
+  return ok;
+}
+
+//------------------------------------------------------------------------------
+
+#if defined(__cplusplus) || defined(c_plusplus)
+}    // extern "C"
+#endif
diff --git a/drivers/webpold/enc/tree.c b/drivers/webpold/enc/tree.c
new file mode 100644
index 0000000000..8b25e5e488
--- /dev/null
+++ b/drivers/webpold/enc/tree.c
@@ -0,0 +1,510 @@
+// Copyright 2011 Google Inc. All Rights Reserved.
+//
+// This code is licensed under the same terms as WebM:
+//  Software License Agreement:  http://www.webmproject.org/license/software/
+//  Additional IP Rights Grant:  http://www.webmproject.org/license/additional/
+// -----------------------------------------------------------------------------
+//
+// Token probabilities
+//
+// Author: Skal (pascal.massimino@gmail.com)
+
+#include "./vp8enci.h"
+
+#if defined(__cplusplus) || defined(c_plusplus)
+extern "C" {
+#endif
+
+//------------------------------------------------------------------------------
+// Default probabilities
+
+// Paragraph 13.5
+const uint8_t
+  VP8CoeffsProba0[NUM_TYPES][NUM_BANDS][NUM_CTX][NUM_PROBAS] = {
+  // genereated using vp8_default_coef_probs() in entropy.c:129
+  { { { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 },
+      { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 },
+      { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }
+    },
+    { { 253, 136, 254, 255, 228, 219, 128, 128, 128, 128, 128 },
+      { 189, 129, 242, 255, 227, 213, 255, 219, 128, 128, 128 },
+      { 106, 126, 227, 252, 214, 209, 255, 255, 128, 128, 128 }
+    },
+    { { 1, 98, 248, 255, 236, 226, 255, 255, 128, 128, 128 },
+      { 181, 133, 238, 254, 221, 234, 255, 154, 128, 128, 128 },
+      { 78, 134, 202, 247, 198, 180, 255, 219, 128, 128, 128 },
+    },
+    { { 1, 185, 249, 255, 243, 255, 128, 128, 128, 128, 128 },
+      { 184, 150, 247, 255, 236, 224, 128, 128, 128, 128, 128 },
+      { 77, 110, 216, 255, 236, 230, 128, 128, 128, 128, 128 },
+    },
+    { { 1, 101, 251, 255, 241, 255, 128, 128, 128, 128, 128 },
+      { 170, 139, 241, 252, 236, 209, 255, 255, 128, 128, 128 },
+      { 37, 116, 196, 243, 228, 255, 255, 255, 128, 128, 128 }
+    },
+    { { 1, 204, 254, 255, 245, 255, 128, 128, 128, 128, 128 },
+      { 207, 160, 250, 255, 238, 128, 128, 128, 128, 128, 128 },
+      { 102, 103, 231, 255, 211, 171, 128, 128, 128, 128, 128 }
+    },
+    { { 1, 152, 252, 255, 240, 255, 128, 128, 128, 128, 128 },
+      { 177, 135, 243, 255, 234, 225, 128, 128, 128, 128, 128 },
+      { 80, 129, 211, 255, 194, 224, 128, 128, 128, 128, 128 }
+    },
+    { { 1, 1, 255, 128, 128, 128, 128, 128, 128, 128, 128 },
+      { 246, 1, 255, 128, 128, 128, 128, 128, 128, 128, 128 },
+      { 255, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }
+    }
+  },
+  { { { 198, 35, 237, 223, 193, 187, 162, 160, 145, 155, 62 },
+      { 131, 45, 198, 221, 172, 176, 220, 157, 252, 221, 1 },
+      { 68, 47, 146, 208, 149, 167, 221, 162, 255, 223, 128 }
+    },
+    { { 1, 149, 241, 255, 221, 224, 255, 255, 128, 128, 128 },
+      { 184, 141, 234, 253, 222, 220, 255, 199, 128, 128, 128 },
+      { 81, 99, 181, 242, 176, 190, 249, 202, 255, 255, 128 }
+    },
+    { { 1, 129, 232, 253, 214, 197, 242, 196, 255, 255, 128 },
+      { 99, 121, 210, 250, 201, 198, 255, 202, 128, 128, 128 },
+      { 23, 91, 163, 242, 170, 187, 247, 210, 255, 255, 128 }
+    },
+    { { 1, 200, 246, 255, 234, 255, 128, 128, 128, 128, 128 },
+      { 109, 178, 241, 255, 231, 245, 255, 255, 128, 128, 128 },
+      { 44, 130, 201, 253, 205, 192, 255, 255, 128, 128, 128 }
+    },
+    { { 1, 132, 239, 251, 219, 209, 255, 165, 128, 128, 128 },
+      { 94, 136, 225, 251, 218, 190, 255, 255, 128, 128, 128 },
+      { 22, 100, 174, 245, 186, 161, 255, 199, 128, 128, 128 }
+    },
+    { { 1, 182, 249, 255, 232, 235, 128, 128, 128, 128, 128 },
+      { 124, 143, 241, 255, 227, 234, 128, 128, 128, 128, 128 },
+      { 35, 77, 181, 251, 193, 211, 255, 205, 128, 128, 128 }
+    },
+    { { 1, 157, 247, 255, 236, 231, 255, 255, 128, 128, 128 },
+      { 121, 141, 235, 255, 225, 227, 255, 255, 128, 128, 128 },
+      { 45, 99, 188, 251, 195, 217, 255, 224, 128, 128, 128 }
+    },
+    { { 1, 1, 251, 255, 213, 255, 128, 128, 128, 128, 128 },
+      { 203, 1, 248, 255, 255, 128, 128, 128, 128, 128, 128 },
+      { 137, 1, 177, 255, 224, 255, 128, 128, 128, 128, 128 }
+    }
+  },
+  { { { 253, 9, 248, 251, 207, 208, 255, 192, 128, 128, 128 },
+      { 175, 13, 224, 243, 193, 185, 249, 198, 255, 255, 128 },
+      { 73, 17, 171, 221, 161, 179, 236, 167, 255, 234, 128 }
+    },
+    { { 1, 95, 247, 253, 212, 183, 255, 255, 128, 128, 128 },
+      { 239, 90, 244, 250, 211, 209, 255, 255, 128, 128, 128 },
+      { 155, 77, 195, 248, 188, 195, 255, 255, 128, 128, 128 }
+    },
+    { { 1, 24, 239, 251, 218, 219, 255, 205, 128, 128, 128 },
+      { 201, 51, 219, 255, 196, 186, 128, 128, 128, 128, 128 },
+      { 69, 46, 190, 239, 201, 218, 255, 228, 128, 128, 128 }
+    },
+    { { 1, 191, 251, 255, 255, 128, 128, 128, 128, 128, 128 },
+      { 223, 165, 249, 255, 213, 255, 128, 128, 128, 128, 128 },
+      { 141, 124, 248, 255, 255, 128, 128, 128, 128, 128, 128 }
+    },
+    { { 1, 16, 248, 255, 255, 128, 128, 128, 128, 128, 128 },
+      { 190, 36, 230, 255, 236, 255, 128, 128, 128, 128, 128 },
+      { 149, 1, 255, 128, 128, 128, 128, 128, 128, 128, 128 }
+    },
+    { { 1, 226, 255, 128, 128, 128, 128, 128, 128, 128, 128 },
+      { 247, 192, 255, 128, 128, 128, 128, 128, 128, 128, 128 },
+      { 240, 128, 255, 128, 128, 128, 128, 128, 128, 128, 128 }
+    },
+    { { 1, 134, 252, 255, 255, 128, 128, 128, 128, 128, 128 },
+      { 213, 62, 250, 255, 255, 128, 128, 128, 128, 128, 128 },
+      { 55, 93, 255, 128, 128, 128, 128, 128, 128, 128, 128 }
+    },
+    { { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 },
+      { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 },
+      { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }
+    }
+  },
+  { { { 202, 24, 213, 235, 186, 191, 220, 160, 240, 175, 255 },
+      { 126, 38, 182, 232, 169, 184, 228, 174, 255, 187, 128 },
+      { 61, 46, 138, 219, 151, 178, 240, 170, 255, 216, 128 }
+    },
+    { { 1, 112, 230, 250, 199, 191, 247, 159, 255, 255, 128 },
+      { 166, 109, 228, 252, 211, 215, 255, 174, 128, 128, 128 },
+      { 39, 77, 162, 232, 172, 180, 245, 178, 255, 255, 128 }
+    },
+    { { 1, 52, 220, 246, 198, 199, 249, 220, 255, 255, 128 },
+      { 124, 74, 191, 243, 183, 193, 250, 221, 255, 255, 128 },
+      { 24, 71, 130, 219, 154, 170, 243, 182, 255, 255, 128 }
+    },
+    { { 1, 182, 225, 249, 219, 240, 255, 224, 128, 128, 128 },
+      { 149, 150, 226, 252, 216, 205, 255, 171, 128, 128, 128 },
+      { 28, 108, 170, 242, 183, 194, 254, 223, 255, 255, 128 }
+    },
+    { { 1, 81, 230, 252, 204, 203, 255, 192, 128, 128, 128 },
+      { 123, 102, 209, 247, 188, 196, 255, 233, 128, 128, 128 },
+      { 20, 95, 153, 243, 164, 173, 255, 203, 128, 128, 128 }
+    },
+    { { 1, 222, 248, 255, 216, 213, 128, 128, 128, 128, 128 },
+      { 168, 175, 246, 252, 235, 205, 255, 255, 128, 128, 128 },
+      { 47, 116, 215, 255, 211, 212, 255, 255, 128, 128, 128 }
+    },
+    { { 1, 121, 236, 253, 212, 214, 255, 255, 128, 128, 128 },
+      { 141, 84, 213, 252, 201, 202, 255, 219, 128, 128, 128 },
+      { 42, 80, 160, 240, 162, 185, 255, 205, 128, 128, 128 }
+    },
+    { { 1, 1, 255, 128, 128, 128, 128, 128, 128, 128, 128 },
+      { 244, 1, 255, 128, 128, 128, 128, 128, 128, 128, 128 },
+      { 238, 1, 255, 128, 128, 128, 128, 128, 128, 128, 128 }
+    }
+  }
+};
+
+void VP8DefaultProbas(VP8Encoder* const enc) {
+  VP8Proba* const probas = &enc->proba_;
+  probas->use_skip_proba_ = 0;
+  memset(probas->segments_, 255u, sizeof(probas->segments_));
+  memcpy(probas->coeffs_, VP8CoeffsProba0, sizeof(VP8CoeffsProba0));
+  // Note: we could hard-code the level_costs_ corresponding to VP8CoeffsProba0,
+  // but that's ~11k of static data. Better call VP8CalculateLevelCosts() later.
+  probas->dirty_ = 1;
+}
+
+// Paragraph 11.5.  900bytes.
+static const uint8_t kBModesProba[NUM_BMODES][NUM_BMODES][NUM_BMODES - 1] = {
+  { { 231, 120, 48, 89, 115, 113, 120, 152, 112 },
+    { 152, 179, 64, 126, 170, 118, 46, 70, 95 },
+    { 175, 69, 143, 80, 85, 82, 72, 155, 103 },
+    { 56, 58, 10, 171, 218, 189, 17, 13, 152 },
+    { 114, 26, 17, 163, 44, 195, 21, 10, 173 },
+    { 121, 24, 80, 195, 26, 62, 44, 64, 85 },
+    { 144, 71, 10, 38, 171, 213, 144, 34, 26 },
+    { 170, 46, 55, 19, 136, 160, 33, 206, 71 },
+    { 63, 20, 8, 114, 114, 208, 12, 9, 226 },
+    { 81, 40, 11, 96, 182, 84, 29, 16, 36 } },
+  { { 134, 183, 89, 137, 98, 101, 106, 165, 148 },
+    { 72, 187, 100, 130, 157, 111, 32, 75, 80 },
+    { 66, 102, 167, 99, 74, 62, 40, 234, 128 },
+    { 41, 53, 9, 178, 241, 141, 26, 8, 107 },
+    { 74, 43, 26, 146, 73, 166, 49, 23, 157 },
+    { 65, 38, 105, 160, 51, 52, 31, 115, 128 },
+    { 104, 79, 12, 27, 217, 255, 87, 17, 7 },
+    { 87, 68, 71, 44, 114, 51, 15, 186, 23 },
+    { 47, 41, 14, 110, 182, 183, 21, 17, 194 },
+    { 66, 45, 25, 102, 197, 189, 23, 18, 22 } },
+  { { 88, 88, 147, 150, 42, 46, 45, 196, 205 },
+    { 43, 97, 183, 117, 85, 38, 35, 179, 61 },
+    { 39, 53, 200, 87, 26, 21, 43, 232, 171 },
+    { 56, 34, 51, 104, 114, 102, 29, 93, 77 },
+    { 39, 28, 85, 171, 58, 165, 90, 98, 64 },
+    { 34, 22, 116, 206, 23, 34, 43, 166, 73 },
+    { 107, 54, 32, 26, 51, 1, 81, 43, 31 },
+    { 68, 25, 106, 22, 64, 171, 36, 225, 114 },
+    { 34, 19, 21, 102, 132, 188, 16, 76, 124 },
+    { 62, 18, 78, 95, 85, 57, 50, 48, 51 } },
+  { { 193, 101, 35, 159, 215, 111, 89, 46, 111 },
+    { 60, 148, 31, 172, 219, 228, 21, 18, 111 },
+    { 112, 113, 77, 85, 179, 255, 38, 120, 114 },
+    { 40, 42, 1, 196, 245, 209, 10, 25, 109 },
+    { 88, 43, 29, 140, 166, 213, 37, 43, 154 },
+    { 61, 63, 30, 155, 67, 45, 68, 1, 209 },
+    { 100, 80, 8, 43, 154, 1, 51, 26, 71 },
+    { 142, 78, 78, 16, 255, 128, 34, 197, 171 },
+    { 41, 40, 5, 102, 211, 183, 4, 1, 221 },
+    { 51, 50, 17, 168, 209, 192, 23, 25, 82 } },
+  { { 138, 31, 36, 171, 27, 166, 38, 44, 229 },
+    { 67, 87, 58, 169, 82, 115, 26, 59, 179 },
+    { 63, 59, 90, 180, 59, 166, 93, 73, 154 },
+    { 40, 40, 21, 116, 143, 209, 34, 39, 175 },
+    { 47, 15, 16, 183, 34, 223, 49, 45, 183 },
+    { 46, 17, 33, 183, 6, 98, 15, 32, 183 },
+    { 57, 46, 22, 24, 128, 1, 54, 17, 37 },
+    { 65, 32, 73, 115, 28, 128, 23, 128, 205 },
+    { 40, 3, 9, 115, 51, 192, 18, 6, 223 },
+    { 87, 37, 9, 115, 59, 77, 64, 21, 47 } },
+  { { 104, 55, 44, 218, 9, 54, 53, 130, 226 },
+    { 64, 90, 70, 205, 40, 41, 23, 26, 57 },
+    { 54, 57, 112, 184, 5, 41, 38, 166, 213 },
+    { 30, 34, 26, 133, 152, 116, 10, 32, 134 },
+    { 39, 19, 53, 221, 26, 114, 32, 73, 255 },
+    { 31, 9, 65, 234, 2, 15, 1, 118, 73 },
+    { 75, 32, 12, 51, 192, 255, 160, 43, 51 },
+    { 88, 31, 35, 67, 102, 85, 55, 186, 85 },
+    { 56, 21, 23, 111, 59, 205, 45, 37, 192 },
+    { 55, 38, 70, 124, 73, 102, 1, 34, 98 } },
+  { { 125, 98, 42, 88, 104, 85, 117, 175, 82 },
+    { 95, 84, 53, 89, 128, 100, 113, 101, 45 },
+    { 75, 79, 123, 47, 51, 128, 81, 171, 1 },
+    { 57, 17, 5, 71, 102, 57, 53, 41, 49 },
+    { 38, 33, 13, 121, 57, 73, 26, 1, 85 },
+    { 41, 10, 67, 138, 77, 110, 90, 47, 114 },
+    { 115, 21, 2, 10, 102, 255, 166, 23, 6 },
+    { 101, 29, 16, 10, 85, 128, 101, 196, 26 },
+    { 57, 18, 10, 102, 102, 213, 34, 20, 43 },
+    { 117, 20, 15, 36, 163, 128, 68, 1, 26 } },
+  { { 102, 61, 71, 37, 34, 53, 31, 243, 192 },
+    { 69, 60, 71, 38, 73, 119, 28, 222, 37 },
+    { 68, 45, 128, 34, 1, 47, 11, 245, 171 },
+    { 62, 17, 19, 70, 146, 85, 55, 62, 70 },
+    { 37, 43, 37, 154, 100, 163, 85, 160, 1 },
+    { 63, 9, 92, 136, 28, 64, 32, 201, 85 },
+    { 75, 15, 9, 9, 64, 255, 184, 119, 16 },
+    { 86, 6, 28, 5, 64, 255, 25, 248, 1 },
+    { 56, 8, 17, 132, 137, 255, 55, 116, 128 },
+    { 58, 15, 20, 82, 135, 57, 26, 121, 40 } },
+  { { 164, 50, 31, 137, 154, 133, 25, 35, 218 },
+    { 51, 103, 44, 131, 131, 123, 31, 6, 158 },
+    { 86, 40, 64, 135, 148, 224, 45, 183, 128 },
+    { 22, 26, 17, 131, 240, 154, 14, 1, 209 },
+    { 45, 16, 21, 91, 64, 222, 7, 1, 197 },
+    { 56, 21, 39, 155, 60, 138, 23, 102, 213 },
+    { 83, 12, 13, 54, 192, 255, 68, 47, 28 },
+    { 85, 26, 85, 85, 128, 128, 32, 146, 171 },
+    { 18, 11, 7, 63, 144, 171, 4, 4, 246 },
+    { 35, 27, 10, 146, 174, 171, 12, 26, 128 } },
+  { { 190, 80, 35, 99, 180, 80, 126, 54, 45 },
+    { 85, 126, 47, 87, 176, 51, 41, 20, 32 },
+    { 101, 75, 128, 139, 118, 146, 116, 128, 85 },
+    { 56, 41, 15, 176, 236, 85, 37, 9, 62 },
+    { 71, 30, 17, 119, 118, 255, 17, 18, 138 },
+    { 101, 38, 60, 138, 55, 70, 43, 26, 142 },
+    { 146, 36, 19, 30, 171, 255, 97, 27, 20 },
+    { 138, 45, 61, 62, 219, 1, 81, 188, 64 },
+    { 32, 41, 20, 117, 151, 142, 20, 21, 163 },
+    { 112, 19, 12, 61, 195, 128, 48, 4, 24 } }
+};
+
+static int PutI4Mode(VP8BitWriter* const bw, int mode,
+                     const uint8_t* const prob) {
+  if (VP8PutBit(bw, mode != B_DC_PRED, prob[0])) {
+    if (VP8PutBit(bw, mode != B_TM_PRED, prob[1])) {
+      if (VP8PutBit(bw, mode != B_VE_PRED, prob[2])) {
+        if (!VP8PutBit(bw, mode >= B_LD_PRED, prob[3])) {
+          if (VP8PutBit(bw, mode != B_HE_PRED, prob[4])) {
+            VP8PutBit(bw, mode != B_RD_PRED, prob[5]);
+          }
+        } else {
+          if (VP8PutBit(bw, mode != B_LD_PRED, prob[6])) {
+            if (VP8PutBit(bw, mode != B_VL_PRED, prob[7])) {
+              VP8PutBit(bw, mode != B_HD_PRED, prob[8]);
+            }
+          }
+        }
+      }
+    }
+  }
+  return mode;
+}
+
+static void PutI16Mode(VP8BitWriter* const bw, int mode) {
+  if (VP8PutBit(bw, (mode == TM_PRED || mode == H_PRED), 156)) {
+    VP8PutBit(bw, mode == TM_PRED, 128);    // TM or HE
+  } else {
+    VP8PutBit(bw, mode == V_PRED, 163);     // VE or DC
+  }
+}
+
+static void PutUVMode(VP8BitWriter* const bw, int uv_mode) {
+  if (VP8PutBit(bw, uv_mode != DC_PRED, 142)) {
+    if (VP8PutBit(bw, uv_mode != V_PRED, 114)) {
+      VP8PutBit(bw, uv_mode != H_PRED, 183);    // else: TM_PRED
+    }
+  }
+}
+
+static void PutSegment(VP8BitWriter* const bw, int s, const uint8_t* p) {
+  if (VP8PutBit(bw, s >= 2, p[0])) p += 1;
+  VP8PutBit(bw, s & 1, p[1]);
+}
+
+void VP8CodeIntraModes(VP8Encoder* const enc) {
+  VP8BitWriter* const bw = &enc->bw_;
+  VP8EncIterator it;
+  VP8IteratorInit(enc, &it);
+  do {
+    const VP8MBInfo* mb = it.mb_;
+    const uint8_t* preds = it.preds_;
+    if (enc->segment_hdr_.update_map_) {
+      PutSegment(bw, mb->segment_, enc->proba_.segments_);
+    }
+    if (enc->proba_.use_skip_proba_) {
+      VP8PutBit(bw, mb->skip_, enc->proba_.skip_proba_);
+    }
+    if (VP8PutBit(bw, (mb->type_ != 0), 145)) {  // i16x16
+      PutI16Mode(bw, preds[0]);
+    } else {
+      const int preds_w = enc->preds_w_;
+      const uint8_t* top_pred = preds - preds_w;
+      int x, y;
+      for (y = 0; y < 4; ++y) {
+        int left = preds[-1];
+        for (x = 0; x < 4; ++x) {
+          const uint8_t* const probas = kBModesProba[top_pred[x]][left];
+          left = PutI4Mode(bw, preds[x], probas);
+        }
+        top_pred = preds;
+        preds += preds_w;
+      }
+    }
+    PutUVMode(bw, mb->uv_mode_);
+  } while (VP8IteratorNext(&it, 0));
+}
+
+//------------------------------------------------------------------------------
+// Paragraph 13
+
+const uint8_t
+    VP8CoeffsUpdateProba[NUM_TYPES][NUM_BANDS][NUM_CTX][NUM_PROBAS] = {
+  { { { 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255 },
+      { 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255 },
+      { 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255 }
+    },
+    { { 176, 246, 255, 255, 255, 255, 255, 255, 255, 255, 255 },
+      { 223, 241, 252, 255, 255, 255, 255, 255, 255, 255, 255 },
+      { 249, 253, 253, 255, 255, 255, 255, 255, 255, 255, 255 }
+    },
+    { { 255, 244, 252, 255, 255, 255, 255, 255, 255, 255, 255 },
+      { 234, 254, 254, 255, 255, 255, 255, 255, 255, 255, 255 },
+      { 253, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255 }
+    },
+    { { 255, 246, 254, 255, 255, 255, 255, 255, 255, 255, 255 },
+      { 239, 253, 254, 255, 255, 255, 255, 255, 255, 255, 255 },
+      { 254, 255, 254, 255, 255, 255, 255, 255, 255, 255, 255 }
+    },
+    { { 255, 248, 254, 255, 255, 255, 255, 255, 255, 255, 255 },
+      { 251, 255, 254, 255, 255, 255, 255, 255, 255, 255, 255 },
+      { 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255 }
+    },
+    { { 255, 253, 254, 255, 255, 255, 255, 255, 255, 255, 255 },
+      { 251, 254, 254, 255, 255, 255, 255, 255, 255, 255, 255 },
+      { 254, 255, 254, 255, 255, 255, 255, 255, 255, 255, 255 }
+    },
+    { { 255, 254, 253, 255, 254, 255, 255, 255, 255, 255, 255 },
+      { 250, 255, 254, 255, 254, 255, 255, 255, 255, 255, 255 },
+      { 254, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255 }
+    },
+    { { 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255 },
+      { 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255 },
+      { 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255 }
+    }
+  },
+  { { { 217, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255 },
+      { 225, 252, 241, 253, 255, 255, 254, 255, 255, 255, 255 },
+      { 234, 250, 241, 250, 253, 255, 253, 254, 255, 255, 255 }
+    },
+    { { 255, 254, 255, 255, 255, 255, 255, 255, 255, 255, 255 },
+      { 223, 254, 254, 255, 255, 255, 255, 255, 255, 255, 255 },
+      { 238, 253, 254, 254, 255, 255, 255, 255, 255, 255, 255 }
+    },
+    { { 255, 248, 254, 255, 255, 255, 255, 255, 255, 255, 255 },
+      { 249, 254, 255, 255, 255, 255, 255, 255, 255, 255, 255 },
+      { 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255 }
+    },
+    { { 255, 253, 255, 255, 255, 255, 255, 255, 255, 255, 255 },
+      { 247, 254, 255, 255, 255, 255, 255, 255, 255, 255, 255 },
+      { 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255 }
+    },
+    { { 255, 253, 254, 255, 255, 255, 255, 255, 255, 255, 255 },
+      { 252, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255 },
+      { 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255 }
+    },
+    { { 255, 254, 254, 255, 255, 255, 255, 255, 255, 255, 255 },
+      { 253, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255 },
+      { 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255 }
+    },
+    { { 255, 254, 253, 255, 255, 255, 255, 255, 255, 255, 255 },
+      { 250, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255 },
+      { 254, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255 }
+    },
+    { { 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255 },
+      { 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255 },
+      { 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255 }
+    }
+  },
+  { { { 186, 251, 250, 255, 255, 255, 255, 255, 255, 255, 255 },
+      { 234, 251, 244, 254, 255, 255, 255, 255, 255, 255, 255 },
+      { 251, 251, 243, 253, 254, 255, 254, 255, 255, 255, 255 }
+    },
+    { { 255, 253, 254, 255, 255, 255, 255, 255, 255, 255, 255 },
+      { 236, 253, 254, 255, 255, 255, 255, 255, 255, 255, 255 },
+      { 251, 253, 253, 254, 254, 255, 255, 255, 255, 255, 255 }
+    },
+    { { 255, 254, 254, 255, 255, 255, 255, 255, 255, 255, 255 },
+      { 254, 254, 254, 255, 255, 255, 255, 255, 255, 255, 255 },
+      { 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255 }
+    },
+    { { 255, 254, 255, 255, 255, 255, 255, 255, 255, 255, 255 },
+      { 254, 254, 255, 255, 255, 255, 255, 255, 255, 255, 255 },
+      { 254, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255 }
+    },
+    { { 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255 },
+      { 254, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255 },
+      { 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255 }
+    },
+    { { 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255 },
+      { 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255 },
+      { 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255 }
+    },
+    { { 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255 },
+      { 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255 },
+      { 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255 }
+    },
+    { { 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255 },
+      { 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255 },
+      { 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255 }
+    }
+  },
+  { { { 248, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255 },
+      { 250, 254, 252, 254, 255, 255, 255, 255, 255, 255, 255 },
+      { 248, 254, 249, 253, 255, 255, 255, 255, 255, 255, 255 }
+    },
+    { { 255, 253, 253, 255, 255, 255, 255, 255, 255, 255, 255 },
+      { 246, 253, 253, 255, 255, 255, 255, 255, 255, 255, 255 },
+      { 252, 254, 251, 254, 254, 255, 255, 255, 255, 255, 255 }
+    },
+    { { 255, 254, 252, 255, 255, 255, 255, 255, 255, 255, 255 },
+      { 248, 254, 253, 255, 255, 255, 255, 255, 255, 255, 255 },
+      { 253, 255, 254, 254, 255, 255, 255, 255, 255, 255, 255 }
+    },
+    { { 255, 251, 254, 255, 255, 255, 255, 255, 255, 255, 255 },
+      { 245, 251, 254, 255, 255, 255, 255, 255, 255, 255, 255 },
+      { 253, 253, 254, 255, 255, 255, 255, 255, 255, 255, 255 }
+    },
+    { { 255, 251, 253, 255, 255, 255, 255, 255, 255, 255, 255 },
+      { 252, 253, 254, 255, 255, 255, 255, 255, 255, 255, 255 },
+      { 255, 254, 255, 255, 255, 255, 255, 255, 255, 255, 255 }
+    },
+    { { 255, 252, 255, 255, 255, 255, 255, 255, 255, 255, 255 },
+      { 249, 255, 254, 255, 255, 255, 255, 255, 255, 255, 255 },
+      { 255, 255, 254, 255, 255, 255, 255, 255, 255, 255, 255 }
+    },
+    { { 255, 255, 253, 255, 255, 255, 255, 255, 255, 255, 255 },
+      { 250, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255 },
+      { 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255 }
+    },
+    { { 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255 },
+      { 254, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255 },
+      { 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255 }
+    }
+  }
+};
+
+void VP8WriteProbas(VP8BitWriter* const bw, const VP8Proba* const probas) {
+  int t, b, c, p;
+  for (t = 0; t < NUM_TYPES; ++t) {
+    for (b = 0; b < NUM_BANDS; ++b) {
+      for (c = 0; c < NUM_CTX; ++c) {
+        for (p = 0; p < NUM_PROBAS; ++p) {
+          const uint8_t p0 = probas->coeffs_[t][b][c][p];
+          const int update = (p0 != VP8CoeffsProba0[t][b][c][p]);
+          if (VP8PutBit(bw, update, VP8CoeffsUpdateProba[t][b][c][p])) {
+            VP8PutValue(bw, p0, 8);
+          }
+        }
+      }
+    }
+  }
+  if (VP8PutBitUniform(bw, probas->use_skip_proba_)) {
+    VP8PutValue(bw, probas->skip_proba_, 8);
+  }
+}
+
+#if defined(__cplusplus) || defined(c_plusplus)
+}    // extern "C"
+#endif
diff --git a/drivers/webpold/enc/vp8enci.h b/drivers/webpold/enc/vp8enci.h
new file mode 100644
index 0000000000..936e1c18ce
--- /dev/null
+++ b/drivers/webpold/enc/vp8enci.h
@@ -0,0 +1,525 @@
+// Copyright 2011 Google Inc. All Rights Reserved.
+//
+// This code is licensed under the same terms as WebM:
+//  Software License Agreement:  http://www.webmproject.org/license/software/
+//  Additional IP Rights Grant:  http://www.webmproject.org/license/additional/
+// -----------------------------------------------------------------------------
+//
+//   WebP encoder: internal header.
+//
+// Author: Skal (pascal.massimino@gmail.com)
+
+#ifndef WEBP_ENC_VP8ENCI_H_
+#define WEBP_ENC_VP8ENCI_H_
+
+#include <string.h>     // for memcpy()
+#include "../encode.h"
+#include "../dsp/dsp.h"
+#include "../utils/bit_writer.h"
+
+#if defined(__cplusplus) || defined(c_plusplus)
+extern "C" {
+#endif
+
+//------------------------------------------------------------------------------
+// Various defines and enums
+
+// version numbers
+#define ENC_MAJ_VERSION 0
+#define ENC_MIN_VERSION 2
+#define ENC_REV_VERSION 0
+
+// size of histogram used by CollectHistogram.
+#define MAX_COEFF_THRESH   64
+
+// intra prediction modes
+enum { B_DC_PRED = 0,   // 4x4 modes
+       B_TM_PRED = 1,
+       B_VE_PRED = 2,
+       B_HE_PRED = 3,
+       B_RD_PRED = 4,
+       B_VR_PRED = 5,
+       B_LD_PRED = 6,
+       B_VL_PRED = 7,
+       B_HD_PRED = 8,
+       B_HU_PRED = 9,
+       NUM_BMODES = B_HU_PRED + 1 - B_DC_PRED,  // = 10
+
+       // Luma16 or UV modes
+       DC_PRED = B_DC_PRED, V_PRED = B_VE_PRED,
+       H_PRED = B_HE_PRED, TM_PRED = B_TM_PRED
+     };
+
+enum { NUM_MB_SEGMENTS = 4,
+       MAX_NUM_PARTITIONS = 8,
+       NUM_TYPES = 4,   // 0: i16-AC,  1: i16-DC,  2:chroma-AC,  3:i4-AC
+       NUM_BANDS = 8,
+       NUM_CTX = 3,
+       NUM_PROBAS = 11,
+       MAX_LF_LEVELS = 64,      // Maximum loop filter level
+       MAX_VARIABLE_LEVEL = 67  // last (inclusive) level with variable cost
+     };
+
+// YUV-cache parameters. Cache is 16-pixels wide.
+// The original or reconstructed samples can be accessed using VP8Scan[]
+// The predicted blocks can be accessed using offsets to yuv_p_ and
+// the arrays VP8*ModeOffsets[];
+//         +----+      YUV Samples area. See VP8Scan[] for accessing the blocks.
+//  Y_OFF  |YYYY| <- original samples  (enc->yuv_in_)
+//         |YYYY|
+//         |YYYY|
+//         |YYYY|
+//  U_OFF  |UUVV| V_OFF  (=U_OFF + 8)
+//         |UUVV|
+//         +----+
+//  Y_OFF  |YYYY| <- compressed/decoded samples  ('yuv_out_')
+//         |YYYY|    There are two buffers like this ('yuv_out_'/'yuv_out2_')
+//         |YYYY|
+//         |YYYY|
+//  U_OFF  |UUVV| V_OFF
+//         |UUVV|
+//          x2 (for yuv_out2_)
+//         +----+     Prediction area ('yuv_p_', size = PRED_SIZE)
+// I16DC16 |YYYY|  Intra16 predictions (16x16 block each)
+//         |YYYY|
+//         |YYYY|
+//         |YYYY|
+// I16TM16 |YYYY|
+//         |YYYY|
+//         |YYYY|
+//         |YYYY|
+// I16VE16 |YYYY|
+//         |YYYY|
+//         |YYYY|
+//         |YYYY|
+// I16HE16 |YYYY|
+//         |YYYY|
+//         |YYYY|
+//         |YYYY|
+//         +----+  Chroma U/V predictions (16x8 block each)
+// C8DC8   |UUVV|
+//         |UUVV|
+// C8TM8   |UUVV|
+//         |UUVV|
+// C8VE8   |UUVV|
+//         |UUVV|
+// C8HE8   |UUVV|
+//         |UUVV|
+//         +----+  Intra 4x4 predictions (4x4 block each)
+//         |YYYY| I4DC4 I4TM4 I4VE4 I4HE4
+//         |YYYY| I4RD4 I4VR4 I4LD4 I4VL4
+//         |YY..| I4HD4 I4HU4 I4TMP
+//         +----+
+#define BPS       16   // this is the common stride
+#define Y_SIZE   (BPS * 16)
+#define UV_SIZE  (BPS * 8)
+#define YUV_SIZE (Y_SIZE + UV_SIZE)
+#define PRED_SIZE (6 * 16 * BPS + 12 * BPS)
+#define Y_OFF    (0)
+#define U_OFF    (Y_SIZE)
+#define V_OFF    (U_OFF + 8)
+#define ALIGN_CST 15
+#define DO_ALIGN(PTR) ((uintptr_t)((PTR) + ALIGN_CST) & ~ALIGN_CST)
+
+extern const int VP8Scan[16 + 4 + 4];           // in quant.c
+extern const int VP8UVModeOffsets[4];           // in analyze.c
+extern const int VP8I16ModeOffsets[4];
+extern const int VP8I4ModeOffsets[NUM_BMODES];
+
+// Layout of prediction blocks
+// intra 16x16
+#define I16DC16 (0 * 16 * BPS)
+#define I16TM16 (1 * 16 * BPS)
+#define I16VE16 (2 * 16 * BPS)
+#define I16HE16 (3 * 16 * BPS)
+// chroma 8x8, two U/V blocks side by side (hence: 16x8 each)
+#define C8DC8 (4 * 16 * BPS)
+#define C8TM8 (4 * 16 * BPS + 8 * BPS)
+#define C8VE8 (5 * 16 * BPS)
+#define C8HE8 (5 * 16 * BPS + 8 * BPS)
+// intra 4x4
+#define I4DC4 (6 * 16 * BPS +  0)
+#define I4TM4 (6 * 16 * BPS +  4)
+#define I4VE4 (6 * 16 * BPS +  8)
+#define I4HE4 (6 * 16 * BPS + 12)
+#define I4RD4 (6 * 16 * BPS + 4 * BPS +  0)
+#define I4VR4 (6 * 16 * BPS + 4 * BPS +  4)
+#define I4LD4 (6 * 16 * BPS + 4 * BPS +  8)
+#define I4VL4 (6 * 16 * BPS + 4 * BPS + 12)
+#define I4HD4 (6 * 16 * BPS + 8 * BPS +  0)
+#define I4HU4 (6 * 16 * BPS + 8 * BPS +  4)
+#define I4TMP (6 * 16 * BPS + 8 * BPS +  8)
+
+typedef int64_t score_t;     // type used for scores, rate, distortion
+#define MAX_COST ((score_t)0x7fffffffffffffLL)
+
+#define QFIX 17
+#define BIAS(b)  ((b) << (QFIX - 8))
+// Fun fact: this is the _only_ line where we're actually being lossy and
+// discarding bits.
+static WEBP_INLINE int QUANTDIV(int n, int iQ, int B) {
+  return (n * iQ + B) >> QFIX;
+}
+extern const uint8_t VP8Zigzag[16];
+
+//------------------------------------------------------------------------------
+// Headers
+
+typedef uint32_t proba_t;   // 16b + 16b
+typedef uint8_t ProbaArray[NUM_CTX][NUM_PROBAS];
+typedef proba_t StatsArray[NUM_CTX][NUM_PROBAS];
+typedef uint16_t CostArray[NUM_CTX][MAX_VARIABLE_LEVEL + 1];
+typedef double LFStats[NUM_MB_SEGMENTS][MAX_LF_LEVELS];  // filter stats
+
+typedef struct VP8Encoder VP8Encoder;
+
+// segment features
+typedef struct {
+  int num_segments_;      // Actual number of segments. 1 segment only = unused.
+  int update_map_;        // whether to update the segment map or not.
+                          // must be 0 if there's only 1 segment.
+  int size_;              // bit-cost for transmitting the segment map
+} VP8SegmentHeader;
+
+// Struct collecting all frame-persistent probabilities.
+typedef struct {
+  uint8_t segments_[3];     // probabilities for segment tree
+  uint8_t skip_proba_;      // final probability of being skipped.
+  ProbaArray coeffs_[NUM_TYPES][NUM_BANDS];      // 924 bytes
+  StatsArray stats_[NUM_TYPES][NUM_BANDS];       // 4224 bytes
+  CostArray level_cost_[NUM_TYPES][NUM_BANDS];   // 11.4k
+  int dirty_;               // if true, need to call VP8CalculateLevelCosts()
+  int use_skip_proba_;      // Note: we always use skip_proba for now.
+  int nb_skip_;             // number of skipped blocks
+} VP8Proba;
+
+// Filter parameters. Not actually used in the code (we don't perform
+// the in-loop filtering), but filled from user's config
+typedef struct {
+  int simple_;             // filtering type: 0=complex, 1=simple
+  int level_;              // base filter level [0..63]
+  int sharpness_;          // [0..7]
+  int i4x4_lf_delta_;      // delta filter level for i4x4 relative to i16x16
+} VP8FilterHeader;
+
+//------------------------------------------------------------------------------
+// Informations about the macroblocks.
+
+typedef struct {
+  // block type
+  unsigned int type_:2;     // 0=i4x4, 1=i16x16
+  unsigned int uv_mode_:2;
+  unsigned int skip_:1;
+  unsigned int segment_:2;
+  uint8_t alpha_;      // quantization-susceptibility
+} VP8MBInfo;
+
+typedef struct VP8Matrix {
+  uint16_t q_[16];        // quantizer steps
+  uint16_t iq_[16];       // reciprocals, fixed point.
+  uint16_t bias_[16];     // rounding bias
+  uint16_t zthresh_[16];  // value under which a coefficient is zeroed
+  uint16_t sharpen_[16];  // frequency boosters for slight sharpening
+} VP8Matrix;
+
+typedef struct {
+  VP8Matrix y1_, y2_, uv_;  // quantization matrices
+  int alpha_;      // quant-susceptibility, range [-127,127]. Zero is neutral.
+                   // Lower values indicate a lower risk of blurriness.
+  int beta_;       // filter-susceptibility, range [0,255].
+  int quant_;      // final segment quantizer.
+  int fstrength_;  // final in-loop filtering strength
+  // reactivities
+  int lambda_i16_, lambda_i4_, lambda_uv_;
+  int lambda_mode_, lambda_trellis_, tlambda_;
+  int lambda_trellis_i16_, lambda_trellis_i4_, lambda_trellis_uv_;
+} VP8SegmentInfo;
+
+// Handy transcient struct to accumulate score and info during RD-optimization
+// and mode evaluation.
+typedef struct {
+  score_t D, SD, R, score;    // Distortion, spectral distortion, rate, score.
+  int16_t y_dc_levels[16];    // Quantized levels for luma-DC, luma-AC, chroma.
+  int16_t y_ac_levels[16][16];
+  int16_t uv_levels[4 + 4][16];
+  int mode_i16;               // mode number for intra16 prediction
+  uint8_t modes_i4[16];       // mode numbers for intra4 predictions
+  int mode_uv;                // mode number of chroma prediction
+  uint32_t nz;                // non-zero blocks
+} VP8ModeScore;
+
+// Iterator structure to iterate through macroblocks, pointing to the
+// right neighbouring data (samples, predictions, contexts, ...)
+typedef struct {
+  int x_, y_;                      // current macroblock
+  int y_offset_, uv_offset_;       // offset to the luma / chroma planes
+  int y_stride_, uv_stride_;       // respective strides
+  uint8_t*      yuv_in_;           // borrowed from enc_ (for now)
+  uint8_t*      yuv_out_;          // ''
+  uint8_t*      yuv_out2_;         // ''
+  uint8_t*      yuv_p_;            // ''
+  VP8Encoder*   enc_;              // back-pointer
+  VP8MBInfo*    mb_;               // current macroblock
+  VP8BitWriter* bw_;               // current bit-writer
+  uint8_t*      preds_;            // intra mode predictors (4x4 blocks)
+  uint32_t*     nz_;               // non-zero pattern
+  uint8_t       i4_boundary_[37];  // 32+5 boundary samples needed by intra4x4
+  uint8_t*      i4_top_;           // pointer to the current top boundary sample
+  int           i4_;               // current intra4x4 mode being tested
+  int           top_nz_[9];        // top-non-zero context.
+  int           left_nz_[9];       // left-non-zero. left_nz[8] is independent.
+  uint64_t      bit_count_[4][3];  // bit counters for coded levels.
+  uint64_t      luma_bits_;        // macroblock bit-cost for luma
+  uint64_t      uv_bits_;          // macroblock bit-cost for chroma
+  LFStats*      lf_stats_;         // filter stats (borrowed from enc_)
+  int           do_trellis_;       // if true, perform extra level optimisation
+  int           done_;             // true when scan is finished
+  int           percent0_;         // saved initial progress percent
+} VP8EncIterator;
+
+  // in iterator.c
+// must be called first.
+void VP8IteratorInit(VP8Encoder* const enc, VP8EncIterator* const it);
+// restart a scan.
+void VP8IteratorReset(VP8EncIterator* const it);
+// import samples from source
+void VP8IteratorImport(const VP8EncIterator* const it);
+// export decimated samples
+void VP8IteratorExport(const VP8EncIterator* const it);
+// go to next macroblock. Returns !done_. If *block_to_save is non-null, will
+// save the boundary values to top_/left_ arrays. block_to_save can be
+// it->yuv_out_ or it->yuv_in_.
+int VP8IteratorNext(VP8EncIterator* const it,
+                    const uint8_t* const block_to_save);
+// Report progression based on macroblock rows. Return 0 for user-abort request.
+int VP8IteratorProgress(const VP8EncIterator* const it,
+                        int final_delta_percent);
+// Intra4x4 iterations
+void VP8IteratorStartI4(VP8EncIterator* const it);
+// returns true if not done.
+int VP8IteratorRotateI4(VP8EncIterator* const it,
+                        const uint8_t* const yuv_out);
+
+// Non-zero context setup/teardown
+void VP8IteratorNzToBytes(VP8EncIterator* const it);
+void VP8IteratorBytesToNz(VP8EncIterator* const it);
+
+// Helper functions to set mode properties
+void VP8SetIntra16Mode(const VP8EncIterator* const it, int mode);
+void VP8SetIntra4Mode(const VP8EncIterator* const it, const uint8_t* modes);
+void VP8SetIntraUVMode(const VP8EncIterator* const it, int mode);
+void VP8SetSkip(const VP8EncIterator* const it, int skip);
+void VP8SetSegment(const VP8EncIterator* const it, int segment);
+
+//------------------------------------------------------------------------------
+// Paginated token buffer
+
+// WIP: #define USE_TOKEN_BUFFER
+
+#ifdef USE_TOKEN_BUFFER
+
+#define MAX_NUM_TOKEN 2048
+
+typedef struct VP8Tokens VP8Tokens;
+struct VP8Tokens {
+  uint16_t tokens_[MAX_NUM_TOKEN];  // bit#15: bit, bits 0..14: slot
+  int left_;
+  VP8Tokens* next_;
+};
+
+typedef struct {
+  VP8Tokens* rows_;
+  uint16_t* tokens_;    // set to (*last_)->tokens_
+  VP8Tokens** last_;
+  int left_;
+  int error_;  // true in case of malloc error
+} VP8TBuffer;
+
+void VP8TBufferInit(VP8TBuffer* const b);    // initialize an empty buffer
+int VP8TBufferNewPage(VP8TBuffer* const b);  // allocate a new page
+void VP8TBufferClear(VP8TBuffer* const b);   // de-allocate memory
+
+int VP8EmitTokens(const VP8TBuffer* const b, VP8BitWriter* const bw,
+                  const uint8_t* const probas);
+
+static WEBP_INLINE int VP8AddToken(VP8TBuffer* const b,
+                                   int bit, int proba_idx) {
+  if (b->left_ > 0 || VP8TBufferNewPage(b)) {
+    const int slot = --b->left_;
+    b->tokens_[slot] = (bit << 15) | proba_idx;
+  }
+  return bit;
+}
+
+#endif  // USE_TOKEN_BUFFER
+
+//------------------------------------------------------------------------------
+// VP8Encoder
+
+struct VP8Encoder {
+  const WebPConfig* config_;    // user configuration and parameters
+  WebPPicture* pic_;            // input / output picture
+
+  // headers
+  VP8FilterHeader   filter_hdr_;     // filtering information
+  VP8SegmentHeader  segment_hdr_;    // segment information
+
+  int profile_;                      // VP8's profile, deduced from Config.
+
+  // dimension, in macroblock units.
+  int mb_w_, mb_h_;
+  int preds_w_;   // stride of the *preds_ prediction plane (=4*mb_w + 1)
+
+  // number of partitions (1, 2, 4 or 8 = MAX_NUM_PARTITIONS)
+  int num_parts_;
+
+  // per-partition boolean decoders.
+  VP8BitWriter bw_;                         // part0
+  VP8BitWriter parts_[MAX_NUM_PARTITIONS];  // token partitions
+
+  int percent_;                             // for progress
+
+  // transparency blob
+  int has_alpha_;
+  uint8_t* alpha_data_;       // non-NULL if transparency is present
+  uint32_t alpha_data_size_;
+
+  // enhancement layer
+  int use_layer_;
+  VP8BitWriter layer_bw_;
+  uint8_t* layer_data_;
+  size_t layer_data_size_;
+
+  // quantization info (one set of DC/AC dequant factor per segment)
+  VP8SegmentInfo dqm_[NUM_MB_SEGMENTS];
+  int base_quant_;                 // nominal quantizer value. Only used
+                                   // for relative coding of segments' quant.
+  int uv_alpha_;                   // U/V quantization susceptibility
+  // global offset of quantizers, shared by all segments
+  int dq_y1_dc_;
+  int dq_y2_dc_, dq_y2_ac_;
+  int dq_uv_dc_, dq_uv_ac_;
+
+  // probabilities and statistics
+  VP8Proba proba_;
+  uint64_t sse_[4];        // sum of Y/U/V/A squared errors for all macroblocks
+  uint64_t sse_count_;     // pixel count for the sse_[] stats
+  int      coded_size_;
+  int      residual_bytes_[3][4];
+  int      block_count_[3];
+
+  // quality/speed settings
+  int method_;              // 0=fastest, 6=best/slowest.
+  int rd_opt_level_;        // Deduced from method_.
+  int max_i4_header_bits_;  // partition #0 safeness factor
+
+  // Memory
+  VP8MBInfo* mb_info_;   // contextual macroblock infos (mb_w_ + 1)
+  uint8_t*   preds_;     // predictions modes: (4*mb_w+1) * (4*mb_h+1)
+  uint32_t*  nz_;        // non-zero bit context: mb_w+1
+  uint8_t*   yuv_in_;    // input samples
+  uint8_t*   yuv_out_;   // output samples
+  uint8_t*   yuv_out2_;  // secondary scratch out-buffer. swapped with yuv_out_.
+  uint8_t*   yuv_p_;     // scratch buffer for prediction
+  uint8_t   *y_top_;     // top luma samples.
+  uint8_t   *uv_top_;    // top u/v samples.
+                         // U and V are packed into 16 pixels (8 U + 8 V)
+  uint8_t   *y_left_;    // left luma samples (adressable from index -1 to 15).
+  uint8_t   *u_left_;    // left u samples (adressable from index -1 to 7)
+  uint8_t   *v_left_;    // left v samples (adressable from index -1 to 7)
+
+  LFStats   *lf_stats_;  // autofilter stats (if NULL, autofilter is off)
+};
+
+//------------------------------------------------------------------------------
+// internal functions. Not public.
+
+  // in tree.c
+extern const uint8_t VP8CoeffsProba0[NUM_TYPES][NUM_BANDS][NUM_CTX][NUM_PROBAS];
+extern const uint8_t
+    VP8CoeffsUpdateProba[NUM_TYPES][NUM_BANDS][NUM_CTX][NUM_PROBAS];
+// Reset the token probabilities to their initial (default) values
+void VP8DefaultProbas(VP8Encoder* const enc);
+// Write the token probabilities
+void VP8WriteProbas(VP8BitWriter* const bw, const VP8Proba* const probas);
+// Writes the partition #0 modes (that is: all intra modes)
+void VP8CodeIntraModes(VP8Encoder* const enc);
+
+  // in syntax.c
+// Generates the final bitstream by coding the partition0 and headers,
+// and appending an assembly of all the pre-coded token partitions.
+// Return true if everything is ok.
+int VP8EncWrite(VP8Encoder* const enc);
+// Release memory allocated for bit-writing in VP8EncLoop & seq.
+void VP8EncFreeBitWriters(VP8Encoder* const enc);
+
+  // in frame.c
+extern const uint8_t VP8EncBands[16 + 1];
+// Form all the four Intra16x16 predictions in the yuv_p_ cache
+void VP8MakeLuma16Preds(const VP8EncIterator* const it);
+// Form all the four Chroma8x8 predictions in the yuv_p_ cache
+void VP8MakeChroma8Preds(const VP8EncIterator* const it);
+// Form all the ten Intra4x4 predictions in the yuv_p_ cache
+// for the 4x4 block it->i4_
+void VP8MakeIntra4Preds(const VP8EncIterator* const it);
+// Rate calculation
+int VP8GetCostLuma16(VP8EncIterator* const it, const VP8ModeScore* const rd);
+int VP8GetCostLuma4(VP8EncIterator* const it, const int16_t levels[16]);
+int VP8GetCostUV(VP8EncIterator* const it, const VP8ModeScore* const rd);
+// Main stat / coding passes
+int VP8EncLoop(VP8Encoder* const enc);
+int VP8StatLoop(VP8Encoder* const enc);
+
+  // in webpenc.c
+// Assign an error code to a picture. Return false for convenience.
+int WebPEncodingSetError(const WebPPicture* const pic, WebPEncodingError error);
+int WebPReportProgress(const WebPPicture* const pic,
+                       int percent, int* const percent_store);
+
+  // in analysis.c
+// Main analysis loop. Decides the segmentations and complexity.
+// Assigns a first guess for Intra16 and uvmode_ prediction modes.
+int VP8EncAnalyze(VP8Encoder* const enc);
+
+  // in quant.c
+// Sets up segment's quantization values, base_quant_ and filter strengths.
+void VP8SetSegmentParams(VP8Encoder* const enc, float quality);
+// Pick best modes and fills the levels. Returns true if skipped.
+int VP8Decimate(VP8EncIterator* const it, VP8ModeScore* const rd, int rd_opt);
+
+  // in alpha.c
+void VP8EncInitAlpha(VP8Encoder* const enc);    // initialize alpha compression
+int VP8EncFinishAlpha(VP8Encoder* const enc);   // finalize compressed data
+void VP8EncDeleteAlpha(VP8Encoder* const enc);  // delete compressed data
+
+  // in layer.c
+void VP8EncInitLayer(VP8Encoder* const enc);     // init everything
+void VP8EncCodeLayerBlock(VP8EncIterator* it);   // code one more macroblock
+int VP8EncFinishLayer(VP8Encoder* const enc);    // finalize coding
+void VP8EncDeleteLayer(VP8Encoder* enc);         // reclaim memory
+
+  // in filter.c
+
+// SSIM utils
+typedef struct {
+  double w, xm, ym, xxm, xym, yym;
+} DistoStats;
+void VP8SSIMAddStats(const DistoStats* const src, DistoStats* const dst);
+void VP8SSIMAccumulatePlane(const uint8_t* src1, int stride1,
+                            const uint8_t* src2, int stride2,
+                            int W, int H, DistoStats* const stats);
+double VP8SSIMGet(const DistoStats* const stats);
+double VP8SSIMGetSquaredError(const DistoStats* const stats);
+
+// autofilter
+void VP8InitFilter(VP8EncIterator* const it);
+void VP8StoreFilterStats(VP8EncIterator* const it);
+void VP8AdjustFilterStrength(VP8EncIterator* const it);
+
+//------------------------------------------------------------------------------
+
+#if defined(__cplusplus) || defined(c_plusplus)
+}    // extern "C"
+#endif
+
+#endif  /* WEBP_ENC_VP8ENCI_H_ */
diff --git a/drivers/webpold/enc/vp8l.c b/drivers/webpold/enc/vp8l.c
new file mode 100644
index 0000000000..f4eb6e783f
--- /dev/null
+++ b/drivers/webpold/enc/vp8l.c
@@ -0,0 +1,1150 @@
+// Copyright 2012 Google Inc. All Rights Reserved.
+//
+// This code is licensed under the same terms as WebM:
+//  Software License Agreement:  http://www.webmproject.org/license/software/
+//  Additional IP Rights Grant:  http://www.webmproject.org/license/additional/
+// -----------------------------------------------------------------------------
+//
+// main entry for the lossless encoder.
+//
+// Author: Vikas Arora (vikaas.arora@gmail.com)
+//
+
+#include <assert.h>
+#include <stdio.h>
+#include <stdlib.h>
+
+#include "./backward_references.h"
+#include "./vp8enci.h"
+#include "./vp8li.h"
+#include "../dsp/lossless.h"
+#include "../utils/bit_writer.h"
+#include "../utils/huffman_encode.h"
+#include "../utils/utils.h"
+#include "../format_constants.h"
+
+#if defined(__cplusplus) || defined(c_plusplus)
+extern "C" {
+#endif
+
+#define PALETTE_KEY_RIGHT_SHIFT   22  // Key for 1K buffer.
+#define MAX_HUFF_IMAGE_SIZE       (16 * 1024 * 1024)
+#define MAX_COLORS_FOR_GRAPH      64
+
+// -----------------------------------------------------------------------------
+// Palette
+
+static int CompareColors(const void* p1, const void* p2) {
+  const uint32_t a = *(const uint32_t*)p1;
+  const uint32_t b = *(const uint32_t*)p2;
+  return (a < b) ? -1 : (a > b) ? 1 : 0;
+}
+
+// If number of colors in the image is less than or equal to MAX_PALETTE_SIZE,
+// creates a palette and returns true, else returns false.
+static int AnalyzeAndCreatePalette(const WebPPicture* const pic,
+                                   uint32_t palette[MAX_PALETTE_SIZE],
+                                   int* const palette_size) {
+  int i, x, y, key;
+  int num_colors = 0;
+  uint8_t in_use[MAX_PALETTE_SIZE * 4] = { 0 };
+  uint32_t colors[MAX_PALETTE_SIZE * 4];
+  static const uint32_t kHashMul = 0x1e35a7bd;
+  const uint32_t* argb = pic->argb;
+  const int width = pic->width;
+  const int height = pic->height;
+  uint32_t last_pix = ~argb[0];   // so we're sure that last_pix != argb[0]
+
+  for (y = 0; y < height; ++y) {
+    for (x = 0; x < width; ++x) {
+      if (argb[x] == last_pix) {
+        continue;
+      }
+      last_pix = argb[x];
+      key = (kHashMul * last_pix) >> PALETTE_KEY_RIGHT_SHIFT;
+      while (1) {
+        if (!in_use[key]) {
+          colors[key] = last_pix;
+          in_use[key] = 1;
+          ++num_colors;
+          if (num_colors > MAX_PALETTE_SIZE) {
+            return 0;
+          }
+          break;
+        } else if (colors[key] == last_pix) {
+          // The color is already there.
+          break;
+        } else {
+          // Some other color sits there.
+          // Do linear conflict resolution.
+          ++key;
+          key &= (MAX_PALETTE_SIZE * 4 - 1);  // key mask for 1K buffer.
+        }
+      }
+    }
+    argb += pic->argb_stride;
+  }
+
+  // TODO(skal): could we reuse in_use[] to speed up ApplyPalette()?
+  num_colors = 0;
+  for (i = 0; i < (int)(sizeof(in_use) / sizeof(in_use[0])); ++i) {
+    if (in_use[i]) {
+      palette[num_colors] = colors[i];
+      ++num_colors;
+    }
+  }
+
+  qsort(palette, num_colors, sizeof(*palette), CompareColors);
+  *palette_size = num_colors;
+  return 1;
+}
+
+static int AnalyzeEntropy(const uint32_t* argb,
+                          int width, int height, int argb_stride,
+                          double* const nonpredicted_bits,
+                          double* const predicted_bits) {
+  int x, y;
+  const uint32_t* last_line = NULL;
+  uint32_t last_pix = argb[0];    // so we're sure that pix_diff == 0
+
+  VP8LHistogram* nonpredicted = NULL;
+  VP8LHistogram* predicted =
+      (VP8LHistogram*)malloc(2 * sizeof(*predicted));
+  if (predicted == NULL) return 0;
+  nonpredicted = predicted + 1;
+
+  VP8LHistogramInit(predicted, 0);
+  VP8LHistogramInit(nonpredicted, 0);
+  for (y = 0; y < height; ++y) {
+    for (x = 0; x < width; ++x) {
+      const uint32_t pix = argb[x];
+      const uint32_t pix_diff = VP8LSubPixels(pix, last_pix);
+      if (pix_diff == 0) continue;
+      if (last_line != NULL && pix == last_line[x]) {
+        continue;
+      }
+      last_pix = pix;
+      {
+        const PixOrCopy pix_token = PixOrCopyCreateLiteral(pix);
+        const PixOrCopy pix_diff_token = PixOrCopyCreateLiteral(pix_diff);
+        VP8LHistogramAddSinglePixOrCopy(nonpredicted, &pix_token);
+        VP8LHistogramAddSinglePixOrCopy(predicted, &pix_diff_token);
+      }
+    }
+    last_line = argb;
+    argb += argb_stride;
+  }
+  *nonpredicted_bits = VP8LHistogramEstimateBitsBulk(nonpredicted);
+  *predicted_bits = VP8LHistogramEstimateBitsBulk(predicted);
+  free(predicted);
+  return 1;
+}
+
+static int VP8LEncAnalyze(VP8LEncoder* const enc, WebPImageHint image_hint) {
+  const WebPPicture* const pic = enc->pic_;
+  assert(pic != NULL && pic->argb != NULL);
+
+  enc->use_palette_ =
+      AnalyzeAndCreatePalette(pic, enc->palette_, &enc->palette_size_);
+
+  if (image_hint == WEBP_HINT_GRAPH) {
+    if (enc->use_palette_ && enc->palette_size_ < MAX_COLORS_FOR_GRAPH) {
+      enc->use_palette_ = 0;
+    }
+  }
+
+  if (!enc->use_palette_) {
+    if (image_hint == WEBP_HINT_PHOTO) {
+      enc->use_predict_ = 1;
+      enc->use_cross_color_ = 1;
+    } else {
+      double non_pred_entropy, pred_entropy;
+      if (!AnalyzeEntropy(pic->argb, pic->width, pic->height, pic->argb_stride,
+                          &non_pred_entropy, &pred_entropy)) {
+        return 0;
+      }
+      if (pred_entropy < 0.95 * non_pred_entropy) {
+        enc->use_predict_ = 1;
+        // TODO(vikasa): Observed some correlation of cross_color transform with
+        // predict. Need to investigate this further and add separate heuristic
+        // for setting use_cross_color flag.
+        enc->use_cross_color_ = 1;
+      }
+    }
+  }
+
+  return 1;
+}
+
+static int GetHuffBitLengthsAndCodes(
+    const VP8LHistogramSet* const histogram_image,
+    HuffmanTreeCode* const huffman_codes) {
+  int i, k;
+  int ok = 1;
+  uint64_t total_length_size = 0;
+  uint8_t* mem_buf = NULL;
+  const int histogram_image_size = histogram_image->size;
+
+  // Iterate over all histograms and get the aggregate number of codes used.
+  for (i = 0; i < histogram_image_size; ++i) {
+    const VP8LHistogram* const histo = histogram_image->histograms[i];
+    HuffmanTreeCode* const codes = &huffman_codes[5 * i];
+    for (k = 0; k < 5; ++k) {
+      const int num_symbols = (k == 0) ? VP8LHistogramNumCodes(histo)
+                            : (k == 4) ? NUM_DISTANCE_CODES
+                            : 256;
+      codes[k].num_symbols = num_symbols;
+      total_length_size += num_symbols;
+    }
+  }
+
+  // Allocate and Set Huffman codes.
+  {
+    uint16_t* codes;
+    uint8_t* lengths;
+    mem_buf = (uint8_t*)WebPSafeCalloc(total_length_size,
+                                       sizeof(*lengths) + sizeof(*codes));
+    if (mem_buf == NULL) {
+      ok = 0;
+      goto End;
+    }
+    codes = (uint16_t*)mem_buf;
+    lengths = (uint8_t*)&codes[total_length_size];
+    for (i = 0; i < 5 * histogram_image_size; ++i) {
+      const int bit_length = huffman_codes[i].num_symbols;
+      huffman_codes[i].codes = codes;
+      huffman_codes[i].code_lengths = lengths;
+      codes += bit_length;
+      lengths += bit_length;
+    }
+  }
+
+  // Create Huffman trees.
+  for (i = 0; i < histogram_image_size; ++i) {
+    HuffmanTreeCode* const codes = &huffman_codes[5 * i];
+    VP8LHistogram* const histo = histogram_image->histograms[i];
+    ok = ok && VP8LCreateHuffmanTree(histo->literal_, 15, codes + 0);
+    ok = ok && VP8LCreateHuffmanTree(histo->red_, 15, codes + 1);
+    ok = ok && VP8LCreateHuffmanTree(histo->blue_, 15, codes + 2);
+    ok = ok && VP8LCreateHuffmanTree(histo->alpha_, 15, codes + 3);
+    ok = ok && VP8LCreateHuffmanTree(histo->distance_, 15, codes + 4);
+  }
+
+ End:
+  if (!ok) free(mem_buf);
+  return ok;
+}
+
+static void StoreHuffmanTreeOfHuffmanTreeToBitMask(
+    VP8LBitWriter* const bw, const uint8_t* code_length_bitdepth) {
+  // RFC 1951 will calm you down if you are worried about this funny sequence.
+  // This sequence is tuned from that, but more weighted for lower symbol count,
+  // and more spiking histograms.
+  static const uint8_t kStorageOrder[CODE_LENGTH_CODES] = {
+    17, 18, 0, 1, 2, 3, 4, 5, 16, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15
+  };
+  int i;
+  // Throw away trailing zeros:
+  int codes_to_store = CODE_LENGTH_CODES;
+  for (; codes_to_store > 4; --codes_to_store) {
+    if (code_length_bitdepth[kStorageOrder[codes_to_store - 1]] != 0) {
+      break;
+    }
+  }
+  VP8LWriteBits(bw, 4, codes_to_store - 4);
+  for (i = 0; i < codes_to_store; ++i) {
+    VP8LWriteBits(bw, 3, code_length_bitdepth[kStorageOrder[i]]);
+  }
+}
+
+static void ClearHuffmanTreeIfOnlyOneSymbol(
+    HuffmanTreeCode* const huffman_code) {
+  int k;
+  int count = 0;
+  for (k = 0; k < huffman_code->num_symbols; ++k) {
+    if (huffman_code->code_lengths[k] != 0) {
+      ++count;
+      if (count > 1) return;
+    }
+  }
+  for (k = 0; k < huffman_code->num_symbols; ++k) {
+    huffman_code->code_lengths[k] = 0;
+    huffman_code->codes[k] = 0;
+  }
+}
+
+static void StoreHuffmanTreeToBitMask(
+    VP8LBitWriter* const bw,
+    const HuffmanTreeToken* const tokens, const int num_tokens,
+    const HuffmanTreeCode* const huffman_code) {
+  int i;
+  for (i = 0; i < num_tokens; ++i) {
+    const int ix = tokens[i].code;
+    const int extra_bits = tokens[i].extra_bits;
+    VP8LWriteBits(bw, huffman_code->code_lengths[ix], huffman_code->codes[ix]);
+    switch (ix) {
+      case 16:
+        VP8LWriteBits(bw, 2, extra_bits);
+        break;
+      case 17:
+        VP8LWriteBits(bw, 3, extra_bits);
+        break;
+      case 18:
+        VP8LWriteBits(bw, 7, extra_bits);
+        break;
+    }
+  }
+}
+
+static int StoreFullHuffmanCode(VP8LBitWriter* const bw,
+                                const HuffmanTreeCode* const tree) {
+  int ok = 0;
+  uint8_t code_length_bitdepth[CODE_LENGTH_CODES] = { 0 };
+  uint16_t code_length_bitdepth_symbols[CODE_LENGTH_CODES] = { 0 };
+  const int max_tokens = tree->num_symbols;
+  int num_tokens;
+  HuffmanTreeCode huffman_code;
+  HuffmanTreeToken* const tokens =
+      (HuffmanTreeToken*)WebPSafeMalloc((uint64_t)max_tokens, sizeof(*tokens));
+  if (tokens == NULL) return 0;
+
+  huffman_code.num_symbols = CODE_LENGTH_CODES;
+  huffman_code.code_lengths = code_length_bitdepth;
+  huffman_code.codes = code_length_bitdepth_symbols;
+
+  VP8LWriteBits(bw, 1, 0);
+  num_tokens = VP8LCreateCompressedHuffmanTree(tree, tokens, max_tokens);
+  {
+    int histogram[CODE_LENGTH_CODES] = { 0 };
+    int i;
+    for (i = 0; i < num_tokens; ++i) {
+      ++histogram[tokens[i].code];
+    }
+
+    if (!VP8LCreateHuffmanTree(histogram, 7, &huffman_code)) {
+      goto End;
+    }
+  }
+
+  StoreHuffmanTreeOfHuffmanTreeToBitMask(bw, code_length_bitdepth);
+  ClearHuffmanTreeIfOnlyOneSymbol(&huffman_code);
+  {
+    int trailing_zero_bits = 0;
+    int trimmed_length = num_tokens;
+    int write_trimmed_length;
+    int length;
+    int i = num_tokens;
+    while (i-- > 0) {
+      const int ix = tokens[i].code;
+      if (ix == 0 || ix == 17 || ix == 18) {
+        --trimmed_length;   // discount trailing zeros
+        trailing_zero_bits += code_length_bitdepth[ix];
+        if (ix == 17) {
+          trailing_zero_bits += 3;
+        } else if (ix == 18) {
+          trailing_zero_bits += 7;
+        }
+      } else {
+        break;
+      }
+    }
+    write_trimmed_length = (trimmed_length > 1 && trailing_zero_bits > 12);
+    length = write_trimmed_length ? trimmed_length : num_tokens;
+    VP8LWriteBits(bw, 1, write_trimmed_length);
+    if (write_trimmed_length) {
+      const int nbits = VP8LBitsLog2Ceiling(trimmed_length - 1);
+      const int nbitpairs = (nbits == 0) ? 1 : (nbits + 1) / 2;
+      VP8LWriteBits(bw, 3, nbitpairs - 1);
+      assert(trimmed_length >= 2);
+      VP8LWriteBits(bw, nbitpairs * 2, trimmed_length - 2);
+    }
+    StoreHuffmanTreeToBitMask(bw, tokens, length, &huffman_code);
+  }
+  ok = 1;
+ End:
+  free(tokens);
+  return ok;
+}
+
+static int StoreHuffmanCode(VP8LBitWriter* const bw,
+                            const HuffmanTreeCode* const huffman_code) {
+  int i;
+  int count = 0;
+  int symbols[2] = { 0, 0 };
+  const int kMaxBits = 8;
+  const int kMaxSymbol = 1 << kMaxBits;
+
+  // Check whether it's a small tree.
+  for (i = 0; i < huffman_code->num_symbols && count < 3; ++i) {
+    if (huffman_code->code_lengths[i] != 0) {
+      if (count < 2) symbols[count] = i;
+      ++count;
+    }
+  }
+
+  if (count == 0) {   // emit minimal tree for empty cases
+    // bits: small tree marker: 1, count-1: 0, large 8-bit code: 0, code: 0
+    VP8LWriteBits(bw, 4, 0x01);
+    return 1;
+  } else if (count <= 2 && symbols[0] < kMaxSymbol && symbols[1] < kMaxSymbol) {
+    VP8LWriteBits(bw, 1, 1);  // Small tree marker to encode 1 or 2 symbols.
+    VP8LWriteBits(bw, 1, count - 1);
+    if (symbols[0] <= 1) {
+      VP8LWriteBits(bw, 1, 0);  // Code bit for small (1 bit) symbol value.
+      VP8LWriteBits(bw, 1, symbols[0]);
+    } else {
+      VP8LWriteBits(bw, 1, 1);
+      VP8LWriteBits(bw, 8, symbols[0]);
+    }
+    if (count == 2) {
+      VP8LWriteBits(bw, 8, symbols[1]);
+    }
+    return 1;
+  } else {
+    return StoreFullHuffmanCode(bw, huffman_code);
+  }
+}
+
+static void WriteHuffmanCode(VP8LBitWriter* const bw,
+                             const HuffmanTreeCode* const code, int index) {
+  const int depth = code->code_lengths[index];
+  const int symbol = code->codes[index];
+  VP8LWriteBits(bw, depth, symbol);
+}
+
+static void StoreImageToBitMask(
+    VP8LBitWriter* const bw, int width, int histo_bits,
+    const VP8LBackwardRefs* const refs,
+    const uint16_t* histogram_symbols,
+    const HuffmanTreeCode* const huffman_codes) {
+  // x and y trace the position in the image.
+  int x = 0;
+  int y = 0;
+  const int histo_xsize = histo_bits ? VP8LSubSampleSize(width, histo_bits) : 1;
+  int i;
+  for (i = 0; i < refs->size; ++i) {
+    const PixOrCopy* const v = &refs->refs[i];
+    const int histogram_ix = histogram_symbols[histo_bits ?
+                                               (y >> histo_bits) * histo_xsize +
+                                               (x >> histo_bits) : 0];
+    const HuffmanTreeCode* const codes = huffman_codes + 5 * histogram_ix;
+    if (PixOrCopyIsCacheIdx(v)) {
+      const int code = PixOrCopyCacheIdx(v);
+      const int literal_ix = 256 + NUM_LENGTH_CODES + code;
+      WriteHuffmanCode(bw, codes, literal_ix);
+    } else if (PixOrCopyIsLiteral(v)) {
+      static const int order[] = { 1, 2, 0, 3 };
+      int k;
+      for (k = 0; k < 4; ++k) {
+        const int code = PixOrCopyLiteral(v, order[k]);
+        WriteHuffmanCode(bw, codes + k, code);
+      }
+    } else {
+      int bits, n_bits;
+      int code, distance;
+
+      PrefixEncode(v->len, &code, &n_bits, &bits);
+      WriteHuffmanCode(bw, codes, 256 + code);
+      VP8LWriteBits(bw, n_bits, bits);
+
+      distance = PixOrCopyDistance(v);
+      PrefixEncode(distance, &code, &n_bits, &bits);
+      WriteHuffmanCode(bw, codes + 4, code);
+      VP8LWriteBits(bw, n_bits, bits);
+    }
+    x += PixOrCopyLength(v);
+    while (x >= width) {
+      x -= width;
+      ++y;
+    }
+  }
+}
+
+// Special case of EncodeImageInternal() for cache-bits=0, histo_bits=31
+static int EncodeImageNoHuffman(VP8LBitWriter* const bw,
+                                const uint32_t* const argb,
+                                int width, int height, int quality) {
+  int i;
+  int ok = 0;
+  VP8LBackwardRefs refs;
+  HuffmanTreeCode huffman_codes[5] = { { 0, NULL, NULL } };
+  const uint16_t histogram_symbols[1] = { 0 };    // only one tree, one symbol
+  VP8LHistogramSet* const histogram_image = VP8LAllocateHistogramSet(1, 0);
+  if (histogram_image == NULL) return 0;
+
+  // Calculate backward references from ARGB image.
+  if (!VP8LGetBackwardReferences(width, height, argb, quality, 0, 1, &refs)) {
+    goto Error;
+  }
+  // Build histogram image and symbols from backward references.
+  VP8LHistogramStoreRefs(&refs, histogram_image->histograms[0]);
+
+  // Create Huffman bit lengths and codes for each histogram image.
+  assert(histogram_image->size == 1);
+  if (!GetHuffBitLengthsAndCodes(histogram_image, huffman_codes)) {
+    goto Error;
+  }
+
+  // No color cache, no Huffman image.
+  VP8LWriteBits(bw, 1, 0);
+
+  // Store Huffman codes.
+  for (i = 0; i < 5; ++i) {
+    HuffmanTreeCode* const codes = &huffman_codes[i];
+    if (!StoreHuffmanCode(bw, codes)) {
+      goto Error;
+    }
+    ClearHuffmanTreeIfOnlyOneSymbol(codes);
+  }
+
+  // Store actual literals.
+  StoreImageToBitMask(bw, width, 0, &refs, histogram_symbols, huffman_codes);
+  ok = 1;
+
+ Error:
+  free(histogram_image);
+  VP8LClearBackwardRefs(&refs);
+  free(huffman_codes[0].codes);
+  return ok;
+}
+
+static int EncodeImageInternal(VP8LBitWriter* const bw,
+                               const uint32_t* const argb,
+                               int width, int height, int quality,
+                               int cache_bits, int histogram_bits) {
+  int ok = 0;
+  const int use_2d_locality = 1;
+  const int use_color_cache = (cache_bits > 0);
+  const uint32_t histogram_image_xysize =
+      VP8LSubSampleSize(width, histogram_bits) *
+      VP8LSubSampleSize(height, histogram_bits);
+  VP8LHistogramSet* histogram_image =
+      VP8LAllocateHistogramSet(histogram_image_xysize, 0);
+  int histogram_image_size = 0;
+  size_t bit_array_size = 0;
+  HuffmanTreeCode* huffman_codes = NULL;
+  VP8LBackwardRefs refs;
+  uint16_t* const histogram_symbols =
+      (uint16_t*)WebPSafeMalloc((uint64_t)histogram_image_xysize,
+                                sizeof(*histogram_symbols));
+  assert(histogram_bits >= MIN_HUFFMAN_BITS);
+  assert(histogram_bits <= MAX_HUFFMAN_BITS);
+  if (histogram_image == NULL || histogram_symbols == NULL) goto Error;
+
+  // Calculate backward references from ARGB image.
+  if (!VP8LGetBackwardReferences(width, height, argb, quality, cache_bits,
+                                 use_2d_locality, &refs)) {
+    goto Error;
+  }
+  // Build histogram image and symbols from backward references.
+  if (!VP8LGetHistoImageSymbols(width, height, &refs,
+                                quality, histogram_bits, cache_bits,
+                                histogram_image,
+                                histogram_symbols)) {
+    goto Error;
+  }
+  // Create Huffman bit lengths and codes for each histogram image.
+  histogram_image_size = histogram_image->size;
+  bit_array_size = 5 * histogram_image_size;
+  huffman_codes = (HuffmanTreeCode*)WebPSafeCalloc(bit_array_size,
+                                                   sizeof(*huffman_codes));
+  if (huffman_codes == NULL ||
+      !GetHuffBitLengthsAndCodes(histogram_image, huffman_codes)) {
+    goto Error;
+  }
+
+  // Color Cache parameters.
+  VP8LWriteBits(bw, 1, use_color_cache);
+  if (use_color_cache) {
+    VP8LWriteBits(bw, 4, cache_bits);
+  }
+
+  // Huffman image + meta huffman.
+  {
+    const int write_histogram_image = (histogram_image_size > 1);
+    VP8LWriteBits(bw, 1, write_histogram_image);
+    if (write_histogram_image) {
+      uint32_t* const histogram_argb =
+          (uint32_t*)WebPSafeMalloc((uint64_t)histogram_image_xysize,
+                                    sizeof(*histogram_argb));
+      int max_index = 0;
+      uint32_t i;
+      if (histogram_argb == NULL) goto Error;
+      for (i = 0; i < histogram_image_xysize; ++i) {
+        const int index = histogram_symbols[i] & 0xffff;
+        histogram_argb[i] = 0xff000000 | (index << 8);
+        if (index >= max_index) {
+          max_index = index + 1;
+        }
+      }
+      histogram_image_size = max_index;
+
+      VP8LWriteBits(bw, 3, histogram_bits - 2);
+      ok = EncodeImageNoHuffman(bw, histogram_argb,
+                                VP8LSubSampleSize(width, histogram_bits),
+                                VP8LSubSampleSize(height, histogram_bits),
+                                quality);
+      free(histogram_argb);
+      if (!ok) goto Error;
+    }
+  }
+
+  // Store Huffman codes.
+  {
+    int i;
+    for (i = 0; i < 5 * histogram_image_size; ++i) {
+      HuffmanTreeCode* const codes = &huffman_codes[i];
+      if (!StoreHuffmanCode(bw, codes)) goto Error;
+      ClearHuffmanTreeIfOnlyOneSymbol(codes);
+    }
+  }
+  // Free combined histograms.
+  free(histogram_image);
+  histogram_image = NULL;
+
+  // Store actual literals.
+  StoreImageToBitMask(bw, width, histogram_bits, &refs,
+                      histogram_symbols, huffman_codes);
+  ok = 1;
+
+ Error:
+  if (!ok) free(histogram_image);
+
+  VP8LClearBackwardRefs(&refs);
+  if (huffman_codes != NULL) {
+    free(huffman_codes->codes);
+    free(huffman_codes);
+  }
+  free(histogram_symbols);
+  return ok;
+}
+
+// -----------------------------------------------------------------------------
+// Transforms
+
+// Check if it would be a good idea to subtract green from red and blue. We
+// only impact entropy in red/blue components, don't bother to look at others.
+static int EvalAndApplySubtractGreen(VP8LEncoder* const enc,
+                                     int width, int height,
+                                     VP8LBitWriter* const bw) {
+  if (!enc->use_palette_) {
+    int i;
+    const uint32_t* const argb = enc->argb_;
+    double bit_cost_before, bit_cost_after;
+    VP8LHistogram* const histo = (VP8LHistogram*)malloc(sizeof(*histo));
+    if (histo == NULL) return 0;
+
+    VP8LHistogramInit(histo, 1);
+    for (i = 0; i < width * height; ++i) {
+      const uint32_t c = argb[i];
+      ++histo->red_[(c >> 16) & 0xff];
+      ++histo->blue_[(c >> 0) & 0xff];
+    }
+    bit_cost_before = VP8LHistogramEstimateBits(histo);
+
+    VP8LHistogramInit(histo, 1);
+    for (i = 0; i < width * height; ++i) {
+      const uint32_t c = argb[i];
+      const int green = (c >> 8) & 0xff;
+      ++histo->red_[((c >> 16) - green) & 0xff];
+      ++histo->blue_[((c >> 0) - green) & 0xff];
+    }
+    bit_cost_after = VP8LHistogramEstimateBits(histo);
+    free(histo);
+
+    // Check if subtracting green yields low entropy.
+    enc->use_subtract_green_ = (bit_cost_after < bit_cost_before);
+    if (enc->use_subtract_green_) {
+      VP8LWriteBits(bw, 1, TRANSFORM_PRESENT);
+      VP8LWriteBits(bw, 2, SUBTRACT_GREEN);
+      VP8LSubtractGreenFromBlueAndRed(enc->argb_, width * height);
+    }
+  }
+  return 1;
+}
+
+static int ApplyPredictFilter(const VP8LEncoder* const enc,
+                              int width, int height, int quality,
+                              VP8LBitWriter* const bw) {
+  const int pred_bits = enc->transform_bits_;
+  const int transform_width = VP8LSubSampleSize(width, pred_bits);
+  const int transform_height = VP8LSubSampleSize(height, pred_bits);
+
+  VP8LResidualImage(width, height, pred_bits, enc->argb_, enc->argb_scratch_,
+                    enc->transform_data_);
+  VP8LWriteBits(bw, 1, TRANSFORM_PRESENT);
+  VP8LWriteBits(bw, 2, PREDICTOR_TRANSFORM);
+  assert(pred_bits >= 2);
+  VP8LWriteBits(bw, 3, pred_bits - 2);
+  if (!EncodeImageNoHuffman(bw, enc->transform_data_,
+                            transform_width, transform_height, quality)) {
+    return 0;
+  }
+  return 1;
+}
+
+static int ApplyCrossColorFilter(const VP8LEncoder* const enc,
+                                 int width, int height, int quality,
+                                 VP8LBitWriter* const bw) {
+  const int ccolor_transform_bits = enc->transform_bits_;
+  const int transform_width = VP8LSubSampleSize(width, ccolor_transform_bits);
+  const int transform_height = VP8LSubSampleSize(height, ccolor_transform_bits);
+  const int step = (quality == 0) ? 32 : 8;
+
+  VP8LColorSpaceTransform(width, height, ccolor_transform_bits, step,
+                          enc->argb_, enc->transform_data_);
+  VP8LWriteBits(bw, 1, TRANSFORM_PRESENT);
+  VP8LWriteBits(bw, 2, CROSS_COLOR_TRANSFORM);
+  assert(ccolor_transform_bits >= 2);
+  VP8LWriteBits(bw, 3, ccolor_transform_bits - 2);
+  if (!EncodeImageNoHuffman(bw, enc->transform_data_,
+                            transform_width, transform_height, quality)) {
+    return 0;
+  }
+  return 1;
+}
+
+// -----------------------------------------------------------------------------
+
+static void PutLE32(uint8_t* const data, uint32_t val) {
+  data[0] = (val >>  0) & 0xff;
+  data[1] = (val >>  8) & 0xff;
+  data[2] = (val >> 16) & 0xff;
+  data[3] = (val >> 24) & 0xff;
+}
+
+static WebPEncodingError WriteRiffHeader(const WebPPicture* const pic,
+                                         size_t riff_size, size_t vp8l_size) {
+  uint8_t riff[RIFF_HEADER_SIZE + CHUNK_HEADER_SIZE + VP8L_SIGNATURE_SIZE] = {
+    'R', 'I', 'F', 'F', 0, 0, 0, 0, 'W', 'E', 'B', 'P',
+    'V', 'P', '8', 'L', 0, 0, 0, 0, VP8L_MAGIC_BYTE,
+  };
+  PutLE32(riff + TAG_SIZE, (uint32_t)riff_size);
+  PutLE32(riff + RIFF_HEADER_SIZE + TAG_SIZE, (uint32_t)vp8l_size);
+  if (!pic->writer(riff, sizeof(riff), pic)) {
+    return VP8_ENC_ERROR_BAD_WRITE;
+  }
+  return VP8_ENC_OK;
+}
+
+static int WriteImageSize(const WebPPicture* const pic,
+                          VP8LBitWriter* const bw) {
+  const int width = pic->width - 1;
+  const int height = pic->height - 1;
+  assert(width < WEBP_MAX_DIMENSION && height < WEBP_MAX_DIMENSION);
+
+  VP8LWriteBits(bw, VP8L_IMAGE_SIZE_BITS, width);
+  VP8LWriteBits(bw, VP8L_IMAGE_SIZE_BITS, height);
+  return !bw->error_;
+}
+
+static int WriteRealAlphaAndVersion(VP8LBitWriter* const bw, int has_alpha) {
+  VP8LWriteBits(bw, 1, has_alpha);
+  VP8LWriteBits(bw, VP8L_VERSION_BITS, VP8L_VERSION);
+  return !bw->error_;
+}
+
+static WebPEncodingError WriteImage(const WebPPicture* const pic,
+                                    VP8LBitWriter* const bw,
+                                    size_t* const coded_size) {
+  WebPEncodingError err = VP8_ENC_OK;
+  const uint8_t* const webpll_data = VP8LBitWriterFinish(bw);
+  const size_t webpll_size = VP8LBitWriterNumBytes(bw);
+  const size_t vp8l_size = VP8L_SIGNATURE_SIZE + webpll_size;
+  const size_t pad = vp8l_size & 1;
+  const size_t riff_size = TAG_SIZE + CHUNK_HEADER_SIZE + vp8l_size + pad;
+
+  err = WriteRiffHeader(pic, riff_size, vp8l_size);
+  if (err != VP8_ENC_OK) goto Error;
+
+  if (!pic->writer(webpll_data, webpll_size, pic)) {
+    err = VP8_ENC_ERROR_BAD_WRITE;
+    goto Error;
+  }
+
+  if (pad) {
+    const uint8_t pad_byte[1] = { 0 };
+    if (!pic->writer(pad_byte, 1, pic)) {
+      err = VP8_ENC_ERROR_BAD_WRITE;
+      goto Error;
+    }
+  }
+  *coded_size = CHUNK_HEADER_SIZE + riff_size;
+  return VP8_ENC_OK;
+
+ Error:
+  return err;
+}
+
+// -----------------------------------------------------------------------------
+
+// Allocates the memory for argb (W x H) buffer, 2 rows of context for
+// prediction and transform data.
+static WebPEncodingError AllocateTransformBuffer(VP8LEncoder* const enc,
+                                                 int width, int height) {
+  WebPEncodingError err = VP8_ENC_OK;
+  const int tile_size = 1 << enc->transform_bits_;
+  const uint64_t image_size = width * height;
+  const uint64_t argb_scratch_size = tile_size * width + width;
+  const uint64_t transform_data_size =
+      (uint64_t)VP8LSubSampleSize(width, enc->transform_bits_) *
+      (uint64_t)VP8LSubSampleSize(height, enc->transform_bits_);
+  const uint64_t total_size =
+      image_size + argb_scratch_size + transform_data_size;
+  uint32_t* mem = (uint32_t*)WebPSafeMalloc(total_size, sizeof(*mem));
+  if (mem == NULL) {
+    err = VP8_ENC_ERROR_OUT_OF_MEMORY;
+    goto Error;
+  }
+  enc->argb_ = mem;
+  mem += image_size;
+  enc->argb_scratch_ = mem;
+  mem += argb_scratch_size;
+  enc->transform_data_ = mem;
+  enc->current_width_ = width;
+
+ Error:
+  return err;
+}
+
+// Bundles multiple (2, 4 or 8) pixels into a single pixel.
+// Returns the new xsize.
+static void BundleColorMap(const WebPPicture* const pic,
+                           int xbits, uint32_t* bundled_argb, int xs) {
+  int y;
+  const int bit_depth = 1 << (3 - xbits);
+  uint32_t code = 0;
+  const uint32_t* argb = pic->argb;
+  const int width = pic->width;
+  const int height = pic->height;
+
+  for (y = 0; y < height; ++y) {
+    int x;
+    for (x = 0; x < width; ++x) {
+      const int mask = (1 << xbits) - 1;
+      const int xsub = x & mask;
+      if (xsub == 0) {
+        code = 0;
+      }
+      // TODO(vikasa): simplify the bundling logic.
+      code |= (argb[x] & 0xff00) << (bit_depth * xsub);
+      bundled_argb[y * xs + (x >> xbits)] = 0xff000000 | code;
+    }
+    argb += pic->argb_stride;
+  }
+}
+
+// Note: Expects "enc->palette_" to be set properly.
+// Also, "enc->palette_" will be modified after this call and should not be used
+// later.
+static WebPEncodingError ApplyPalette(VP8LBitWriter* const bw,
+                                      VP8LEncoder* const enc, int quality) {
+  WebPEncodingError err = VP8_ENC_OK;
+  int i, x, y;
+  const WebPPicture* const pic = enc->pic_;
+  uint32_t* argb = pic->argb;
+  const int width = pic->width;
+  const int height = pic->height;
+  uint32_t* const palette = enc->palette_;
+  const int palette_size = enc->palette_size_;
+
+  // Replace each input pixel by corresponding palette index.
+  for (y = 0; y < height; ++y) {
+    for (x = 0; x < width; ++x) {
+      const uint32_t pix = argb[x];
+      for (i = 0; i < palette_size; ++i) {
+        if (pix == palette[i]) {
+          argb[x] = 0xff000000u | (i << 8);
+          break;
+        }
+      }
+    }
+    argb += pic->argb_stride;
+  }
+
+  // Save palette to bitstream.
+  VP8LWriteBits(bw, 1, TRANSFORM_PRESENT);
+  VP8LWriteBits(bw, 2, COLOR_INDEXING_TRANSFORM);
+  assert(palette_size >= 1);
+  VP8LWriteBits(bw, 8, palette_size - 1);
+  for (i = palette_size - 1; i >= 1; --i) {
+    palette[i] = VP8LSubPixels(palette[i], palette[i - 1]);
+  }
+  if (!EncodeImageNoHuffman(bw, palette, palette_size, 1, quality)) {
+    err = VP8_ENC_ERROR_INVALID_CONFIGURATION;
+    goto Error;
+  }
+
+  if (palette_size <= 16) {
+    // Image can be packed (multiple pixels per uint32_t).
+    int xbits = 1;
+    if (palette_size <= 2) {
+      xbits = 3;
+    } else if (palette_size <= 4) {
+      xbits = 2;
+    }
+    err = AllocateTransformBuffer(enc, VP8LSubSampleSize(width, xbits), height);
+    if (err != VP8_ENC_OK) goto Error;
+    BundleColorMap(pic, xbits, enc->argb_, enc->current_width_);
+  }
+
+ Error:
+  return err;
+}
+
+// -----------------------------------------------------------------------------
+
+static int GetHistoBits(const WebPConfig* const config,
+                        const WebPPicture* const pic) {
+  const int width = pic->width;
+  const int height = pic->height;
+  const size_t hist_size = sizeof(VP8LHistogram);
+  // Make tile size a function of encoding method (Range: 0 to 6).
+  int histo_bits = 7 - config->method;
+  while (1) {
+    const size_t huff_image_size = VP8LSubSampleSize(width, histo_bits) *
+                                   VP8LSubSampleSize(height, histo_bits) *
+                                   hist_size;
+    if (huff_image_size <= MAX_HUFF_IMAGE_SIZE) break;
+    ++histo_bits;
+  }
+  return (histo_bits < MIN_HUFFMAN_BITS) ? MIN_HUFFMAN_BITS :
+         (histo_bits > MAX_HUFFMAN_BITS) ? MAX_HUFFMAN_BITS : histo_bits;
+}
+
+static void InitEncParams(VP8LEncoder* const enc) {
+  const WebPConfig* const config = enc->config_;
+  const WebPPicture* const picture = enc->pic_;
+  const int method = config->method;
+  const float quality = config->quality;
+  enc->transform_bits_ = (method < 4) ? 5 : (method > 4) ? 3 : 4;
+  enc->histo_bits_ = GetHistoBits(config, picture);
+  enc->cache_bits_ = (quality <= 25.f) ? 0 : 7;
+}
+
+// -----------------------------------------------------------------------------
+// VP8LEncoder
+
+static VP8LEncoder* VP8LEncoderNew(const WebPConfig* const config,
+                                   const WebPPicture* const picture) {
+  VP8LEncoder* const enc = (VP8LEncoder*)calloc(1, sizeof(*enc));
+  if (enc == NULL) {
+    WebPEncodingSetError(picture, VP8_ENC_ERROR_OUT_OF_MEMORY);
+    return NULL;
+  }
+  enc->config_ = config;
+  enc->pic_ = picture;
+  return enc;
+}
+
+static void VP8LEncoderDelete(VP8LEncoder* enc) {
+  free(enc->argb_);
+  free(enc);
+}
+
+// -----------------------------------------------------------------------------
+// Main call
+
+WebPEncodingError VP8LEncodeStream(const WebPConfig* const config,
+                                   const WebPPicture* const picture,
+                                   VP8LBitWriter* const bw) {
+  WebPEncodingError err = VP8_ENC_OK;
+  const int quality = (int)config->quality;
+  const int width = picture->width;
+  const int height = picture->height;
+  VP8LEncoder* const enc = VP8LEncoderNew(config, picture);
+  const size_t byte_position = VP8LBitWriterNumBytes(bw);
+
+  if (enc == NULL) {
+    err = VP8_ENC_ERROR_OUT_OF_MEMORY;
+    goto Error;
+  }
+
+  InitEncParams(enc);
+
+  // ---------------------------------------------------------------------------
+  // Analyze image (entropy, num_palettes etc)
+
+  if (!VP8LEncAnalyze(enc, config->image_hint)) {
+    err = VP8_ENC_ERROR_OUT_OF_MEMORY;
+    goto Error;
+  }
+
+  if (enc->use_palette_) {
+    err = ApplyPalette(bw, enc, quality);
+    if (err != VP8_ENC_OK) goto Error;
+    // Color cache is disabled for palette.
+    enc->cache_bits_ = 0;
+  }
+
+  // In case image is not packed.
+  if (enc->argb_ == NULL) {
+    int y;
+    err = AllocateTransformBuffer(enc, width, height);
+    if (err != VP8_ENC_OK) goto Error;
+    for (y = 0; y < height; ++y) {
+      memcpy(enc->argb_ + y * width,
+             picture->argb + y * picture->argb_stride,
+             width * sizeof(*enc->argb_));
+    }
+    enc->current_width_ = width;
+  }
+
+  // ---------------------------------------------------------------------------
+  // Apply transforms and write transform data.
+
+  if (!EvalAndApplySubtractGreen(enc, enc->current_width_, height, bw)) {
+    err = VP8_ENC_ERROR_OUT_OF_MEMORY;
+    goto Error;
+  }
+
+  if (enc->use_predict_) {
+    if (!ApplyPredictFilter(enc, enc->current_width_, height, quality, bw)) {
+      err = VP8_ENC_ERROR_INVALID_CONFIGURATION;
+      goto Error;
+    }
+  }
+
+  if (enc->use_cross_color_) {
+    if (!ApplyCrossColorFilter(enc, enc->current_width_, height, quality, bw)) {
+      err = VP8_ENC_ERROR_INVALID_CONFIGURATION;
+      goto Error;
+    }
+  }
+
+  VP8LWriteBits(bw, 1, !TRANSFORM_PRESENT);  // No more transforms.
+
+  // ---------------------------------------------------------------------------
+  // Estimate the color cache size.
+
+  if (enc->cache_bits_ > 0) {
+    if (!VP8LCalculateEstimateForCacheSize(enc->argb_, enc->current_width_,
+                                           height, &enc->cache_bits_)) {
+      err = VP8_ENC_ERROR_INVALID_CONFIGURATION;
+      goto Error;
+    }
+  }
+
+  // ---------------------------------------------------------------------------
+  // Encode and write the transformed image.
+
+  if (!EncodeImageInternal(bw, enc->argb_, enc->current_width_, height,
+                           quality, enc->cache_bits_, enc->histo_bits_)) {
+    err = VP8_ENC_ERROR_OUT_OF_MEMORY;
+    goto Error;
+  }
+
+  if (picture->stats != NULL) {
+    WebPAuxStats* const stats = picture->stats;
+    stats->lossless_features = 0;
+    if (enc->use_predict_) stats->lossless_features |= 1;
+    if (enc->use_cross_color_) stats->lossless_features |= 2;
+    if (enc->use_subtract_green_) stats->lossless_features |= 4;
+    if (enc->use_palette_) stats->lossless_features |= 8;
+    stats->histogram_bits = enc->histo_bits_;
+    stats->transform_bits = enc->transform_bits_;
+    stats->cache_bits = enc->cache_bits_;
+    stats->palette_size = enc->palette_size_;
+    stats->lossless_size = (int)(VP8LBitWriterNumBytes(bw) - byte_position);
+  }
+
+ Error:
+  VP8LEncoderDelete(enc);
+  return err;
+}
+
+int VP8LEncodeImage(const WebPConfig* const config,
+                    const WebPPicture* const picture) {
+  int width, height;
+  int has_alpha;
+  size_t coded_size;
+  int percent = 0;
+  WebPEncodingError err = VP8_ENC_OK;
+  VP8LBitWriter bw;
+
+  if (picture == NULL) return 0;
+
+  if (config == NULL || picture->argb == NULL) {
+    err = VP8_ENC_ERROR_NULL_PARAMETER;
+    WebPEncodingSetError(picture, err);
+    return 0;
+  }
+
+  width = picture->width;
+  height = picture->height;
+  if (!VP8LBitWriterInit(&bw, (width * height) >> 1)) {
+    err = VP8_ENC_ERROR_OUT_OF_MEMORY;
+    goto Error;
+  }
+
+  if (!WebPReportProgress(picture, 1, &percent)) {
+ UserAbort:
+    err = VP8_ENC_ERROR_USER_ABORT;
+    goto Error;
+  }
+  // Reset stats (for pure lossless coding)
+  if (picture->stats != NULL) {
+    WebPAuxStats* const stats = picture->stats;
+    memset(stats, 0, sizeof(*stats));
+    stats->PSNR[0] = 99.f;
+    stats->PSNR[1] = 99.f;
+    stats->PSNR[2] = 99.f;
+    stats->PSNR[3] = 99.f;
+    stats->PSNR[4] = 99.f;
+  }
+
+  // Write image size.
+  if (!WriteImageSize(picture, &bw)) {
+    err = VP8_ENC_ERROR_OUT_OF_MEMORY;
+    goto Error;
+  }
+
+  has_alpha = WebPPictureHasTransparency(picture);
+  // Write the non-trivial Alpha flag and lossless version.
+  if (!WriteRealAlphaAndVersion(&bw, has_alpha)) {
+    err = VP8_ENC_ERROR_OUT_OF_MEMORY;
+    goto Error;
+  }
+
+  if (!WebPReportProgress(picture, 5, &percent)) goto UserAbort;
+
+  // Encode main image stream.
+  err = VP8LEncodeStream(config, picture, &bw);
+  if (err != VP8_ENC_OK) goto Error;
+
+  // TODO(skal): have a fine-grained progress report in VP8LEncodeStream().
+  if (!WebPReportProgress(picture, 90, &percent)) goto UserAbort;
+
+  // Finish the RIFF chunk.
+  err = WriteImage(picture, &bw, &coded_size);
+  if (err != VP8_ENC_OK) goto Error;
+
+  if (!WebPReportProgress(picture, 100, &percent)) goto UserAbort;
+
+  // Save size.
+  if (picture->stats != NULL) {
+    picture->stats->coded_size += (int)coded_size;
+    picture->stats->lossless_size = (int)coded_size;
+  }
+
+  if (picture->extra_info != NULL) {
+    const int mb_w = (width + 15) >> 4;
+    const int mb_h = (height + 15) >> 4;
+    memset(picture->extra_info, 0, mb_w * mb_h * sizeof(*picture->extra_info));
+  }
+
+ Error:
+  if (bw.error_) err = VP8_ENC_ERROR_OUT_OF_MEMORY;
+  VP8LBitWriterDestroy(&bw);
+  if (err != VP8_ENC_OK) {
+    WebPEncodingSetError(picture, err);
+    return 0;
+  }
+  return 1;
+}
+
+//------------------------------------------------------------------------------
+
+#if defined(__cplusplus) || defined(c_plusplus)
+}    // extern "C"
+#endif
diff --git a/drivers/webpold/enc/vp8li.h b/drivers/webpold/enc/vp8li.h
new file mode 100644
index 0000000000..bb111aec33
--- /dev/null
+++ b/drivers/webpold/enc/vp8li.h
@@ -0,0 +1,68 @@
+// Copyright 2012 Google Inc. All Rights Reserved.
+//
+// This code is licensed under the same terms as WebM:
+//  Software License Agreement:  http://www.webmproject.org/license/software/
+//  Additional IP Rights Grant:  http://www.webmproject.org/license/additional/
+// -----------------------------------------------------------------------------
+//
+// Lossless encoder: internal header.
+//
+// Author: Vikas Arora (vikaas.arora@gmail.com)
+
+#ifndef WEBP_ENC_VP8LI_H_
+#define WEBP_ENC_VP8LI_H_
+
+#include "./histogram.h"
+#include "../utils/bit_writer.h"
+#include "../encode.h"
+#include "../format_constants.h"
+
+#if defined(__cplusplus) || defined(c_plusplus)
+extern "C" {
+#endif
+
+typedef struct {
+  const WebPConfig* config_;    // user configuration and parameters
+  const WebPPicture* pic_;      // input picture.
+
+  uint32_t* argb_;              // Transformed argb image data.
+  uint32_t* argb_scratch_;      // Scratch memory for argb rows
+                                // (used for prediction).
+  uint32_t* transform_data_;    // Scratch memory for transform data.
+  int       current_width_;     // Corresponds to packed image width.
+
+  // Encoding parameters derived from quality parameter.
+  int histo_bits_;
+  int transform_bits_;
+  int cache_bits_;        // If equal to 0, don't use color cache.
+
+  // Encoding parameters derived from image characteristics.
+  int use_cross_color_;
+  int use_subtract_green_;
+  int use_predict_;
+  int use_palette_;
+  int palette_size_;
+  uint32_t palette_[MAX_PALETTE_SIZE];
+} VP8LEncoder;
+
+//------------------------------------------------------------------------------
+// internal functions. Not public.
+
+// Encodes the picture.
+// Returns 0 if config or picture is NULL or picture doesn't have valid argb
+// input.
+int VP8LEncodeImage(const WebPConfig* const config,
+                    const WebPPicture* const picture);
+
+// Encodes the main image stream using the supplied bit writer.
+WebPEncodingError VP8LEncodeStream(const WebPConfig* const config,
+                                   const WebPPicture* const picture,
+                                   VP8LBitWriter* const bw);
+
+//------------------------------------------------------------------------------
+
+#if defined(__cplusplus) || defined(c_plusplus)
+}    // extern "C"
+#endif
+
+#endif  /* WEBP_ENC_VP8LI_H_ */
diff --git a/drivers/webpold/enc/webpenc.c b/drivers/webpold/enc/webpenc.c
new file mode 100644
index 0000000000..3c275589fc
--- /dev/null
+++ b/drivers/webpold/enc/webpenc.c
@@ -0,0 +1,389 @@
+// Copyright 2011 Google Inc. All Rights Reserved.
+//
+// This code is licensed under the same terms as WebM:
+//  Software License Agreement:  http://www.webmproject.org/license/software/
+//  Additional IP Rights Grant:  http://www.webmproject.org/license/additional/
+// -----------------------------------------------------------------------------
+//
+// WebP encoder: main entry point
+//
+// Author: Skal (pascal.massimino@gmail.com)
+
+#include <assert.h>
+#include <stdlib.h>
+#include <string.h>
+#include <math.h>
+
+#include "./vp8enci.h"
+#include "./vp8li.h"
+#include "../utils/utils.h"
+
+// #define PRINT_MEMORY_INFO
+
+#if defined(__cplusplus) || defined(c_plusplus)
+extern "C" {
+#endif
+
+#ifdef PRINT_MEMORY_INFO
+#include <stdio.h>
+#endif
+
+//------------------------------------------------------------------------------
+
+int WebPGetEncoderVersion(void) {
+  return (ENC_MAJ_VERSION << 16) | (ENC_MIN_VERSION << 8) | ENC_REV_VERSION;
+}
+
+//------------------------------------------------------------------------------
+// WebPPicture
+//------------------------------------------------------------------------------
+
+static int DummyWriter(const uint8_t* data, size_t data_size,
+                       const WebPPicture* const picture) {
+  // The following are to prevent 'unused variable' error message.
+  (void)data;
+  (void)data_size;
+  (void)picture;
+  return 1;
+}
+
+int WebPPictureInitInternal(WebPPicture* picture, int version) {
+  if (WEBP_ABI_IS_INCOMPATIBLE(version, WEBP_ENCODER_ABI_VERSION)) {
+    return 0;   // caller/system version mismatch!
+  }
+  if (picture != NULL) {
+    memset(picture, 0, sizeof(*picture));
+    picture->writer = DummyWriter;
+    WebPEncodingSetError(picture, VP8_ENC_OK);
+  }
+  return 1;
+}
+
+//------------------------------------------------------------------------------
+// VP8Encoder
+//------------------------------------------------------------------------------
+
+static void ResetSegmentHeader(VP8Encoder* const enc) {
+  VP8SegmentHeader* const hdr = &enc->segment_hdr_;
+  hdr->num_segments_ = enc->config_->segments;
+  hdr->update_map_  = (hdr->num_segments_ > 1);
+  hdr->size_ = 0;
+}
+
+static void ResetFilterHeader(VP8Encoder* const enc) {
+  VP8FilterHeader* const hdr = &enc->filter_hdr_;
+  hdr->simple_ = 1;
+  hdr->level_ = 0;
+  hdr->sharpness_ = 0;
+  hdr->i4x4_lf_delta_ = 0;
+}
+
+static void ResetBoundaryPredictions(VP8Encoder* const enc) {
+  // init boundary values once for all
+  // Note: actually, initializing the preds_[] is only needed for intra4.
+  int i;
+  uint8_t* const top = enc->preds_ - enc->preds_w_;
+  uint8_t* const left = enc->preds_ - 1;
+  for (i = -1; i < 4 * enc->mb_w_; ++i) {
+    top[i] = B_DC_PRED;
+  }
+  for (i = 0; i < 4 * enc->mb_h_; ++i) {
+    left[i * enc->preds_w_] = B_DC_PRED;
+  }
+  enc->nz_[-1] = 0;   // constant
+}
+
+// Map configured quality level to coding tools used.
+//-------------+---+---+---+---+---+---+
+//   Quality   | 0 | 1 | 2 | 3 | 4 | 5 +
+//-------------+---+---+---+---+---+---+
+// dynamic prob| ~ | x | x | x | x | x |
+//-------------+---+---+---+---+---+---+
+// rd-opt modes|   |   | x | x | x | x |
+//-------------+---+---+---+---+---+---+
+// fast i4/i16 | x | x |   |   |   |   |
+//-------------+---+---+---+---+---+---+
+// rd-opt i4/16|   |   | x | x | x | x |
+//-------------+---+---+---+---+---+---+
+// Trellis     |   | x |   |   | x | x |
+//-------------+---+---+---+---+---+---+
+// full-SNS    |   |   |   |   |   | x |
+//-------------+---+---+---+---+---+---+
+
+static void MapConfigToTools(VP8Encoder* const enc) {
+  const int method = enc->config_->method;
+  const int limit = 100 - enc->config_->partition_limit;
+  enc->method_ = method;
+  enc->rd_opt_level_ = (method >= 6) ? 3
+                     : (method >= 5) ? 2
+                     : (method >= 3) ? 1
+                     : 0;
+  enc->max_i4_header_bits_ =
+      256 * 16 * 16 *                 // upper bound: up to 16bit per 4x4 block
+      (limit * limit) / (100 * 100);  // ... modulated with a quadratic curve.
+}
+
+// Memory scaling with dimensions:
+//  memory (bytes) ~= 2.25 * w + 0.0625 * w * h
+//
+// Typical memory footprint (768x510 picture)
+// Memory used:
+//              encoder: 33919
+//          block cache: 2880
+//                 info: 3072
+//                preds: 24897
+//          top samples: 1623
+//             non-zero: 196
+//             lf-stats: 2048
+//                total: 68635
+// Transcient object sizes:
+//       VP8EncIterator: 352
+//         VP8ModeScore: 912
+//       VP8SegmentInfo: 532
+//             VP8Proba: 31032
+//              LFStats: 2048
+// Picture size (yuv): 589824
+
+static VP8Encoder* InitVP8Encoder(const WebPConfig* const config,
+                                  WebPPicture* const picture) {
+  const int use_filter =
+      (config->filter_strength > 0) || (config->autofilter > 0);
+  const int mb_w = (picture->width + 15) >> 4;
+  const int mb_h = (picture->height + 15) >> 4;
+  const int preds_w = 4 * mb_w + 1;
+  const int preds_h = 4 * mb_h + 1;
+  const size_t preds_size = preds_w * preds_h * sizeof(uint8_t);
+  const int top_stride = mb_w * 16;
+  const size_t nz_size = (mb_w + 1) * sizeof(uint32_t);
+  const size_t cache_size = (3 * YUV_SIZE + PRED_SIZE) * sizeof(uint8_t);
+  const size_t info_size = mb_w * mb_h * sizeof(VP8MBInfo);
+  const size_t samples_size = (2 * top_stride +         // top-luma/u/v
+                               16 + 16 + 16 + 8 + 1 +   // left y/u/v
+                               2 * ALIGN_CST)           // align all
+                               * sizeof(uint8_t);
+  const size_t lf_stats_size =
+      config->autofilter ? sizeof(LFStats) + ALIGN_CST : 0;
+  VP8Encoder* enc;
+  uint8_t* mem;
+  const uint64_t size = (uint64_t)sizeof(VP8Encoder)   // main struct
+                      + ALIGN_CST                      // cache alignment
+                      + cache_size                     // working caches
+                      + info_size                      // modes info
+                      + preds_size                     // prediction modes
+                      + samples_size                   // top/left samples
+                      + nz_size                        // coeff context bits
+                      + lf_stats_size;                 // autofilter stats
+
+#ifdef PRINT_MEMORY_INFO
+  printf("===================================\n");
+  printf("Memory used:\n"
+         "             encoder: %ld\n"
+         "         block cache: %ld\n"
+         "                info: %ld\n"
+         "               preds: %ld\n"
+         "         top samples: %ld\n"
+         "            non-zero: %ld\n"
+         "            lf-stats: %ld\n"
+         "               total: %ld\n",
+         sizeof(VP8Encoder) + ALIGN_CST, cache_size, info_size,
+         preds_size, samples_size, nz_size, lf_stats_size, size);
+  printf("Transcient object sizes:\n"
+         "      VP8EncIterator: %ld\n"
+         "        VP8ModeScore: %ld\n"
+         "      VP8SegmentInfo: %ld\n"
+         "            VP8Proba: %ld\n"
+         "             LFStats: %ld\n",
+         sizeof(VP8EncIterator), sizeof(VP8ModeScore),
+         sizeof(VP8SegmentInfo), sizeof(VP8Proba),
+         sizeof(LFStats));
+  printf("Picture size (yuv): %ld\n",
+         mb_w * mb_h * 384 * sizeof(uint8_t));
+  printf("===================================\n");
+#endif
+  mem = (uint8_t*)WebPSafeMalloc(size, sizeof(*mem));
+  if (mem == NULL) {
+    WebPEncodingSetError(picture, VP8_ENC_ERROR_OUT_OF_MEMORY);
+    return NULL;
+  }
+  enc = (VP8Encoder*)mem;
+  mem = (uint8_t*)DO_ALIGN(mem + sizeof(*enc));
+  memset(enc, 0, sizeof(*enc));
+  enc->num_parts_ = 1 << config->partitions;
+  enc->mb_w_ = mb_w;
+  enc->mb_h_ = mb_h;
+  enc->preds_w_ = preds_w;
+  enc->yuv_in_ = (uint8_t*)mem;
+  mem += YUV_SIZE;
+  enc->yuv_out_ = (uint8_t*)mem;
+  mem += YUV_SIZE;
+  enc->yuv_out2_ = (uint8_t*)mem;
+  mem += YUV_SIZE;
+  enc->yuv_p_ = (uint8_t*)mem;
+  mem += PRED_SIZE;
+  enc->mb_info_ = (VP8MBInfo*)mem;
+  mem += info_size;
+  enc->preds_ = ((uint8_t*)mem) + 1 + enc->preds_w_;
+  mem += preds_w * preds_h * sizeof(uint8_t);
+  enc->nz_ = 1 + (uint32_t*)mem;
+  mem += nz_size;
+  enc->lf_stats_ = lf_stats_size ? (LFStats*)DO_ALIGN(mem) : NULL;
+  mem += lf_stats_size;
+
+  // top samples (all 16-aligned)
+  mem = (uint8_t*)DO_ALIGN(mem);
+  enc->y_top_ = (uint8_t*)mem;
+  enc->uv_top_ = enc->y_top_ + top_stride;
+  mem += 2 * top_stride;
+  mem = (uint8_t*)DO_ALIGN(mem + 1);
+  enc->y_left_ = (uint8_t*)mem;
+  mem += 16 + 16;
+  enc->u_left_ = (uint8_t*)mem;
+  mem += 16;
+  enc->v_left_ = (uint8_t*)mem;
+  mem += 8;
+
+  enc->config_ = config;
+  enc->profile_ = use_filter ? ((config->filter_type == 1) ? 0 : 1) : 2;
+  enc->pic_ = picture;
+  enc->percent_ = 0;
+
+  MapConfigToTools(enc);
+  VP8EncDspInit();
+  VP8DefaultProbas(enc);
+  ResetSegmentHeader(enc);
+  ResetFilterHeader(enc);
+  ResetBoundaryPredictions(enc);
+
+  VP8EncInitAlpha(enc);
+#ifdef WEBP_EXPERIMENTAL_FEATURES
+  VP8EncInitLayer(enc);
+#endif
+
+  return enc;
+}
+
+static void DeleteVP8Encoder(VP8Encoder* enc) {
+  if (enc != NULL) {
+    VP8EncDeleteAlpha(enc);
+#ifdef WEBP_EXPERIMENTAL_FEATURES
+    VP8EncDeleteLayer(enc);
+#endif
+    free(enc);
+  }
+}
+
+//------------------------------------------------------------------------------
+
+static double GetPSNR(uint64_t err, uint64_t size) {
+  return err ? 10. * log10(255. * 255. * size / err) : 99.;
+}
+
+static void FinalizePSNR(const VP8Encoder* const enc) {
+  WebPAuxStats* stats = enc->pic_->stats;
+  const uint64_t size = enc->sse_count_;
+  const uint64_t* const sse = enc->sse_;
+  stats->PSNR[0] = (float)GetPSNR(sse[0], size);
+  stats->PSNR[1] = (float)GetPSNR(sse[1], size / 4);
+  stats->PSNR[2] = (float)GetPSNR(sse[2], size / 4);
+  stats->PSNR[3] = (float)GetPSNR(sse[0] + sse[1] + sse[2], size * 3 / 2);
+  stats->PSNR[4] = (float)GetPSNR(sse[3], size);
+}
+
+static void StoreStats(VP8Encoder* const enc) {
+  WebPAuxStats* const stats = enc->pic_->stats;
+  if (stats != NULL) {
+    int i, s;
+    for (i = 0; i < NUM_MB_SEGMENTS; ++i) {
+      stats->segment_level[i] = enc->dqm_[i].fstrength_;
+      stats->segment_quant[i] = enc->dqm_[i].quant_;
+      for (s = 0; s <= 2; ++s) {
+        stats->residual_bytes[s][i] = enc->residual_bytes_[s][i];
+      }
+    }
+    FinalizePSNR(enc);
+    stats->coded_size = enc->coded_size_;
+    for (i = 0; i < 3; ++i) {
+      stats->block_count[i] = enc->block_count_[i];
+    }
+  }
+  WebPReportProgress(enc->pic_, 100, &enc->percent_);  // done!
+}
+
+int WebPEncodingSetError(const WebPPicture* const pic,
+                         WebPEncodingError error) {
+  assert((int)error < VP8_ENC_ERROR_LAST);
+  assert((int)error >= VP8_ENC_OK);
+  ((WebPPicture*)pic)->error_code = error;
+  return 0;
+}
+
+int WebPReportProgress(const WebPPicture* const pic,
+                       int percent, int* const percent_store) {
+  if (percent_store != NULL && percent != *percent_store) {
+    *percent_store = percent;
+    if (pic->progress_hook && !pic->progress_hook(percent, pic)) {
+      // user abort requested
+      WebPEncodingSetError(pic, VP8_ENC_ERROR_USER_ABORT);
+      return 0;
+    }
+  }
+  return 1;  // ok
+}
+//------------------------------------------------------------------------------
+
+int WebPEncode(const WebPConfig* config, WebPPicture* pic) {
+  int ok;
+
+  if (pic == NULL)
+    return 0;
+  WebPEncodingSetError(pic, VP8_ENC_OK);  // all ok so far
+  if (config == NULL)  // bad params
+    return WebPEncodingSetError(pic, VP8_ENC_ERROR_NULL_PARAMETER);
+  if (!WebPValidateConfig(config))
+    return WebPEncodingSetError(pic, VP8_ENC_ERROR_INVALID_CONFIGURATION);
+  if (pic->width <= 0 || pic->height <= 0)
+    return WebPEncodingSetError(pic, VP8_ENC_ERROR_BAD_DIMENSION);
+  if (pic->width > WEBP_MAX_DIMENSION || pic->height > WEBP_MAX_DIMENSION)
+    return WebPEncodingSetError(pic, VP8_ENC_ERROR_BAD_DIMENSION);
+
+  if (pic->stats != NULL) memset(pic->stats, 0, sizeof(*pic->stats));
+
+  if (!config->lossless) {
+    VP8Encoder* enc = NULL;
+    if (pic->y == NULL || pic->u == NULL || pic->v == NULL) {
+      if (pic->argb != NULL) {
+        if (!WebPPictureARGBToYUVA(pic, WEBP_YUV420)) return 0;
+      } else {
+        return WebPEncodingSetError(pic, VP8_ENC_ERROR_NULL_PARAMETER);
+      }
+    }
+
+    enc = InitVP8Encoder(config, pic);
+    if (enc == NULL) return 0;  // pic->error is already set.
+    // Note: each of the tasks below account for 20% in the progress report.
+    ok = VP8EncAnalyze(enc)
+      && VP8StatLoop(enc)
+      && VP8EncLoop(enc)
+      && VP8EncFinishAlpha(enc)
+#ifdef WEBP_EXPERIMENTAL_FEATURES
+      && VP8EncFinishLayer(enc)
+#endif
+      && VP8EncWrite(enc);
+    StoreStats(enc);
+    if (!ok) {
+      VP8EncFreeBitWriters(enc);
+    }
+    DeleteVP8Encoder(enc);
+  } else {
+    if (pic->argb == NULL)
+      return WebPEncodingSetError(pic, VP8_ENC_ERROR_NULL_PARAMETER);
+
+    ok = VP8LEncodeImage(config, pic);  // Sets pic->error in case of problem.
+  }
+
+  return ok;
+}
+
+#if defined(__cplusplus) || defined(c_plusplus)
+}    // extern "C"
+#endif
diff --git a/drivers/webpold/encode.h b/drivers/webpold/encode.h
new file mode 100644
index 0000000000..2e37cfabe7
--- /dev/null
+++ b/drivers/webpold/encode.h
@@ -0,0 +1,463 @@
+// Copyright 2011 Google Inc. All Rights Reserved.
+//
+// This code is licensed under the same terms as WebM:
+//  Software License Agreement:  http://www.webmproject.org/license/software/
+//  Additional IP Rights Grant:  http://www.webmproject.org/license/additional/
+// -----------------------------------------------------------------------------
+//
+//   WebP encoder: main interface
+//
+// Author: Skal (pascal.massimino@gmail.com)
+
+#ifndef WEBP_WEBP_ENCODE_H_
+#define WEBP_WEBP_ENCODE_H_
+
+#include "./types.h"
+
+#if defined(__cplusplus) || defined(c_plusplus)
+extern "C" {
+#endif
+
+#define WEBP_ENCODER_ABI_VERSION 0x0200    // MAJOR(8b) + MINOR(8b)
+
+// Return the encoder's version number, packed in hexadecimal using 8bits for
+// each of major/minor/revision. E.g: v2.5.7 is 0x020507.
+WEBP_EXTERN(int) WebPGetEncoderVersion(void);
+
+//------------------------------------------------------------------------------
+// One-stop-shop call! No questions asked:
+
+// Returns the size of the compressed data (pointed to by *output), or 0 if
+// an error occurred. The compressed data must be released by the caller
+// using the call 'free(*output)'.
+// These functions compress using the lossy format, and the quality_factor
+// can go from 0 (smaller output, lower quality) to 100 (best quality,
+// larger output).
+WEBP_EXTERN(size_t) WebPEncodeRGB(const uint8_t* rgb,
+                                  int width, int height, int stride,
+                                  float quality_factor, uint8_t** output);
+WEBP_EXTERN(size_t) WebPEncodeBGR(const uint8_t* bgr,
+                                  int width, int height, int stride,
+                                  float quality_factor, uint8_t** output);
+WEBP_EXTERN(size_t) WebPEncodeRGBA(const uint8_t* rgba,
+                                   int width, int height, int stride,
+                                   float quality_factor, uint8_t** output);
+WEBP_EXTERN(size_t) WebPEncodeBGRA(const uint8_t* bgra,
+                                   int width, int height, int stride,
+                                   float quality_factor, uint8_t** output);
+
+// These functions are the equivalent of the above, but compressing in a
+// lossless manner. Files are usually larger than lossy format, but will
+// not suffer any compression loss.
+WEBP_EXTERN(size_t) WebPEncodeLosslessRGB(const uint8_t* rgb,
+                                          int width, int height, int stride,
+                                          uint8_t** output);
+WEBP_EXTERN(size_t) WebPEncodeLosslessBGR(const uint8_t* bgr,
+                                          int width, int height, int stride,
+                                          uint8_t** output);
+WEBP_EXTERN(size_t) WebPEncodeLosslessRGBA(const uint8_t* rgba,
+                                           int width, int height, int stride,
+                                           uint8_t** output);
+WEBP_EXTERN(size_t) WebPEncodeLosslessBGRA(const uint8_t* bgra,
+                                           int width, int height, int stride,
+                                           uint8_t** output);
+
+//------------------------------------------------------------------------------
+// Coding parameters
+
+// Image characteristics hint for the underlying encoder.
+typedef enum {
+  WEBP_HINT_DEFAULT = 0,  // default preset.
+  WEBP_HINT_PICTURE,      // digital picture, like portrait, inner shot
+  WEBP_HINT_PHOTO,        // outdoor photograph, with natural lighting
+  WEBP_HINT_GRAPH,        // Discrete tone image (graph, map-tile etc).
+  WEBP_HINT_LAST
+} WebPImageHint;
+
+typedef struct {
+  int lossless;           // Lossless encoding (0=lossy(default), 1=lossless).
+  float quality;          // between 0 (smallest file) and 100 (biggest)
+  int method;             // quality/speed trade-off (0=fast, 6=slower-better)
+
+  WebPImageHint image_hint;  // Hint for image type (lossless only for now).
+
+  // Parameters related to lossy compression only:
+  int target_size;        // if non-zero, set the desired target size in bytes.
+                          // Takes precedence over the 'compression' parameter.
+  float target_PSNR;      // if non-zero, specifies the minimal distortion to
+                          // try to achieve. Takes precedence over target_size.
+  int segments;           // maximum number of segments to use, in [1..4]
+  int sns_strength;       // Spatial Noise Shaping. 0=off, 100=maximum.
+  int filter_strength;    // range: [0 = off .. 100 = strongest]
+  int filter_sharpness;   // range: [0 = off .. 7 = least sharp]
+  int filter_type;        // filtering type: 0 = simple, 1 = strong (only used
+                          // if filter_strength > 0 or autofilter > 0)
+  int autofilter;         // Auto adjust filter's strength [0 = off, 1 = on]
+  int alpha_compression;  // Algorithm for encoding the alpha plane (0 = none,
+                          // 1 = compressed with WebP lossless). Default is 1.
+  int alpha_filtering;    // Predictive filtering method for alpha plane.
+                          //  0: none, 1: fast, 2: best. Default if 1.
+  int alpha_quality;      // Between 0 (smallest size) and 100 (lossless).
+                          // Default is 100.
+  int pass;               // number of entropy-analysis passes (in [1..10]).
+
+  int show_compressed;    // if true, export the compressed picture back.
+                          // In-loop filtering is not applied.
+  int preprocessing;      // preprocessing filter (0=none, 1=segment-smooth)
+  int partitions;         // log2(number of token partitions) in [0..3]. Default
+                          // is set to 0 for easier progressive decoding.
+  int partition_limit;    // quality degradation allowed to fit the 512k limit
+                          // on prediction modes coding (0: no degradation,
+                          // 100: maximum possible degradation).
+
+  uint32_t pad[8];        // padding for later use
+} WebPConfig;
+
+// Enumerate some predefined settings for WebPConfig, depending on the type
+// of source picture. These presets are used when calling WebPConfigPreset().
+typedef enum {
+  WEBP_PRESET_DEFAULT = 0,  // default preset.
+  WEBP_PRESET_PICTURE,      // digital picture, like portrait, inner shot
+  WEBP_PRESET_PHOTO,        // outdoor photograph, with natural lighting
+  WEBP_PRESET_DRAWING,      // hand or line drawing, with high-contrast details
+  WEBP_PRESET_ICON,         // small-sized colorful images
+  WEBP_PRESET_TEXT          // text-like
+} WebPPreset;
+
+// Internal, version-checked, entry point
+WEBP_EXTERN(int) WebPConfigInitInternal(WebPConfig*, WebPPreset, float, int);
+
+// Should always be called, to initialize a fresh WebPConfig structure before
+// modification. Returns false in case of version mismatch. WebPConfigInit()
+// must have succeeded before using the 'config' object.
+// Note that the default values are lossless=0 and quality=75.
+static WEBP_INLINE int WebPConfigInit(WebPConfig* config) {
+  return WebPConfigInitInternal(config, WEBP_PRESET_DEFAULT, 75.f,
+                                WEBP_ENCODER_ABI_VERSION);
+}
+
+// This function will initialize the configuration according to a predefined
+// set of parameters (referred to by 'preset') and a given quality factor.
+// This function can be called as a replacement to WebPConfigInit(). Will
+// return false in case of error.
+static WEBP_INLINE int WebPConfigPreset(WebPConfig* config,
+                                        WebPPreset preset, float quality) {
+  return WebPConfigInitInternal(config, preset, quality,
+                                WEBP_ENCODER_ABI_VERSION);
+}
+
+// Returns true if 'config' is non-NULL and all configuration parameters are
+// within their valid ranges.
+WEBP_EXTERN(int) WebPValidateConfig(const WebPConfig* config);
+
+//------------------------------------------------------------------------------
+// Input / Output
+
+typedef struct WebPPicture WebPPicture;   // main structure for I/O
+
+// Structure for storing auxiliary statistics (mostly for lossy encoding).
+typedef struct {
+  int coded_size;         // final size
+
+  float PSNR[5];          // peak-signal-to-noise ratio for Y/U/V/All/Alpha
+  int block_count[3];     // number of intra4/intra16/skipped macroblocks
+  int header_bytes[2];    // approximate number of bytes spent for header
+                          // and mode-partition #0
+  int residual_bytes[3][4];  // approximate number of bytes spent for
+                             // DC/AC/uv coefficients for each (0..3) segments.
+  int segment_size[4];    // number of macroblocks in each segments
+  int segment_quant[4];   // quantizer values for each segments
+  int segment_level[4];   // filtering strength for each segments [0..63]
+
+  int alpha_data_size;    // size of the transparency data
+  int layer_data_size;    // size of the enhancement layer data
+
+  // lossless encoder statistics
+  uint32_t lossless_features;  // bit0:predictor bit1:cross-color transform
+                               // bit2:subtract-green bit3:color indexing
+  int histogram_bits;          // number of precision bits of histogram
+  int transform_bits;          // precision bits for transform
+  int cache_bits;              // number of bits for color cache lookup
+  int palette_size;            // number of color in palette, if used
+  int lossless_size;           // final lossless size
+
+  uint32_t pad[4];        // padding for later use
+} WebPAuxStats;
+
+// Signature for output function. Should return true if writing was successful.
+// data/data_size is the segment of data to write, and 'picture' is for
+// reference (and so one can make use of picture->custom_ptr).
+typedef int (*WebPWriterFunction)(const uint8_t* data, size_t data_size,
+                                  const WebPPicture* picture);
+
+// WebPMemoryWrite: a special WebPWriterFunction that writes to memory using
+// the following WebPMemoryWriter object (to be set as a custom_ptr).
+typedef struct {
+  uint8_t* mem;       // final buffer (of size 'max_size', larger than 'size').
+  size_t   size;      // final size
+  size_t   max_size;  // total capacity
+  uint32_t pad[1];    // padding for later use
+} WebPMemoryWriter;
+
+// The following must be called first before any use.
+WEBP_EXTERN(void) WebPMemoryWriterInit(WebPMemoryWriter* writer);
+
+// The custom writer to be used with WebPMemoryWriter as custom_ptr. Upon
+// completion, writer.mem and writer.size will hold the coded data.
+WEBP_EXTERN(int) WebPMemoryWrite(const uint8_t* data, size_t data_size,
+                                 const WebPPicture* picture);
+
+// Progress hook, called from time to time to report progress. It can return
+// false to request an abort of the encoding process, or true otherwise if
+// everything is OK.
+typedef int (*WebPProgressHook)(int percent, const WebPPicture* picture);
+
+typedef enum {
+  // chroma sampling
+  WEBP_YUV420 = 0,   // 4:2:0
+  WEBP_YUV422 = 1,   // 4:2:2
+  WEBP_YUV444 = 2,   // 4:4:4
+  WEBP_YUV400 = 3,   // grayscale
+  WEBP_CSP_UV_MASK = 3,   // bit-mask to get the UV sampling factors
+  // alpha channel variants
+  WEBP_YUV420A = 4,
+  WEBP_YUV422A = 5,
+  WEBP_YUV444A = 6,
+  WEBP_YUV400A = 7,   // grayscale + alpha
+  WEBP_CSP_ALPHA_BIT = 4   // bit that is set if alpha is present
+} WebPEncCSP;
+
+// Encoding error conditions.
+typedef enum {
+  VP8_ENC_OK = 0,
+  VP8_ENC_ERROR_OUT_OF_MEMORY,            // memory error allocating objects
+  VP8_ENC_ERROR_BITSTREAM_OUT_OF_MEMORY,  // memory error while flushing bits
+  VP8_ENC_ERROR_NULL_PARAMETER,           // a pointer parameter is NULL
+  VP8_ENC_ERROR_INVALID_CONFIGURATION,    // configuration is invalid
+  VP8_ENC_ERROR_BAD_DIMENSION,            // picture has invalid width/height
+  VP8_ENC_ERROR_PARTITION0_OVERFLOW,      // partition is bigger than 512k
+  VP8_ENC_ERROR_PARTITION_OVERFLOW,       // partition is bigger than 16M
+  VP8_ENC_ERROR_BAD_WRITE,                // error while flushing bytes
+  VP8_ENC_ERROR_FILE_TOO_BIG,             // file is bigger than 4G
+  VP8_ENC_ERROR_USER_ABORT,               // abort request by user
+  VP8_ENC_ERROR_LAST                      // list terminator. always last.
+} WebPEncodingError;
+
+// maximum width/height allowed (inclusive), in pixels
+#define WEBP_MAX_DIMENSION 16383
+
+// Main exchange structure (input samples, output bytes, statistics)
+struct WebPPicture {
+
+  //   INPUT
+  //////////////
+  // Main flag for encoder selecting between ARGB or YUV input.
+  // It is recommended to use ARGB input (*argb, argb_stride) for lossless
+  // compression, and YUV input (*y, *u, *v, etc.) for lossy compression
+  // since these are the respective native colorspace for these formats.
+  int use_argb;
+
+  // YUV input (mostly used for input to lossy compression)
+  WebPEncCSP colorspace;     // colorspace: should be YUV420 for now (=Y'CbCr).
+  int width, height;         // dimensions (less or equal to WEBP_MAX_DIMENSION)
+  uint8_t *y, *u, *v;        // pointers to luma/chroma planes.
+  int y_stride, uv_stride;   // luma/chroma strides.
+  uint8_t* a;                // pointer to the alpha plane
+  int a_stride;              // stride of the alpha plane
+  uint32_t pad1[2];          // padding for later use
+
+  // ARGB input (mostly used for input to lossless compression)
+  uint32_t* argb;            // Pointer to argb (32 bit) plane.
+  int argb_stride;           // This is stride in pixels units, not bytes.
+  uint32_t pad2[3];          // padding for later use
+
+  //   OUTPUT
+  ///////////////
+  // Byte-emission hook, to store compressed bytes as they are ready.
+  WebPWriterFunction writer;  // can be NULL
+  void* custom_ptr;           // can be used by the writer.
+
+  // map for extra information (only for lossy compression mode)
+  int extra_info_type;    // 1: intra type, 2: segment, 3: quant
+                          // 4: intra-16 prediction mode,
+                          // 5: chroma prediction mode,
+                          // 6: bit cost, 7: distortion
+  uint8_t* extra_info;    // if not NULL, points to an array of size
+                          // ((width + 15) / 16) * ((height + 15) / 16) that
+                          // will be filled with a macroblock map, depending
+                          // on extra_info_type.
+
+  //   STATS AND REPORTS
+  ///////////////////////////
+  // Pointer to side statistics (updated only if not NULL)
+  WebPAuxStats* stats;
+
+  // Error code for the latest error encountered during encoding
+  WebPEncodingError error_code;
+
+  // If not NULL, report progress during encoding.
+  WebPProgressHook progress_hook;
+
+  void* user_data;        // this field is free to be set to any value and
+                          // used during callbacks (like progress-report e.g.).
+
+  uint32_t pad3[3];       // padding for later use
+
+  // Unused for now: original samples (for non-YUV420 modes)
+  uint8_t *u0, *v0;
+  int uv0_stride;
+
+  uint32_t pad4[7];       // padding for later use
+
+  // PRIVATE FIELDS
+  ////////////////////
+  void* memory_;          // row chunk of memory for yuva planes
+  void* memory_argb_;     // and for argb too.
+  void* pad5[2];          // padding for later use
+};
+
+// Internal, version-checked, entry point
+WEBP_EXTERN(int) WebPPictureInitInternal(WebPPicture*, int);
+
+// Should always be called, to initialize the structure. Returns false in case
+// of version mismatch. WebPPictureInit() must have succeeded before using the
+// 'picture' object.
+// Note that, by default, use_argb is false and colorspace is WEBP_YUV420.
+static WEBP_INLINE int WebPPictureInit(WebPPicture* picture) {
+  return WebPPictureInitInternal(picture, WEBP_ENCODER_ABI_VERSION);
+}
+
+//------------------------------------------------------------------------------
+// WebPPicture utils
+
+// Convenience allocation / deallocation based on picture->width/height:
+// Allocate y/u/v buffers as per colorspace/width/height specification.
+// Note! This function will free the previous buffer if needed.
+// Returns false in case of memory error.
+WEBP_EXTERN(int) WebPPictureAlloc(WebPPicture* picture);
+
+// Release the memory allocated by WebPPictureAlloc() or WebPPictureImport*().
+// Note that this function does _not_ free the memory used by the 'picture'
+// object itself.
+// Besides memory (which is reclaimed) all other fields of 'picture' are
+// preserved.
+WEBP_EXTERN(void) WebPPictureFree(WebPPicture* picture);
+
+// Copy the pixels of *src into *dst, using WebPPictureAlloc. Upon return,
+// *dst will fully own the copied pixels (this is not a view).
+// Returns false in case of memory allocation error.
+WEBP_EXTERN(int) WebPPictureCopy(const WebPPicture* src, WebPPicture* dst);
+
+// Compute PSNR or SSIM distortion between two pictures.
+// Result is in dB, stores in result[] in the Y/U/V/Alpha/All order.
+// Returns false in case of error (pic1 and pic2 don't have same dimension, ...)
+// Warning: this function is rather CPU-intensive.
+WEBP_EXTERN(int) WebPPictureDistortion(
+    const WebPPicture* pic1, const WebPPicture* pic2,
+    int metric_type,           // 0 = PSNR, 1 = SSIM
+    float result[5]);
+
+// self-crops a picture to the rectangle defined by top/left/width/height.
+// Returns false in case of memory allocation error, or if the rectangle is
+// outside of the source picture.
+// The rectangle for the view is defined by the top-left corner pixel
+// coordinates (left, top) as well as its width and height. This rectangle
+// must be fully be comprised inside the 'src' source picture. If the source
+// picture uses the YUV420 colorspace, the top and left coordinates will be
+// snapped to even values.
+WEBP_EXTERN(int) WebPPictureCrop(WebPPicture* picture,
+                                 int left, int top, int width, int height);
+
+// Extracts a view from 'src' picture into 'dst'. The rectangle for the view
+// is defined by the top-left corner pixel coordinates (left, top) as well
+// as its width and height. This rectangle must be fully be comprised inside
+// the 'src' source picture. If the source picture uses the YUV420 colorspace,
+// the top and left coordinates will be snapped to even values.
+// Picture 'src' must out-live 'dst' picture. Self-extraction of view is allowed
+// ('src' equal to 'dst') as a mean of fast-cropping (but note that doing so,
+// the original dimension will be lost).
+// Returns false in case of memory allocation error or invalid parameters.
+WEBP_EXTERN(int) WebPPictureView(const WebPPicture* src,
+                                 int left, int top, int width, int height,
+                                 WebPPicture* dst);
+
+// Returns true if the 'picture' is actually a view and therefore does
+// not own the memory for pixels.
+WEBP_EXTERN(int) WebPPictureIsView(const WebPPicture* picture);
+
+// Rescale a picture to new dimension width x height.
+// Now gamma correction is applied.
+// Returns false in case of error (invalid parameter or insufficient memory).
+WEBP_EXTERN(int) WebPPictureRescale(WebPPicture* pic, int width, int height);
+
+// Colorspace conversion function to import RGB samples.
+// Previous buffer will be free'd, if any.
+// *rgb buffer should have a size of at least height * rgb_stride.
+// Returns false in case of memory error.
+WEBP_EXTERN(int) WebPPictureImportRGB(
+    WebPPicture* picture, const uint8_t* rgb, int rgb_stride);
+// Same, but for RGBA buffer.
+WEBP_EXTERN(int) WebPPictureImportRGBA(
+    WebPPicture* picture, const uint8_t* rgba, int rgba_stride);
+// Same, but for RGBA buffer. Imports the RGB direct from the 32-bit format
+// input buffer ignoring the alpha channel. Avoids needing to copy the data
+// to a temporary 24-bit RGB buffer to import the RGB only.
+WEBP_EXTERN(int) WebPPictureImportRGBX(
+    WebPPicture* picture, const uint8_t* rgbx, int rgbx_stride);
+
+// Variants of the above, but taking BGR(A|X) input.
+WEBP_EXTERN(int) WebPPictureImportBGR(
+    WebPPicture* picture, const uint8_t* bgr, int bgr_stride);
+WEBP_EXTERN(int) WebPPictureImportBGRA(
+    WebPPicture* picture, const uint8_t* bgra, int bgra_stride);
+WEBP_EXTERN(int) WebPPictureImportBGRX(
+    WebPPicture* picture, const uint8_t* bgrx, int bgrx_stride);
+
+// Converts picture->argb data to the YUVA format specified by 'colorspace'.
+// Upon return, picture->use_argb is set to false. The presence of real
+// non-opaque transparent values is detected, and 'colorspace' will be
+// adjusted accordingly. Note that this method is lossy.
+// Returns false in case of error.
+WEBP_EXTERN(int) WebPPictureARGBToYUVA(WebPPicture* picture,
+                                       WebPEncCSP colorspace);
+
+// Converts picture->yuv to picture->argb and sets picture->use_argb to true.
+// The input format must be YUV_420 or YUV_420A.
+// Note that the use of this method is discouraged if one has access to the
+// raw ARGB samples, since using YUV420 is comparatively lossy. Also, the
+// conversion from YUV420 to ARGB incurs a small loss too.
+// Returns false in case of error.
+WEBP_EXTERN(int) WebPPictureYUVAToARGB(WebPPicture* picture);
+
+// Helper function: given a width x height plane of YUV(A) samples
+// (with stride 'stride'), clean-up the YUV samples under fully transparent
+// area, to help compressibility (no guarantee, though).
+WEBP_EXTERN(void) WebPCleanupTransparentArea(WebPPicture* picture);
+
+// Scan the picture 'picture' for the presence of non fully opaque alpha values.
+// Returns true in such case. Otherwise returns false (indicating that the
+// alpha plane can be ignored altogether e.g.).
+WEBP_EXTERN(int) WebPPictureHasTransparency(const WebPPicture* picture);
+
+//------------------------------------------------------------------------------
+// Main call
+
+// Main encoding call, after config and picture have been initialized.
+// 'picture' must be less than 16384x16384 in dimension (cf WEBP_MAX_DIMENSION),
+// and the 'config' object must be a valid one.
+// Returns false in case of error, true otherwise.
+// In case of error, picture->error_code is updated accordingly.
+// 'picture' can hold the source samples in both YUV(A) or ARGB input, depending
+// on the value of 'picture->use_argb'. It is highly recommended to use
+// the former for lossy encoding, and the latter for lossless encoding
+// (when config.lossless is true). Automatic conversion from one format to
+// another is provided but they both incur some loss.
+WEBP_EXTERN(int) WebPEncode(const WebPConfig* config, WebPPicture* picture);
+
+//------------------------------------------------------------------------------
+
+#if defined(__cplusplus) || defined(c_plusplus)
+}    // extern "C"
+#endif
+
+#endif  /* WEBP_WEBP_ENCODE_H_ */
diff --git a/drivers/webpold/format_constants.h b/drivers/webpold/format_constants.h
new file mode 100644
index 0000000000..7ce498f672
--- /dev/null
+++ b/drivers/webpold/format_constants.h
@@ -0,0 +1,90 @@
+// Copyright 2012 Google Inc. All Rights Reserved.
+//
+// This code is licensed under the same terms as WebM:
+//  Software License Agreement:  http://www.webmproject.org/license/software/
+//  Additional IP Rights Grant:  http://www.webmproject.org/license/additional/
+// -----------------------------------------------------------------------------
+//
+//  Internal header for constants related to WebP file format.
+//
+// Author: Urvang (urvang@google.com)
+
+#ifndef WEBP_WEBP_FORMAT_CONSTANTS_H_
+#define WEBP_WEBP_FORMAT_CONSTANTS_H_
+
+// VP8 related constants.
+#define VP8_SIGNATURE 0x9d012a              // Signature in VP8 data.
+#define VP8_MAX_PARTITION0_SIZE (1 << 19)   // max size of mode partition
+#define VP8_MAX_PARTITION_SIZE  (1 << 24)   // max size for token partition
+#define VP8_FRAME_HEADER_SIZE 10  // Size of the frame header within VP8 data.
+
+// VP8L related constants.
+#define VP8L_SIGNATURE_SIZE          1      // VP8L signature size.
+#define VP8L_MAGIC_BYTE              0x2f   // VP8L signature byte.
+#define VP8L_IMAGE_SIZE_BITS         14     // Number of bits used to store
+                                            // width and height.
+#define VP8L_VERSION_BITS            3      // 3 bits reserved for version.
+#define VP8L_VERSION                 0      // version 0
+#define VP8L_FRAME_HEADER_SIZE       5      // Size of the VP8L frame header.
+
+#define MAX_PALETTE_SIZE             256
+#define MAX_CACHE_BITS               11
+#define HUFFMAN_CODES_PER_META_CODE  5
+#define ARGB_BLACK                   0xff000000
+
+#define DEFAULT_CODE_LENGTH          8
+#define MAX_ALLOWED_CODE_LENGTH      15
+
+#define NUM_LITERAL_CODES            256
+#define NUM_LENGTH_CODES             24
+#define NUM_DISTANCE_CODES           40
+#define CODE_LENGTH_CODES            19
+
+#define MIN_HUFFMAN_BITS             2  // min number of Huffman bits
+#define MAX_HUFFMAN_BITS             9  // max number of Huffman bits
+
+#define TRANSFORM_PRESENT            1  // The bit to be written when next data
+                                        // to be read is a transform.
+#define NUM_TRANSFORMS               4  // Maximum number of allowed transform
+                                        // in a bitstream.
+typedef enum {
+  PREDICTOR_TRANSFORM      = 0,
+  CROSS_COLOR_TRANSFORM    = 1,
+  SUBTRACT_GREEN           = 2,
+  COLOR_INDEXING_TRANSFORM = 3
+} VP8LImageTransformType;
+
+// Alpha related constants.
+#define ALPHA_HEADER_LEN            1
+#define ALPHA_NO_COMPRESSION        0
+#define ALPHA_LOSSLESS_COMPRESSION  1
+#define ALPHA_PREPROCESSED_LEVELS   1
+
+// Mux related constants.
+#define TAG_SIZE           4     // Size of a chunk tag (e.g. "VP8L").
+#define CHUNK_SIZE_BYTES   4     // Size needed to store chunk's size.
+#define CHUNK_HEADER_SIZE  8     // Size of a chunk header.
+#define RIFF_HEADER_SIZE   12    // Size of the RIFF header ("RIFFnnnnWEBP").
+#define FRAME_CHUNK_SIZE   15    // Size of a FRM chunk.
+#define LOOP_CHUNK_SIZE    2     // Size of a LOOP chunk.
+#define TILE_CHUNK_SIZE    6     // Size of a TILE chunk.
+#define VP8X_CHUNK_SIZE    10    // Size of a VP8X chunk.
+
+#define TILING_FLAG_BIT    0x01  // Set if tiles are possibly used.
+#define ANIMATION_FLAG_BIT 0x02  // Set if some animation is expected
+#define ICC_FLAG_BIT       0x04  // Whether ICC is present or not.
+#define METADATA_FLAG_BIT  0x08  // Set if some META chunk is possibly present.
+#define ALPHA_FLAG_BIT     0x10  // Should be same as the ALPHA_FLAG in mux.h
+#define ROTATION_FLAG_BITS 0xe0  // all 3 bits for rotation + symmetry
+
+#define MAX_CANVAS_SIZE     (1 << 24)    // 24-bit max for VP8X width/height.
+#define MAX_IMAGE_AREA      (1ULL << 32) // 32-bit max for width x height.
+#define MAX_LOOP_COUNT      (1 << 16)    // maximum value for loop-count
+#define MAX_DURATION        (1 << 24)    // maximum duration
+#define MAX_POSITION_OFFSET (1 << 24)    // maximum frame/tile x/y offset
+
+// Maximum chunk payload is such that adding the header and padding won't
+// overflow a uint32_t.
+#define MAX_CHUNK_PAYLOAD (~0U - CHUNK_HEADER_SIZE - 1)
+
+#endif  /* WEBP_WEBP_FORMAT_CONSTANTS_H_ */
diff --git a/drivers/webpold/image_loader_webp.cpp b/drivers/webpold/image_loader_webp.cpp
new file mode 100644
index 0000000000..9d8a616556
--- /dev/null
+++ b/drivers/webpold/image_loader_webp.cpp
@@ -0,0 +1,165 @@
+/*************************************************/
+/*  image_loader_webp.cpp                        */
+/*************************************************/
+/*            This file is part of:              */
+/*                GODOT ENGINE                   */
+/*************************************************/
+/*       Source code within this file is:        */
+/*  (c) 2007-2010 Juan Linietsky, Ariel Manzur   */
+/*             All Rights Reserved.              */
+/*************************************************/
+
+#include "image_loader_webp.h"
+
+#include "print_string.h"
+#include "os/os.h"
+#include "drivers/webp/decode.h"
+#include "drivers/webp/encode.h"
+#include "io/marshalls.h"
+#include <stdlib.h>
+
+static DVector<uint8_t> _webp_lossy_pack(const Image& p_image,float p_quality) {
+
+	ERR_FAIL_COND_V(p_image.empty(),DVector<uint8_t>());
+
+	Image img=p_image;
+	if (img.detect_alpha())
+		img.convert(Image::FORMAT_RGBA);
+	else
+		img.convert(Image::FORMAT_RGB);
+
+	Size2 s(img.get_width(),img.get_height());
+	DVector<uint8_t> data = img.get_data();
+	DVector<uint8_t>::Read r = data.read();
+
+	uint8_t *dst_buff=NULL;
+	size_t dst_size=0;
+	if (img.get_format()==Image::FORMAT_RGB) {
+
+		dst_size = WebPEncodeRGB(r.ptr(),s.width,s.height,3*s.width,CLAMP(p_quality*100.0,0,100.0),&dst_buff);
+	} else {
+		dst_size = WebPEncodeRGBA(r.ptr(),s.width,s.height,4*s.width,CLAMP(p_quality*100.0,0,100.0),&dst_buff);
+	}
+
+	ERR_FAIL_COND_V(dst_size==0,DVector<uint8_t>());
+	DVector<uint8_t> dst;
+	dst.resize(4+dst_size);
+	DVector<uint8_t>::Write w = dst.write();
+	w[0]='W';
+	w[1]='E';
+	w[2]='B';
+	w[3]='P';
+	copymem(&w[4],dst_buff,dst_size);
+	free(dst_buff);
+	w=DVector<uint8_t>::Write();
+	return dst;
+}
+
+static Image _webp_lossy_unpack(const DVector<uint8_t>& p_buffer) {
+
+	int size = p_buffer.size()-4;
+	ERR_FAIL_COND_V(size<=0,Image());
+	DVector<uint8_t>::Read r = p_buffer.read();
+
+	ERR_FAIL_COND_V(r[0]!='W' || r[1]!='E' || r[2]!='B' || r[3]!='P',Image());
+	WebPBitstreamFeatures features;
+	if (WebPGetFeatures(&r[4],size,&features)!=VP8_STATUS_OK) {
+		ERR_EXPLAIN("Error unpacking WEBP image:");
+		ERR_FAIL_V(Image());
+	}
+
+	//print_line("width: "+itos(features.width));
+	//print_line("height: "+itos(features.height));
+	//print_line("alpha: "+itos(features.has_alpha));
+
+	DVector<uint8_t> dst_image;
+	int datasize = features.width*features.height*(features.has_alpha?4:3);
+	dst_image.resize(datasize);
+
+	DVector<uint8_t>::Write dst_w = dst_image.write();
+
+	bool errdec=false;
+	if (features.has_alpha)	 {
+		errdec = WebPDecodeRGBAInto(&r[4],size,dst_w.ptr(),datasize,4*features.width)==NULL;
+	} else {
+		errdec = WebPDecodeRGBInto(&r[4],size,dst_w.ptr(),datasize,3*features.width)==NULL;
+
+	}
+
+	//ERR_EXPLAIN("Error decoding webp! - "+p_file);
+	ERR_FAIL_COND_V(errdec,Image());
+
+	dst_w = DVector<uint8_t>::Write();
+
+	return Image(features.width,features.height,0,features.has_alpha?Image::FORMAT_RGBA:Image::FORMAT_RGB,dst_image);
+
+}
+
+
+Error ImageLoaderWEBP::load_image(Image *p_image,FileAccess *f) {
+
+
+	uint32_t size = f->get_len();
+	DVector<uint8_t> src_image;
+	src_image.resize(size);
+
+	WebPBitstreamFeatures features;
+
+	DVector<uint8_t>::Write src_w = src_image.write();
+	f->get_buffer(src_w.ptr(),size);
+	ERR_FAIL_COND_V(f->eof_reached(), ERR_FILE_EOF);
+
+	if (WebPGetFeatures(src_w.ptr(),size,&features)!=VP8_STATUS_OK) {
+		f->close();
+		//ERR_EXPLAIN("Error decoding WEBP image: "+p_file);
+		ERR_FAIL_V(ERR_FILE_CORRUPT);
+	}
+
+	print_line("width: "+itos(features.width));
+	print_line("height: "+itos(features.height));
+	print_line("alpha: "+itos(features.has_alpha));
+
+	src_w = DVector<uint8_t>::Write();
+
+	DVector<uint8_t> dst_image;
+	int datasize = features.width*features.height*(features.has_alpha?4:3);
+	dst_image.resize(datasize);
+
+	DVector<uint8_t>::Read src_r = src_image.read();
+	DVector<uint8_t>::Write dst_w = dst_image.write();
+
+
+	bool errdec=false;
+	if (features.has_alpha)	 {
+		errdec = WebPDecodeRGBAInto(src_r.ptr(),size,dst_w.ptr(),datasize,4*features.width)==NULL;
+	} else {
+		errdec = WebPDecodeRGBInto(src_r.ptr(),size,dst_w.ptr(),datasize,3*features.width)==NULL;
+
+	}
+
+	//ERR_EXPLAIN("Error decoding webp! - "+p_file);
+	ERR_FAIL_COND_V(errdec,ERR_FILE_CORRUPT);
+
+	src_r = DVector<uint8_t>::Read();
+	dst_w = DVector<uint8_t>::Write();
+
+	*p_image = Image(features.width,features.height,0,features.has_alpha?Image::FORMAT_RGBA:Image::FORMAT_RGB,dst_image);
+
+
+	return OK;
+
+}
+
+void ImageLoaderWEBP::get_recognized_extensions(List<String> *p_extensions) const {
+	
+	p_extensions->push_back("webp");
+}
+
+
+ImageLoaderWEBP::ImageLoaderWEBP() {
+
+	Image::lossy_packer=_webp_lossy_pack;
+	Image::lossy_unpacker=_webp_lossy_unpack;
+}
+
+
diff --git a/drivers/webpold/image_loader_webp.h b/drivers/webpold/image_loader_webp.h
new file mode 100644
index 0000000000..8fc188cc9c
--- /dev/null
+++ b/drivers/webpold/image_loader_webp.h
@@ -0,0 +1,32 @@
+/*************************************************/
+/*  image_loader_webp.h                          */
+/*************************************************/
+/*            This file is part of:              */
+/*                GODOT ENGINE                   */
+/*************************************************/
+/*       Source code within this file is:        */
+/*  (c) 2007-2010 Juan Linietsky, Ariel Manzur   */
+/*             All Rights Reserved.              */
+/*************************************************/
+
+#ifndef IMAGE_LOADER_WEBP_H
+#define IMAGE_LOADER_WEBP_H
+
+#include "io/image_loader.h"
+
+/**
+	@author Juan Linietsky <reduzio@gmail.com>
+*/
+class ImageLoaderWEBP : public ImageFormatLoader {
+
+
+public:
+
+	virtual Error load_image(Image *p_image,FileAccess *f);
+	virtual void get_recognized_extensions(List<String> *p_extensions) const;	
+	ImageLoaderWEBP();
+};
+
+
+
+#endif
diff --git a/drivers/webpold/mux.h b/drivers/webpold/mux.h
new file mode 100644
index 0000000000..5139af80fa
--- /dev/null
+++ b/drivers/webpold/mux.h
@@ -0,0 +1,604 @@
+// Copyright 2011 Google Inc. All Rights Reserved.
+//
+// This code is licensed under the same terms as WebM:
+//  Software License Agreement:  http://www.webmproject.org/license/software/
+//  Additional IP Rights Grant:  http://www.webmproject.org/license/additional/
+// -----------------------------------------------------------------------------
+//
+//  RIFF container manipulation for WEBP images.
+//
+// Authors: Urvang (urvang@google.com)
+//          Vikas (vikasa@google.com)
+
+// This API allows manipulation of WebP container images containing features
+// like Color profile, XMP metadata, Animation and Tiling.
+//
+// Code Example#1: Creating a MUX with image data, color profile and XMP
+// metadata.
+//
+//   int copy_data = 0;
+//   WebPMux* mux = WebPMuxNew();
+//   // ... (Prepare image data).
+//   WebPMuxSetImage(mux, &image, copy_data);
+//   // ... (Prepare ICCP color profile data).
+//   WebPMuxSetColorProfile(mux, &icc_profile, copy_data);
+//   // ... (Prepare XMP metadata).
+//   WebPMuxSetMetadata(mux, &xmp, copy_data);
+//   // Get data from mux in WebP RIFF format.
+//   WebPMuxAssemble(mux, &output_data);
+//   WebPMuxDelete(mux);
+//   // ... (Consume output_data; e.g. write output_data.bytes_ to file).
+//   WebPDataClear(&output_data);
+//
+// Code Example#2: Get image and color profile data from a WebP file.
+//
+//   int copy_data = 0;
+//   // ... (Read data from file).
+//   WebPMux* mux = WebPMuxCreate(&data, copy_data);
+//   WebPMuxGetImage(mux, &image);
+//   // ... (Consume image; e.g. call WebPDecode() to decode the data).
+//   WebPMuxGetColorProfile(mux, &icc_profile);
+//   // ... (Consume icc_data).
+//   WebPMuxDelete(mux);
+//   free(data);
+
+#ifndef WEBP_WEBP_MUX_H_
+#define WEBP_WEBP_MUX_H_
+
+#include "./types.h"
+
+#if defined(__cplusplus) || defined(c_plusplus)
+extern "C" {
+#endif
+
+#define WEBP_MUX_ABI_VERSION 0x0100        // MAJOR(8b) + MINOR(8b)
+
+// Error codes
+typedef enum {
+  WEBP_MUX_OK                 =  1,
+  WEBP_MUX_NOT_FOUND          =  0,
+  WEBP_MUX_INVALID_ARGUMENT   = -1,
+  WEBP_MUX_BAD_DATA           = -2,
+  WEBP_MUX_MEMORY_ERROR       = -3,
+  WEBP_MUX_NOT_ENOUGH_DATA    = -4
+} WebPMuxError;
+
+// Flag values for different features used in VP8X chunk.
+typedef enum {
+  TILE_FLAG       = 0x00000001,
+  ANIMATION_FLAG  = 0x00000002,
+  ICCP_FLAG       = 0x00000004,
+  META_FLAG       = 0x00000008,
+  ALPHA_FLAG      = 0x00000010
+} WebPFeatureFlags;
+
+// IDs for different types of chunks.
+typedef enum {
+  WEBP_CHUNK_VP8X,     // VP8X
+  WEBP_CHUNK_ICCP,     // ICCP
+  WEBP_CHUNK_LOOP,     // LOOP
+  WEBP_CHUNK_FRAME,    // FRM
+  WEBP_CHUNK_TILE,     // TILE
+  WEBP_CHUNK_ALPHA,    // ALPH
+  WEBP_CHUNK_IMAGE,    // VP8/VP8L
+  WEBP_CHUNK_META,     // META
+  WEBP_CHUNK_UNKNOWN,  // Other chunks.
+  WEBP_CHUNK_NIL
+} WebPChunkId;
+
+typedef struct WebPMux WebPMux;   // main opaque object.
+
+// Data type used to describe 'raw' data, e.g., chunk data
+// (ICC profile, metadata) and WebP compressed image data.
+typedef struct {
+  const uint8_t* bytes_;
+  size_t size_;
+} WebPData;
+
+//------------------------------------------------------------------------------
+// Manipulation of a WebPData object.
+
+// Initializes the contents of the 'webp_data' object with default values.
+WEBP_EXTERN(void) WebPDataInit(WebPData* webp_data);
+
+// Clears the contents of the 'webp_data' object by calling free(). Does not
+// deallocate the object itself.
+WEBP_EXTERN(void) WebPDataClear(WebPData* webp_data);
+
+// Allocates necessary storage for 'dst' and copies the contents of 'src'.
+// Returns true on success.
+WEBP_EXTERN(int) WebPDataCopy(const WebPData* src, WebPData* dst);
+
+//------------------------------------------------------------------------------
+// Life of a Mux object
+
+// Internal, version-checked, entry point
+WEBP_EXTERN(WebPMux*) WebPNewInternal(int);
+
+// Creates an empty mux object.
+// Returns:
+//   A pointer to the newly created empty mux object.
+static WEBP_INLINE WebPMux* WebPMuxNew(void) {
+  return WebPNewInternal(WEBP_MUX_ABI_VERSION);
+}
+
+// Deletes the mux object.
+// Parameters:
+//   mux - (in/out) object to be deleted
+WEBP_EXTERN(void) WebPMuxDelete(WebPMux* mux);
+
+//------------------------------------------------------------------------------
+// Mux creation.
+
+// Internal, version-checked, entry point
+WEBP_EXTERN(WebPMux*) WebPMuxCreateInternal(const WebPData*, int, int);
+
+// Creates a mux object from raw data given in WebP RIFF format.
+// Parameters:
+//   bitstream - (in) the bitstream data in WebP RIFF format
+//   copy_data - (in) value 1 indicates given data WILL copied to the mux, and
+//               value 0 indicates data will NOT be copied.
+// Returns:
+//   A pointer to the mux object created from given data - on success.
+//   NULL - In case of invalid data or memory error.
+static WEBP_INLINE WebPMux* WebPMuxCreate(const WebPData* bitstream,
+                                          int copy_data) {
+  return WebPMuxCreateInternal(bitstream, copy_data, WEBP_MUX_ABI_VERSION);
+}
+
+//------------------------------------------------------------------------------
+// Single Image.
+
+// Sets the image in the mux object. Any existing images (including frame/tile)
+// will be removed.
+// Parameters:
+//   mux - (in/out) object in which the image is to be set
+//   bitstream - (in) can either be a raw VP8/VP8L bitstream or a single-image
+//               WebP file (non-animated and non-tiled)
+//   copy_data - (in) value 1 indicates given data WILL copied to the mux, and
+//               value 0 indicates data will NOT be copied.
+// Returns:
+//   WEBP_MUX_INVALID_ARGUMENT - if mux is NULL or bitstream is NULL.
+//   WEBP_MUX_MEMORY_ERROR - on memory allocation error.
+//   WEBP_MUX_OK - on success.
+WEBP_EXTERN(WebPMuxError) WebPMuxSetImage(WebPMux* mux,
+                                          const WebPData* bitstream,
+                                          int copy_data);
+
+// Gets image data from the mux object.
+// The content of 'bitstream' is allocated using malloc(), and NOT
+// owned by the 'mux' object. It MUST be deallocated by the caller by calling
+// WebPDataClear().
+// Parameters:
+//   mux - (in) object from which the image is to be fetched
+//   bitstream - (out) the image data
+// Returns:
+//   WEBP_MUX_INVALID_ARGUMENT - if either mux or bitstream is NULL
+//                               OR mux contains animation/tiling.
+//   WEBP_MUX_NOT_FOUND - if image is not present in mux object.
+//   WEBP_MUX_OK - on success.
+WEBP_EXTERN(WebPMuxError) WebPMuxGetImage(const WebPMux* mux,
+                                          WebPData* bitstream);
+
+// Deletes the image in the mux object.
+// Parameters:
+//   mux - (in/out) object from which the image is to be deleted
+// Returns:
+//   WEBP_MUX_INVALID_ARGUMENT - if mux is NULL
+//                               OR if mux contains animation/tiling.
+//   WEBP_MUX_NOT_FOUND - if image is not present in mux object.
+//   WEBP_MUX_OK - on success.
+WEBP_EXTERN(WebPMuxError) WebPMuxDeleteImage(WebPMux* mux);
+
+//------------------------------------------------------------------------------
+// XMP Metadata.
+
+// Sets the XMP metadata in the mux object. Any existing metadata chunk(s) will
+// be removed.
+// Parameters:
+//   mux - (in/out) object to which the XMP metadata is to be added
+//   metadata - (in) the XMP metadata data to be added
+//   copy_data - (in) value 1 indicates given data WILL copied to the mux, and
+//               value 0 indicates data will NOT be copied.
+// Returns:
+//   WEBP_MUX_INVALID_ARGUMENT - if mux or metadata is NULL.
+//   WEBP_MUX_MEMORY_ERROR - on memory allocation error.
+//   WEBP_MUX_OK - on success.
+WEBP_EXTERN(WebPMuxError) WebPMuxSetMetadata(WebPMux* mux,
+                                             const WebPData* metadata,
+                                             int copy_data);
+
+// Gets a reference to the XMP metadata in the mux object.
+// The caller should NOT free the returned data.
+// Parameters:
+//   mux - (in) object from which the XMP metadata is to be fetched
+//   metadata - (out) XMP metadata
+// Returns:
+//   WEBP_MUX_INVALID_ARGUMENT - if either mux or metadata is NULL.
+//   WEBP_MUX_NOT_FOUND - if metadata is not present in mux object.
+//   WEBP_MUX_OK - on success.
+WEBP_EXTERN(WebPMuxError) WebPMuxGetMetadata(const WebPMux* mux,
+                                             WebPData* metadata);
+
+// Deletes the XMP metadata in the mux object.
+// Parameters:
+//   mux - (in/out) object from which XMP metadata is to be deleted
+// Returns:
+//   WEBP_MUX_INVALID_ARGUMENT - if mux is NULL
+//   WEBP_MUX_NOT_FOUND - If mux does not contain metadata.
+//   WEBP_MUX_OK - on success.
+WEBP_EXTERN(WebPMuxError) WebPMuxDeleteMetadata(WebPMux* mux);
+
+//------------------------------------------------------------------------------
+// ICC Color Profile.
+
+// Sets the color profile in the mux object. Any existing color profile chunk(s)
+// will be removed.
+// Parameters:
+//   mux - (in/out) object to which the color profile is to be added
+//   color_profile - (in) the color profile data to be added
+//   copy_data - (in) value 1 indicates given data WILL copied to the mux, and
+//               value 0 indicates data will NOT be copied.
+// Returns:
+//   WEBP_MUX_INVALID_ARGUMENT - if mux or color_profile is NULL
+//   WEBP_MUX_MEMORY_ERROR - on memory allocation error
+//   WEBP_MUX_OK - on success
+WEBP_EXTERN(WebPMuxError) WebPMuxSetColorProfile(WebPMux* mux,
+                                                 const WebPData* color_profile,
+                                                 int copy_data);
+
+// Gets a reference to the color profile in the mux object.
+// The caller should NOT free the returned data.
+// Parameters:
+//   mux - (in) object from which the color profile data is to be fetched
+//   color_profile - (out) color profile data
+// Returns:
+//   WEBP_MUX_INVALID_ARGUMENT - if either mux or color_profile is NULL.
+//   WEBP_MUX_NOT_FOUND - if color profile is not present in mux object.
+//   WEBP_MUX_OK - on success.
+WEBP_EXTERN(WebPMuxError) WebPMuxGetColorProfile(const WebPMux* mux,
+                                                 WebPData* color_profile);
+
+// Deletes the color profile in the mux object.
+// Parameters:
+//   mux - (in/out) object from which color profile is to be deleted
+// Returns:
+//   WEBP_MUX_INVALID_ARGUMENT - if mux is NULL
+//   WEBP_MUX_NOT_FOUND - If mux does not contain color profile.
+//   WEBP_MUX_OK - on success.
+WEBP_EXTERN(WebPMuxError) WebPMuxDeleteColorProfile(WebPMux* mux);
+
+//------------------------------------------------------------------------------
+// Animation.
+
+// Adds an animation frame at the end of the mux object.
+// Note: as WebP only supports even offsets, any odd offset will be snapped to
+// an even location using: offset &= ~1
+// Parameters:
+//   mux - (in/out) object to which an animation frame is to be added
+//   bitstream - (in) the image data corresponding to the frame. It can either
+//               be a raw VP8/VP8L bitstream or a single-image WebP file
+//               (non-animated and non-tiled)
+//   x_offset - (in) x-offset of the frame to be added
+//   y_offset - (in) y-offset of the frame to be added
+//   duration - (in) duration of the frame to be added (in milliseconds)
+//   copy_data - (in) value 1 indicates given data WILL copied to the mux, and
+//               value 0 indicates data will NOT be copied.
+// Returns:
+//   WEBP_MUX_INVALID_ARGUMENT - if mux is NULL or bitstream is NULL
+//   WEBP_MUX_MEMORY_ERROR - on memory allocation error.
+//   WEBP_MUX_OK - on success.
+WEBP_EXTERN(WebPMuxError) WebPMuxPushFrame(
+    WebPMux* mux, const WebPData* bitstream,
+    int x_offset, int y_offset, int duration, int copy_data);
+
+// TODO(urvang): Create a struct as follows to reduce argument list size:
+// typedef struct {
+//  WebPData bitstream;
+//  int x_offset, y_offset;
+//  int duration;
+// } FrameInfo;
+
+// Gets the nth animation frame from the mux object.
+// The content of 'bitstream' is allocated using malloc(), and NOT
+// owned by the 'mux' object. It MUST be deallocated by the caller by calling
+// WebPDataClear().
+// nth=0 has a special meaning - last position.
+// Parameters:
+//   mux - (in) object from which the info is to be fetched
+//   nth - (in) index of the frame in the mux object
+//   bitstream - (out) the image data
+//   x_offset - (out) x-offset of the returned frame
+//   y_offset - (out) y-offset of the returned frame
+//   duration - (out) duration of the returned frame (in milliseconds)
+// Returns:
+//   WEBP_MUX_INVALID_ARGUMENT - if either mux, bitstream, x_offset,
+//                               y_offset, or duration is NULL
+//   WEBP_MUX_NOT_FOUND - if there are less than nth frames in the mux object.
+//   WEBP_MUX_BAD_DATA - if nth frame chunk in mux is invalid.
+//   WEBP_MUX_OK - on success.
+WEBP_EXTERN(WebPMuxError) WebPMuxGetFrame(
+    const WebPMux* mux, uint32_t nth, WebPData* bitstream,
+    int* x_offset, int* y_offset, int* duration);
+
+// Deletes an animation frame from the mux object.
+// nth=0 has a special meaning - last position.
+// Parameters:
+//   mux - (in/out) object from which a frame is to be deleted
+//   nth - (in) The position from which the frame is to be deleted
+// Returns:
+//   WEBP_MUX_INVALID_ARGUMENT - if mux is NULL
+//   WEBP_MUX_NOT_FOUND - If there are less than nth frames in the mux object
+//                        before deletion.
+//   WEBP_MUX_OK - on success.
+WEBP_EXTERN(WebPMuxError) WebPMuxDeleteFrame(WebPMux* mux, uint32_t nth);
+
+// Sets the animation loop count in the mux object. Any existing loop count
+// value(s) will be removed.
+// Parameters:
+//   mux - (in/out) object in which loop chunk is to be set/added
+//   loop_count - (in) animation loop count value.
+//                Note that loop_count of zero denotes infinite loop.
+// Returns:
+//   WEBP_MUX_INVALID_ARGUMENT - if mux is NULL
+//   WEBP_MUX_MEMORY_ERROR - on memory allocation error.
+//   WEBP_MUX_OK - on success.
+WEBP_EXTERN(WebPMuxError) WebPMuxSetLoopCount(WebPMux* mux, int loop_count);
+
+// Gets the animation loop count from the mux object.
+// Parameters:
+//   mux - (in) object from which the loop count is to be fetched
+//   loop_count - (out) the loop_count value present in the LOOP chunk
+// Returns:
+//   WEBP_MUX_INVALID_ARGUMENT - if either of mux or loop_count is NULL
+//   WEBP_MUX_NOT_FOUND - if loop chunk is not present in mux object.
+//   WEBP_MUX_OK - on success.
+WEBP_EXTERN(WebPMuxError) WebPMuxGetLoopCount(const WebPMux* mux,
+                                              int* loop_count);
+
+//------------------------------------------------------------------------------
+// Tiling.
+
+// Adds a tile at the end of the mux object.
+// Note: as WebP only supports even offsets, any odd offset will be snapped to
+// an even location using: offset &= ~1
+// Parameters:
+//   mux - (in/out) object to which a tile is to be added.
+//   bitstream - (in) the image data corresponding to the frame. It can either
+//               be a raw VP8/VP8L bitstream or a single-image WebP file
+//               (non-animated and non-tiled)
+//   x_offset - (in) x-offset of the tile to be added
+//   y_offset - (in) y-offset of the tile to be added
+//   copy_data - (in) value 1 indicates given data WILL copied to the mux, and
+//               value 0 indicates data will NOT be copied.
+// Returns:
+//   WEBP_MUX_INVALID_ARGUMENT - if mux is NULL or bitstream is NULL
+//   WEBP_MUX_MEMORY_ERROR - on memory allocation error.
+//   WEBP_MUX_OK - on success.
+WEBP_EXTERN(WebPMuxError) WebPMuxPushTile(
+    WebPMux* mux, const WebPData* bitstream,
+    int x_offset, int y_offset, int copy_data);
+
+// Gets the nth tile from the mux object.
+// The content of 'bitstream' is allocated using malloc(), and NOT
+// owned by the 'mux' object. It MUST be deallocated by the caller by calling
+// WebPDataClear().
+// nth=0 has a special meaning - last position.
+// Parameters:
+//   mux - (in) object from which the info is to be fetched
+//   nth - (in) index of the tile in the mux object
+//   bitstream - (out) the image data
+//   x_offset - (out) x-offset of the returned tile
+//   y_offset - (out) y-offset of the returned tile
+// Returns:
+//   WEBP_MUX_INVALID_ARGUMENT - if either mux, bitstream, x_offset or
+//                               y_offset is NULL
+//   WEBP_MUX_NOT_FOUND - if there are less than nth tiles in the mux object.
+//   WEBP_MUX_BAD_DATA - if nth tile chunk in mux is invalid.
+//   WEBP_MUX_OK - on success.
+WEBP_EXTERN(WebPMuxError) WebPMuxGetTile(
+    const WebPMux* mux, uint32_t nth, WebPData* bitstream,
+    int* x_offset, int* y_offset);
+
+// Deletes a tile from the mux object.
+// nth=0 has a special meaning - last position
+// Parameters:
+//   mux - (in/out) object from which a tile is to be deleted
+//   nth - (in) The position from which the tile is to be deleted
+// Returns:
+//   WEBP_MUX_INVALID_ARGUMENT - if mux is NULL
+//   WEBP_MUX_NOT_FOUND - If there are less than nth tiles in the mux object
+//                        before deletion.
+//   WEBP_MUX_OK - on success.
+WEBP_EXTERN(WebPMuxError) WebPMuxDeleteTile(WebPMux* mux, uint32_t nth);
+
+//------------------------------------------------------------------------------
+// Misc Utilities.
+
+// Gets the feature flags from the mux object.
+// Parameters:
+//   mux - (in) object from which the features are to be fetched
+//   flags - (out) the flags specifying which features are present in the
+//           mux object. This will be an OR of various flag values.
+//           Enum 'WebPFeatureFlags' can be used to test individual flag values.
+// Returns:
+//   WEBP_MUX_INVALID_ARGUMENT - if mux or flags is NULL
+//   WEBP_MUX_NOT_FOUND - if VP8X chunk is not present in mux object.
+//   WEBP_MUX_BAD_DATA - if VP8X chunk in mux is invalid.
+//   WEBP_MUX_OK - on success.
+WEBP_EXTERN(WebPMuxError) WebPMuxGetFeatures(const WebPMux* mux,
+                                             uint32_t* flags);
+
+// Gets number of chunks having tag value tag in the mux object.
+// Parameters:
+//   mux - (in) object from which the info is to be fetched
+//   id - (in) chunk id specifying the type of chunk
+//   num_elements - (out) number of chunks with the given chunk id
+// Returns:
+//   WEBP_MUX_INVALID_ARGUMENT - if either mux, or num_elements is NULL
+//   WEBP_MUX_OK - on success.
+WEBP_EXTERN(WebPMuxError) WebPMuxNumChunks(const WebPMux* mux,
+                                           WebPChunkId id, int* num_elements);
+
+// Assembles all chunks in WebP RIFF format and returns in 'assembled_data'.
+// This function also validates the mux object.
+// Note: The content of 'assembled_data' will be ignored and overwritten.
+// Also, the content of 'assembled_data' is allocated using malloc(), and NOT
+// owned by the 'mux' object. It MUST be deallocated by the caller by calling
+// WebPDataClear().
+// Parameters:
+//   mux - (in/out) object whose chunks are to be assembled
+//   assembled_data - (out) assembled WebP data
+// Returns:
+//   WEBP_MUX_BAD_DATA - if mux object is invalid.
+//   WEBP_MUX_INVALID_ARGUMENT - if either mux, output_data or output_size is
+//                               NULL.
+//   WEBP_MUX_MEMORY_ERROR - on memory allocation error.
+//   WEBP_MUX_OK - on success
+WEBP_EXTERN(WebPMuxError) WebPMuxAssemble(WebPMux* mux,
+                                          WebPData* assembled_data);
+
+//------------------------------------------------------------------------------
+// Demux API.
+// Enables extraction of image and extended format data from WebP files.
+
+#define WEBP_DEMUX_ABI_VERSION 0x0100    // MAJOR(8b) + MINOR(8b)
+
+typedef struct WebPDemuxer WebPDemuxer;
+
+typedef enum {
+  WEBP_DEMUX_PARSING_HEADER,  // Not enough data to parse full header.
+  WEBP_DEMUX_PARSED_HEADER,   // Header parsing complete, data may be available.
+  WEBP_DEMUX_DONE             // Entire file has been parsed.
+} WebPDemuxState;
+
+//------------------------------------------------------------------------------
+// Life of a Demux object
+
+// Internal, version-checked, entry point
+WEBP_EXTERN(WebPDemuxer*) WebPDemuxInternal(
+    const WebPData*, int, WebPDemuxState*, int);
+
+// Parses the WebP file given by 'data'.
+// A complete WebP file must be present in 'data' for the function to succeed.
+// Returns a WebPDemuxer object on successful parse, NULL otherwise.
+static WEBP_INLINE WebPDemuxer* WebPDemux(const WebPData* data) {
+  return WebPDemuxInternal(data, 0, NULL, WEBP_DEMUX_ABI_VERSION);
+}
+
+// Parses the WebP file given by 'data'.
+// If 'state' is non-NULL it will be set to indicate the status of the demuxer.
+// Returns a WebPDemuxer object on successful parse, NULL otherwise.
+static WEBP_INLINE WebPDemuxer* WebPDemuxPartial(
+    const WebPData* data, WebPDemuxState* state) {
+  return WebPDemuxInternal(data, 1, state, WEBP_DEMUX_ABI_VERSION);
+}
+
+// Frees memory associated with 'dmux'.
+WEBP_EXTERN(void) WebPDemuxDelete(WebPDemuxer* dmux);
+
+//------------------------------------------------------------------------------
+// Data/information extraction.
+
+typedef enum {
+  WEBP_FF_FORMAT_FLAGS,  // Extended format flags present in the 'VP8X' chunk.
+  WEBP_FF_CANVAS_WIDTH,
+  WEBP_FF_CANVAS_HEIGHT,
+  WEBP_FF_LOOP_COUNT
+} WebPFormatFeature;
+
+// Get the 'feature' value from the 'dmux'.
+// NOTE: values are only valid if WebPDemux() was used or WebPDemuxPartial()
+// returned a state > WEBP_DEMUX_PARSING_HEADER.
+WEBP_EXTERN(uint32_t) WebPDemuxGetI(
+    const WebPDemuxer* dmux, WebPFormatFeature feature);
+
+//------------------------------------------------------------------------------
+// Frame iteration.
+
+typedef struct {
+  int frame_num_;
+  int num_frames_;
+  int tile_num_;
+  int num_tiles_;
+  int x_offset_, y_offset_;  // offset relative to the canvas.
+  int width_, height_;       // dimensions of this frame or tile.
+  int duration_;   // display duration in milliseconds.
+  int complete_;   // true if 'tile_' contains a full frame. partial images may
+                   // still be decoded with the WebP incremental decoder.
+  WebPData tile_;  // The frame or tile given by 'frame_num_' and 'tile_num_'.
+
+  uint32_t pad[4];           // padding for later use
+  void* private_;
+} WebPIterator;
+
+// Retrieves frame 'frame_number' from 'dmux'.
+// 'iter->tile_' points to the first tile on return from this function.
+// Individual tiles may be extracted using WebPDemuxSetTile().
+// Setting 'frame_number' equal to 0 will return the last frame of the image.
+// Returns false if 'dmux' is NULL or frame 'frame_number' is not present.
+// Call WebPDemuxReleaseIterator() when use of the iterator is complete.
+// NOTE: 'dmux' must persist for the lifetime of 'iter'.
+WEBP_EXTERN(int) WebPDemuxGetFrame(
+    const WebPDemuxer* dmux, int frame_number, WebPIterator* iter);
+
+// Sets 'iter->tile_' to point to the next ('iter->frame_num_' + 1) or previous
+// ('iter->frame_num_' - 1) frame. These functions do not loop.
+// Returns true on success, false otherwise.
+WEBP_EXTERN(int) WebPDemuxNextFrame(WebPIterator* iter);
+WEBP_EXTERN(int) WebPDemuxPrevFrame(WebPIterator* iter);
+
+// Sets 'iter->tile_' to reflect tile number 'tile_number'.
+// Returns true if tile 'tile_number' is present, false otherwise.
+WEBP_EXTERN(int) WebPDemuxSelectTile(WebPIterator* iter, int tile_number);
+
+// Releases any memory associated with 'iter'.
+// Must be called before destroying the associated WebPDemuxer with
+// WebPDemuxDelete().
+WEBP_EXTERN(void) WebPDemuxReleaseIterator(WebPIterator* iter);
+
+//------------------------------------------------------------------------------
+// Chunk iteration.
+
+typedef struct {
+  // The current and total number of chunks with the fourcc given to
+  // WebPDemuxGetChunk().
+  int chunk_num_;
+  int num_chunks_;
+  WebPData chunk_;    // The payload of the chunk.
+
+  uint32_t pad[6];    // padding for later use
+  void* private_;
+} WebPChunkIterator;
+
+// Retrieves the 'chunk_number' instance of the chunk with id 'fourcc' from
+// 'dmux'.
+// 'fourcc' is a character array containing the fourcc of the chunk to return,
+// e.g., "ICCP", "META", "EXIF", etc.
+// Setting 'chunk_number' equal to 0 will return the last chunk in a set.
+// Returns true if the chunk is found, false otherwise. Image related chunk
+// payloads are accessed through WebPDemuxGetFrame() and related functions.
+// Call WebPDemuxReleaseChunkIterator() when use of the iterator is complete.
+// NOTE: 'dmux' must persist for the lifetime of the iterator.
+WEBP_EXTERN(int) WebPDemuxGetChunk(const WebPDemuxer* dmux,
+                                   const char fourcc[4], int chunk_number,
+                                   WebPChunkIterator* iter);
+
+// Sets 'iter->chunk_' to point to the next ('iter->chunk_num_' + 1) or previous
+// ('iter->chunk_num_' - 1) chunk. These functions do not loop.
+// Returns true on success, false otherwise.
+WEBP_EXTERN(int) WebPDemuxNextChunk(WebPChunkIterator* iter);
+WEBP_EXTERN(int) WebPDemuxPrevChunk(WebPChunkIterator* iter);
+
+// Releases any memory associated with 'iter'.
+// Must be called before destroying the associated WebPDemuxer with
+// WebPDemuxDelete().
+WEBP_EXTERN(void) WebPDemuxReleaseChunkIterator(WebPChunkIterator* iter);
+
+//------------------------------------------------------------------------------
+
+#if defined(__cplusplus) || defined(c_plusplus)
+}    // extern "C"
+#endif
+
+#endif  /* WEBP_WEBP_MUX_H_ */
diff --git a/drivers/webp/mux/demux.c b/drivers/webpold/mux/demux.c
index 501d08f41d..501d08f41d 100644
--- a/drivers/webp/mux/demux.c
+++ b/drivers/webpold/mux/demux.c
diff --git a/drivers/webpold/mux/muxedit.c b/drivers/webpold/mux/muxedit.c
new file mode 100644
index 0000000000..08629d4ae2
--- /dev/null
+++ b/drivers/webpold/mux/muxedit.c
@@ -0,0 +1,712 @@
+// Copyright 2011 Google Inc. All Rights Reserved.
+//
+// This code is licensed under the same terms as WebM:
+//  Software License Agreement:  http://www.webmproject.org/license/software/
+//  Additional IP Rights Grant:  http://www.webmproject.org/license/additional/
+// -----------------------------------------------------------------------------
+//
+// Set and delete APIs for mux.
+//
+// Authors: Urvang (urvang@google.com)
+//          Vikas (vikasa@google.com)
+
+#include <assert.h>
+#include "./muxi.h"
+
+#if defined(__cplusplus) || defined(c_plusplus)
+extern "C" {
+#endif
+
+//------------------------------------------------------------------------------
+// Life of a mux object.
+
+static void MuxInit(WebPMux* const mux) {
+  if (mux == NULL) return;
+  memset(mux, 0, sizeof(*mux));
+}
+
+WebPMux* WebPNewInternal(int version) {
+  if (WEBP_ABI_IS_INCOMPATIBLE(version, WEBP_MUX_ABI_VERSION)) {
+    return NULL;
+  } else {
+    WebPMux* const mux = (WebPMux*)malloc(sizeof(WebPMux));
+    // If mux is NULL MuxInit is a noop.
+    MuxInit(mux);
+    return mux;
+  }
+}
+
+static void DeleteAllChunks(WebPChunk** const chunk_list) {
+  while (*chunk_list) {
+    *chunk_list = ChunkDelete(*chunk_list);
+  }
+}
+
+static void MuxRelease(WebPMux* const mux) {
+  if (mux == NULL) return;
+  MuxImageDeleteAll(&mux->images_);
+  DeleteAllChunks(&mux->vp8x_);
+  DeleteAllChunks(&mux->iccp_);
+  DeleteAllChunks(&mux->loop_);
+  DeleteAllChunks(&mux->meta_);
+  DeleteAllChunks(&mux->unknown_);
+}
+
+void WebPMuxDelete(WebPMux* mux) {
+  // If mux is NULL MuxRelease is a noop.
+  MuxRelease(mux);
+  free(mux);
+}
+
+//------------------------------------------------------------------------------
+// Helper method(s).
+
+// Handy MACRO, makes MuxSet() very symmetric to MuxGet().
+#define SWITCH_ID_LIST(INDEX, LIST)                                            \
+  if (idx == (INDEX)) {                                                        \
+    err = ChunkAssignData(&chunk, data, copy_data, kChunks[(INDEX)].tag);      \
+    if (err == WEBP_MUX_OK) {                                                  \
+      err = ChunkSetNth(&chunk, (LIST), nth);                                  \
+    }                                                                          \
+    return err;                                                                \
+  }
+
+static WebPMuxError MuxSet(WebPMux* const mux, CHUNK_INDEX idx, uint32_t nth,
+                           const WebPData* const data, int copy_data) {
+  WebPChunk chunk;
+  WebPMuxError err = WEBP_MUX_NOT_FOUND;
+  assert(mux != NULL);
+  assert(!IsWPI(kChunks[idx].id));
+
+  ChunkInit(&chunk);
+  SWITCH_ID_LIST(IDX_VP8X, &mux->vp8x_);
+  SWITCH_ID_LIST(IDX_ICCP, &mux->iccp_);
+  SWITCH_ID_LIST(IDX_LOOP, &mux->loop_);
+  SWITCH_ID_LIST(IDX_META, &mux->meta_);
+  if (idx == IDX_UNKNOWN && data->size_ > TAG_SIZE) {
+    // For raw-data unknown chunk, the first four bytes should be the tag to be
+    // used for the chunk.
+    const WebPData tmp = { data->bytes_ + TAG_SIZE, data->size_ - TAG_SIZE };
+    err = ChunkAssignData(&chunk, &tmp, copy_data, GetLE32(data->bytes_ + 0));
+    if (err == WEBP_MUX_OK)
+      err = ChunkSetNth(&chunk, &mux->unknown_, nth);
+  }
+  return err;
+}
+#undef SWITCH_ID_LIST
+
+static WebPMuxError MuxAddChunk(WebPMux* const mux, uint32_t nth, uint32_t tag,
+                                const uint8_t* data, size_t size,
+                                int copy_data) {
+  const CHUNK_INDEX idx = ChunkGetIndexFromTag(tag);
+  const WebPData chunk_data = { data, size };
+  assert(mux != NULL);
+  assert(size <= MAX_CHUNK_PAYLOAD);
+  assert(idx != IDX_NIL);
+  return MuxSet(mux, idx, nth, &chunk_data, copy_data);
+}
+
+// Create data for frame/tile given image data, offsets and duration.
+static WebPMuxError CreateFrameTileData(const WebPData* const image,
+                                        int x_offset, int y_offset,
+                                        int duration, int is_lossless,
+                                        int is_frame,
+                                        WebPData* const frame_tile) {
+  int width;
+  int height;
+  uint8_t* frame_tile_bytes;
+  const size_t frame_tile_size = kChunks[is_frame ? IDX_FRAME : IDX_TILE].size;
+
+  const int ok = is_lossless ?
+      VP8LGetInfo(image->bytes_, image->size_, &width, &height, NULL) :
+      VP8GetInfo(image->bytes_, image->size_, image->size_, &width, &height);
+  if (!ok) return WEBP_MUX_INVALID_ARGUMENT;
+
+  assert(width > 0 && height > 0 && duration > 0);
+  // Note: assertion on upper bounds is done in PutLE24().
+
+  frame_tile_bytes = (uint8_t*)malloc(frame_tile_size);
+  if (frame_tile_bytes == NULL) return WEBP_MUX_MEMORY_ERROR;
+
+  PutLE24(frame_tile_bytes + 0, x_offset / 2);
+  PutLE24(frame_tile_bytes + 3, y_offset / 2);
+
+  if (is_frame) {
+    PutLE24(frame_tile_bytes + 6, width - 1);
+    PutLE24(frame_tile_bytes + 9, height - 1);
+    PutLE24(frame_tile_bytes + 12, duration - 1);
+  }
+
+  frame_tile->bytes_ = frame_tile_bytes;
+  frame_tile->size_ = frame_tile_size;
+  return WEBP_MUX_OK;
+}
+
+// Outputs image data given a bitstream. The bitstream can either be a
+// single-image WebP file or raw VP8/VP8L data.
+// Also outputs 'is_lossless' to be true if the given bitstream is lossless.
+static WebPMuxError GetImageData(const WebPData* const bitstream,
+                                 WebPData* const image, WebPData* const alpha,
+                                 int* const is_lossless) {
+  WebPDataInit(alpha);  // Default: no alpha.
+  if (bitstream->size_ < TAG_SIZE ||
+      memcmp(bitstream->bytes_, "RIFF", TAG_SIZE)) {
+    // It is NOT webp file data. Return input data as is.
+    *image = *bitstream;
+  } else {
+    // It is webp file data. Extract image data from it.
+    const WebPMuxImage* wpi;
+    WebPMux* const mux = WebPMuxCreate(bitstream, 0);
+    if (mux == NULL) return WEBP_MUX_BAD_DATA;
+    wpi = mux->images_;
+    assert(wpi != NULL && wpi->img_ != NULL);
+    *image = wpi->img_->data_;
+    if (wpi->alpha_ != NULL) {
+      *alpha = wpi->alpha_->data_;
+    }
+    WebPMuxDelete(mux);
+  }
+  *is_lossless = VP8LCheckSignature(image->bytes_, image->size_);
+  return WEBP_MUX_OK;
+}
+
+static WebPMuxError DeleteChunks(WebPChunk** chunk_list, uint32_t tag) {
+  WebPMuxError err = WEBP_MUX_NOT_FOUND;
+  assert(chunk_list);
+  while (*chunk_list) {
+    WebPChunk* const chunk = *chunk_list;
+    if (chunk->tag_ == tag) {
+      *chunk_list = ChunkDelete(chunk);
+      err = WEBP_MUX_OK;
+    } else {
+      chunk_list = &chunk->next_;
+    }
+  }
+  return err;
+}
+
+static WebPMuxError MuxDeleteAllNamedData(WebPMux* const mux, CHUNK_INDEX idx) {
+  const WebPChunkId id = kChunks[idx].id;
+  WebPChunk** chunk_list;
+
+  if (mux == NULL) return WEBP_MUX_INVALID_ARGUMENT;
+  if (IsWPI(id)) return WEBP_MUX_INVALID_ARGUMENT;
+
+  chunk_list = MuxGetChunkListFromId(mux, id);
+  if (chunk_list == NULL) return WEBP_MUX_INVALID_ARGUMENT;
+
+  return DeleteChunks(chunk_list, kChunks[idx].tag);
+}
+
+static WebPMuxError DeleteLoopCount(WebPMux* const mux) {
+  return MuxDeleteAllNamedData(mux, IDX_LOOP);
+}
+
+//------------------------------------------------------------------------------
+// Set API(s).
+
+WebPMuxError WebPMuxSetImage(WebPMux* mux,
+                             const WebPData* bitstream, int copy_data) {
+  WebPMuxError err;
+  WebPChunk chunk;
+  WebPMuxImage wpi;
+  WebPData image;
+  WebPData alpha;
+  int is_lossless;
+  int image_tag;
+
+  if (mux == NULL || bitstream == NULL || bitstream->bytes_ == NULL ||
+      bitstream->size_ > MAX_CHUNK_PAYLOAD) {
+    return WEBP_MUX_INVALID_ARGUMENT;
+  }
+
+  // If given data is for a whole webp file,
+  // extract only the VP8/VP8L data from it.
+  err = GetImageData(bitstream, &image, &alpha, &is_lossless);
+  if (err != WEBP_MUX_OK) return err;
+  image_tag = is_lossless ? kChunks[IDX_VP8L].tag : kChunks[IDX_VP8].tag;
+
+  // Delete the existing images.
+  MuxImageDeleteAll(&mux->images_);
+
+  MuxImageInit(&wpi);
+
+  if (alpha.bytes_ != NULL) {  // Add alpha chunk.
+    ChunkInit(&chunk);
+    err = ChunkAssignData(&chunk, &alpha, copy_data, kChunks[IDX_ALPHA].tag);
+    if (err != WEBP_MUX_OK) goto Err;
+    err = ChunkSetNth(&chunk, &wpi.alpha_, 1);
+    if (err != WEBP_MUX_OK) goto Err;
+  }
+
+  // Add image chunk.
+  ChunkInit(&chunk);
+  err = ChunkAssignData(&chunk, &image, copy_data, image_tag);
+  if (err != WEBP_MUX_OK) goto Err;
+  err = ChunkSetNth(&chunk, &wpi.img_, 1);
+  if (err != WEBP_MUX_OK) goto Err;
+
+  // Add this image to mux.
+  err = MuxImagePush(&wpi, &mux->images_);
+  if (err != WEBP_MUX_OK) goto Err;
+
+  // All OK.
+  return WEBP_MUX_OK;
+
+ Err:
+  // Something bad happened.
+  ChunkRelease(&chunk);
+  MuxImageRelease(&wpi);
+  return err;
+}
+
+WebPMuxError WebPMuxSetMetadata(WebPMux* mux, const WebPData* metadata,
+                                int copy_data) {
+  WebPMuxError err;
+
+  if (mux == NULL || metadata == NULL || metadata->bytes_ == NULL ||
+      metadata->size_ > MAX_CHUNK_PAYLOAD) {
+    return WEBP_MUX_INVALID_ARGUMENT;
+  }
+
+  // Delete the existing metadata chunk(s).
+  err = WebPMuxDeleteMetadata(mux);
+  if (err != WEBP_MUX_OK && err != WEBP_MUX_NOT_FOUND) return err;
+
+  // Add the given metadata chunk.
+  return MuxSet(mux, IDX_META, 1, metadata, copy_data);
+}
+
+WebPMuxError WebPMuxSetColorProfile(WebPMux* mux, const WebPData* color_profile,
+                                    int copy_data) {
+  WebPMuxError err;
+
+  if (mux == NULL || color_profile == NULL || color_profile->bytes_ == NULL ||
+      color_profile->size_ > MAX_CHUNK_PAYLOAD) {
+    return WEBP_MUX_INVALID_ARGUMENT;
+  }
+
+  // Delete the existing ICCP chunk(s).
+  err = WebPMuxDeleteColorProfile(mux);
+  if (err != WEBP_MUX_OK && err != WEBP_MUX_NOT_FOUND) return err;
+
+  // Add the given ICCP chunk.
+  return MuxSet(mux, IDX_ICCP, 1, color_profile, copy_data);
+}
+
+WebPMuxError WebPMuxSetLoopCount(WebPMux* mux, int loop_count) {
+  WebPMuxError err;
+  uint8_t* data = NULL;
+
+  if (mux == NULL) return WEBP_MUX_INVALID_ARGUMENT;
+  if (loop_count >= MAX_LOOP_COUNT) return WEBP_MUX_INVALID_ARGUMENT;
+
+  // Delete the existing LOOP chunk(s).
+  err = DeleteLoopCount(mux);
+  if (err != WEBP_MUX_OK && err != WEBP_MUX_NOT_FOUND) return err;
+
+  // Add the given loop count.
+  data = (uint8_t*)malloc(kChunks[IDX_LOOP].size);
+  if (data == NULL) return WEBP_MUX_MEMORY_ERROR;
+
+  PutLE16(data, loop_count);
+  err = MuxAddChunk(mux, 1, kChunks[IDX_LOOP].tag, data,
+                    kChunks[IDX_LOOP].size, 1);
+  free(data);
+  return err;
+}
+
+static WebPMuxError MuxPushFrameTileInternal(
+    WebPMux* const mux, const WebPData* const bitstream, int x_offset,
+    int y_offset, int duration, int copy_data, uint32_t tag) {
+  WebPChunk chunk;
+  WebPData image;
+  WebPData alpha;
+  WebPMuxImage wpi;
+  WebPMuxError err;
+  WebPData frame_tile;
+  const int is_frame = (tag == kChunks[IDX_FRAME].tag) ? 1 : 0;
+  int is_lossless;
+  int image_tag;
+
+  // Sanity checks.
+  if (mux == NULL || bitstream == NULL || bitstream->bytes_ == NULL ||
+      bitstream->size_ > MAX_CHUNK_PAYLOAD) {
+    return WEBP_MUX_INVALID_ARGUMENT;
+  }
+  if (x_offset < 0 || x_offset >= MAX_POSITION_OFFSET ||
+      y_offset < 0 || y_offset >= MAX_POSITION_OFFSET ||
+      duration <= 0 || duration > MAX_DURATION) {
+    return WEBP_MUX_INVALID_ARGUMENT;
+  }
+
+  // Snap offsets to even positions.
+  x_offset &= ~1;
+  y_offset &= ~1;
+
+  // If given data is for a whole webp file,
+  // extract only the VP8/VP8L data from it.
+  err = GetImageData(bitstream, &image, &alpha, &is_lossless);
+  if (err != WEBP_MUX_OK) return err;
+  image_tag = is_lossless ? kChunks[IDX_VP8L].tag : kChunks[IDX_VP8].tag;
+
+  WebPDataInit(&frame_tile);
+  ChunkInit(&chunk);
+  MuxImageInit(&wpi);
+
+  if (alpha.bytes_ != NULL) {
+    // Add alpha chunk.
+    err = ChunkAssignData(&chunk, &alpha, copy_data, kChunks[IDX_ALPHA].tag);
+    if (err != WEBP_MUX_OK) goto Err;
+    err = ChunkSetNth(&chunk, &wpi.alpha_, 1);
+    if (err != WEBP_MUX_OK) goto Err;
+    ChunkInit(&chunk);  // chunk owned by wpi.alpha_ now.
+  }
+
+  // Add image chunk.
+  err = ChunkAssignData(&chunk, &image, copy_data, image_tag);
+  if (err != WEBP_MUX_OK) goto Err;
+  err = ChunkSetNth(&chunk, &wpi.img_, 1);
+  if (err != WEBP_MUX_OK) goto Err;
+  ChunkInit(&chunk);  // chunk owned by wpi.img_ now.
+
+  // Create frame/tile data.
+  err = CreateFrameTileData(&image, x_offset, y_offset, duration, is_lossless,
+                            is_frame, &frame_tile);
+  if (err != WEBP_MUX_OK) goto Err;
+
+  // Add frame/tile chunk (with copy_data = 1).
+  err = ChunkAssignData(&chunk, &frame_tile, 1, tag);
+  if (err != WEBP_MUX_OK) goto Err;
+  WebPDataClear(&frame_tile);
+  err = ChunkSetNth(&chunk, &wpi.header_, 1);
+  if (err != WEBP_MUX_OK) goto Err;
+  ChunkInit(&chunk);  // chunk owned by wpi.header_ now.
+
+  // Add this WebPMuxImage to mux.
+  err = MuxImagePush(&wpi, &mux->images_);
+  if (err != WEBP_MUX_OK) goto Err;
+
+  // All is well.
+  return WEBP_MUX_OK;
+
+ Err:  // Something bad happened.
+  WebPDataClear(&frame_tile);
+  ChunkRelease(&chunk);
+  MuxImageRelease(&wpi);
+  return err;
+}
+
+WebPMuxError WebPMuxPushFrame(WebPMux* mux, const WebPData* bitstream,
+                              int x_offset, int y_offset,
+                              int duration, int copy_data) {
+  return MuxPushFrameTileInternal(mux, bitstream, x_offset, y_offset,
+                                  duration, copy_data, kChunks[IDX_FRAME].tag);
+}
+
+WebPMuxError WebPMuxPushTile(WebPMux* mux, const WebPData* bitstream,
+                             int x_offset, int y_offset,
+                             int copy_data) {
+  return MuxPushFrameTileInternal(mux, bitstream, x_offset, y_offset,
+                                  1 /* unused duration */, copy_data,
+                                  kChunks[IDX_TILE].tag);
+}
+
+//------------------------------------------------------------------------------
+// Delete API(s).
+
+WebPMuxError WebPMuxDeleteImage(WebPMux* mux) {
+  WebPMuxError err;
+
+  if (mux == NULL) return WEBP_MUX_INVALID_ARGUMENT;
+
+  err = MuxValidateForImage(mux);
+  if (err != WEBP_MUX_OK) return err;
+
+  // All well, delete image.
+  MuxImageDeleteAll(&mux->images_);
+  return WEBP_MUX_OK;
+}
+
+WebPMuxError WebPMuxDeleteMetadata(WebPMux* mux) {
+  return MuxDeleteAllNamedData(mux, IDX_META);
+}
+
+WebPMuxError WebPMuxDeleteColorProfile(WebPMux* mux) {
+  return MuxDeleteAllNamedData(mux, IDX_ICCP);
+}
+
+static WebPMuxError DeleteFrameTileInternal(WebPMux* const mux, uint32_t nth,
+                                            CHUNK_INDEX idx) {
+  const WebPChunkId id = kChunks[idx].id;
+  if (mux == NULL) return WEBP_MUX_INVALID_ARGUMENT;
+
+  assert(idx == IDX_FRAME || idx == IDX_TILE);
+  return MuxImageDeleteNth(&mux->images_, nth, id);
+}
+
+WebPMuxError WebPMuxDeleteFrame(WebPMux* mux, uint32_t nth) {
+  return DeleteFrameTileInternal(mux, nth, IDX_FRAME);
+}
+
+WebPMuxError WebPMuxDeleteTile(WebPMux* mux, uint32_t nth) {
+  return DeleteFrameTileInternal(mux, nth, IDX_TILE);
+}
+
+//------------------------------------------------------------------------------
+// Assembly of the WebP RIFF file.
+
+static WebPMuxError GetFrameTileInfo(const WebPChunk* const frame_tile_chunk,
+                                     int* const x_offset, int* const y_offset,
+                                     int* const duration) {
+  const uint32_t tag = frame_tile_chunk->tag_;
+  const int is_frame = (tag == kChunks[IDX_FRAME].tag);
+  const WebPData* const data = &frame_tile_chunk->data_;
+  const size_t expected_data_size =
+      is_frame ? FRAME_CHUNK_SIZE : TILE_CHUNK_SIZE;
+  assert(frame_tile_chunk != NULL);
+  assert(tag == kChunks[IDX_FRAME].tag || tag ==  kChunks[IDX_TILE].tag);
+  if (data->size_ != expected_data_size) return WEBP_MUX_INVALID_ARGUMENT;
+
+  *x_offset = 2 * GetLE24(data->bytes_ + 0);
+  *y_offset = 2 * GetLE24(data->bytes_ + 3);
+  if (is_frame) *duration = 1 + GetLE24(data->bytes_ + 12);
+  return WEBP_MUX_OK;
+}
+
+WebPMuxError MuxGetImageWidthHeight(const WebPChunk* const image_chunk,
+                                    int* const width, int* const height) {
+  const uint32_t tag = image_chunk->tag_;
+  const WebPData* const data = &image_chunk->data_;
+  int w, h;
+  int ok;
+  assert(image_chunk != NULL);
+  assert(tag == kChunks[IDX_VP8].tag || tag ==  kChunks[IDX_VP8L].tag);
+  ok = (tag == kChunks[IDX_VP8].tag) ?
+      VP8GetInfo(data->bytes_, data->size_, data->size_, &w, &h) :
+      VP8LGetInfo(data->bytes_, data->size_, &w, &h, NULL);
+  if (ok) {
+    *width = w;
+    *height = h;
+    return WEBP_MUX_OK;
+  } else {
+    return WEBP_MUX_BAD_DATA;
+  }
+}
+
+static WebPMuxError GetImageInfo(const WebPMuxImage* const wpi,
+                                 int* const x_offset, int* const y_offset,
+                                 int* const duration,
+                                 int* const width, int* const height) {
+  const WebPChunk* const image_chunk = wpi->img_;
+  const WebPChunk* const frame_tile_chunk = wpi->header_;
+
+  // Get offsets and duration from FRM/TILE chunk.
+  const WebPMuxError err =
+      GetFrameTileInfo(frame_tile_chunk, x_offset, y_offset, duration);
+  if (err != WEBP_MUX_OK) return err;
+
+  // Get width and height from VP8/VP8L chunk.
+  return MuxGetImageWidthHeight(image_chunk, width, height);
+}
+
+static WebPMuxError GetImageCanvasWidthHeight(
+    const WebPMux* const mux, uint32_t flags,
+    int* const width, int* const height) {
+  WebPMuxImage* wpi = NULL;
+  assert(mux != NULL);
+  assert(width != NULL && height != NULL);
+
+  wpi = mux->images_;
+  assert(wpi != NULL);
+  assert(wpi->img_ != NULL);
+
+  if (wpi->next_) {
+    int max_x = 0;
+    int max_y = 0;
+    int64_t image_area = 0;
+    // Aggregate the bounding box for animation frames & tiled images.
+    for (; wpi != NULL; wpi = wpi->next_) {
+      int x_offset, y_offset, duration, w, h;
+      const WebPMuxError err = GetImageInfo(wpi, &x_offset, &y_offset,
+                                            &duration, &w, &h);
+      const int max_x_pos = x_offset + w;
+      const int max_y_pos = y_offset + h;
+      if (err != WEBP_MUX_OK) return err;
+      assert(x_offset < MAX_POSITION_OFFSET);
+      assert(y_offset < MAX_POSITION_OFFSET);
+
+      if (max_x_pos > max_x) max_x = max_x_pos;
+      if (max_y_pos > max_y) max_y = max_y_pos;
+      image_area += w * h;
+    }
+    *width = max_x;
+    *height = max_y;
+    // Crude check to validate that there are no image overlaps/holes for tile
+    // images. Check that the aggregated image area for individual tiles exactly
+    // matches the image area of the constructed canvas. However, the area-match
+    // is necessary but not sufficient condition.
+    if ((flags & TILE_FLAG) && (image_area != (max_x * max_y))) {
+      *width = 0;
+      *height = 0;
+      return WEBP_MUX_INVALID_ARGUMENT;
+    }
+  } else {
+    // For a single image, extract the width & height from VP8/VP8L image-data.
+    int w, h;
+    const WebPChunk* const image_chunk = wpi->img_;
+    const WebPMuxError err = MuxGetImageWidthHeight(image_chunk, &w, &h);
+    if (err != WEBP_MUX_OK) return err;
+    *width = w;
+    *height = h;
+  }
+  return WEBP_MUX_OK;
+}
+
+// VP8X format:
+// Total Size : 10,
+// Flags  : 4 bytes,
+// Width  : 3 bytes,
+// Height : 3 bytes.
+static WebPMuxError CreateVP8XChunk(WebPMux* const mux) {
+  WebPMuxError err = WEBP_MUX_OK;
+  uint32_t flags = 0;
+  int width = 0;
+  int height = 0;
+  uint8_t data[VP8X_CHUNK_SIZE];
+  const size_t data_size = VP8X_CHUNK_SIZE;
+  const WebPMuxImage* images = NULL;
+
+  assert(mux != NULL);
+  images = mux->images_;  // First image.
+  if (images == NULL || images->img_ == NULL ||
+      images->img_->data_.bytes_ == NULL) {
+    return WEBP_MUX_INVALID_ARGUMENT;
+  }
+
+  // If VP8X chunk(s) is(are) already present, remove them (and later add new
+  // VP8X chunk with updated flags).
+  err = MuxDeleteAllNamedData(mux, IDX_VP8X);
+  if (err != WEBP_MUX_OK && err != WEBP_MUX_NOT_FOUND) return err;
+
+  // Set flags.
+  if (mux->iccp_ != NULL && mux->iccp_->data_.bytes_ != NULL) {
+    flags |= ICCP_FLAG;
+  }
+
+  if (mux->meta_ != NULL && mux->meta_->data_.bytes_ != NULL) {
+    flags |= META_FLAG;
+  }
+
+  if (images->header_ != NULL) {
+    if (images->header_->tag_ == kChunks[IDX_TILE].tag) {
+      // This is a tiled image.
+      flags |= TILE_FLAG;
+    } else if (images->header_->tag_ == kChunks[IDX_FRAME].tag) {
+      // This is an image with animation.
+      flags |= ANIMATION_FLAG;
+    }
+  }
+
+  if (MuxImageCount(images, WEBP_CHUNK_ALPHA) > 0) {
+    flags |= ALPHA_FLAG;  // Some images have an alpha channel.
+  }
+
+  if (flags == 0) {
+    // For Simple Image, VP8X chunk should not be added.
+    return WEBP_MUX_OK;
+  }
+
+  err = GetImageCanvasWidthHeight(mux, flags, &width, &height);
+  if (err != WEBP_MUX_OK) return err;
+
+  if (width <= 0 || height <= 0) {
+    return WEBP_MUX_INVALID_ARGUMENT;
+  }
+  if (width > MAX_CANVAS_SIZE || height > MAX_CANVAS_SIZE) {
+    return WEBP_MUX_INVALID_ARGUMENT;
+  }
+
+  if (MuxHasLosslessImages(images)) {
+    // We have a file with a VP8X chunk having some lossless images.
+    // As lossless images implicitly contain alpha, force ALPHA_FLAG to be true.
+    // Note: This 'flags' update must NOT be done for a lossless image
+    // without a VP8X chunk!
+    flags |= ALPHA_FLAG;
+  }
+
+  PutLE32(data + 0, flags);   // VP8X chunk flags.
+  PutLE24(data + 4, width - 1);   // canvas width.
+  PutLE24(data + 7, height - 1);  // canvas height.
+
+  err = MuxAddChunk(mux, 1, kChunks[IDX_VP8X].tag, data, data_size, 1);
+  return err;
+}
+
+WebPMuxError WebPMuxAssemble(WebPMux* mux, WebPData* assembled_data) {
+  size_t size = 0;
+  uint8_t* data = NULL;
+  uint8_t* dst = NULL;
+  int num_frames;
+  int num_loop_chunks;
+  WebPMuxError err;
+
+  if (mux == NULL || assembled_data == NULL) {
+    return WEBP_MUX_INVALID_ARGUMENT;
+  }
+
+  // Remove LOOP chunk if unnecessary.
+  err = WebPMuxNumChunks(mux, kChunks[IDX_LOOP].id, &num_loop_chunks);
+  if (err != WEBP_MUX_OK) return err;
+  if (num_loop_chunks >= 1) {
+    err = WebPMuxNumChunks(mux, kChunks[IDX_FRAME].id, &num_frames);
+    if (err != WEBP_MUX_OK) return err;
+    if (num_frames == 0) {
+      err = DeleteLoopCount(mux);
+      if (err != WEBP_MUX_OK) return err;
+    }
+  }
+
+  // Create VP8X chunk.
+  err = CreateVP8XChunk(mux);
+  if (err != WEBP_MUX_OK) return err;
+
+  // Allocate data.
+  size = ChunksListDiskSize(mux->vp8x_) + ChunksListDiskSize(mux->iccp_)
+       + ChunksListDiskSize(mux->loop_) + MuxImageListDiskSize(mux->images_)
+       + ChunksListDiskSize(mux->meta_) + ChunksListDiskSize(mux->unknown_)
+       + RIFF_HEADER_SIZE;
+
+  data = (uint8_t*)malloc(size);
+  if (data == NULL) return WEBP_MUX_MEMORY_ERROR;
+
+  // Emit header & chunks.
+  dst = MuxEmitRiffHeader(data, size);
+  dst = ChunkListEmit(mux->vp8x_, dst);
+  dst = ChunkListEmit(mux->iccp_, dst);
+  dst = ChunkListEmit(mux->loop_, dst);
+  dst = MuxImageListEmit(mux->images_, dst);
+  dst = ChunkListEmit(mux->meta_, dst);
+  dst = ChunkListEmit(mux->unknown_, dst);
+  assert(dst == data + size);
+
+  // Validate mux.
+  err = MuxValidate(mux);
+  if (err != WEBP_MUX_OK) {
+    free(data);
+    data = NULL;
+    size = 0;
+  }
+
+  // Finalize.
+  assembled_data->bytes_ = data;
+  assembled_data->size_ = size;
+
+  return err;
+}
+
+//------------------------------------------------------------------------------
+
+#if defined(__cplusplus) || defined(c_plusplus)
+}    // extern "C"
+#endif
diff --git a/drivers/webpold/mux/muxi.h b/drivers/webpold/mux/muxi.h
new file mode 100644
index 0000000000..2f06f3ed03
--- /dev/null
+++ b/drivers/webpold/mux/muxi.h
@@ -0,0 +1,271 @@
+// Copyright 2011 Google Inc. All Rights Reserved.
+//
+// This code is licensed under the same terms as WebM:
+//  Software License Agreement:  http://www.webmproject.org/license/software/
+//  Additional IP Rights Grant:  http://www.webmproject.org/license/additional/
+// -----------------------------------------------------------------------------
+//
+// Internal header for mux library.
+//
+// Author: Urvang (urvang@google.com)
+
+#ifndef WEBP_MUX_MUXI_H_
+#define WEBP_MUX_MUXI_H_
+
+#include <stdlib.h>
+#include "../dec/vp8i.h"
+#include "../dec/vp8li.h"
+#include "../format_constants.h"
+#include "../mux.h"
+
+#if defined(__cplusplus) || defined(c_plusplus)
+extern "C" {
+#endif
+
+//------------------------------------------------------------------------------
+// Defines and constants.
+
+// Chunk object.
+typedef struct WebPChunk WebPChunk;
+struct WebPChunk {
+  uint32_t        tag_;
+  int             owner_;  // True if *data_ memory is owned internally.
+                           // VP8X, Loop, and other internally created chunks
+                           // like frame/tile are always owned.
+  WebPData        data_;
+  WebPChunk*      next_;
+};
+
+// MuxImage object. Store a full webp image (including frame/tile chunk, alpha
+// chunk and VP8/VP8L chunk),
+typedef struct WebPMuxImage WebPMuxImage;
+struct WebPMuxImage {
+  WebPChunk*  header_;      // Corresponds to WEBP_CHUNK_FRAME/WEBP_CHUNK_TILE.
+  WebPChunk*  alpha_;       // Corresponds to WEBP_CHUNK_ALPHA.
+  WebPChunk*  img_;         // Corresponds to WEBP_CHUNK_IMAGE.
+  int         is_partial_;  // True if only some of the chunks are filled.
+  WebPMuxImage* next_;
+};
+
+// Main mux object. Stores data chunks.
+struct WebPMux {
+  WebPMuxImage*   images_;
+  WebPChunk*      iccp_;
+  WebPChunk*      meta_;
+  WebPChunk*      loop_;
+  WebPChunk*      vp8x_;
+
+  WebPChunk*  unknown_;
+};
+
+// CHUNK_INDEX enum: used for indexing within 'kChunks' (defined below) only.
+// Note: the reason for having two enums ('WebPChunkId' and 'CHUNK_INDEX') is to
+// allow two different chunks to have the same id (e.g. WebPChunkId
+// 'WEBP_CHUNK_IMAGE' can correspond to CHUNK_INDEX 'IDX_VP8' or 'IDX_VP8L').
+typedef enum {
+  IDX_VP8X = 0,
+  IDX_ICCP,
+  IDX_LOOP,
+  IDX_FRAME,
+  IDX_TILE,
+  IDX_ALPHA,
+  IDX_VP8,
+  IDX_VP8L,
+  IDX_META,
+  IDX_UNKNOWN,
+
+  IDX_NIL,
+  IDX_LAST_CHUNK
+} CHUNK_INDEX;
+
+#define NIL_TAG 0x00000000u  // To signal void chunk.
+
+#define MKFOURCC(a, b, c, d) ((uint32_t)(a) | (b) << 8 | (c) << 16 | (d) << 24)
+
+typedef struct {
+  uint32_t      tag;
+  WebPChunkId   id;
+  uint32_t      size;
+} ChunkInfo;
+
+extern const ChunkInfo kChunks[IDX_LAST_CHUNK];
+
+//------------------------------------------------------------------------------
+// Helper functions.
+
+// Read 16, 24 or 32 bits stored in little-endian order.
+static WEBP_INLINE int GetLE16(const uint8_t* const data) {
+  return (int)(data[0] << 0) | (data[1] << 8);
+}
+
+static WEBP_INLINE int GetLE24(const uint8_t* const data) {
+  return GetLE16(data) | (data[2] << 16);
+}
+
+static WEBP_INLINE uint32_t GetLE32(const uint8_t* const data) {
+  return (uint32_t)GetLE16(data) | (GetLE16(data + 2) << 16);
+}
+
+// Store 16, 24 or 32 bits in little-endian order.
+static WEBP_INLINE void PutLE16(uint8_t* const data, int val) {
+  assert(val < (1 << 16));
+  data[0] = (val >> 0);
+  data[1] = (val >> 8);
+}
+
+static WEBP_INLINE void PutLE24(uint8_t* const data, int val) {
+  assert(val < (1 << 24));
+  PutLE16(data, val & 0xffff);
+  data[2] = (val >> 16);
+}
+
+static WEBP_INLINE void PutLE32(uint8_t* const data, uint32_t val) {
+  PutLE16(data, (int)(val & 0xffff));
+  PutLE16(data + 2, (int)(val >> 16));
+}
+
+static WEBP_INLINE size_t SizeWithPadding(size_t chunk_size) {
+  return CHUNK_HEADER_SIZE + ((chunk_size + 1) & ~1U);
+}
+
+//------------------------------------------------------------------------------
+// Chunk object management.
+
+// Initialize.
+void ChunkInit(WebPChunk* const chunk);
+
+// Get chunk index from chunk tag. Returns IDX_NIL if not found.
+CHUNK_INDEX ChunkGetIndexFromTag(uint32_t tag);
+
+// Get chunk id from chunk tag. Returns WEBP_CHUNK_NIL if not found.
+WebPChunkId ChunkGetIdFromTag(uint32_t tag);
+
+// Search for nth chunk with given 'tag' in the chunk list.
+// nth = 0 means "last of the list".
+WebPChunk* ChunkSearchList(WebPChunk* first, uint32_t nth, uint32_t tag);
+
+// Fill the chunk with the given data.
+WebPMuxError ChunkAssignData(WebPChunk* chunk, const WebPData* const data,
+                             int copy_data, uint32_t tag);
+
+// Sets 'chunk' at nth position in the 'chunk_list'.
+// nth = 0 has the special meaning "last of the list".
+WebPMuxError ChunkSetNth(const WebPChunk* chunk, WebPChunk** chunk_list,
+                         uint32_t nth);
+
+// Releases chunk and returns chunk->next_.
+WebPChunk* ChunkRelease(WebPChunk* const chunk);
+
+// Deletes given chunk & returns chunk->next_.
+WebPChunk* ChunkDelete(WebPChunk* const chunk);
+
+// Size of a chunk including header and padding.
+static WEBP_INLINE size_t ChunkDiskSize(const WebPChunk* chunk) {
+  const size_t data_size = chunk->data_.size_;
+  assert(data_size < MAX_CHUNK_PAYLOAD);
+  return SizeWithPadding(data_size);
+}
+
+// Total size of a list of chunks.
+size_t ChunksListDiskSize(const WebPChunk* chunk_list);
+
+// Write out the given list of chunks into 'dst'.
+uint8_t* ChunkListEmit(const WebPChunk* chunk_list, uint8_t* dst);
+
+// Get the width & height of image stored in 'image_chunk'.
+WebPMuxError MuxGetImageWidthHeight(const WebPChunk* const image_chunk,
+                                    int* const width, int* const height);
+
+//------------------------------------------------------------------------------
+// MuxImage object management.
+
+// Initialize.
+void MuxImageInit(WebPMuxImage* const wpi);
+
+// Releases image 'wpi' and returns wpi->next.
+WebPMuxImage* MuxImageRelease(WebPMuxImage* const wpi);
+
+// Delete image 'wpi' and return the next image in the list or NULL.
+// 'wpi' can be NULL.
+WebPMuxImage* MuxImageDelete(WebPMuxImage* const wpi);
+
+// Delete all images in 'wpi_list'.
+void MuxImageDeleteAll(WebPMuxImage** const wpi_list);
+
+// Count number of images matching the given tag id in the 'wpi_list'.
+int MuxImageCount(const WebPMuxImage* wpi_list, WebPChunkId id);
+
+// Check if given ID corresponds to an image related chunk.
+static WEBP_INLINE int IsWPI(WebPChunkId id) {
+  switch (id) {
+    case WEBP_CHUNK_FRAME:
+    case WEBP_CHUNK_TILE:
+    case WEBP_CHUNK_ALPHA:
+    case WEBP_CHUNK_IMAGE:  return 1;
+    default:        return 0;
+  }
+}
+
+// Get a reference to appropriate chunk list within an image given chunk tag.
+static WEBP_INLINE WebPChunk** MuxImageGetListFromId(
+    const WebPMuxImage* const wpi, WebPChunkId id) {
+  assert(wpi != NULL);
+  switch (id) {
+    case WEBP_CHUNK_FRAME:
+    case WEBP_CHUNK_TILE:  return (WebPChunk**)&wpi->header_;
+    case WEBP_CHUNK_ALPHA: return (WebPChunk**)&wpi->alpha_;
+    case WEBP_CHUNK_IMAGE: return (WebPChunk**)&wpi->img_;
+    default: return NULL;
+  }
+}
+
+// Pushes 'wpi' at the end of 'wpi_list'.
+WebPMuxError MuxImagePush(const WebPMuxImage* wpi, WebPMuxImage** wpi_list);
+
+// Delete nth image in the image list with given tag id.
+WebPMuxError MuxImageDeleteNth(WebPMuxImage** wpi_list, uint32_t nth,
+                               WebPChunkId id);
+
+// Get nth image in the image list with given tag id.
+WebPMuxError MuxImageGetNth(const WebPMuxImage** wpi_list, uint32_t nth,
+                            WebPChunkId id, WebPMuxImage** wpi);
+
+// Total size of the given image.
+size_t MuxImageDiskSize(const WebPMuxImage* const wpi);
+
+// Total size of a list of images.
+size_t MuxImageListDiskSize(const WebPMuxImage* wpi_list);
+
+// Write out the given image into 'dst'.
+uint8_t* MuxImageEmit(const WebPMuxImage* const wpi, uint8_t* dst);
+
+// Write out the given list of images into 'dst'.
+uint8_t* MuxImageListEmit(const WebPMuxImage* wpi_list, uint8_t* dst);
+
+//------------------------------------------------------------------------------
+// Helper methods for mux.
+
+// Checks if the given image list contains at least one lossless image.
+int MuxHasLosslessImages(const WebPMuxImage* images);
+
+// Write out RIFF header into 'data', given total data size 'size'.
+uint8_t* MuxEmitRiffHeader(uint8_t* const data, size_t size);
+
+// Returns the list where chunk with given ID is to be inserted in mux.
+// Return value is NULL if this chunk should be inserted in mux->images_ list
+// or if 'id' is not known.
+WebPChunk** MuxGetChunkListFromId(const WebPMux* mux, WebPChunkId id);
+
+// Validates that the given mux has a single image.
+WebPMuxError MuxValidateForImage(const WebPMux* const mux);
+
+// Validates the given mux object.
+WebPMuxError MuxValidate(const WebPMux* const mux);
+
+//------------------------------------------------------------------------------
+
+#if defined(__cplusplus) || defined(c_plusplus)
+}    // extern "C"
+#endif
+
+#endif  /* WEBP_MUX_MUXI_H_ */
diff --git a/drivers/webpold/mux/muxinternal.c b/drivers/webpold/mux/muxinternal.c
new file mode 100644
index 0000000000..6c3c4fe60a
--- /dev/null
+++ b/drivers/webpold/mux/muxinternal.c
@@ -0,0 +1,576 @@
+// Copyright 2011 Google Inc. All Rights Reserved.
+//
+// This code is licensed under the same terms as WebM:
+//  Software License Agreement:  http://www.webmproject.org/license/software/
+//  Additional IP Rights Grant:  http://www.webmproject.org/license/additional/
+// -----------------------------------------------------------------------------
+//
+// Internal objects and utils for mux.
+//
+// Authors: Urvang (urvang@google.com)
+//          Vikas (vikasa@google.com)
+
+#include <assert.h>
+#include "./muxi.h"
+
+#if defined(__cplusplus) || defined(c_plusplus)
+extern "C" {
+#endif
+
+#define UNDEFINED_CHUNK_SIZE (-1)
+
+const ChunkInfo kChunks[] = {
+  { MKFOURCC('V', 'P', '8', 'X'),  WEBP_CHUNK_VP8X,    VP8X_CHUNK_SIZE },
+  { MKFOURCC('I', 'C', 'C', 'P'),  WEBP_CHUNK_ICCP,    UNDEFINED_CHUNK_SIZE },
+  { MKFOURCC('L', 'O', 'O', 'P'),  WEBP_CHUNK_LOOP,    LOOP_CHUNK_SIZE },
+  { MKFOURCC('F', 'R', 'M', ' '),  WEBP_CHUNK_FRAME,   FRAME_CHUNK_SIZE },
+  { MKFOURCC('T', 'I', 'L', 'E'),  WEBP_CHUNK_TILE,    TILE_CHUNK_SIZE },
+  { MKFOURCC('A', 'L', 'P', 'H'),  WEBP_CHUNK_ALPHA,   UNDEFINED_CHUNK_SIZE },
+  { MKFOURCC('V', 'P', '8', ' '),  WEBP_CHUNK_IMAGE,   UNDEFINED_CHUNK_SIZE },
+  { MKFOURCC('V', 'P', '8', 'L'),  WEBP_CHUNK_IMAGE,   UNDEFINED_CHUNK_SIZE },
+  { MKFOURCC('M', 'E', 'T', 'A'),  WEBP_CHUNK_META,    UNDEFINED_CHUNK_SIZE },
+  { MKFOURCC('U', 'N', 'K', 'N'),  WEBP_CHUNK_UNKNOWN, UNDEFINED_CHUNK_SIZE },
+
+  { NIL_TAG,                    WEBP_CHUNK_NIL,     UNDEFINED_CHUNK_SIZE }
+};
+
+//------------------------------------------------------------------------------
+// Life of a chunk object.
+
+void ChunkInit(WebPChunk* const chunk) {
+  assert(chunk);
+  memset(chunk, 0, sizeof(*chunk));
+  chunk->tag_ = NIL_TAG;
+}
+
+WebPChunk* ChunkRelease(WebPChunk* const chunk) {
+  WebPChunk* next;
+  if (chunk == NULL) return NULL;
+  if (chunk->owner_) {
+    WebPDataClear(&chunk->data_);
+  }
+  next = chunk->next_;
+  ChunkInit(chunk);
+  return next;
+}
+
+//------------------------------------------------------------------------------
+// Chunk misc methods.
+
+CHUNK_INDEX ChunkGetIndexFromTag(uint32_t tag) {
+  int i;
+  for (i = 0; kChunks[i].tag != NIL_TAG; ++i) {
+    if (tag == kChunks[i].tag) return i;
+  }
+  return IDX_NIL;
+}
+
+WebPChunkId ChunkGetIdFromTag(uint32_t tag) {
+  int i;
+  for (i = 0; kChunks[i].tag != NIL_TAG; ++i) {
+    if (tag == kChunks[i].tag) return kChunks[i].id;
+  }
+  return WEBP_CHUNK_NIL;
+}
+
+//------------------------------------------------------------------------------
+// Chunk search methods.
+
+// Returns next chunk in the chunk list with the given tag.
+static WebPChunk* ChunkSearchNextInList(WebPChunk* chunk, uint32_t tag) {
+  while (chunk && chunk->tag_ != tag) {
+    chunk = chunk->next_;
+  }
+  return chunk;
+}
+
+WebPChunk* ChunkSearchList(WebPChunk* first, uint32_t nth, uint32_t tag) {
+  uint32_t iter = nth;
+  first = ChunkSearchNextInList(first, tag);
+  if (!first) return NULL;
+
+  while (--iter != 0) {
+    WebPChunk* next_chunk = ChunkSearchNextInList(first->next_, tag);
+    if (next_chunk == NULL) break;
+    first = next_chunk;
+  }
+  return ((nth > 0) && (iter > 0)) ? NULL : first;
+}
+
+// Outputs a pointer to 'prev_chunk->next_',
+//   where 'prev_chunk' is the pointer to the chunk at position (nth - 1).
+// Returns 1 if nth chunk was found, 0 otherwise.
+static int ChunkSearchListToSet(WebPChunk** chunk_list, uint32_t nth,
+                                WebPChunk*** const location) {
+  uint32_t count = 0;
+  assert(chunk_list);
+  *location = chunk_list;
+
+  while (*chunk_list) {
+    WebPChunk* const cur_chunk = *chunk_list;
+    ++count;
+    if (count == nth) return 1;  // Found.
+    chunk_list = &cur_chunk->next_;
+    *location = chunk_list;
+  }
+
+  // *chunk_list is ok to be NULL if adding at last location.
+  return (nth == 0 || (count == nth - 1)) ? 1 : 0;
+}
+
+//------------------------------------------------------------------------------
+// Chunk writer methods.
+
+WebPMuxError ChunkAssignData(WebPChunk* chunk, const WebPData* const data,
+                             int copy_data, uint32_t tag) {
+  // For internally allocated chunks, always copy data & make it owner of data.
+  if (tag == kChunks[IDX_VP8X].tag || tag == kChunks[IDX_LOOP].tag) {
+    copy_data = 1;
+  }
+
+  ChunkRelease(chunk);
+
+  if (data != NULL) {
+    if (copy_data) {
+      // Copy data.
+      chunk->data_.bytes_ = (uint8_t*)malloc(data->size_);
+      if (chunk->data_.bytes_ == NULL) return WEBP_MUX_MEMORY_ERROR;
+      memcpy((uint8_t*)chunk->data_.bytes_, data->bytes_, data->size_);
+      chunk->data_.size_ = data->size_;
+
+      // Chunk is owner of data.
+      chunk->owner_ = 1;
+    } else {
+      // Don't copy data.
+      chunk->data_ = *data;
+    }
+  }
+
+  chunk->tag_ = tag;
+
+  return WEBP_MUX_OK;
+}
+
+WebPMuxError ChunkSetNth(const WebPChunk* chunk, WebPChunk** chunk_list,
+                         uint32_t nth) {
+  WebPChunk* new_chunk;
+
+  if (!ChunkSearchListToSet(chunk_list, nth, &chunk_list)) {
+    return WEBP_MUX_NOT_FOUND;
+  }
+
+  new_chunk = (WebPChunk*)malloc(sizeof(*new_chunk));
+  if (new_chunk == NULL) return WEBP_MUX_MEMORY_ERROR;
+  *new_chunk = *chunk;
+  new_chunk->next_ = *chunk_list;
+  *chunk_list = new_chunk;
+  return WEBP_MUX_OK;
+}
+
+//------------------------------------------------------------------------------
+// Chunk deletion method(s).
+
+WebPChunk* ChunkDelete(WebPChunk* const chunk) {
+  WebPChunk* const next = ChunkRelease(chunk);
+  free(chunk);
+  return next;
+}
+
+//------------------------------------------------------------------------------
+// Chunk serialization methods.
+
+size_t ChunksListDiskSize(const WebPChunk* chunk_list) {
+  size_t size = 0;
+  while (chunk_list) {
+    size += ChunkDiskSize(chunk_list);
+    chunk_list = chunk_list->next_;
+  }
+  return size;
+}
+
+static uint8_t* ChunkEmit(const WebPChunk* const chunk, uint8_t* dst) {
+  const size_t chunk_size = chunk->data_.size_;
+  assert(chunk);
+  assert(chunk->tag_ != NIL_TAG);
+  PutLE32(dst + 0, chunk->tag_);
+  PutLE32(dst + TAG_SIZE, (uint32_t)chunk_size);
+  assert(chunk_size == (uint32_t)chunk_size);
+  memcpy(dst + CHUNK_HEADER_SIZE, chunk->data_.bytes_, chunk_size);
+  if (chunk_size & 1)
+    dst[CHUNK_HEADER_SIZE + chunk_size] = 0;  // Add padding.
+  return dst + ChunkDiskSize(chunk);
+}
+
+uint8_t* ChunkListEmit(const WebPChunk* chunk_list, uint8_t* dst) {
+  while (chunk_list) {
+    dst = ChunkEmit(chunk_list, dst);
+    chunk_list = chunk_list->next_;
+  }
+  return dst;
+}
+
+//------------------------------------------------------------------------------
+// Manipulation of a WebPData object.
+
+void WebPDataInit(WebPData* webp_data) {
+  if (webp_data != NULL) {
+    memset(webp_data, 0, sizeof(*webp_data));
+  }
+}
+
+void WebPDataClear(WebPData* webp_data) {
+  if (webp_data != NULL) {
+    free((void*)webp_data->bytes_);
+    WebPDataInit(webp_data);
+  }
+}
+
+int WebPDataCopy(const WebPData* src, WebPData* dst) {
+  if (src == NULL || dst == NULL) return 0;
+
+  WebPDataInit(dst);
+  if (src->bytes_ != NULL && src->size_ != 0) {
+    dst->bytes_ = (uint8_t*)malloc(src->size_);
+    if (dst->bytes_ == NULL) return 0;
+    memcpy((void*)dst->bytes_, src->bytes_, src->size_);
+    dst->size_ = src->size_;
+  }
+  return 1;
+}
+
+//------------------------------------------------------------------------------
+// Life of a MuxImage object.
+
+void MuxImageInit(WebPMuxImage* const wpi) {
+  assert(wpi);
+  memset(wpi, 0, sizeof(*wpi));
+}
+
+WebPMuxImage* MuxImageRelease(WebPMuxImage* const wpi) {
+  WebPMuxImage* next;
+  if (wpi == NULL) return NULL;
+  ChunkDelete(wpi->header_);
+  ChunkDelete(wpi->alpha_);
+  ChunkDelete(wpi->img_);
+
+  next = wpi->next_;
+  MuxImageInit(wpi);
+  return next;
+}
+
+//------------------------------------------------------------------------------
+// MuxImage search methods.
+
+int MuxImageCount(const WebPMuxImage* wpi_list, WebPChunkId id) {
+  int count = 0;
+  const WebPMuxImage* current;
+  for (current = wpi_list; current != NULL; current = current->next_) {
+    const WebPChunk* const wpi_chunk = *MuxImageGetListFromId(current, id);
+    if (wpi_chunk != NULL) {
+      const WebPChunkId wpi_chunk_id = ChunkGetIdFromTag(wpi_chunk->tag_);
+      if (wpi_chunk_id == id) ++count;
+    }
+  }
+  return count;
+}
+
+// Outputs a pointer to 'prev_wpi->next_',
+//   where 'prev_wpi' is the pointer to the image at position (nth - 1).
+// Returns 1 if nth image with given id was found, 0 otherwise.
+static int SearchImageToGetOrDelete(WebPMuxImage** wpi_list, uint32_t nth,
+                                    WebPChunkId id,
+                                    WebPMuxImage*** const location) {
+  uint32_t count = 0;
+  assert(wpi_list);
+  *location = wpi_list;
+
+  // Search makes sense only for the following.
+  assert(id == WEBP_CHUNK_FRAME || id == WEBP_CHUNK_TILE ||
+         id == WEBP_CHUNK_IMAGE);
+  assert(id != WEBP_CHUNK_IMAGE || nth == 1);
+
+  if (nth == 0) {
+    nth = MuxImageCount(*wpi_list, id);
+    if (nth == 0) return 0;  // Not found.
+  }
+
+  while (*wpi_list) {
+    WebPMuxImage* const cur_wpi = *wpi_list;
+    const WebPChunk* const wpi_chunk = *MuxImageGetListFromId(cur_wpi, id);
+    if (wpi_chunk != NULL) {
+      const WebPChunkId wpi_chunk_id = ChunkGetIdFromTag(wpi_chunk->tag_);
+      if (wpi_chunk_id == id) {
+        ++count;
+        if (count == nth) return 1;  // Found.
+      }
+    }
+    wpi_list = &cur_wpi->next_;
+    *location = wpi_list;
+  }
+  return 0;  // Not found.
+}
+
+//------------------------------------------------------------------------------
+// MuxImage writer methods.
+
+WebPMuxError MuxImagePush(const WebPMuxImage* wpi, WebPMuxImage** wpi_list) {
+  WebPMuxImage* new_wpi;
+
+  while (*wpi_list != NULL) {
+    WebPMuxImage* const cur_wpi = *wpi_list;
+    if (cur_wpi->next_ == NULL) break;
+    wpi_list = &cur_wpi->next_;
+  }
+
+  new_wpi = (WebPMuxImage*)malloc(sizeof(*new_wpi));
+  if (new_wpi == NULL) return WEBP_MUX_MEMORY_ERROR;
+  *new_wpi = *wpi;
+  new_wpi->next_ = NULL;
+
+  if (*wpi_list != NULL) {
+    (*wpi_list)->next_ = new_wpi;
+  } else {
+    *wpi_list = new_wpi;
+  }
+  return WEBP_MUX_OK;
+}
+
+//------------------------------------------------------------------------------
+// MuxImage deletion methods.
+
+WebPMuxImage* MuxImageDelete(WebPMuxImage* const wpi) {
+  // Delete the components of wpi. If wpi is NULL this is a noop.
+  WebPMuxImage* const next = MuxImageRelease(wpi);
+  free(wpi);
+  return next;
+}
+
+void MuxImageDeleteAll(WebPMuxImage** const wpi_list) {
+  while (*wpi_list) {
+    *wpi_list = MuxImageDelete(*wpi_list);
+  }
+}
+
+WebPMuxError MuxImageDeleteNth(WebPMuxImage** wpi_list, uint32_t nth,
+                               WebPChunkId id) {
+  assert(wpi_list);
+  if (!SearchImageToGetOrDelete(wpi_list, nth, id, &wpi_list)) {
+    return WEBP_MUX_NOT_FOUND;
+  }
+  *wpi_list = MuxImageDelete(*wpi_list);
+  return WEBP_MUX_OK;
+}
+
+//------------------------------------------------------------------------------
+// MuxImage reader methods.
+
+WebPMuxError MuxImageGetNth(const WebPMuxImage** wpi_list, uint32_t nth,
+                            WebPChunkId id, WebPMuxImage** wpi) {
+  assert(wpi_list);
+  assert(wpi);
+  if (!SearchImageToGetOrDelete((WebPMuxImage**)wpi_list, nth, id,
+                                (WebPMuxImage***)&wpi_list)) {
+    return WEBP_MUX_NOT_FOUND;
+  }
+  *wpi = (WebPMuxImage*)*wpi_list;
+  return WEBP_MUX_OK;
+}
+
+//------------------------------------------------------------------------------
+// MuxImage serialization methods.
+
+// Size of an image.
+size_t MuxImageDiskSize(const WebPMuxImage* const wpi) {
+  size_t size = 0;
+  if (wpi->header_ != NULL) size += ChunkDiskSize(wpi->header_);
+  if (wpi->alpha_ != NULL) size += ChunkDiskSize(wpi->alpha_);
+  if (wpi->img_ != NULL) size += ChunkDiskSize(wpi->img_);
+  return size;
+}
+
+size_t MuxImageListDiskSize(const WebPMuxImage* wpi_list) {
+  size_t size = 0;
+  while (wpi_list) {
+    size += MuxImageDiskSize(wpi_list);
+    wpi_list = wpi_list->next_;
+  }
+  return size;
+}
+
+uint8_t* MuxImageEmit(const WebPMuxImage* const wpi, uint8_t* dst) {
+  // Ordering of chunks to be emitted is strictly as follows:
+  // 1. Frame/Tile chunk (if present).
+  // 2. Alpha chunk (if present).
+  // 3. VP8/VP8L chunk.
+  assert(wpi);
+  if (wpi->header_ != NULL) dst = ChunkEmit(wpi->header_, dst);
+  if (wpi->alpha_ != NULL) dst = ChunkEmit(wpi->alpha_, dst);
+  if (wpi->img_ != NULL) dst = ChunkEmit(wpi->img_, dst);
+  return dst;
+}
+
+uint8_t* MuxImageListEmit(const WebPMuxImage* wpi_list, uint8_t* dst) {
+  while (wpi_list) {
+    dst = MuxImageEmit(wpi_list, dst);
+    wpi_list = wpi_list->next_;
+  }
+  return dst;
+}
+
+//------------------------------------------------------------------------------
+// Helper methods for mux.
+
+int MuxHasLosslessImages(const WebPMuxImage* images) {
+  while (images != NULL) {
+    assert(images->img_ != NULL);
+    if (images->img_->tag_ == kChunks[IDX_VP8L].tag) {
+      return 1;
+    }
+    images = images->next_;
+  }
+  return 0;
+}
+
+uint8_t* MuxEmitRiffHeader(uint8_t* const data, size_t size) {
+  PutLE32(data + 0, MKFOURCC('R', 'I', 'F', 'F'));
+  PutLE32(data + TAG_SIZE, (uint32_t)size - CHUNK_HEADER_SIZE);
+  assert(size == (uint32_t)size);
+  PutLE32(data + TAG_SIZE + CHUNK_SIZE_BYTES, MKFOURCC('W', 'E', 'B', 'P'));
+  return data + RIFF_HEADER_SIZE;
+}
+
+WebPChunk** MuxGetChunkListFromId(const WebPMux* mux, WebPChunkId id) {
+  assert(mux != NULL);
+  switch(id) {
+    case WEBP_CHUNK_VP8X:    return (WebPChunk**)&mux->vp8x_;
+    case WEBP_CHUNK_ICCP:    return (WebPChunk**)&mux->iccp_;
+    case WEBP_CHUNK_LOOP:    return (WebPChunk**)&mux->loop_;
+    case WEBP_CHUNK_META:    return (WebPChunk**)&mux->meta_;
+    case WEBP_CHUNK_UNKNOWN: return (WebPChunk**)&mux->unknown_;
+    default: return NULL;
+  }
+}
+
+WebPMuxError MuxValidateForImage(const WebPMux* const mux) {
+  const int num_images = MuxImageCount(mux->images_, WEBP_CHUNK_IMAGE);
+  const int num_frames = MuxImageCount(mux->images_, WEBP_CHUNK_FRAME);
+  const int num_tiles  = MuxImageCount(mux->images_, WEBP_CHUNK_TILE);
+
+  if (num_images == 0) {
+    // No images in mux.
+    return WEBP_MUX_NOT_FOUND;
+  } else if (num_images == 1 && num_frames == 0 && num_tiles == 0) {
+    // Valid case (single image).
+    return WEBP_MUX_OK;
+  } else {
+    // Frame/Tile case OR an invalid mux.
+    return WEBP_MUX_INVALID_ARGUMENT;
+  }
+}
+
+static int IsNotCompatible(int feature, int num_items) {
+  return (feature != 0) != (num_items > 0);
+}
+
+#define NO_FLAG 0
+
+// Test basic constraints:
+// retrieval, maximum number of chunks by index (use -1 to skip)
+// and feature incompatibility (use NO_FLAG to skip).
+// On success returns WEBP_MUX_OK and stores the chunk count in *num.
+static WebPMuxError ValidateChunk(const WebPMux* const mux, CHUNK_INDEX idx,
+                                  WebPFeatureFlags feature,
+                                  WebPFeatureFlags vp8x_flags,
+                                  int max, int* num) {
+  const WebPMuxError err =
+      WebPMuxNumChunks(mux, kChunks[idx].id, num);
+  if (err != WEBP_MUX_OK) return err;
+  if (max > -1 && *num > max) return WEBP_MUX_INVALID_ARGUMENT;
+  if (feature != NO_FLAG && IsNotCompatible(vp8x_flags & feature, *num)) {
+    return WEBP_MUX_INVALID_ARGUMENT;
+  }
+  return WEBP_MUX_OK;
+}
+
+WebPMuxError MuxValidate(const WebPMux* const mux) {
+  int num_iccp;
+  int num_meta;
+  int num_loop_chunks;
+  int num_frames;
+  int num_tiles;
+  int num_vp8x;
+  int num_images;
+  int num_alpha;
+  uint32_t flags;
+  WebPMuxError err;
+
+  // Verify mux is not NULL.
+  if (mux == NULL) return WEBP_MUX_INVALID_ARGUMENT;
+
+  // Verify mux has at least one image.
+  if (mux->images_ == NULL) return WEBP_MUX_INVALID_ARGUMENT;
+
+  err = WebPMuxGetFeatures(mux, &flags);
+  if (err != WEBP_MUX_OK) return err;
+
+  // At most one color profile chunk.
+  err = ValidateChunk(mux, IDX_ICCP, ICCP_FLAG, flags, 1, &num_iccp);
+  if (err != WEBP_MUX_OK) return err;
+
+  // At most one XMP metadata.
+  err = ValidateChunk(mux, IDX_META, META_FLAG, flags, 1, &num_meta);
+  if (err != WEBP_MUX_OK) return err;
+
+  // Animation: ANIMATION_FLAG, loop chunk and frame chunk(s) are consistent.
+  // At most one loop chunk.
+  err = ValidateChunk(mux, IDX_LOOP, NO_FLAG, flags, 1, &num_loop_chunks);
+  if (err != WEBP_MUX_OK) return err;
+  err = ValidateChunk(mux, IDX_FRAME, NO_FLAG, flags, -1, &num_frames);
+  if (err != WEBP_MUX_OK) return err;
+
+  {
+    const int has_animation = !!(flags & ANIMATION_FLAG);
+    if (has_animation && (num_loop_chunks == 0 || num_frames == 0)) {
+      return WEBP_MUX_INVALID_ARGUMENT;
+    }
+    if (!has_animation && (num_loop_chunks == 1 || num_frames > 0)) {
+      return WEBP_MUX_INVALID_ARGUMENT;
+    }
+  }
+
+  // Tiling: TILE_FLAG and tile chunk(s) are consistent.
+  err = ValidateChunk(mux, IDX_TILE, TILE_FLAG, flags, -1, &num_tiles);
+  if (err != WEBP_MUX_OK) return err;
+
+  // Verify either VP8X chunk is present OR there is only one elem in
+  // mux->images_.
+  err = ValidateChunk(mux, IDX_VP8X, NO_FLAG, flags, 1, &num_vp8x);
+  if (err != WEBP_MUX_OK) return err;
+  err = ValidateChunk(mux, IDX_VP8, NO_FLAG, flags, -1, &num_images);
+  if (err != WEBP_MUX_OK) return err;
+  if (num_vp8x == 0 && num_images != 1) return WEBP_MUX_INVALID_ARGUMENT;
+
+  // ALPHA_FLAG & alpha chunk(s) are consistent.
+  if (num_vp8x > 0 && MuxHasLosslessImages(mux->images_)) {
+    // Special case: we have a VP8X chunk as well as some lossless images.
+    if (!(flags & ALPHA_FLAG)) return WEBP_MUX_INVALID_ARGUMENT;
+  } else {
+    err = ValidateChunk(mux, IDX_ALPHA, ALPHA_FLAG, flags, -1, &num_alpha);
+    if (err != WEBP_MUX_OK) return err;
+  }
+
+  // num_tiles & num_images are consistent.
+  if (num_tiles > 0 && num_images != num_tiles) {
+    return WEBP_MUX_INVALID_ARGUMENT;
+  }
+
+  return WEBP_MUX_OK;
+}
+
+#undef NO_FLAG
+
+//------------------------------------------------------------------------------
+
+#if defined(__cplusplus) || defined(c_plusplus)
+}    // extern "C"
+#endif
diff --git a/drivers/webpold/mux/muxread.c b/drivers/webpold/mux/muxread.c
new file mode 100644
index 0000000000..21c3cfbaeb
--- /dev/null
+++ b/drivers/webpold/mux/muxread.c
@@ -0,0 +1,411 @@
+// Copyright 2011 Google Inc. All Rights Reserved.
+//
+// This code is licensed under the same terms as WebM:
+//  Software License Agreement:  http://www.webmproject.org/license/software/
+//  Additional IP Rights Grant:  http://www.webmproject.org/license/additional/
+// -----------------------------------------------------------------------------
+//
+// Read APIs for mux.
+//
+// Authors: Urvang (urvang@google.com)
+//          Vikas (vikasa@google.com)
+
+#include <assert.h>
+#include "./muxi.h"
+
+#if defined(__cplusplus) || defined(c_plusplus)
+extern "C" {
+#endif
+
+//------------------------------------------------------------------------------
+// Helper method(s).
+
+// Handy MACRO.
+#define SWITCH_ID_LIST(INDEX, LIST)                                           \
+  if (idx == (INDEX)) {                                                       \
+    const WebPChunk* const chunk = ChunkSearchList((LIST), nth,               \
+                                                   kChunks[(INDEX)].tag);     \
+    if (chunk) {                                                              \
+      *data = chunk->data_;                                                   \
+      return WEBP_MUX_OK;                                                     \
+    } else {                                                                  \
+      return WEBP_MUX_NOT_FOUND;                                              \
+    }                                                                         \
+  }
+
+static WebPMuxError MuxGet(const WebPMux* const mux, CHUNK_INDEX idx,
+                           uint32_t nth, WebPData* const data) {
+  assert(mux != NULL);
+  assert(!IsWPI(kChunks[idx].id));
+  WebPDataInit(data);
+
+  SWITCH_ID_LIST(IDX_VP8X, mux->vp8x_);
+  SWITCH_ID_LIST(IDX_ICCP, mux->iccp_);
+  SWITCH_ID_LIST(IDX_LOOP, mux->loop_);
+  SWITCH_ID_LIST(IDX_META, mux->meta_);
+  SWITCH_ID_LIST(IDX_UNKNOWN, mux->unknown_);
+  return WEBP_MUX_NOT_FOUND;
+}
+#undef SWITCH_ID_LIST
+
+// Fill the chunk with the given data (includes chunk header bytes), after some
+// verifications.
+static WebPMuxError ChunkVerifyAndAssignData(WebPChunk* chunk,
+                                             const uint8_t* data,
+                                             size_t data_size, size_t riff_size,
+                                             int copy_data) {
+  uint32_t chunk_size;
+  WebPData chunk_data;
+
+  // Sanity checks.
+  if (data_size < TAG_SIZE) return WEBP_MUX_NOT_ENOUGH_DATA;
+  chunk_size = GetLE32(data + TAG_SIZE);
+
+  {
+    const size_t chunk_disk_size = SizeWithPadding(chunk_size);
+    if (chunk_disk_size > riff_size) return WEBP_MUX_BAD_DATA;
+    if (chunk_disk_size > data_size) return WEBP_MUX_NOT_ENOUGH_DATA;
+  }
+
+  // Data assignment.
+  chunk_data.bytes_ = data + CHUNK_HEADER_SIZE;
+  chunk_data.size_ = chunk_size;
+  return ChunkAssignData(chunk, &chunk_data, copy_data, GetLE32(data + 0));
+}
+
+//------------------------------------------------------------------------------
+// Create a mux object from WebP-RIFF data.
+
+WebPMux* WebPMuxCreateInternal(const WebPData* bitstream, int copy_data,
+                               int version) {
+  size_t riff_size;
+  uint32_t tag;
+  const uint8_t* end;
+  WebPMux* mux = NULL;
+  WebPMuxImage* wpi = NULL;
+  const uint8_t* data;
+  size_t size;
+  WebPChunk chunk;
+  ChunkInit(&chunk);
+
+  // Sanity checks.
+  if (WEBP_ABI_IS_INCOMPATIBLE(version, WEBP_MUX_ABI_VERSION)) {
+    return NULL;  // version mismatch
+  }
+  if (bitstream == NULL) return NULL;
+
+  data = bitstream->bytes_;
+  size = bitstream->size_;
+
+  if (data == NULL) return NULL;
+  if (size < RIFF_HEADER_SIZE) return NULL;
+  if (GetLE32(data + 0) != MKFOURCC('R', 'I', 'F', 'F') ||
+      GetLE32(data + CHUNK_HEADER_SIZE) != MKFOURCC('W', 'E', 'B', 'P')) {
+    return NULL;
+  }
+
+  mux = WebPMuxNew();
+  if (mux == NULL) return NULL;
+
+  if (size < RIFF_HEADER_SIZE + TAG_SIZE) goto Err;
+
+  tag = GetLE32(data + RIFF_HEADER_SIZE);
+  if (tag != kChunks[IDX_VP8].tag &&
+      tag != kChunks[IDX_VP8L].tag &&
+      tag != kChunks[IDX_VP8X].tag) {
+    goto Err;  // First chunk should be VP8, VP8L or VP8X.
+  }
+
+  riff_size = SizeWithPadding(GetLE32(data + TAG_SIZE));
+  if (riff_size > MAX_CHUNK_PAYLOAD || riff_size > size) {
+    goto Err;
+  } else {
+    if (riff_size < size) {  // Redundant data after last chunk.
+      size = riff_size;  // To make sure we don't read any data beyond mux_size.
+    }
+  }
+
+  end = data + size;
+  data += RIFF_HEADER_SIZE;
+  size -= RIFF_HEADER_SIZE;
+
+  wpi = (WebPMuxImage*)malloc(sizeof(*wpi));
+  if (wpi == NULL) goto Err;
+  MuxImageInit(wpi);
+
+  // Loop over chunks.
+  while (data != end) {
+    WebPChunkId id;
+    WebPMuxError err;
+
+    err = ChunkVerifyAndAssignData(&chunk, data, size, riff_size, copy_data);
+    if (err != WEBP_MUX_OK) goto Err;
+
+    id = ChunkGetIdFromTag(chunk.tag_);
+
+    if (IsWPI(id)) {  // An image chunk (frame/tile/alpha/vp8).
+      WebPChunk** wpi_chunk_ptr =
+          MuxImageGetListFromId(wpi, id);  // Image chunk to set.
+      assert(wpi_chunk_ptr != NULL);
+      if (*wpi_chunk_ptr != NULL) goto Err;  // Consecutive alpha chunks or
+                                             // consecutive frame/tile chunks.
+      if (ChunkSetNth(&chunk, wpi_chunk_ptr, 1) != WEBP_MUX_OK) goto Err;
+      if (id == WEBP_CHUNK_IMAGE) {
+        wpi->is_partial_ = 0;  // wpi is completely filled.
+        // Add this to mux->images_ list.
+        if (MuxImagePush(wpi, &mux->images_) != WEBP_MUX_OK) goto Err;
+        MuxImageInit(wpi);  // Reset for reading next image.
+      } else {
+        wpi->is_partial_ = 1;  // wpi is only partially filled.
+      }
+    } else {  // A non-image chunk.
+      WebPChunk** chunk_list;
+      if (wpi->is_partial_) goto Err;  // Encountered a non-image chunk before
+                                       // getting all chunks of an image.
+      chunk_list = MuxGetChunkListFromId(mux, id);  // List to add this chunk.
+      if (chunk_list == NULL) chunk_list = &mux->unknown_;
+      if (ChunkSetNth(&chunk, chunk_list, 0) != WEBP_MUX_OK) goto Err;
+    }
+    {
+      const size_t data_size = ChunkDiskSize(&chunk);
+      data += data_size;
+      size -= data_size;
+    }
+    ChunkInit(&chunk);
+  }
+
+  // Validate mux if complete.
+  if (MuxValidate(mux) != WEBP_MUX_OK) goto Err;
+
+  MuxImageDelete(wpi);
+  return mux;  // All OK;
+
+ Err:  // Something bad happened.
+  ChunkRelease(&chunk);
+  MuxImageDelete(wpi);
+  WebPMuxDelete(mux);
+  return NULL;
+}
+
+//------------------------------------------------------------------------------
+// Get API(s).
+
+WebPMuxError WebPMuxGetFeatures(const WebPMux* mux, uint32_t* flags) {
+  WebPData data;
+  WebPMuxError err;
+
+  if (mux == NULL || flags == NULL) return WEBP_MUX_INVALID_ARGUMENT;
+  *flags = 0;
+
+  // Check if VP8X chunk is present.
+  err = MuxGet(mux, IDX_VP8X, 1, &data);
+  if (err == WEBP_MUX_NOT_FOUND) {
+    // Check if VP8/VP8L chunk is present.
+    err = WebPMuxGetImage(mux, &data);
+    WebPDataClear(&data);
+    return err;
+  } else if (err != WEBP_MUX_OK) {
+    return err;
+  }
+
+  if (data.size_ < CHUNK_SIZE_BYTES) return WEBP_MUX_BAD_DATA;
+
+  // All OK. Fill up flags.
+  *flags = GetLE32(data.bytes_);
+  return WEBP_MUX_OK;
+}
+
+static uint8_t* EmitVP8XChunk(uint8_t* const dst, int width,
+                              int height, uint32_t flags) {
+  const size_t vp8x_size = CHUNK_HEADER_SIZE + VP8X_CHUNK_SIZE;
+  assert(width >= 1 && height >= 1);
+  assert(width <= MAX_CANVAS_SIZE && height <= MAX_CANVAS_SIZE);
+  assert(width * (uint64_t)height < MAX_IMAGE_AREA);
+  PutLE32(dst, MKFOURCC('V', 'P', '8', 'X'));
+  PutLE32(dst + TAG_SIZE, VP8X_CHUNK_SIZE);
+  PutLE32(dst + CHUNK_HEADER_SIZE, flags);
+  PutLE24(dst + CHUNK_HEADER_SIZE + 4, width - 1);
+  PutLE24(dst + CHUNK_HEADER_SIZE + 7, height - 1);
+  return dst + vp8x_size;
+}
+
+// Assemble a single image WebP bitstream from 'wpi'.
+static WebPMuxError SynthesizeBitstream(WebPMuxImage* const wpi,
+                                        WebPData* const bitstream) {
+  uint8_t* dst;
+
+  // Allocate data.
+  const int need_vp8x = (wpi->alpha_ != NULL);
+  const size_t vp8x_size = need_vp8x ? CHUNK_HEADER_SIZE + VP8X_CHUNK_SIZE : 0;
+  const size_t alpha_size = need_vp8x ? ChunkDiskSize(wpi->alpha_) : 0;
+  // Note: No need to output FRM/TILE chunk for a single image.
+  const size_t size = RIFF_HEADER_SIZE + vp8x_size + alpha_size +
+                      ChunkDiskSize(wpi->img_);
+  uint8_t* const data = (uint8_t*)malloc(size);
+  if (data == NULL) return WEBP_MUX_MEMORY_ERROR;
+
+  // Main RIFF header.
+  dst = MuxEmitRiffHeader(data, size);
+
+  if (need_vp8x) {
+    int w, h;
+    WebPMuxError err;
+    assert(wpi->img_ != NULL);
+    err = MuxGetImageWidthHeight(wpi->img_, &w, &h);
+    if (err != WEBP_MUX_OK) {
+      free(data);
+      return err;
+    }
+    dst = EmitVP8XChunk(dst, w, h, ALPHA_FLAG);  // VP8X.
+    dst = ChunkListEmit(wpi->alpha_, dst);       // ALPH.
+  }
+
+  // Bitstream.
+  dst = ChunkListEmit(wpi->img_, dst);
+  assert(dst == data + size);
+
+  // Output.
+  bitstream->bytes_ = data;
+  bitstream->size_ = size;
+  return WEBP_MUX_OK;
+}
+
+WebPMuxError WebPMuxGetImage(const WebPMux* mux, WebPData* bitstream) {
+  WebPMuxError err;
+  WebPMuxImage* wpi = NULL;
+
+  if (mux == NULL || bitstream == NULL) {
+    return WEBP_MUX_INVALID_ARGUMENT;
+  }
+
+  err = MuxValidateForImage(mux);
+  if (err != WEBP_MUX_OK) return err;
+
+  // All well. Get the image.
+  err = MuxImageGetNth((const WebPMuxImage**)&mux->images_, 1, WEBP_CHUNK_IMAGE,
+                       &wpi);
+  assert(err == WEBP_MUX_OK);  // Already tested above.
+
+  return SynthesizeBitstream(wpi, bitstream);
+}
+
+WebPMuxError WebPMuxGetMetadata(const WebPMux* mux, WebPData* metadata) {
+  if (mux == NULL || metadata == NULL) return WEBP_MUX_INVALID_ARGUMENT;
+  return MuxGet(mux, IDX_META, 1, metadata);
+}
+
+WebPMuxError WebPMuxGetColorProfile(const WebPMux* mux,
+                                    WebPData* color_profile) {
+  if (mux == NULL || color_profile == NULL) return WEBP_MUX_INVALID_ARGUMENT;
+  return MuxGet(mux, IDX_ICCP, 1, color_profile);
+}
+
+WebPMuxError WebPMuxGetLoopCount(const WebPMux* mux, int* loop_count) {
+  WebPData image;
+  WebPMuxError err;
+
+  if (mux == NULL || loop_count == NULL) return WEBP_MUX_INVALID_ARGUMENT;
+
+  err = MuxGet(mux, IDX_LOOP, 1, &image);
+  if (err != WEBP_MUX_OK) return err;
+  if (image.size_ < kChunks[WEBP_CHUNK_LOOP].size) return WEBP_MUX_BAD_DATA;
+  *loop_count = GetLE16(image.bytes_);
+
+  return WEBP_MUX_OK;
+}
+
+static WebPMuxError MuxGetFrameTileInternal(
+    const WebPMux* const mux, uint32_t nth, WebPData* const bitstream,
+    int* const x_offset, int* const y_offset, int* const duration,
+    uint32_t tag) {
+  const WebPData* frame_tile_data;
+  WebPMuxError err;
+  WebPMuxImage* wpi;
+
+  const int is_frame = (tag == kChunks[WEBP_CHUNK_FRAME].tag) ? 1 : 0;
+  const CHUNK_INDEX idx = is_frame ? IDX_FRAME : IDX_TILE;
+  const WebPChunkId id = kChunks[idx].id;
+
+  if (mux == NULL || bitstream == NULL ||
+      x_offset == NULL || y_offset == NULL || (is_frame && duration == NULL)) {
+    return WEBP_MUX_INVALID_ARGUMENT;
+  }
+
+  // Get the nth WebPMuxImage.
+  err = MuxImageGetNth((const WebPMuxImage**)&mux->images_, nth, id, &wpi);
+  if (err != WEBP_MUX_OK) return err;
+
+  // Get frame chunk.
+  assert(wpi->header_ != NULL);  // As MuxImageGetNth() already checked header_.
+  frame_tile_data = &wpi->header_->data_;
+
+  if (frame_tile_data->size_ < kChunks[idx].size) return WEBP_MUX_BAD_DATA;
+  *x_offset = 2 * GetLE24(frame_tile_data->bytes_ + 0);
+  *y_offset = 2 * GetLE24(frame_tile_data->bytes_ + 3);
+  if (is_frame) *duration = 1 + GetLE24(frame_tile_data->bytes_ + 12);
+
+  return SynthesizeBitstream(wpi, bitstream);
+}
+
+WebPMuxError WebPMuxGetFrame(const WebPMux* mux, uint32_t nth,
+                             WebPData* bitstream,
+                             int* x_offset, int* y_offset, int* duration) {
+  return MuxGetFrameTileInternal(mux, nth, bitstream, x_offset, y_offset,
+                                 duration, kChunks[IDX_FRAME].tag);
+}
+
+WebPMuxError WebPMuxGetTile(const WebPMux* mux, uint32_t nth,
+                            WebPData* bitstream,
+                            int* x_offset, int* y_offset) {
+  return MuxGetFrameTileInternal(mux, nth, bitstream, x_offset, y_offset, NULL,
+                                 kChunks[IDX_TILE].tag);
+}
+
+// Get chunk index from chunk id. Returns IDX_NIL if not found.
+static CHUNK_INDEX ChunkGetIndexFromId(WebPChunkId id) {
+  int i;
+  for (i = 0; kChunks[i].id != WEBP_CHUNK_NIL; ++i) {
+    if (id == kChunks[i].id) return i;
+  }
+  return IDX_NIL;
+}
+
+// Count number of chunks matching 'tag' in the 'chunk_list'.
+// If tag == NIL_TAG, any tag will be matched.
+static int CountChunks(const WebPChunk* const chunk_list, uint32_t tag) {
+  int count = 0;
+  const WebPChunk* current;
+  for (current = chunk_list; current != NULL; current = current->next_) {
+    if (tag == NIL_TAG || current->tag_ == tag) {
+      count++;  // Count chunks whose tags match.
+    }
+  }
+  return count;
+}
+
+WebPMuxError WebPMuxNumChunks(const WebPMux* mux,
+                              WebPChunkId id, int* num_elements) {
+  if (mux == NULL || num_elements == NULL) {
+    return WEBP_MUX_INVALID_ARGUMENT;
+  }
+
+  if (IsWPI(id)) {
+    *num_elements = MuxImageCount(mux->images_, id);
+  } else {
+    WebPChunk* const* chunk_list = MuxGetChunkListFromId(mux, id);
+    if (chunk_list == NULL) {
+      *num_elements = 0;
+    } else {
+      const CHUNK_INDEX idx = ChunkGetIndexFromId(id);
+      *num_elements = CountChunks(*chunk_list, kChunks[idx].tag);
+    }
+  }
+
+  return WEBP_MUX_OK;
+}
+
+//------------------------------------------------------------------------------
+
+#if defined(__cplusplus) || defined(c_plusplus)
+}    // extern "C"
+#endif
diff --git a/drivers/webpold/types.h b/drivers/webpold/types.h
new file mode 100644
index 0000000000..3e27190bef
--- /dev/null
+++ b/drivers/webpold/types.h
@@ -0,0 +1,45 @@
+// Copyright 2010 Google Inc. All Rights Reserved.
+//
+// This code is licensed under the same terms as WebM:
+//  Software License Agreement:  http://www.webmproject.org/license/software/
+//  Additional IP Rights Grant:  http://www.webmproject.org/license/additional/
+// -----------------------------------------------------------------------------
+//
+//  Common types
+//
+// Author: Skal (pascal.massimino@gmail.com)
+
+#ifndef WEBP_WEBP_TYPES_H_
+#define WEBP_WEBP_TYPES_H_
+
+#include <stddef.h>  // for size_t
+
+#ifndef _MSC_VER
+#include <inttypes.h>
+#ifdef __STRICT_ANSI__
+#define WEBP_INLINE
+#else  /* __STRICT_ANSI__ */
+#define WEBP_INLINE inline
+#endif
+#else
+typedef signed   char int8_t;
+typedef unsigned char uint8_t;
+typedef signed   short int16_t;
+typedef unsigned short uint16_t;
+typedef signed   int int32_t;
+typedef unsigned int uint32_t;
+typedef unsigned long long int uint64_t;
+typedef long long int int64_t;
+#define WEBP_INLINE __forceinline
+#endif  /* _MSC_VER */
+
+#ifndef WEBP_EXTERN
+// This explicitly marks library functions and allows for changing the
+// signature for e.g., Windows DLL builds.
+#define WEBP_EXTERN(type) extern type
+#endif  /* WEBP_EXTERN */
+
+// Macro to check ABI compatibility (same major revision number)
+#define WEBP_ABI_IS_INCOMPATIBLE(a, b) (((a) >> 8) != ((b) >> 8))
+
+#endif  /* WEBP_WEBP_TYPES_H_ */
diff --git a/drivers/webpold/utils/bit_reader.c b/drivers/webpold/utils/bit_reader.c
new file mode 100644
index 0000000000..1afb1db890
--- /dev/null
+++ b/drivers/webpold/utils/bit_reader.c
@@ -0,0 +1,229 @@
+// Copyright 2010 Google Inc. All Rights Reserved.
+//
+// This code is licensed under the same terms as WebM:
+//  Software License Agreement:  http://www.webmproject.org/license/software/
+//  Additional IP Rights Grant:  http://www.webmproject.org/license/additional/
+// -----------------------------------------------------------------------------
+//
+// Boolean decoder
+//
+// Author: Skal (pascal.massimino@gmail.com)
+
+#include "./bit_reader.h"
+
+#if defined(__cplusplus) || defined(c_plusplus)
+extern "C" {
+#endif
+
+#define MK(X) (((bit_t)(X) << (BITS)) | (MASK))
+
+//------------------------------------------------------------------------------
+// VP8BitReader
+
+void VP8InitBitReader(VP8BitReader* const br,
+                      const uint8_t* const start, const uint8_t* const end) {
+  assert(br != NULL);
+  assert(start != NULL);
+  assert(start <= end);
+  br->range_   = MK(255 - 1);
+  br->buf_     = start;
+  br->buf_end_ = end;
+  br->value_   = 0;
+  br->missing_ = 8;   // to load the very first 8bits
+  br->eof_     = 0;
+}
+
+const uint8_t kVP8Log2Range[128] = {
+     7, 6, 6, 5, 5, 5, 5, 4, 4, 4, 4, 4, 4, 4, 4,
+  3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
+  2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
+  2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
+  1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+  1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+  1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+  1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+  0
+};
+
+// range = (range << kVP8Log2Range[range]) + trailing 1's
+const bit_t kVP8NewRange[128] = {
+  MK(127), MK(127), MK(191), MK(127), MK(159), MK(191), MK(223), MK(127),
+  MK(143), MK(159), MK(175), MK(191), MK(207), MK(223), MK(239), MK(127),
+  MK(135), MK(143), MK(151), MK(159), MK(167), MK(175), MK(183), MK(191),
+  MK(199), MK(207), MK(215), MK(223), MK(231), MK(239), MK(247), MK(127),
+  MK(131), MK(135), MK(139), MK(143), MK(147), MK(151), MK(155), MK(159),
+  MK(163), MK(167), MK(171), MK(175), MK(179), MK(183), MK(187), MK(191),
+  MK(195), MK(199), MK(203), MK(207), MK(211), MK(215), MK(219), MK(223),
+  MK(227), MK(231), MK(235), MK(239), MK(243), MK(247), MK(251), MK(127),
+  MK(129), MK(131), MK(133), MK(135), MK(137), MK(139), MK(141), MK(143),
+  MK(145), MK(147), MK(149), MK(151), MK(153), MK(155), MK(157), MK(159),
+  MK(161), MK(163), MK(165), MK(167), MK(169), MK(171), MK(173), MK(175),
+  MK(177), MK(179), MK(181), MK(183), MK(185), MK(187), MK(189), MK(191),
+  MK(193), MK(195), MK(197), MK(199), MK(201), MK(203), MK(205), MK(207),
+  MK(209), MK(211), MK(213), MK(215), MK(217), MK(219), MK(221), MK(223),
+  MK(225), MK(227), MK(229), MK(231), MK(233), MK(235), MK(237), MK(239),
+  MK(241), MK(243), MK(245), MK(247), MK(249), MK(251), MK(253), MK(127)
+};
+
+#undef MK
+
+void VP8LoadFinalBytes(VP8BitReader* const br) {
+  assert(br != NULL && br->buf_ != NULL);
+  // Only read 8bits at a time
+  if (br->buf_ < br->buf_end_) {
+    br->value_ |= (bit_t)(*br->buf_++) << ((BITS) - 8 + br->missing_);
+    br->missing_ -= 8;
+  } else {
+    br->eof_ = 1;
+  }
+}
+
+//------------------------------------------------------------------------------
+// Higher-level calls
+
+uint32_t VP8GetValue(VP8BitReader* const br, int bits) {
+  uint32_t v = 0;
+  while (bits-- > 0) {
+    v |= VP8GetBit(br, 0x80) << bits;
+  }
+  return v;
+}
+
+int32_t VP8GetSignedValue(VP8BitReader* const br, int bits) {
+  const int value = VP8GetValue(br, bits);
+  return VP8Get(br) ? -value : value;
+}
+
+//------------------------------------------------------------------------------
+// VP8LBitReader
+
+#define MAX_NUM_BIT_READ 25
+
+static const uint32_t kBitMask[MAX_NUM_BIT_READ] = {
+  0, 1, 3, 7, 15, 31, 63, 127, 255, 511, 1023, 2047, 4095, 8191, 16383, 32767,
+  65535, 131071, 262143, 524287, 1048575, 2097151, 4194303, 8388607, 16777215
+};
+
+void VP8LInitBitReader(VP8LBitReader* const br,
+                       const uint8_t* const start,
+                       size_t length) {
+  size_t i;
+  assert(br != NULL);
+  assert(start != NULL);
+  assert(length < 0xfffffff8u);   // can't happen with a RIFF chunk.
+
+  br->buf_ = start;
+  br->len_ = length;
+  br->val_ = 0;
+  br->pos_ = 0;
+  br->bit_pos_ = 0;
+  br->eos_ = 0;
+  br->error_ = 0;
+  for (i = 0; i < sizeof(br->val_) && i < br->len_; ++i) {
+    br->val_ |= ((uint64_t)br->buf_[br->pos_]) << (8 * i);
+    ++br->pos_;
+  }
+}
+
+void VP8LBitReaderSetBuffer(VP8LBitReader* const br,
+                            const uint8_t* const buf, size_t len) {
+  assert(br != NULL);
+  assert(buf != NULL);
+  assert(len < 0xfffffff8u);   // can't happen with a RIFF chunk.
+  br->eos_ = (br->pos_ >= len);
+  br->buf_ = buf;
+  br->len_ = len;
+}
+
+static void ShiftBytes(VP8LBitReader* const br) {
+  while (br->bit_pos_ >= 8 && br->pos_ < br->len_) {
+    br->val_ >>= 8;
+    br->val_ |= ((uint64_t)br->buf_[br->pos_]) << 56;
+    ++br->pos_;
+    br->bit_pos_ -= 8;
+  }
+}
+
+void VP8LFillBitWindow(VP8LBitReader* const br) {
+  if (br->bit_pos_ >= 32) {
+#if defined(__x86_64__) || defined(_M_X64)
+    if (br->pos_ + 8 < br->len_) {
+      br->val_ >>= 32;
+      // The expression below needs a little-endian arch to work correctly.
+      // This gives a large speedup for decoding speed.
+      br->val_ |= *(const uint64_t *)(br->buf_ + br->pos_) << 32;
+      br->pos_ += 4;
+      br->bit_pos_ -= 32;
+    } else {
+      // Slow path.
+      ShiftBytes(br);
+    }
+#else
+    // Always the slow path.
+    ShiftBytes(br);
+#endif
+  }
+  if (br->pos_ == br->len_ && br->bit_pos_ == 64) {
+    br->eos_ = 1;
+  }
+}
+
+uint32_t VP8LReadOneBit(VP8LBitReader* const br) {
+  const uint32_t val = (br->val_ >> br->bit_pos_) & 1;
+  // Flag an error at end_of_stream.
+  if (!br->eos_) {
+    ++br->bit_pos_;
+    if (br->bit_pos_ >= 32) {
+      ShiftBytes(br);
+    }
+    // After this last bit is read, check if eos needs to be flagged.
+    if (br->pos_ == br->len_ && br->bit_pos_ == 64) {
+      br->eos_ = 1;
+    }
+  } else {
+    br->error_ = 1;
+  }
+  return val;
+}
+
+uint32_t VP8LReadBits(VP8LBitReader* const br, int n_bits) {
+  uint32_t val = 0;
+  assert(n_bits >= 0);
+  // Flag an error if end_of_stream or n_bits is more than allowed limit.
+  if (!br->eos_ && n_bits < MAX_NUM_BIT_READ) {
+    // If this read is going to cross the read buffer, set the eos flag.
+    if (br->pos_ == br->len_) {
+      if ((br->bit_pos_ + n_bits) >= 64) {
+        br->eos_ = 1;
+        if ((br->bit_pos_ + n_bits) > 64) return val;
+      }
+    }
+    val = (br->val_ >> br->bit_pos_) & kBitMask[n_bits];
+    br->bit_pos_ += n_bits;
+    if (br->bit_pos_ >= 40) {
+      if (br->pos_ + 5 < br->len_) {
+        br->val_ >>= 40;
+        br->val_ |=
+            (((uint64_t)br->buf_[br->pos_ + 0]) << 24) |
+            (((uint64_t)br->buf_[br->pos_ + 1]) << 32) |
+            (((uint64_t)br->buf_[br->pos_ + 2]) << 40) |
+            (((uint64_t)br->buf_[br->pos_ + 3]) << 48) |
+            (((uint64_t)br->buf_[br->pos_ + 4]) << 56);
+        br->pos_ += 5;
+        br->bit_pos_ -= 40;
+      }
+      if (br->bit_pos_ >= 8) {
+        ShiftBytes(br);
+      }
+    }
+  } else {
+    br->error_ = 1;
+  }
+  return val;
+}
+
+//------------------------------------------------------------------------------
+
+#if defined(__cplusplus) || defined(c_plusplus)
+}    // extern "C"
+#endif
diff --git a/drivers/webpold/utils/bit_reader.h b/drivers/webpold/utils/bit_reader.h
new file mode 100644
index 0000000000..43cd948fd4
--- /dev/null
+++ b/drivers/webpold/utils/bit_reader.h
@@ -0,0 +1,198 @@
+//
+// Copyright 2010 Google Inc. All Rights Reserved.
+//
+// This code is licensed under the same terms as WebM:
+//  Software License Agreement:  http://www.webmproject.org/license/software/
+//  Additional IP Rights Grant:  http://www.webmproject.org/license/additional/
+// -----------------------------------------------------------------------------
+//
+// Boolean decoder
+//
+// Author: Skal (pascal.massimino@gmail.com)
+//         Vikas Arora (vikaas.arora@gmail.com)
+
+#ifndef WEBP_UTILS_BIT_READER_H_
+#define WEBP_UTILS_BIT_READER_H_
+
+#include <assert.h>
+#ifdef _MSC_VER
+#include <stdlib.h>  // _byteswap_ulong
+#endif
+#include <string.h>  // For memcpy
+#include "../types.h"
+
+#if defined(__cplusplus) || defined(c_plusplus)
+extern "C" {
+#endif
+
+#define BITS 32     // can be 32, 16 or 8
+#define MASK ((((bit_t)1) << (BITS)) - 1)
+#if (BITS == 32)
+typedef uint64_t bit_t;   // natural register type
+typedef uint32_t lbit_t;  // natural type for memory I/O
+#elif (BITS == 16)
+typedef uint32_t bit_t;
+typedef uint16_t lbit_t;
+#else
+typedef uint32_t bit_t;
+typedef uint8_t lbit_t;
+#endif
+
+//------------------------------------------------------------------------------
+// Bitreader and code-tree reader
+
+typedef struct VP8BitReader VP8BitReader;
+struct VP8BitReader {
+  const uint8_t* buf_;        // next byte to be read
+  const uint8_t* buf_end_;    // end of read buffer
+  int eof_;                   // true if input is exhausted
+
+  // boolean decoder
+  bit_t range_;            // current range minus 1. In [127, 254] interval.
+  bit_t value_;            // current value
+  int missing_;            // number of missing bits in value_ (8bit)
+};
+
+// Initialize the bit reader and the boolean decoder.
+void VP8InitBitReader(VP8BitReader* const br,
+                      const uint8_t* const start, const uint8_t* const end);
+
+// return the next value made of 'num_bits' bits
+uint32_t VP8GetValue(VP8BitReader* const br, int num_bits);
+static WEBP_INLINE uint32_t VP8Get(VP8BitReader* const br) {
+  return VP8GetValue(br, 1);
+}
+
+// return the next value with sign-extension.
+int32_t VP8GetSignedValue(VP8BitReader* const br, int num_bits);
+
+// Read a bit with proba 'prob'. Speed-critical function!
+extern const uint8_t kVP8Log2Range[128];
+extern const bit_t kVP8NewRange[128];
+
+void VP8LoadFinalBytes(VP8BitReader* const br);    // special case for the tail
+
+static WEBP_INLINE void VP8LoadNewBytes(VP8BitReader* const br) {
+  assert(br && br->buf_);
+  // Read 'BITS' bits at a time if possible.
+  if (br->buf_ + sizeof(lbit_t) <= br->buf_end_) {
+    // convert memory type to register type (with some zero'ing!)
+    bit_t bits;
+    lbit_t in_bits = *(lbit_t*)br->buf_;
+    br->buf_ += (BITS) >> 3;
+#if !defined(__BIG_ENDIAN__)
+#if (BITS == 32)
+#if defined(__i386__) || defined(__x86_64__)
+    __asm__ volatile("bswap %k0" : "=r"(in_bits) : "0"(in_bits));
+    bits = (bit_t)in_bits;   // 32b -> 64b zero-extension
+#elif defined(_MSC_VER)
+    bits = _byteswap_ulong(in_bits);
+#else
+    bits = (bit_t)(in_bits >> 24) | ((in_bits >> 8) & 0xff00)
+         | ((in_bits << 8) & 0xff0000)  | (in_bits << 24);
+#endif  // x86
+#elif (BITS == 16)
+    // gcc will recognize a 'rorw $8, ...' here:
+    bits = (bit_t)(in_bits >> 8) | ((in_bits & 0xff) << 8);
+#endif
+#else    // LITTLE_ENDIAN
+    bits = (bit_t)in_bits;
+#endif
+    br->value_ |= bits << br->missing_;
+    br->missing_ -= (BITS);
+  } else {
+    VP8LoadFinalBytes(br);    // no need to be inlined
+  }
+}
+
+static WEBP_INLINE int VP8BitUpdate(VP8BitReader* const br, bit_t split) {
+  const bit_t value_split = split | (MASK);
+  if (br->missing_ > 0) {  // Make sure we have a least BITS bits in 'value_'
+    VP8LoadNewBytes(br);
+  }
+  if (br->value_ > value_split) {
+    br->range_ -= value_split + 1;
+    br->value_ -= value_split + 1;
+    return 1;
+  } else {
+    br->range_ = value_split;
+    return 0;
+  }
+}
+
+static WEBP_INLINE void VP8Shift(VP8BitReader* const br) {
+  // range_ is in [0..127] interval here.
+  const int idx = br->range_ >> (BITS);
+  const int shift = kVP8Log2Range[idx];
+  br->range_ = kVP8NewRange[idx];
+  br->value_ <<= shift;
+  br->missing_ += shift;
+}
+
+static WEBP_INLINE int VP8GetBit(VP8BitReader* const br, int prob) {
+  // It's important to avoid generating a 64bit x 64bit multiply here.
+  // We just need an 8b x 8b after all.
+  const bit_t split =
+      (bit_t)((uint32_t)(br->range_ >> (BITS)) * prob) << ((BITS) - 8);
+  const int bit = VP8BitUpdate(br, split);
+  if (br->range_ <= (((bit_t)0x7e << (BITS)) | (MASK))) {
+    VP8Shift(br);
+  }
+  return bit;
+}
+
+static WEBP_INLINE int VP8GetSigned(VP8BitReader* const br, int v) {
+  const bit_t split = (br->range_ >> 1);
+  const int bit = VP8BitUpdate(br, split);
+  VP8Shift(br);
+  return bit ? -v : v;
+}
+
+
+// -----------------------------------------------------------------------------
+// Bitreader
+
+typedef struct {
+  uint64_t       val_;
+  const uint8_t* buf_;
+  size_t         len_;
+  size_t         pos_;
+  int            bit_pos_;
+  int            eos_;
+  int            error_;
+} VP8LBitReader;
+
+void VP8LInitBitReader(VP8LBitReader* const br,
+                       const uint8_t* const start,
+                       size_t length);
+
+//  Sets a new data buffer.
+void VP8LBitReaderSetBuffer(VP8LBitReader* const br,
+                            const uint8_t* const buffer, size_t length);
+
+// Reads the specified number of bits from Read Buffer.
+// Flags an error in case end_of_stream or n_bits is more than allowed limit.
+// Flags eos if this read attempt is going to cross the read buffer.
+uint32_t VP8LReadBits(VP8LBitReader* const br, int n_bits);
+
+// Reads one bit from Read Buffer. Flags an error in case end_of_stream.
+// Flags eos after reading last bit from the buffer.
+uint32_t VP8LReadOneBit(VP8LBitReader* const br);
+
+// VP8LReadOneBitUnsafe is faster than VP8LReadOneBit, but it can be called only
+// 32 times after the last VP8LFillBitWindow. Any subsequent calls
+// (without VP8LFillBitWindow) will return invalid data.
+static WEBP_INLINE uint32_t VP8LReadOneBitUnsafe(VP8LBitReader* const br) {
+  const uint32_t val = (br->val_ >> br->bit_pos_) & 1;
+  ++br->bit_pos_;
+  return val;
+}
+
+// Advances the Read buffer by 4 bytes to make room for reading next 32 bits.
+void VP8LFillBitWindow(VP8LBitReader* const br);
+
+#if defined(__cplusplus) || defined(c_plusplus)
+}    // extern "C"
+#endif
+
+#endif  /* WEBP_UTILS_BIT_READER_H_ */
diff --git a/drivers/webpold/utils/bit_writer.c b/drivers/webpold/utils/bit_writer.c
new file mode 100644
index 0000000000..671159cacd
--- /dev/null
+++ b/drivers/webpold/utils/bit_writer.c
@@ -0,0 +1,284 @@
+// Copyright 2011 Google Inc. All Rights Reserved.
+//
+// This code is licensed under the same terms as WebM:
+//  Software License Agreement:  http://www.webmproject.org/license/software/
+//  Additional IP Rights Grant:  http://www.webmproject.org/license/additional/
+// -----------------------------------------------------------------------------
+//
+// Bit writing and boolean coder
+//
+// Author: Skal (pascal.massimino@gmail.com)
+//         Vikas Arora (vikaas.arora@gmail.com)
+
+#include <assert.h>
+#include <string.h>   // for memcpy()
+#include <stdlib.h>
+#include "./bit_writer.h"
+
+#if defined(__cplusplus) || defined(c_plusplus)
+extern "C" {
+#endif
+
+//------------------------------------------------------------------------------
+// VP8BitWriter
+
+static int BitWriterResize(VP8BitWriter* const bw, size_t extra_size) {
+  uint8_t* new_buf;
+  size_t new_size;
+  const uint64_t needed_size_64b = (uint64_t)bw->pos_ + extra_size;
+  const size_t needed_size = (size_t)needed_size_64b;
+  if (needed_size_64b != needed_size) {
+    bw->error_ = 1;
+    return 0;
+  }
+  if (needed_size <= bw->max_pos_) return 1;
+  // If the following line wraps over 32bit, the test just after will catch it.
+  new_size = 2 * bw->max_pos_;
+  if (new_size < needed_size) new_size = needed_size;
+  if (new_size < 1024) new_size = 1024;
+  new_buf = (uint8_t*)malloc(new_size);
+  if (new_buf == NULL) {
+    bw->error_ = 1;
+    return 0;
+  }
+  memcpy(new_buf, bw->buf_, bw->pos_);
+  free(bw->buf_);
+  bw->buf_ = new_buf;
+  bw->max_pos_ = new_size;
+  return 1;
+}
+
+static void kFlush(VP8BitWriter* const bw) {
+  const int s = 8 + bw->nb_bits_;
+  const int32_t bits = bw->value_ >> s;
+  assert(bw->nb_bits_ >= 0);
+  bw->value_ -= bits << s;
+  bw->nb_bits_ -= 8;
+  if ((bits & 0xff) != 0xff) {
+    size_t pos = bw->pos_;
+    if (!BitWriterResize(bw, bw->run_ + 1)) {
+      return;
+    }
+    if (bits & 0x100) {  // overflow -> propagate carry over pending 0xff's
+      if (pos > 0) bw->buf_[pos - 1]++;
+    }
+    if (bw->run_ > 0) {
+      const int value = (bits & 0x100) ? 0x00 : 0xff;
+      for (; bw->run_ > 0; --bw->run_) bw->buf_[pos++] = value;
+    }
+    bw->buf_[pos++] = bits;
+    bw->pos_ = pos;
+  } else {
+    bw->run_++;   // delay writing of bytes 0xff, pending eventual carry.
+  }
+}
+
+//------------------------------------------------------------------------------
+// renormalization
+
+static const uint8_t kNorm[128] = {  // renorm_sizes[i] = 8 - log2(i)
+     7, 6, 6, 5, 5, 5, 5, 4, 4, 4, 4, 4, 4, 4, 4,
+  3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
+  2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
+  2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
+  1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+  1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+  1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+  1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+  0
+};
+
+// range = ((range + 1) << kVP8Log2Range[range]) - 1
+static const uint8_t kNewRange[128] = {
+  127, 127, 191, 127, 159, 191, 223, 127, 143, 159, 175, 191, 207, 223, 239,
+  127, 135, 143, 151, 159, 167, 175, 183, 191, 199, 207, 215, 223, 231, 239,
+  247, 127, 131, 135, 139, 143, 147, 151, 155, 159, 163, 167, 171, 175, 179,
+  183, 187, 191, 195, 199, 203, 207, 211, 215, 219, 223, 227, 231, 235, 239,
+  243, 247, 251, 127, 129, 131, 133, 135, 137, 139, 141, 143, 145, 147, 149,
+  151, 153, 155, 157, 159, 161, 163, 165, 167, 169, 171, 173, 175, 177, 179,
+  181, 183, 185, 187, 189, 191, 193, 195, 197, 199, 201, 203, 205, 207, 209,
+  211, 213, 215, 217, 219, 221, 223, 225, 227, 229, 231, 233, 235, 237, 239,
+  241, 243, 245, 247, 249, 251, 253, 127
+};
+
+int VP8PutBit(VP8BitWriter* const bw, int bit, int prob) {
+  const int split = (bw->range_ * prob) >> 8;
+  if (bit) {
+    bw->value_ += split + 1;
+    bw->range_ -= split + 1;
+  } else {
+    bw->range_ = split;
+  }
+  if (bw->range_ < 127) {   // emit 'shift' bits out and renormalize
+    const int shift = kNorm[bw->range_];
+    bw->range_ = kNewRange[bw->range_];
+    bw->value_ <<= shift;
+    bw->nb_bits_ += shift;
+    if (bw->nb_bits_ > 0) kFlush(bw);
+  }
+  return bit;
+}
+
+int VP8PutBitUniform(VP8BitWriter* const bw, int bit) {
+  const int split = bw->range_ >> 1;
+  if (bit) {
+    bw->value_ += split + 1;
+    bw->range_ -= split + 1;
+  } else {
+    bw->range_ = split;
+  }
+  if (bw->range_ < 127) {
+    bw->range_ = kNewRange[bw->range_];
+    bw->value_ <<= 1;
+    bw->nb_bits_ += 1;
+    if (bw->nb_bits_ > 0) kFlush(bw);
+  }
+  return bit;
+}
+
+void VP8PutValue(VP8BitWriter* const bw, int value, int nb_bits) {
+  int mask;
+  for (mask = 1 << (nb_bits - 1); mask; mask >>= 1)
+    VP8PutBitUniform(bw, value & mask);
+}
+
+void VP8PutSignedValue(VP8BitWriter* const bw, int value, int nb_bits) {
+  if (!VP8PutBitUniform(bw, value != 0))
+    return;
+  if (value < 0) {
+    VP8PutValue(bw, ((-value) << 1) | 1, nb_bits + 1);
+  } else {
+    VP8PutValue(bw, value << 1, nb_bits + 1);
+  }
+}
+
+//------------------------------------------------------------------------------
+
+int VP8BitWriterInit(VP8BitWriter* const bw, size_t expected_size) {
+  bw->range_   = 255 - 1;
+  bw->value_   = 0;
+  bw->run_     = 0;
+  bw->nb_bits_ = -8;
+  bw->pos_     = 0;
+  bw->max_pos_ = 0;
+  bw->error_   = 0;
+  bw->buf_     = NULL;
+  return (expected_size > 0) ? BitWriterResize(bw, expected_size) : 1;
+}
+
+uint8_t* VP8BitWriterFinish(VP8BitWriter* const bw) {
+  VP8PutValue(bw, 0, 9 - bw->nb_bits_);
+  bw->nb_bits_ = 0;   // pad with zeroes
+  kFlush(bw);
+  return bw->buf_;
+}
+
+int VP8BitWriterAppend(VP8BitWriter* const bw,
+                       const uint8_t* data, size_t size) {
+  assert(data);
+  if (bw->nb_bits_ != -8) return 0;   // kFlush() must have been called
+  if (!BitWriterResize(bw, size)) return 0;
+  memcpy(bw->buf_ + bw->pos_, data, size);
+  bw->pos_ += size;
+  return 1;
+}
+
+void VP8BitWriterWipeOut(VP8BitWriter* const bw) {
+  if (bw) {
+    free(bw->buf_);
+    memset(bw, 0, sizeof(*bw));
+  }
+}
+
+//------------------------------------------------------------------------------
+// VP8LBitWriter
+
+// Returns 1 on success.
+static int VP8LBitWriterResize(VP8LBitWriter* const bw, size_t extra_size) {
+  uint8_t* allocated_buf;
+  size_t allocated_size;
+  const size_t current_size = VP8LBitWriterNumBytes(bw);
+  const uint64_t size_required_64b = (uint64_t)current_size + extra_size;
+  const size_t size_required = (size_t)size_required_64b;
+  if (size_required != size_required_64b) {
+    bw->error_ = 1;
+    return 0;
+  }
+  if (bw->max_bytes_ > 0 && size_required <= bw->max_bytes_) return 1;
+  allocated_size = (3 * bw->max_bytes_) >> 1;
+  if (allocated_size < size_required) allocated_size = size_required;
+  // make allocated size multiple of 1k
+  allocated_size = (((allocated_size >> 10) + 1) << 10);
+  allocated_buf = (uint8_t*)malloc(allocated_size);
+  if (allocated_buf == NULL) {
+    bw->error_ = 1;
+    return 0;
+  }
+  memcpy(allocated_buf, bw->buf_, current_size);
+  free(bw->buf_);
+  bw->buf_ = allocated_buf;
+  bw->max_bytes_ = allocated_size;
+  memset(allocated_buf + current_size, 0, allocated_size - current_size);
+  return 1;
+}
+
+int VP8LBitWriterInit(VP8LBitWriter* const bw, size_t expected_size) {
+  memset(bw, 0, sizeof(*bw));
+  return VP8LBitWriterResize(bw, expected_size);
+}
+
+void VP8LBitWriterDestroy(VP8LBitWriter* const bw) {
+  if (bw != NULL) {
+    free(bw->buf_);
+    memset(bw, 0, sizeof(*bw));
+  }
+}
+
+void VP8LWriteBits(VP8LBitWriter* const bw, int n_bits, uint32_t bits) {
+  if (n_bits < 1) return;
+#if !defined(__BIG_ENDIAN__)
+  // Technically, this branch of the code can write up to 25 bits at a time,
+  // but in prefix encoding, the maximum number of bits written is 18 at a time.
+  {
+    uint8_t* const p = &bw->buf_[bw->bit_pos_ >> 3];
+    uint32_t v = *(const uint32_t*)p;
+    v |= bits << (bw->bit_pos_ & 7);
+    *(uint32_t*)p = v;
+    bw->bit_pos_ += n_bits;
+  }
+#else  // BIG_ENDIAN
+  {
+    uint8_t* p = &bw->buf_[bw->bit_pos_ >> 3];
+    const int bits_reserved_in_first_byte = bw->bit_pos_ & 7;
+    const int bits_left_to_write = n_bits - 8 + bits_reserved_in_first_byte;
+    // implicit & 0xff is assumed for uint8_t arithmetics
+    *p++ |= bits << bits_reserved_in_first_byte;
+    bits >>= 8 - bits_reserved_in_first_byte;
+    if (bits_left_to_write >= 1) {
+      *p++ = bits;
+      bits >>= 8;
+      if (bits_left_to_write >= 9) {
+        *p++ = bits;
+        bits >>= 8;
+      }
+    }
+    assert(n_bits <= 25);
+    *p = bits;
+    bw->bit_pos_ += n_bits;
+  }
+#endif
+  if ((bw->bit_pos_ >> 3) > (bw->max_bytes_ - 8)) {
+    const uint64_t extra_size = 32768ULL + bw->max_bytes_;
+    if (extra_size != (size_t)extra_size ||
+        !VP8LBitWriterResize(bw, (size_t)extra_size)) {
+      bw->bit_pos_ = 0;
+      bw->error_ = 1;
+    }
+  }
+}
+
+//------------------------------------------------------------------------------
+
+#if defined(__cplusplus) || defined(c_plusplus)
+}    // extern "C"
+#endif
diff --git a/drivers/webpold/utils/bit_writer.h b/drivers/webpold/utils/bit_writer.h
new file mode 100644
index 0000000000..57f39b11b1
--- /dev/null
+++ b/drivers/webpold/utils/bit_writer.h
@@ -0,0 +1,123 @@
+// Copyright 2011 Google Inc. All Rights Reserved.
+//
+// This code is licensed under the same terms as WebM:
+//  Software License Agreement:  http://www.webmproject.org/license/software/
+//  Additional IP Rights Grant:  http://www.webmproject.org/license/additional/
+// -----------------------------------------------------------------------------
+//
+// Bit writing and boolean coder
+//
+// Author: Skal (pascal.massimino@gmail.com)
+
+#ifndef WEBP_UTILS_BIT_WRITER_H_
+#define WEBP_UTILS_BIT_WRITER_H_
+
+#include "../types.h"
+
+#if defined(__cplusplus) || defined(c_plusplus)
+extern "C" {
+#endif
+
+//------------------------------------------------------------------------------
+// Bit-writing
+
+typedef struct VP8BitWriter VP8BitWriter;
+struct VP8BitWriter {
+  int32_t  range_;      // range-1
+  int32_t  value_;
+  int      run_;        // number of outstanding bits
+  int      nb_bits_;    // number of pending bits
+  uint8_t* buf_;        // internal buffer. Re-allocated regularly. Not owned.
+  size_t   pos_;
+  size_t   max_pos_;
+  int      error_;      // true in case of error
+};
+
+// Initialize the object. Allocates some initial memory based on expected_size.
+int VP8BitWriterInit(VP8BitWriter* const bw, size_t expected_size);
+// Finalize the bitstream coding. Returns a pointer to the internal buffer.
+uint8_t* VP8BitWriterFinish(VP8BitWriter* const bw);
+// Release any pending memory and zeroes the object. Not a mandatory call.
+// Only useful in case of error, when the internal buffer hasn't been grabbed!
+void VP8BitWriterWipeOut(VP8BitWriter* const bw);
+
+int VP8PutBit(VP8BitWriter* const bw, int bit, int prob);
+int VP8PutBitUniform(VP8BitWriter* const bw, int bit);
+void VP8PutValue(VP8BitWriter* const bw, int value, int nb_bits);
+void VP8PutSignedValue(VP8BitWriter* const bw, int value, int nb_bits);
+
+// Appends some bytes to the internal buffer. Data is copied.
+int VP8BitWriterAppend(VP8BitWriter* const bw,
+                       const uint8_t* data, size_t size);
+
+// return approximate write position (in bits)
+static WEBP_INLINE uint64_t VP8BitWriterPos(const VP8BitWriter* const bw) {
+  return (uint64_t)(bw->pos_ + bw->run_) * 8 + 8 + bw->nb_bits_;
+}
+
+// Returns a pointer to the internal buffer.
+static WEBP_INLINE uint8_t* VP8BitWriterBuf(const VP8BitWriter* const bw) {
+  return bw->buf_;
+}
+// Returns the size of the internal buffer.
+static WEBP_INLINE size_t VP8BitWriterSize(const VP8BitWriter* const bw) {
+  return bw->pos_;
+}
+
+//------------------------------------------------------------------------------
+// VP8LBitWriter
+// TODO(vikasa): VP8LBitWriter is copied as-is from lossless code. There's scope
+// of re-using VP8BitWriter. Will evaluate once basic lossless encoder is
+// implemented.
+
+typedef struct {
+  uint8_t* buf_;
+  size_t bit_pos_;
+  size_t max_bytes_;
+
+  // After all bits are written, the caller must observe the state of
+  // error_. A value of 1 indicates that a memory allocation failure
+  // has happened during bit writing. A value of 0 indicates successful
+  // writing of bits.
+  int error_;
+} VP8LBitWriter;
+
+static WEBP_INLINE size_t VP8LBitWriterNumBytes(VP8LBitWriter* const bw) {
+  return (bw->bit_pos_ + 7) >> 3;
+}
+
+static WEBP_INLINE uint8_t* VP8LBitWriterFinish(VP8LBitWriter* const bw) {
+  return bw->buf_;
+}
+
+// Returns 0 in case of memory allocation error.
+int VP8LBitWriterInit(VP8LBitWriter* const bw, size_t expected_size);
+
+void VP8LBitWriterDestroy(VP8LBitWriter* const bw);
+
+// This function writes bits into bytes in increasing addresses, and within
+// a byte least-significant-bit first.
+//
+// The function can write up to 16 bits in one go with WriteBits
+// Example: let's assume that 3 bits (Rs below) have been written already:
+//
+// BYTE-0     BYTE+1       BYTE+2
+//
+// 0000 0RRR    0000 0000    0000 0000
+//
+// Now, we could write 5 or less bits in MSB by just sifting by 3
+// and OR'ing to BYTE-0.
+//
+// For n bits, we take the last 5 bytes, OR that with high bits in BYTE-0,
+// and locate the rest in BYTE+1 and BYTE+2.
+//
+// VP8LBitWriter's error_ flag is set in case of  memory allocation error.
+void VP8LWriteBits(VP8LBitWriter* const bw, int n_bits, uint32_t bits);
+
+//------------------------------------------------------------------------------
+
+#if defined(__cplusplus) || defined(c_plusplus)
+}    // extern "C"
+#endif
+
+#endif  /* WEBP_UTILS_BIT_WRITER_H_ */
diff --git a/drivers/webpold/utils/color_cache.c b/drivers/webpold/utils/color_cache.c
new file mode 100644
index 0000000000..560f81db10
--- /dev/null
+++ b/drivers/webpold/utils/color_cache.c
@@ -0,0 +1,44 @@
+// Copyright 2012 Google Inc. All Rights Reserved.
+//
+// This code is licensed under the same terms as WebM:
+//  Software License Agreement:  http://www.webmproject.org/license/software/
+//  Additional IP Rights Grant:  http://www.webmproject.org/license/additional/
+// -----------------------------------------------------------------------------
+//
+// Color Cache for WebP Lossless
+//
+// Author: Jyrki Alakuijala (jyrki@google.com)
+
+#include <assert.h>
+#include <stdlib.h>
+#include "./color_cache.h"
+#include "../utils/utils.h"
+
+#if defined(__cplusplus) || defined(c_plusplus)
+extern "C" {
+#endif
+
+//------------------------------------------------------------------------------
+// VP8LColorCache.
+
+int VP8LColorCacheInit(VP8LColorCache* const cc, int hash_bits) {
+  const int hash_size = 1 << hash_bits;
+  assert(cc != NULL);
+  assert(hash_bits > 0);
+  cc->colors_ = (uint32_t*)WebPSafeCalloc((uint64_t)hash_size,
+                                          sizeof(*cc->colors_));
+  if (cc->colors_ == NULL) return 0;
+  cc->hash_shift_ = 32 - hash_bits;
+  return 1;
+}
+
+void VP8LColorCacheClear(VP8LColorCache* const cc) {
+  if (cc != NULL) {
+    free(cc->colors_);
+    cc->colors_ = NULL;
+  }
+}
+
+#if defined(__cplusplus) || defined(c_plusplus)
+}
+#endif
diff --git a/drivers/webpold/utils/color_cache.h b/drivers/webpold/utils/color_cache.h
new file mode 100644
index 0000000000..da5e260195
--- /dev/null
+++ b/drivers/webpold/utils/color_cache.h
@@ -0,0 +1,68 @@
+// Copyright 2012 Google Inc. All Rights Reserved.
+//
+// This code is licensed under the same terms as WebM:
+//  Software License Agreement:  http://www.webmproject.org/license/software/
+//  Additional IP Rights Grant:  http://www.webmproject.org/license/additional/
+// -----------------------------------------------------------------------------
+//
+// Color Cache for WebP Lossless
+//
+// Authors: Jyrki Alakuijala (jyrki@google.com)
+//          Urvang Joshi (urvang@google.com)
+
+#ifndef WEBP_UTILS_COLOR_CACHE_H_
+#define WEBP_UTILS_COLOR_CACHE_H_
+
+#include "../types.h"
+
+#if defined(__cplusplus) || defined(c_plusplus)
+extern "C" {
+#endif
+
+// Main color cache struct.
+typedef struct {
+  uint32_t *colors_;  // color entries
+  int hash_shift_;    // Hash shift: 32 - hash_bits.
+} VP8LColorCache;
+
+static const uint32_t kHashMul = 0x1e35a7bd;
+
+static WEBP_INLINE uint32_t VP8LColorCacheLookup(
+    const VP8LColorCache* const cc, uint32_t key) {
+  assert(key <= (~0U >> cc->hash_shift_));
+  return cc->colors_[key];
+}
+
+static WEBP_INLINE void VP8LColorCacheInsert(const VP8LColorCache* const cc,
+                                             uint32_t argb) {
+  const uint32_t key = (kHashMul * argb) >> cc->hash_shift_;
+  cc->colors_[key] = argb;
+}
+
+static WEBP_INLINE int VP8LColorCacheGetIndex(const VP8LColorCache* const cc,
+                                              uint32_t argb) {
+  return (kHashMul * argb) >> cc->hash_shift_;
+}
+
+static WEBP_INLINE int VP8LColorCacheContains(const VP8LColorCache* const cc,
+                                              uint32_t argb) {
+  const uint32_t key = (kHashMul * argb) >> cc->hash_shift_;
+  return cc->colors_[key] == argb;
+}
+
+//------------------------------------------------------------------------------
+
+// Initializes the color cache with 'hash_bits' bits for the keys.
+// Returns false in case of memory error.
+int VP8LColorCacheInit(VP8LColorCache* const color_cache, int hash_bits);
+
+// Delete the memory associated to color cache.
+void VP8LColorCacheClear(VP8LColorCache* const color_cache);
+
+//------------------------------------------------------------------------------
+
+#if defined(__cplusplus) || defined(c_plusplus)
+}
+#endif
+
+#endif  // WEBP_UTILS_COLOR_CACHE_H_
diff --git a/drivers/webpold/utils/filters.c b/drivers/webpold/utils/filters.c
new file mode 100644
index 0000000000..08f52a3d20
--- /dev/null
+++ b/drivers/webpold/utils/filters.c
@@ -0,0 +1,229 @@
+// Copyright 2011 Google Inc. All Rights Reserved.
+//
+// This code is licensed under the same terms as WebM:
+//  Software License Agreement:  http://www.webmproject.org/license/software/
+//  Additional IP Rights Grant:  http://www.webmproject.org/license/additional/
+// -----------------------------------------------------------------------------
+//
+// Spatial prediction using various filters
+//
+// Author: Urvang (urvang@google.com)
+
+#include "./filters.h"
+#include <assert.h>
+#include <stdlib.h>
+#include <string.h>
+
+#if defined(__cplusplus) || defined(c_plusplus)
+extern "C" {
+#endif
+
+//------------------------------------------------------------------------------
+// Helpful macro.
+
+# define SANITY_CHECK(in, out)                              \
+  assert(in != NULL);                                       \
+  assert(out != NULL);                                      \
+  assert(width > 0);                                        \
+  assert(height > 0);                                       \
+  assert(bpp > 0);                                          \
+  assert(stride >= width * bpp);
+
+static WEBP_INLINE void PredictLine(const uint8_t* src, const uint8_t* pred,
+                                    uint8_t* dst, int length, int inverse) {
+  int i;
+  if (inverse) {
+    for (i = 0; i < length; ++i) dst[i] = src[i] + pred[i];
+  } else {
+    for (i = 0; i < length; ++i) dst[i] = src[i] - pred[i];
+  }
+}
+
+//------------------------------------------------------------------------------
+// Horizontal filter.
+
+static WEBP_INLINE void DoHorizontalFilter(const uint8_t* in,
+    int width, int height, int bpp, int stride, int inverse, uint8_t* out) {
+  int h;
+  const uint8_t* preds = (inverse ? out : in);
+  SANITY_CHECK(in, out);
+
+  // Filter line-by-line.
+  for (h = 0; h < height; ++h) {
+    // Leftmost pixel is predicted from above (except for topmost scanline).
+    if (h == 0) {
+      memcpy((void*)out, (const void*)in, bpp);
+    } else {
+      PredictLine(in, preds - stride, out, bpp, inverse);
+    }
+    PredictLine(in + bpp, preds, out + bpp, bpp * (width - 1), inverse);
+    preds += stride;
+    in += stride;
+    out += stride;
+  }
+}
+
+static void HorizontalFilter(const uint8_t* data, int width, int height,
+                             int bpp, int stride, uint8_t* filtered_data) {
+  DoHorizontalFilter(data, width, height, bpp, stride, 0, filtered_data);
+}
+
+static void HorizontalUnfilter(const uint8_t* data, int width, int height,
+                               int bpp, int stride, uint8_t* recon_data) {
+  DoHorizontalFilter(data, width, height, bpp, stride, 1, recon_data);
+}
+
+//------------------------------------------------------------------------------
+// Vertical filter.
+
+static WEBP_INLINE void DoVerticalFilter(const uint8_t* in,
+    int width, int height, int bpp, int stride, int inverse, uint8_t* out) {
+  int h;
+  const uint8_t* preds = (inverse ? out : in);
+  SANITY_CHECK(in, out);
+
+  // Very first top-left pixel is copied.
+  memcpy((void*)out, (const void*)in, bpp);
+  // Rest of top scan-line is left-predicted.
+  PredictLine(in + bpp, preds, out + bpp, bpp * (width - 1), inverse);
+
+  // Filter line-by-line.
+  for (h = 1; h < height; ++h) {
+    in += stride;
+    out += stride;
+    PredictLine(in, preds, out, bpp * width, inverse);
+    preds += stride;
+  }
+}
+
+static void VerticalFilter(const uint8_t* data, int width, int height,
+                           int bpp, int stride, uint8_t* filtered_data) {
+  DoVerticalFilter(data, width, height, bpp, stride, 0, filtered_data);
+}
+
+static void VerticalUnfilter(const uint8_t* data, int width, int height,
+                             int bpp, int stride, uint8_t* recon_data) {
+  DoVerticalFilter(data, width, height, bpp, stride, 1, recon_data);
+}
+
+//------------------------------------------------------------------------------
+// Gradient filter.
+
+static WEBP_INLINE int GradientPredictor(uint8_t a, uint8_t b, uint8_t c) {
+  const int g = a + b - c;
+  return (g < 0) ? 0 : (g > 255) ? 255 : g;
+}
+
+static WEBP_INLINE
+void DoGradientFilter(const uint8_t* in, int width, int height,
+                      int bpp, int stride, int inverse, uint8_t* out) {
+  const uint8_t* preds = (inverse ? out : in);
+  int h;
+  SANITY_CHECK(in, out);
+
+  // left prediction for top scan-line
+  memcpy((void*)out, (const void*)in, bpp);
+  PredictLine(in + bpp, preds, out + bpp, bpp * (width - 1), inverse);
+
+  // Filter line-by-line.
+  for (h = 1; h < height; ++h) {
+    int w;
+    preds += stride;
+    in += stride;
+    out += stride;
+    // leftmost pixel: predict from above.
+    PredictLine(in, preds - stride, out, bpp, inverse);
+    for (w = bpp; w < width * bpp; ++w) {
+      const int pred = GradientPredictor(preds[w - bpp],
+                                         preds[w - stride],
+                                         preds[w - stride - bpp]);
+      out[w] = in[w] + (inverse ? pred : -pred);
+    }
+  }
+}
+
+static void GradientFilter(const uint8_t* data, int width, int height,
+                           int bpp, int stride, uint8_t* filtered_data) {
+  DoGradientFilter(data, width, height, bpp, stride, 0, filtered_data);
+}
+
+static void GradientUnfilter(const uint8_t* data, int width, int height,
+                             int bpp, int stride, uint8_t* recon_data) {
+  DoGradientFilter(data, width, height, bpp, stride, 1, recon_data);
+}
+
+#undef SANITY_CHECK
+
+// -----------------------------------------------------------------------------
+// Quick estimate of a potentially interesting filter mode to try, in addition
+// to the default NONE.
+
+#define SMAX 16
+#define SDIFF(a, b) (abs((a) - (b)) >> 4)   // Scoring diff, in [0..SMAX)
+
+WEBP_FILTER_TYPE EstimateBestFilter(const uint8_t* data,
+                                    int width, int height, int stride) {
+  int i, j;
+  int bins[WEBP_FILTER_LAST][SMAX];
+  memset(bins, 0, sizeof(bins));
+  // We only sample every other pixels. That's enough.
+  for (j = 2; j < height - 1; j += 2) {
+    const uint8_t* const p = data + j * stride;
+    int mean = p[0];
+    for (i = 2; i < width - 1; i += 2) {
+      const int diff0 = SDIFF(p[i], mean);
+      const int diff1 = SDIFF(p[i], p[i - 1]);
+      const int diff2 = SDIFF(p[i], p[i - width]);
+      const int grad_pred =
+          GradientPredictor(p[i - 1], p[i - width], p[i - width - 1]);
+      const int diff3 = SDIFF(p[i], grad_pred);
+      bins[WEBP_FILTER_NONE][diff0] = 1;
+      bins[WEBP_FILTER_HORIZONTAL][diff1] = 1;
+      bins[WEBP_FILTER_VERTICAL][diff2] = 1;
+      bins[WEBP_FILTER_GRADIENT][diff3] = 1;
+      mean = (3 * mean + p[i] + 2) >> 2;
+    }
+  }
+  {
+    WEBP_FILTER_TYPE filter, best_filter = WEBP_FILTER_NONE;
+    int best_score = 0x7fffffff;
+    for (filter = WEBP_FILTER_NONE; filter < WEBP_FILTER_LAST; ++filter) {
+      int score = 0;
+      for (i = 0; i < SMAX; ++i) {
+        if (bins[filter][i] > 0) {
+          score += i;
+        }
+      }
+      if (score < best_score) {
+        best_score = score;
+        best_filter = filter;
+      }
+    }
+    return best_filter;
+  }
+}
+
+#undef SMAX
+#undef SDIFF
+
+//------------------------------------------------------------------------------
+
+const WebPFilterFunc WebPFilters[WEBP_FILTER_LAST] = {
+  NULL,              // WEBP_FILTER_NONE
+  HorizontalFilter,  // WEBP_FILTER_HORIZONTAL
+  VerticalFilter,    // WEBP_FILTER_VERTICAL
+  GradientFilter     // WEBP_FILTER_GRADIENT
+};
+
+const WebPFilterFunc WebPUnfilters[WEBP_FILTER_LAST] = {
+  NULL,                // WEBP_FILTER_NONE
+  HorizontalUnfilter,  // WEBP_FILTER_HORIZONTAL
+  VerticalUnfilter,    // WEBP_FILTER_VERTICAL
+  GradientUnfilter     // WEBP_FILTER_GRADIENT
+};
+
+//------------------------------------------------------------------------------
+
+#if defined(__cplusplus) || defined(c_plusplus)
+}    // extern "C"
+#endif
diff --git a/drivers/webpold/utils/filters.h b/drivers/webpold/utils/filters.h
new file mode 100644
index 0000000000..db886be29a
--- /dev/null
+++ b/drivers/webpold/utils/filters.h
@@ -0,0 +1,54 @@
+// Copyright 2011 Google Inc. All Rights Reserved.
+//
+// This code is licensed under the same terms as WebM:
+//  Software License Agreement:  http://www.webmproject.org/license/software/
+//  Additional IP Rights Grant:  http://www.webmproject.org/license/additional/
+// -----------------------------------------------------------------------------
+//
+// Spatial prediction using various filters
+//
+// Author: Urvang (urvang@google.com)
+
+#ifndef WEBP_UTILS_FILTERS_H_
+#define WEBP_UTILS_FILTERS_H_
+
+#include "../types.h"
+
+#if defined(__cplusplus) || defined(c_plusplus)
+extern "C" {
+#endif
+
+// Filters.
+typedef enum {
+  WEBP_FILTER_NONE = 0,
+  WEBP_FILTER_HORIZONTAL,
+  WEBP_FILTER_VERTICAL,
+  WEBP_FILTER_GRADIENT,
+  WEBP_FILTER_LAST = WEBP_FILTER_GRADIENT + 1,  // end marker
+  WEBP_FILTER_BEST,
+  WEBP_FILTER_FAST
+} WEBP_FILTER_TYPE;
+
+typedef void (*WebPFilterFunc)(const uint8_t* in, int width, int height,
+                               int bpp, int stride, uint8_t* out);
+
+// Filter the given data using the given predictor.
+// 'in' corresponds to a 2-dimensional pixel array of size (stride * height)
+// in raster order.
+// 'bpp' is number of bytes per pixel, and
+// 'stride' is number of bytes per scan line (with possible padding).
+// 'out' should be pre-allocated.
+extern const WebPFilterFunc WebPFilters[WEBP_FILTER_LAST];
+
+// Reconstruct the original data from the given filtered data.
+extern const WebPFilterFunc WebPUnfilters[WEBP_FILTER_LAST];
+
+// Fast estimate of a potentially good filter.
+extern WEBP_FILTER_TYPE EstimateBestFilter(const uint8_t* data,
+                                           int width, int height, int stride);
+
+#if defined(__cplusplus) || defined(c_plusplus)
+}    // extern "C"
+#endif
+
+#endif  /* WEBP_UTILS_FILTERS_H_ */
diff --git a/drivers/webpold/utils/huffman.c b/drivers/webpold/utils/huffman.c
new file mode 100644
index 0000000000..1cc1cfd355
--- /dev/null
+++ b/drivers/webpold/utils/huffman.c
@@ -0,0 +1,238 @@
+// Copyright 2012 Google Inc. All Rights Reserved.
+//
+// This code is licensed under the same terms as WebM:
+//  Software License Agreement:  http://www.webmproject.org/license/software/
+//  Additional IP Rights Grant:  http://www.webmproject.org/license/additional/
+// -----------------------------------------------------------------------------
+//
+// Utilities for building and looking up Huffman trees.
+//
+// Author: Urvang Joshi (urvang@google.com)
+
+#include <assert.h>
+#include <stdlib.h>
+#include "./huffman.h"
+#include "../utils/utils.h"
+#include "../format_constants.h"
+
+#if defined(__cplusplus) || defined(c_plusplus)
+extern "C" {
+#endif
+
+#define NON_EXISTENT_SYMBOL (-1)
+
+static void TreeNodeInit(HuffmanTreeNode* const node) {
+  node->children_ = -1;   // means: 'unassigned so far'
+}
+
+static int NodeIsEmpty(const HuffmanTreeNode* const node) {
+  return (node->children_ < 0);
+}
+
+static int IsFull(const HuffmanTree* const tree) {
+  return (tree->num_nodes_ == tree->max_nodes_);
+}
+
+static void AssignChildren(HuffmanTree* const tree,
+                           HuffmanTreeNode* const node) {
+  HuffmanTreeNode* const children = tree->root_ + tree->num_nodes_;
+  node->children_ = (int)(children - node);
+  assert(children - node == (int)(children - node));
+  tree->num_nodes_ += 2;
+  TreeNodeInit(children + 0);
+  TreeNodeInit(children + 1);
+}
+
+static int TreeInit(HuffmanTree* const tree, int num_leaves) {
+  assert(tree != NULL);
+  if (num_leaves == 0) return 0;
+  // We allocate maximum possible nodes in the tree at once.
+  // Note that a Huffman tree is a full binary tree; and in a full binary tree
+  // with L leaves, the total number of nodes N = 2 * L - 1.
+  tree->max_nodes_ = 2 * num_leaves - 1;
+  tree->root_ = (HuffmanTreeNode*)WebPSafeMalloc((uint64_t)tree->max_nodes_,
+                                                 sizeof(*tree->root_));
+  if (tree->root_ == NULL) return 0;
+  TreeNodeInit(tree->root_);  // Initialize root.
+  tree->num_nodes_ = 1;
+  return 1;
+}
+
+void HuffmanTreeRelease(HuffmanTree* const tree) {
+  if (tree != NULL) {
+    free(tree->root_);
+    tree->root_ = NULL;
+    tree->max_nodes_ = 0;
+    tree->num_nodes_ = 0;
+  }
+}
+
+int HuffmanCodeLengthsToCodes(const int* const code_lengths,
+                              int code_lengths_size, int* const huff_codes) {
+  int symbol;
+  int code_len;
+  int code_length_hist[MAX_ALLOWED_CODE_LENGTH + 1] = { 0 };
+  int curr_code;
+  int next_codes[MAX_ALLOWED_CODE_LENGTH + 1] = { 0 };
+  int max_code_length = 0;
+
+  assert(code_lengths != NULL);
+  assert(code_lengths_size > 0);
+  assert(huff_codes != NULL);
+
+  // Calculate max code length.
+  for (symbol = 0; symbol < code_lengths_size; ++symbol) {
+    if (code_lengths[symbol] > max_code_length) {
+      max_code_length = code_lengths[symbol];
+    }
+  }
+  if (max_code_length > MAX_ALLOWED_CODE_LENGTH) return 0;
+
+  // Calculate code length histogram.
+  for (symbol = 0; symbol < code_lengths_size; ++symbol) {
+    ++code_length_hist[code_lengths[symbol]];
+  }
+  code_length_hist[0] = 0;
+
+  // Calculate the initial values of 'next_codes' for each code length.
+  // next_codes[code_len] denotes the code to be assigned to the next symbol
+  // of code length 'code_len'.
+  curr_code = 0;
+  next_codes[0] = -1;  // Unused, as code length = 0 implies code doesn't exist.
+  for (code_len = 1; code_len <= max_code_length; ++code_len) {
+    curr_code = (curr_code + code_length_hist[code_len - 1]) << 1;
+    next_codes[code_len] = curr_code;
+  }
+
+  // Get symbols.
+  for (symbol = 0; symbol < code_lengths_size; ++symbol) {
+    if (code_lengths[symbol] > 0) {
+      huff_codes[symbol] = next_codes[code_lengths[symbol]]++;
+    } else {
+      huff_codes[symbol] = NON_EXISTENT_SYMBOL;
+    }
+  }
+  return 1;
+}
+
+static int TreeAddSymbol(HuffmanTree* const tree,
+                         int symbol, int code, int code_length) {
+  HuffmanTreeNode* node = tree->root_;
+  const HuffmanTreeNode* const max_node = tree->root_ + tree->max_nodes_;
+  while (code_length-- > 0) {
+    if (node >= max_node) {
+      return 0;
+    }
+    if (NodeIsEmpty(node)) {
+      if (IsFull(tree)) return 0;    // error: too many symbols.
+      AssignChildren(tree, node);
+    } else if (HuffmanTreeNodeIsLeaf(node)) {
+      return 0;  // leaf is already occupied.
+    }
+    node += node->children_ + ((code >> code_length) & 1);
+  }
+  if (NodeIsEmpty(node)) {
+    node->children_ = 0;      // turn newly created node into a leaf.
+  } else if (!HuffmanTreeNodeIsLeaf(node)) {
+    return 0;   // trying to assign a symbol to already used code.
+  }
+  node->symbol_ = symbol;  // Add symbol in this node.
+  return 1;
+}
+
+int HuffmanTreeBuildImplicit(HuffmanTree* const tree,
+                             const int* const code_lengths,
+                             int code_lengths_size) {
+  int symbol;
+  int num_symbols = 0;
+  int root_symbol = 0;
+
+  assert(tree != NULL);
+  assert(code_lengths != NULL);
+
+  // Find out number of symbols and the root symbol.
+  for (symbol = 0; symbol < code_lengths_size; ++symbol) {
+    if (code_lengths[symbol] > 0) {
+      // Note: code length = 0 indicates non-existent symbol.
+      ++num_symbols;
+      root_symbol = symbol;
+    }
+  }
+
+  // Initialize the tree. Will fail for num_symbols = 0
+  if (!TreeInit(tree, num_symbols)) return 0;
+
+  // Build tree.
+  if (num_symbols == 1) {  // Trivial case.
+    const int max_symbol = code_lengths_size;
+    if (root_symbol < 0 || root_symbol >= max_symbol) {
+      HuffmanTreeRelease(tree);
+      return 0;
+    }
+    return TreeAddSymbol(tree, root_symbol, 0, 0);
+  } else {  // Normal case.
+    int ok = 0;
+
+    // Get Huffman codes from the code lengths.
+    int* const codes =
+        (int*)WebPSafeMalloc((uint64_t)code_lengths_size, sizeof(*codes));
+    if (codes == NULL) goto End;
+
+    if (!HuffmanCodeLengthsToCodes(code_lengths, code_lengths_size, codes)) {
+      goto End;
+    }
+
+    // Add symbols one-by-one.
+    for (symbol = 0; symbol < code_lengths_size; ++symbol) {
+      if (code_lengths[symbol] > 0) {
+        if (!TreeAddSymbol(tree, symbol, codes[symbol], code_lengths[symbol])) {
+          goto End;
+        }
+      }
+    }
+    ok = 1;
+ End:
+    free(codes);
+    ok = ok && IsFull(tree);
+    if (!ok) HuffmanTreeRelease(tree);
+    return ok;
+  }
+}
+
+int HuffmanTreeBuildExplicit(HuffmanTree* const tree,
+                             const int* const code_lengths,
+                             const int* const codes,
+                             const int* const symbols, int max_symbol,
+                             int num_symbols) {
+  int ok = 0;
+  int i;
+
+  assert(tree != NULL);
+  assert(code_lengths != NULL);
+  assert(codes != NULL);
+  assert(symbols != NULL);
+
+  // Initialize the tree. Will fail if num_symbols = 0.
+  if (!TreeInit(tree, num_symbols)) return 0;
+
+  // Add symbols one-by-one.
+  for (i = 0; i < num_symbols; ++i) {
+    if (codes[i] != NON_EXISTENT_SYMBOL) {
+      if (symbols[i] < 0 || symbols[i] >= max_symbol) {
+        goto End;
+      }
+      if (!TreeAddSymbol(tree, symbols[i], codes[i], code_lengths[i])) {
+        goto End;
+      }
+    }
+  }
+  ok = 1;
+ End:
+  ok = ok && IsFull(tree);
+  if (!ok) HuffmanTreeRelease(tree);
+  return ok;
+}
+
+#if defined(__cplusplus) || defined(c_plusplus)
+}    // extern "C"
+#endif
diff --git a/drivers/webpold/utils/huffman.h b/drivers/webpold/utils/huffman.h
new file mode 100644
index 0000000000..f16447e649
--- /dev/null
+++ b/drivers/webpold/utils/huffman.h
@@ -0,0 +1,78 @@
+// Copyright 2012 Google Inc. All Rights Reserved.
+//
+// This code is licensed under the same terms as WebM:
+//  Software License Agreement:  http://www.webmproject.org/license/software/
+//  Additional IP Rights Grant:  http://www.webmproject.org/license/additional/
+// -----------------------------------------------------------------------------
+//
+// Utilities for building and looking up Huffman trees.
+//
+// Author: Urvang Joshi (urvang@google.com)
+
+#ifndef WEBP_UTILS_HUFFMAN_H_
+#define WEBP_UTILS_HUFFMAN_H_
+
+#include <assert.h>
+#include "../types.h"
+
+#if defined(__cplusplus) || defined(c_plusplus)
+extern "C" {
+#endif
+
+// A node of a Huffman tree.
+typedef struct {
+  int symbol_;
+  int children_;  // delta offset to both children (contiguous) or 0 if leaf.
+} HuffmanTreeNode;
+
+// Huffman Tree.
+typedef struct HuffmanTree HuffmanTree;
+struct HuffmanTree {
+  HuffmanTreeNode* root_;   // all the nodes, starting at root.
+  int max_nodes_;           // max number of nodes
+  int num_nodes_;           // number of currently occupied nodes
+};
+
+// Returns true if the given node is a leaf of the Huffman tree.
+static WEBP_INLINE int HuffmanTreeNodeIsLeaf(
+    const HuffmanTreeNode* const node) {
+  return (node->children_ == 0);
+}
+
+// Go down one level. Most critical function. 'right_child' must be 0 or 1.
+static WEBP_INLINE const HuffmanTreeNode* HuffmanTreeNextNode(
+    const HuffmanTreeNode* node, int right_child) {
+  return node + node->children_ + right_child;
+}
+
+// Releases the nodes of the Huffman tree.
+// Note: It does NOT free 'tree' itself.
+void HuffmanTreeRelease(HuffmanTree* const tree);
+
+// Builds Huffman tree assuming code lengths are implicitly in symbol order.
+// Returns false in case of error (invalid tree or memory error).
+int HuffmanTreeBuildImplicit(HuffmanTree* const tree,
+                             const int* const code_lengths,
+                             int code_lengths_size);
+
+// Build a Huffman tree with explicitly given lists of code lengths, codes
+// and symbols. Verifies that all symbols added are smaller than max_symbol.
+// Returns false in case of an invalid symbol, invalid tree or memory error.
+int HuffmanTreeBuildExplicit(HuffmanTree* const tree,
+                             const int* const code_lengths,
+                             const int* const codes,
+                             const int* const symbols, int max_symbol,
+                             int num_symbols);
+
+// Utility: converts Huffman code lengths to corresponding Huffman codes.
+// 'huff_codes' should be pre-allocated.
+// Returns false in case of error (memory allocation, invalid codes).
+int HuffmanCodeLengthsToCodes(const int* const code_lengths,
+                              int code_lengths_size, int* const huff_codes);
+
+
+#if defined(__cplusplus) || defined(c_plusplus)
+}    // extern "C"
+#endif
+
+#endif  // WEBP_UTILS_HUFFMAN_H_
diff --git a/drivers/webpold/utils/huffman_encode.c b/drivers/webpold/utils/huffman_encode.c
new file mode 100644
index 0000000000..e172b10a85
--- /dev/null
+++ b/drivers/webpold/utils/huffman_encode.c
@@ -0,0 +1,439 @@
+// Copyright 2011 Google Inc. All Rights Reserved.
+//
+// This code is licensed under the same terms as WebM:
+//  Software License Agreement:  http://www.webmproject.org/license/software/
+//  Additional IP Rights Grant:  http://www.webmproject.org/license/additional/
+// -----------------------------------------------------------------------------
+//
+// Author: Jyrki Alakuijala (jyrki@google.com)
+//
+// Entropy encoding (Huffman) for webp lossless.
+
+#include <assert.h>
+#include <stdlib.h>
+#include <string.h>
+#include "./huffman_encode.h"
+#include "../utils/utils.h"
+#include "../format_constants.h"
+
+// -----------------------------------------------------------------------------
+// Util function to optimize the symbol map for RLE coding
+
+// Heuristics for selecting the stride ranges to collapse.
+static int ValuesShouldBeCollapsedToStrideAverage(int a, int b) {
+  return abs(a - b) < 4;
+}
+
+// Change the population counts in a way that the consequent
+// Hufmann tree compression, especially its RLE-part, give smaller output.
+static int OptimizeHuffmanForRle(int length, int* const counts) {
+  uint8_t* good_for_rle;
+  // 1) Let's make the Huffman code more compatible with rle encoding.
+  int i;
+  for (; length >= 0; --length) {
+    if (length == 0) {
+      return 1;  // All zeros.
+    }
+    if (counts[length - 1] != 0) {
+      // Now counts[0..length - 1] does not have trailing zeros.
+      break;
+    }
+  }
+  // 2) Let's mark all population counts that already can be encoded
+  // with an rle code.
+  good_for_rle = (uint8_t*)calloc(length, 1);
+  if (good_for_rle == NULL) {
+    return 0;
+  }
+  {
+    // Let's not spoil any of the existing good rle codes.
+    // Mark any seq of 0's that is longer as 5 as a good_for_rle.
+    // Mark any seq of non-0's that is longer as 7 as a good_for_rle.
+    int symbol = counts[0];
+    int stride = 0;
+    for (i = 0; i < length + 1; ++i) {
+      if (i == length || counts[i] != symbol) {
+        if ((symbol == 0 && stride >= 5) ||
+            (symbol != 0 && stride >= 7)) {
+          int k;
+          for (k = 0; k < stride; ++k) {
+            good_for_rle[i - k - 1] = 1;
+          }
+        }
+        stride = 1;
+        if (i != length) {
+          symbol = counts[i];
+        }
+      } else {
+        ++stride;
+      }
+    }
+  }
+  // 3) Let's replace those population counts that lead to more rle codes.
+  {
+    int stride = 0;
+    int limit = counts[0];
+    int sum = 0;
+    for (i = 0; i < length + 1; ++i) {
+      if (i == length || good_for_rle[i] ||
+          (i != 0 && good_for_rle[i - 1]) ||
+          !ValuesShouldBeCollapsedToStrideAverage(counts[i], limit)) {
+        if (stride >= 4 || (stride >= 3 && sum == 0)) {
+          int k;
+          // The stride must end, collapse what we have, if we have enough (4).
+          int count = (sum + stride / 2) / stride;
+          if (count < 1) {
+            count = 1;
+          }
+          if (sum == 0) {
+            // Don't make an all zeros stride to be upgraded to ones.
+            count = 0;
+          }
+          for (k = 0; k < stride; ++k) {
+            // We don't want to change value at counts[i],
+            // that is already belonging to the next stride. Thus - 1.
+            counts[i - k - 1] = count;
+          }
+        }
+        stride = 0;
+        sum = 0;
+        if (i < length - 3) {
+          // All interesting strides have a count of at least 4,
+          // at least when non-zeros.
+          limit = (counts[i] + counts[i + 1] +
+                   counts[i + 2] + counts[i + 3] + 2) / 4;
+        } else if (i < length) {
+          limit = counts[i];
+        } else {
+          limit = 0;
+        }
+      }
+      ++stride;
+      if (i != length) {
+        sum += counts[i];
+        if (stride >= 4) {
+          limit = (sum + stride / 2) / stride;
+        }
+      }
+    }
+  }
+  free(good_for_rle);
+  return 1;
+}
+
+typedef struct {
+  int total_count_;
+  int value_;
+  int pool_index_left_;
+  int pool_index_right_;
+} HuffmanTree;
+
+// A comparer function for two Huffman trees: sorts first by 'total count'
+// (more comes first), and then by 'value' (more comes first).
+static int CompareHuffmanTrees(const void* ptr1, const void* ptr2) {
+  const HuffmanTree* const t1 = (const HuffmanTree*)ptr1;
+  const HuffmanTree* const t2 = (const HuffmanTree*)ptr2;
+  if (t1->total_count_ > t2->total_count_) {
+    return -1;
+  } else if (t1->total_count_ < t2->total_count_) {
+    return 1;
+  } else {
+    if (t1->value_ < t2->value_) {
+      return -1;
+    }
+    if (t1->value_ > t2->value_) {
+      return 1;
+    }
+    return 0;
+  }
+}
+
+static void SetBitDepths(const HuffmanTree* const tree,
+                         const HuffmanTree* const pool,
+                         uint8_t* const bit_depths, int level) {
+  if (tree->pool_index_left_ >= 0) {
+    SetBitDepths(&pool[tree->pool_index_left_], pool, bit_depths, level + 1);
+    SetBitDepths(&pool[tree->pool_index_right_], pool, bit_depths, level + 1);
+  } else {
+    bit_depths[tree->value_] = level;
+  }
+}
+
+// Create an optimal Huffman tree.
+//
+// (data,length): population counts.
+// tree_limit: maximum bit depth (inclusive) of the codes.
+// bit_depths[]: how many bits are used for the symbol.
+//
+// Returns 0 when an error has occurred.
+//
+// The catch here is that the tree cannot be arbitrarily deep
+//
+// count_limit is the value that is to be faked as the minimum value
+// and this minimum value is raised until the tree matches the
+// maximum length requirement.
+//
+// This algorithm is not of excellent performance for very long data blocks,
+// especially when population counts are longer than 2**tree_limit, but
+// we are not planning to use this with extremely long blocks.
+//
+// See http://en.wikipedia.org/wiki/Huffman_coding
+static int GenerateOptimalTree(const int* const histogram, int histogram_size,
+                               int tree_depth_limit,
+                               uint8_t* const bit_depths) {
+  int count_min;
+  HuffmanTree* tree_pool;
+  HuffmanTree* tree;
+  int tree_size_orig = 0;
+  int i;
+
+  for (i = 0; i < histogram_size; ++i) {
+    if (histogram[i] != 0) {
+      ++tree_size_orig;
+    }
+  }
+
+  // 3 * tree_size is enough to cover all the nodes representing a
+  // population and all the inserted nodes combining two existing nodes.
+  // The tree pool needs 2 * (tree_size_orig - 1) entities, and the
+  // tree needs exactly tree_size_orig entities.
+  tree = (HuffmanTree*)WebPSafeMalloc(3ULL * tree_size_orig, sizeof(*tree));
+  if (tree == NULL) return 0;
+  tree_pool = tree + tree_size_orig;
+
+  // For block sizes with less than 64k symbols we never need to do a
+  // second iteration of this loop.
+  // If we actually start running inside this loop a lot, we would perhaps
+  // be better off with the Katajainen algorithm.
+  assert(tree_size_orig <= (1 << (tree_depth_limit - 1)));
+  for (count_min = 1; ; count_min *= 2) {
+    int tree_size = tree_size_orig;
+    // We need to pack the Huffman tree in tree_depth_limit bits.
+    // So, we try by faking histogram entries to be at least 'count_min'.
+    int idx = 0;
+    int j;
+    for (j = 0; j < histogram_size; ++j) {
+      if (histogram[j] != 0) {
+        const int count =
+            (histogram[j] < count_min) ? count_min : histogram[j];
+        tree[idx].total_count_ = count;
+        tree[idx].value_ = j;
+        tree[idx].pool_index_left_ = -1;
+        tree[idx].pool_index_right_ = -1;
+        ++idx;
+      }
+    }
+
+    // Build the Huffman tree.
+    qsort(tree, tree_size, sizeof(*tree), CompareHuffmanTrees);
+
+    if (tree_size > 1) {  // Normal case.
+      int tree_pool_size = 0;
+      while (tree_size > 1) {  // Finish when we have only one root.
+        int count;
+        tree_pool[tree_pool_size++] = tree[tree_size - 1];
+        tree_pool[tree_pool_size++] = tree[tree_size - 2];
+        count = tree_pool[tree_pool_size - 1].total_count_ +
+            tree_pool[tree_pool_size - 2].total_count_;
+        tree_size -= 2;
+        {
+          // Search for the insertion point.
+          int k;
+          for (k = 0; k < tree_size; ++k) {
+            if (tree[k].total_count_ <= count) {
+              break;
+            }
+          }
+          memmove(tree + (k + 1), tree + k, (tree_size - k) * sizeof(*tree));
+          tree[k].total_count_ = count;
+          tree[k].value_ = -1;
+
+          tree[k].pool_index_left_ = tree_pool_size - 1;
+          tree[k].pool_index_right_ = tree_pool_size - 2;
+          tree_size = tree_size + 1;
+        }
+      }
+      SetBitDepths(&tree[0], tree_pool, bit_depths, 0);
+    } else if (tree_size == 1) {  // Trivial case: only one element.
+      bit_depths[tree[0].value_] = 1;
+    }
+
+    {
+      // Test if this Huffman tree satisfies our 'tree_depth_limit' criteria.
+      int max_depth = bit_depths[0];
+      for (j = 1; j < histogram_size; ++j) {
+        if (max_depth < bit_depths[j]) {
+          max_depth = bit_depths[j];
+        }
+      }
+      if (max_depth <= tree_depth_limit) {
+        break;
+      }
+    }
+  }
+  free(tree);
+  return 1;
+}
+
+// -----------------------------------------------------------------------------
+// Coding of the Huffman tree values
+
+static HuffmanTreeToken* CodeRepeatedValues(int repetitions,
+                                            HuffmanTreeToken* tokens,
+                                            int value, int prev_value) {
+  assert(value <= MAX_ALLOWED_CODE_LENGTH);
+  if (value != prev_value) {
+    tokens->code = value;
+    tokens->extra_bits = 0;
+    ++tokens;
+    --repetitions;
+  }
+  while (repetitions >= 1) {
+    if (repetitions < 3) {
+      int i;
+      for (i = 0; i < repetitions; ++i) {
+        tokens->code = value;
+        tokens->extra_bits = 0;
+        ++tokens;
+      }
+      break;
+    } else if (repetitions < 7) {
+      tokens->code = 16;
+      tokens->extra_bits = repetitions - 3;
+      ++tokens;
+      break;
+    } else {
+      tokens->code = 16;
+      tokens->extra_bits = 3;
+      ++tokens;
+      repetitions -= 6;
+    }
+  }
+  return tokens;
+}
+
+static HuffmanTreeToken* CodeRepeatedZeros(int repetitions,
+                                           HuffmanTreeToken* tokens) {
+  while (repetitions >= 1) {
+    if (repetitions < 3) {
+      int i;
+      for (i = 0; i < repetitions; ++i) {
+        tokens->code = 0;   // 0-value
+        tokens->extra_bits = 0;
+        ++tokens;
+      }
+      break;
+    } else if (repetitions < 11) {
+      tokens->code = 17;
+      tokens->extra_bits = repetitions - 3;
+      ++tokens;
+      break;
+    } else if (repetitions < 139) {
+      tokens->code = 18;
+      tokens->extra_bits = repetitions - 11;
+      ++tokens;
+      break;
+    } else {
+      tokens->code = 18;
+      tokens->extra_bits = 0x7f;  // 138 repeated 0s
+      ++tokens;
+      repetitions -= 138;
+    }
+  }
+  return tokens;
+}
+
+int VP8LCreateCompressedHuffmanTree(const HuffmanTreeCode* const tree,
+                                    HuffmanTreeToken* tokens, int max_tokens) {
+  HuffmanTreeToken* const starting_token = tokens;
+  HuffmanTreeToken* const ending_token = tokens + max_tokens;
+  const int depth_size = tree->num_symbols;
+  int prev_value = 8;  // 8 is the initial value for rle.
+  int i = 0;
+  assert(tokens != NULL);
+  while (i < depth_size) {
+    const int value = tree->code_lengths[i];
+    int k = i + 1;
+    int runs;
+    while (k < depth_size && tree->code_lengths[k] == value) ++k;
+    runs = k - i;
+    if (value == 0) {
+      tokens = CodeRepeatedZeros(runs, tokens);
+    } else {
+      tokens = CodeRepeatedValues(runs, tokens, value, prev_value);
+      prev_value = value;
+    }
+    i += runs;
+    assert(tokens <= ending_token);
+  }
+  (void)ending_token;    // suppress 'unused variable' warning
+  return (int)(tokens - starting_token);
+}
+
+// -----------------------------------------------------------------------------
+
+// Pre-reversed 4-bit values.
+static const uint8_t kReversedBits[16] = {
+  0x0, 0x8, 0x4, 0xc, 0x2, 0xa, 0x6, 0xe,
+  0x1, 0x9, 0x5, 0xd, 0x3, 0xb, 0x7, 0xf
+};
+
+static uint32_t ReverseBits(int num_bits, uint32_t bits) {
+  uint32_t retval = 0;
+  int i = 0;
+  while (i < num_bits) {
+    i += 4;
+    retval |= kReversedBits[bits & 0xf] << (MAX_ALLOWED_CODE_LENGTH + 1 - i);
+    bits >>= 4;
+  }
+  retval >>= (MAX_ALLOWED_CODE_LENGTH + 1 - num_bits);
+  return retval;
+}
+
+// Get the actual bit values for a tree of bit depths.
+static void ConvertBitDepthsToSymbols(HuffmanTreeCode* const tree) {
+  // 0 bit-depth means that the symbol does not exist.
+  int i;
+  int len;
+  uint32_t next_code[MAX_ALLOWED_CODE_LENGTH + 1];
+  int depth_count[MAX_ALLOWED_CODE_LENGTH + 1] = { 0 };
+
+  assert(tree != NULL);
+  len = tree->num_symbols;
+  for (i = 0; i < len; ++i) {
+    const int code_length = tree->code_lengths[i];
+    assert(code_length <= MAX_ALLOWED_CODE_LENGTH);
+    ++depth_count[code_length];
+  }
+  depth_count[0] = 0;  // ignore unused symbol
+  next_code[0] = 0;
+  {
+    uint32_t code = 0;
+    for (i = 1; i <= MAX_ALLOWED_CODE_LENGTH; ++i) {
+      code = (code + depth_count[i - 1]) << 1;
+      next_code[i] = code;
+    }
+  }
+  for (i = 0; i < len; ++i) {
+    const int code_length = tree->code_lengths[i];
+    tree->codes[i] = ReverseBits(code_length, next_code[code_length]++);
+  }
+}
+
+// -----------------------------------------------------------------------------
+// Main entry point
+
+int VP8LCreateHuffmanTree(int* const histogram, int tree_depth_limit,
+                          HuffmanTreeCode* const tree) {
+  const int num_symbols = tree->num_symbols;
+  if (!OptimizeHuffmanForRle(num_symbols, histogram)) {
+    return 0;
+  }
+  if (!GenerateOptimalTree(histogram, num_symbols,
+                           tree_depth_limit, tree->code_lengths)) {
+    return 0;
+  }
+  // Create the actual bit codes for the bit lengths.
+  ConvertBitDepthsToSymbols(tree);
+  return 1;
+}
diff --git a/drivers/webpold/utils/huffman_encode.h b/drivers/webpold/utils/huffman_encode.h
new file mode 100644
index 0000000000..7f4aedc102
--- /dev/null
+++ b/drivers/webpold/utils/huffman_encode.h
@@ -0,0 +1,47 @@
+// Copyright 2011 Google Inc. All Rights Reserved.
+//
+// This code is licensed under the same terms as WebM:
+//  Software License Agreement:  http://www.webmproject.org/license/software/
+//  Additional IP Rights Grant:  http://www.webmproject.org/license/additional/
+// -----------------------------------------------------------------------------
+//
+// Author: Jyrki Alakuijala (jyrki@google.com)
+//
+// Entropy encoding (Huffman) for webp lossless
+
+#ifndef WEBP_UTILS_HUFFMAN_ENCODE_H_
+#define WEBP_UTILS_HUFFMAN_ENCODE_H_
+
+#include "../types.h"
+
+#if defined(__cplusplus) || defined(c_plusplus)
+extern "C" {
+#endif
+
+// Struct for holding the tree header in coded form.
+typedef struct {
+  uint8_t code;         // value (0..15) or escape code (16,17,18)
+  uint8_t extra_bits;   // extra bits for escape codes
+} HuffmanTreeToken;
+
+// Struct to represent the tree codes (depth and bits array).
+typedef struct {
+  int       num_symbols;   // Number of symbols.
+  uint8_t*  code_lengths;  // Code lengths of the symbols.
+  uint16_t* codes;         // Symbol Codes.
+} HuffmanTreeCode;
+
+// Turn the Huffman tree into a token sequence.
+// Returns the number of tokens used.
+int VP8LCreateCompressedHuffmanTree(const HuffmanTreeCode* const tree,
+                                    HuffmanTreeToken* tokens, int max_tokens);
+
+// Create an optimized tree, and tokenize it.
+int VP8LCreateHuffmanTree(int* const histogram, int tree_depth_limit,
+                          HuffmanTreeCode* const tree);
+
+#if defined(__cplusplus) || defined(c_plusplus)
+}
+#endif
+
+#endif  // WEBP_UTILS_HUFFMAN_ENCODE_H_
diff --git a/drivers/webpold/utils/quant_levels.c b/drivers/webpold/utils/quant_levels.c
new file mode 100644
index 0000000000..f6884392aa
--- /dev/null
+++ b/drivers/webpold/utils/quant_levels.c
@@ -0,0 +1,154 @@
+// Copyright 2011 Google Inc. All Rights Reserved.
+//
+// This code is licensed under the same terms as WebM:
+//  Software License Agreement:  http://www.webmproject.org/license/software/
+//  Additional IP Rights Grant:  http://www.webmproject.org/license/additional/
+// -----------------------------------------------------------------------------
+//
+// Quantize levels for specified number of quantization-levels ([2, 256]).
+// Min and max values are preserved (usual 0 and 255 for alpha plane).
+//
+// Author: Skal (pascal.massimino@gmail.com)
+
+#include <assert.h>
+
+#include "./quant_levels.h"
+
+#if defined(__cplusplus) || defined(c_plusplus)
+extern "C" {
+#endif
+
+#define NUM_SYMBOLS     256
+
+#define MAX_ITER  6             // Maximum number of convergence steps.
+#define ERROR_THRESHOLD 1e-4    // MSE stopping criterion.
+
+// -----------------------------------------------------------------------------
+// Quantize levels.
+
+int QuantizeLevels(uint8_t* const data, int width, int height,
+                   int num_levels, uint64_t* const sse) {
+  int freq[NUM_SYMBOLS] = { 0 };
+  int q_level[NUM_SYMBOLS] = { 0 };
+  double inv_q_level[NUM_SYMBOLS] = { 0 };
+  int min_s = 255, max_s = 0;
+  const size_t data_size = height * width;
+  int i, num_levels_in, iter;
+  double last_err = 1.e38, err = 0.;
+  const double err_threshold = ERROR_THRESHOLD * data_size;
+
+  if (data == NULL) {
+    return 0;
+  }
+
+  if (width <= 0 || height <= 0) {
+    return 0;
+  }
+
+  if (num_levels < 2 || num_levels > 256) {
+    return 0;
+  }
+
+  {
+    size_t n;
+    num_levels_in = 0;
+    for (n = 0; n < data_size; ++n) {
+      num_levels_in += (freq[data[n]] == 0);
+      if (min_s > data[n]) min_s = data[n];
+      if (max_s < data[n]) max_s = data[n];
+      ++freq[data[n]];
+    }
+  }
+
+  if (num_levels_in <= num_levels) goto End;  // nothing to do!
+
+  // Start with uniformly spread centroids.
+  for (i = 0; i < num_levels; ++i) {
+    inv_q_level[i] = min_s + (double)(max_s - min_s) * i / (num_levels - 1);
+  }
+
+  // Fixed values. Won't be changed.
+  q_level[min_s] = 0;
+  q_level[max_s] = num_levels - 1;
+  assert(inv_q_level[0] == min_s);
+  assert(inv_q_level[num_levels - 1] == max_s);
+
+  // k-Means iterations.
+  for (iter = 0; iter < MAX_ITER; ++iter) {
+    double q_sum[NUM_SYMBOLS] = { 0 };
+    double q_count[NUM_SYMBOLS] = { 0 };
+    int s, slot = 0;
+
+    // Assign classes to representatives.
+    for (s = min_s; s <= max_s; ++s) {
+      // Keep track of the nearest neighbour 'slot'
+      while (slot < num_levels - 1 &&
+             2 * s > inv_q_level[slot] + inv_q_level[slot + 1]) {
+        ++slot;
+      }
+      if (freq[s] > 0) {
+        q_sum[slot] += s * freq[s];
+        q_count[slot] += freq[s];
+      }
+      q_level[s] = slot;
+    }
+
+    // Assign new representatives to classes.
+    if (num_levels > 2) {
+      for (slot = 1; slot < num_levels - 1; ++slot) {
+        const double count = q_count[slot];
+        if (count > 0.) {
+          inv_q_level[slot] = q_sum[slot] / count;
+        }
+      }
+    }
+
+    // Compute convergence error.
+    err = 0.;
+    for (s = min_s; s <= max_s; ++s) {
+      const double error = s - inv_q_level[q_level[s]];
+      err += freq[s] * error * error;
+    }
+
+    // Check for convergence: we stop as soon as the error is no
+    // longer improving.
+    if (last_err - err < err_threshold) break;
+    last_err = err;
+  }
+
+  // Remap the alpha plane to quantized values.
+  {
+    // double->int rounding operation can be costly, so we do it
+    // once for all before remapping. We also perform the data[] -> slot
+    // mapping, while at it (avoid one indirection in the final loop).
+    uint8_t map[NUM_SYMBOLS];
+    int s;
+    size_t n;
+    for (s = min_s; s <= max_s; ++s) {
+      const int slot = q_level[s];
+      map[s] = (uint8_t)(inv_q_level[slot] + .5);
+    }
+    // Final pass.
+    for (n = 0; n < data_size; ++n) {
+      data[n] = map[data[n]];
+    }
+  }
+ End:
+  // Store sum of squared error if needed.
+  if (sse != NULL) *sse = (uint64_t)err;
+
+  return 1;
+}
+
+int DequantizeLevels(uint8_t* const data, int width, int height) {
+  if (data == NULL || width <= 0 || height <= 0) return 0;
+  // TODO(skal): implement gradient smoothing.
+  (void)data;
+  (void)width;
+  (void)height;
+  return 1;
+}
+
+#if defined(__cplusplus) || defined(c_plusplus)
+}    // extern "C"
+#endif
diff --git a/drivers/webpold/utils/quant_levels.h b/drivers/webpold/utils/quant_levels.h
new file mode 100644
index 0000000000..4f165fd230
--- /dev/null
+++ b/drivers/webpold/utils/quant_levels.h
@@ -0,0 +1,39 @@
+// Copyright 2011 Google Inc. All Rights Reserved.
+//
+// This code is licensed under the same terms as WebM:
+//  Software License Agreement:  http://www.webmproject.org/license/software/
+//  Additional IP Rights Grant:  http://www.webmproject.org/license/additional/
+// -----------------------------------------------------------------------------
+//
+// Alpha plane quantization utility
+//
+// Author:  Vikas Arora (vikasa@google.com)
+
+#ifndef WEBP_UTILS_QUANT_LEVELS_H_
+#define WEBP_UTILS_QUANT_LEVELS_H_
+
+#include <stdlib.h>
+
+#include "../types.h"
+
+#if defined(__cplusplus) || defined(c_plusplus)
+extern "C" {
+#endif
+
+// Replace the input 'data' of size 'width'x'height' with 'num-levels'
+// quantized values. If not NULL, 'sse' will contain the sum of squared error.
+// Valid range for 'num_levels' is [2, 256].
+// Returns false in case of error (data is NULL, or parameters are invalid).
+int QuantizeLevels(uint8_t* const data, int width, int height, int num_levels,
+                   uint64_t* const sse);
+
+// Apply post-processing to input 'data' of size 'width'x'height' assuming
+// that the source was quantized to a reduced number of levels.
+// Returns false in case of error (data is NULL, invalid parameters, ...).
+int DequantizeLevels(uint8_t* const data, int width, int height);
+
+#if defined(__cplusplus) || defined(c_plusplus)
+}    // extern "C"
+#endif
+
+#endif  /* WEBP_UTILS_QUANT_LEVELS_H_ */
diff --git a/drivers/webpold/utils/rescaler.c b/drivers/webpold/utils/rescaler.c
new file mode 100644
index 0000000000..9825dcbc5f
--- /dev/null
+++ b/drivers/webpold/utils/rescaler.c
@@ -0,0 +1,152 @@
+// Copyright 2012 Google Inc. All Rights Reserved.
+//
+// This code is licensed under the same terms as WebM:
+//  Software License Agreement:  http://www.webmproject.org/license/software/
+//  Additional IP Rights Grant:  http://www.webmproject.org/license/additional/
+// -----------------------------------------------------------------------------
+//
+// Rescaling functions
+//
+// Author: Skal (pascal.massimino@gmail.com)
+
+#include <assert.h>
+#include <stdlib.h>
+#include "./rescaler.h"
+
+//------------------------------------------------------------------------------
+
+#if defined(__cplusplus) || defined(c_plusplus)
+extern "C" {
+#endif
+
+#define RFIX 30
+#define MULT_FIX(x,y) (((int64_t)(x) * (y) + (1 << (RFIX - 1))) >> RFIX)
+
+void WebPRescalerInit(WebPRescaler* const wrk, int src_width, int src_height,
+                      uint8_t* const dst, int dst_width, int dst_height,
+                      int dst_stride, int num_channels, int x_add, int x_sub,
+                      int y_add, int y_sub, int32_t* const work) {
+  wrk->x_expand = (src_width < dst_width);
+  wrk->src_width = src_width;
+  wrk->src_height = src_height;
+  wrk->dst_width = dst_width;
+  wrk->dst_height = dst_height;
+  wrk->dst = dst;
+  wrk->dst_stride = dst_stride;
+  wrk->num_channels = num_channels;
+  // for 'x_expand', we use bilinear interpolation
+  wrk->x_add = wrk->x_expand ? (x_sub - 1) : x_add - x_sub;
+  wrk->x_sub = wrk->x_expand ? (x_add - 1) : x_sub;
+  wrk->y_accum = y_add;
+  wrk->y_add = y_add;
+  wrk->y_sub = y_sub;
+  wrk->fx_scale = (1 << RFIX) / x_sub;
+  wrk->fy_scale = (1 << RFIX) / y_sub;
+  wrk->fxy_scale = wrk->x_expand ?
+      ((int64_t)dst_height << RFIX) / (x_sub * src_height) :
+      ((int64_t)dst_height << RFIX) / (x_add * src_height);
+  wrk->irow = work;
+  wrk->frow = work + num_channels * dst_width;
+}
+
+void WebPRescalerImportRow(WebPRescaler* const wrk,
+                           const uint8_t* const src, int channel) {
+  const int x_stride = wrk->num_channels;
+  const int x_out_max = wrk->dst_width * wrk->num_channels;
+  int x_in = channel;
+  int x_out;
+  int accum = 0;
+  if (!wrk->x_expand) {
+    int sum = 0;
+    for (x_out = channel; x_out < x_out_max; x_out += x_stride) {
+      accum += wrk->x_add;
+      for (; accum > 0; accum -= wrk->x_sub) {
+        sum += src[x_in];
+        x_in += x_stride;
+      }
+      {        // Emit next horizontal pixel.
+        const int32_t base = src[x_in];
+        const int32_t frac = base * (-accum);
+        x_in += x_stride;
+        wrk->frow[x_out] = (sum + base) * wrk->x_sub - frac;
+        // fresh fractional start for next pixel
+        sum = (int)MULT_FIX(frac, wrk->fx_scale);
+      }
+    }
+  } else {        // simple bilinear interpolation
+    int left = src[channel], right = src[channel];
+    for (x_out = channel; x_out < x_out_max; x_out += x_stride) {
+      if (accum < 0) {
+        left = right;
+        x_in += x_stride;
+        right = src[x_in];
+        accum += wrk->x_add;
+      }
+      wrk->frow[x_out] = right * wrk->x_add + (left - right) * accum;
+      accum -= wrk->x_sub;
+    }
+  }
+  // Accumulate the new row's contribution
+  for (x_out = channel; x_out < x_out_max; x_out += x_stride) {
+    wrk->irow[x_out] += wrk->frow[x_out];
+  }
+}
+
+uint8_t* WebPRescalerExportRow(WebPRescaler* const wrk) {
+  if (wrk->y_accum <= 0) {
+    int x_out;
+    uint8_t* const dst = wrk->dst;
+    int32_t* const irow = wrk->irow;
+    const int32_t* const frow = wrk->frow;
+    const int yscale = wrk->fy_scale * (-wrk->y_accum);
+    const int x_out_max = wrk->dst_width * wrk->num_channels;
+
+    for (x_out = 0; x_out < x_out_max; ++x_out) {
+      const int frac = (int)MULT_FIX(frow[x_out], yscale);
+      const int v = (int)MULT_FIX(irow[x_out] - frac, wrk->fxy_scale);
+      dst[x_out] = (!(v & ~0xff)) ? v : (v < 0) ? 0 : 255;
+      irow[x_out] = frac;   // new fractional start
+    }
+    wrk->y_accum += wrk->y_add;
+    wrk->dst += wrk->dst_stride;
+    return dst;
+  } else {
+    return NULL;
+  }
+}
+
+#undef MULT_FIX
+#undef RFIX
+
+//------------------------------------------------------------------------------
+// all-in-one calls
+
+int WebPRescalerImport(WebPRescaler* const wrk, int num_lines,
+                       const uint8_t* src, int src_stride) {
+  int total_imported = 0;
+  while (total_imported < num_lines && wrk->y_accum > 0) {
+    int channel;
+    for (channel = 0; channel < wrk->num_channels; ++channel) {
+      WebPRescalerImportRow(wrk, src, channel);
+    }
+    src += src_stride;
+    ++total_imported;
+    wrk->y_accum -= wrk->y_sub;
+  }
+  return total_imported;
+}
+
+int WebPRescalerExport(WebPRescaler* const rescaler) {
+  int total_exported = 0;
+  while (WebPRescalerHasPendingOutput(rescaler)) {
+    WebPRescalerExportRow(rescaler);
+    ++total_exported;
+  }
+  return total_exported;
+}
+
+//------------------------------------------------------------------------------
+
+#if defined(__cplusplus) || defined(c_plusplus)
+}    // extern "C"
+#endif
diff --git a/drivers/webpold/utils/rescaler.h b/drivers/webpold/utils/rescaler.h
new file mode 100644
index 0000000000..9c9133d19b
--- /dev/null
+++ b/drivers/webpold/utils/rescaler.h
@@ -0,0 +1,76 @@
+// Copyright 2012 Google Inc. All Rights Reserved.
+//
+// This code is licensed under the same terms as WebM:
+//  Software License Agreement:  http://www.webmproject.org/license/software/
+//  Additional IP Rights Grant:  http://www.webmproject.org/license/additional/
+// -----------------------------------------------------------------------------
+//
+// Rescaling functions
+//
+// Author: Skal (pascal.massimino@gmail.com)
+
+#ifndef WEBP_UTILS_RESCALER_H_
+#define WEBP_UTILS_RESCALER_H_
+
+#if defined(__cplusplus) || defined(c_plusplus)
+extern "C" {
+#endif
+
+#include "../types.h"
+
+// Structure used for on-the-fly rescaling
+typedef struct {
+  int x_expand;               // true if we're expanding in the x direction
+  int num_channels;           // bytes to jump between pixels
+  int fy_scale, fx_scale;     // fixed-point scaling factor
+  int64_t fxy_scale;          // ''
+  // we need hpel-precise add/sub increments, for the downsampled U/V planes.
+  int y_accum;                // vertical accumulator
+  int y_add, y_sub;           // vertical increments (add ~= src, sub ~= dst)
+  int x_add, x_sub;           // horizontal increments (add ~= src, sub ~= dst)
+  int src_width, src_height;  // source dimensions
+  int dst_width, dst_height;  // destination dimensions
+  uint8_t* dst;
+  int dst_stride;
+  int32_t* irow, *frow;       // work buffer
+} WebPRescaler;
+
+// Initialize a rescaler given scratch area 'work' and dimensions of src & dst.
+void WebPRescalerInit(WebPRescaler* const wrk, int src_width, int src_height,
+                      uint8_t* const dst,
+                      int dst_width, int dst_height, int dst_stride,
+                      int num_channels,
+                      int x_add, int x_sub,
+                      int y_add, int y_sub,
+                      int32_t* const work);
+
+// Import a row of data and save its contribution in the rescaler.
+// 'channel' denotes the channel number to be imported.
+void WebPRescalerImportRow(WebPRescaler* const rescaler,
+                           const uint8_t* const src, int channel);
+
+// Import multiple rows over all channels, until at least one row is ready to
+// be exported. Returns the actual number of lines that were imported.
+int WebPRescalerImport(WebPRescaler* const rescaler, int num_rows,
+                       const uint8_t* src, int src_stride);
+
+// Return true if there is pending output rows ready.
+static WEBP_INLINE
+int WebPRescalerHasPendingOutput(const WebPRescaler* const rescaler) {
+  return (rescaler->y_accum <= 0);
+}
+
+// Export one row from rescaler. Returns the pointer where output was written,
+// or NULL if no row was pending.
+uint8_t* WebPRescalerExportRow(WebPRescaler* const wrk);
+
+// Export as many rows as possible. Return the numbers of rows written.
+int WebPRescalerExport(WebPRescaler* const wrk);
+
+//------------------------------------------------------------------------------
+
+#if defined(__cplusplus) || defined(c_plusplus)
+}    // extern "C"
+#endif
+
+#endif  /* WEBP_UTILS_RESCALER_H_ */
diff --git a/drivers/webpold/utils/thread.c b/drivers/webpold/utils/thread.c
new file mode 100644
index 0000000000..ce89cf9dc7
--- /dev/null
+++ b/drivers/webpold/utils/thread.c
@@ -0,0 +1,247 @@
+// Copyright 2011 Google Inc. All Rights Reserved.
+//
+// This code is licensed under the same terms as WebM:
+//  Software License Agreement:  http://www.webmproject.org/license/software/
+//  Additional IP Rights Grant:  http://www.webmproject.org/license/additional/
+// -----------------------------------------------------------------------------
+//
+// Multi-threaded worker
+//
+// Author: Skal (pascal.massimino@gmail.com)
+
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+
+#include <assert.h>
+#include <string.h>   // for memset()
+#include "./thread.h"
+
+#if defined(__cplusplus) || defined(c_plusplus)
+extern "C" {
+#endif
+
+#ifdef WEBP_USE_THREAD
+
+#if defined(_WIN32)
+
+//------------------------------------------------------------------------------
+// simplistic pthread emulation layer
+
+#include <process.h>
+
+// _beginthreadex requires __stdcall
+#define THREADFN unsigned int __stdcall
+#define THREAD_RETURN(val) (unsigned int)((DWORD_PTR)val)
+
+static int pthread_create(pthread_t* const thread, const void* attr,
+                          unsigned int (__stdcall *start)(void*), void* arg) {
+  (void)attr;
+  *thread = (pthread_t)_beginthreadex(NULL,   /* void *security */
+                                      0,      /* unsigned stack_size */
+                                      start,
+                                      arg,
+                                      0,      /* unsigned initflag */
+                                      NULL);  /* unsigned *thrdaddr */
+  if (*thread == NULL) return 1;
+  SetThreadPriority(*thread, THREAD_PRIORITY_ABOVE_NORMAL);
+  return 0;
+}
+
+static int pthread_join(pthread_t thread, void** value_ptr) {
+  (void)value_ptr;
+  return (WaitForSingleObject(thread, INFINITE) != WAIT_OBJECT_0 ||
+          CloseHandle(thread) == 0);
+}
+
+// Mutex
+static int pthread_mutex_init(pthread_mutex_t* const mutex, void* mutexattr) {
+  (void)mutexattr;
+  InitializeCriticalSection(mutex);
+  return 0;
+}
+
+static int pthread_mutex_lock(pthread_mutex_t* const mutex) {
+  EnterCriticalSection(mutex);
+  return 0;
+}
+
+static int pthread_mutex_unlock(pthread_mutex_t* const mutex) {
+  LeaveCriticalSection(mutex);
+  return 0;
+}
+
+static int pthread_mutex_destroy(pthread_mutex_t* const mutex) {
+  DeleteCriticalSection(mutex);
+  return 0;
+}
+
+// Condition
+static int pthread_cond_destroy(pthread_cond_t* const condition) {
+  int ok = 1;
+  ok &= (CloseHandle(condition->waiting_sem_) != 0);
+  ok &= (CloseHandle(condition->received_sem_) != 0);
+  ok &= (CloseHandle(condition->signal_event_) != 0);
+  return !ok;
+}
+
+static int pthread_cond_init(pthread_cond_t* const condition, void* cond_attr) {
+  (void)cond_attr;
+  condition->waiting_sem_ = CreateSemaphore(NULL, 0, 1, NULL);
+  condition->received_sem_ = CreateSemaphore(NULL, 0, 1, NULL);
+  condition->signal_event_ = CreateEvent(NULL, FALSE, FALSE, NULL);
+  if (condition->waiting_sem_ == NULL ||
+      condition->received_sem_ == NULL ||
+      condition->signal_event_ == NULL) {
+    pthread_cond_destroy(condition);
+    return 1;
+  }
+  return 0;
+}
+
+static int pthread_cond_signal(pthread_cond_t* const condition) {
+  int ok = 1;
+  if (WaitForSingleObject(condition->waiting_sem_, 0) == WAIT_OBJECT_0) {
+    // a thread is waiting in pthread_cond_wait: allow it to be notified
+    ok = SetEvent(condition->signal_event_);
+    // wait until the event is consumed so the signaler cannot consume
+    // the event via its own pthread_cond_wait.
+    ok &= (WaitForSingleObject(condition->received_sem_, INFINITE) !=
+           WAIT_OBJECT_0);
+  }
+  return !ok;
+}
+
+static int pthread_cond_wait(pthread_cond_t* const condition,
+                             pthread_mutex_t* const mutex) {
+  int ok;
+  // note that there is a consumer available so the signal isn't dropped in
+  // pthread_cond_signal
+  if (!ReleaseSemaphore(condition->waiting_sem_, 1, NULL))
+    return 1;
+  // now unlock the mutex so pthread_cond_signal may be issued
+  pthread_mutex_unlock(mutex);
+  ok = (WaitForSingleObject(condition->signal_event_, INFINITE) ==
+        WAIT_OBJECT_0);
+  ok &= ReleaseSemaphore(condition->received_sem_, 1, NULL);
+  pthread_mutex_lock(mutex);
+  return !ok;
+}
+
+#else  // _WIN32
+# define THREADFN void*
+# define THREAD_RETURN(val) val
+#endif
+
+//------------------------------------------------------------------------------
+
+static THREADFN WebPWorkerThreadLoop(void *ptr) {    // thread loop
+  WebPWorker* const worker = (WebPWorker*)ptr;
+  int done = 0;
+  while (!done) {
+    pthread_mutex_lock(&worker->mutex_);
+    while (worker->status_ == OK) {   // wait in idling mode
+      pthread_cond_wait(&worker->condition_, &worker->mutex_);
+    }
+    if (worker->status_ == WORK) {
+      if (worker->hook) {
+        worker->had_error |= !worker->hook(worker->data1, worker->data2);
+      }
+      worker->status_ = OK;
+    } else if (worker->status_ == NOT_OK) {   // finish the worker
+      done = 1;
+    }
+    // signal to the main thread that we're done (for Sync())
+    pthread_cond_signal(&worker->condition_);
+    pthread_mutex_unlock(&worker->mutex_);
+  }
+  return THREAD_RETURN(NULL);    // Thread is finished
+}
+
+// main thread state control
+static void WebPWorkerChangeState(WebPWorker* const worker,
+                                  WebPWorkerStatus new_status) {
+  // no-op when attempting to change state on a thread that didn't come up
+  if (worker->status_ < OK) return;
+
+  pthread_mutex_lock(&worker->mutex_);
+  // wait for the worker to finish
+  while (worker->status_ != OK) {
+    pthread_cond_wait(&worker->condition_, &worker->mutex_);
+  }
+  // assign new status and release the working thread if needed
+  if (new_status != OK) {
+    worker->status_ = new_status;
+    pthread_cond_signal(&worker->condition_);
+  }
+  pthread_mutex_unlock(&worker->mutex_);
+}
+
+#endif
+
+//------------------------------------------------------------------------------
+
+void WebPWorkerInit(WebPWorker* const worker) {
+  memset(worker, 0, sizeof(*worker));
+  worker->status_ = NOT_OK;
+}
+
+int WebPWorkerSync(WebPWorker* const worker) {
+#ifdef WEBP_USE_THREAD
+  WebPWorkerChangeState(worker, OK);
+#endif
+  assert(worker->status_ <= OK);
+  return !worker->had_error;
+}
+
+int WebPWorkerReset(WebPWorker* const worker) {
+  int ok = 1;
+  worker->had_error = 0;
+  if (worker->status_ < OK) {
+#ifdef WEBP_USE_THREAD
+    if (pthread_mutex_init(&worker->mutex_, NULL) ||
+        pthread_cond_init(&worker->condition_, NULL)) {
+      return 0;
+    }
+    pthread_mutex_lock(&worker->mutex_);
+    ok = !pthread_create(&worker->thread_, NULL, WebPWorkerThreadLoop, worker);
+    if (ok) worker->status_ = OK;
+    pthread_mutex_unlock(&worker->mutex_);
+#else
+    worker->status_ = OK;
+#endif
+  } else if (worker->status_ > OK) {
+    ok = WebPWorkerSync(worker);
+  }
+  assert(!ok || (worker->status_ == OK));
+  return ok;
+}
+
+void WebPWorkerLaunch(WebPWorker* const worker) {
+#ifdef WEBP_USE_THREAD
+  WebPWorkerChangeState(worker, WORK);
+#else
+  if (worker->hook)
+    worker->had_error |= !worker->hook(worker->data1, worker->data2);
+#endif
+}
+
+void WebPWorkerEnd(WebPWorker* const worker) {
+  if (worker->status_ >= OK) {
+#ifdef WEBP_USE_THREAD
+    WebPWorkerChangeState(worker, NOT_OK);
+    pthread_join(worker->thread_, NULL);
+    pthread_mutex_destroy(&worker->mutex_);
+    pthread_cond_destroy(&worker->condition_);
+#else
+    worker->status_ = NOT_OK;
+#endif
+  }
+  assert(worker->status_ == NOT_OK);
+}
+
+//------------------------------------------------------------------------------
+
+#if defined(__cplusplus) || defined(c_plusplus)
+}    // extern "C"
+#endif
diff --git a/drivers/webpold/utils/thread.h b/drivers/webpold/utils/thread.h
new file mode 100644
index 0000000000..3191890b76
--- /dev/null
+++ b/drivers/webpold/utils/thread.h
@@ -0,0 +1,86 @@
+// Copyright 2011 Google Inc. All Rights Reserved.
+//
+// This code is licensed under the same terms as WebM:
+//  Software License Agreement:  http://www.webmproject.org/license/software/
+//  Additional IP Rights Grant:  http://www.webmproject.org/license/additional/
+// -----------------------------------------------------------------------------
+//
+// Multi-threaded worker
+//
+// Author: Skal (pascal.massimino@gmail.com)
+
+#ifndef WEBP_UTILS_THREAD_H_
+#define WEBP_UTILS_THREAD_H_
+
+#if defined(__cplusplus) || defined(c_plusplus)
+extern "C" {
+#endif
+
+#if WEBP_USE_THREAD
+
+#if defined(_WIN32)
+
+#include <windows.h>
+typedef HANDLE pthread_t;
+typedef CRITICAL_SECTION pthread_mutex_t;
+typedef struct {
+  HANDLE waiting_sem_;
+  HANDLE received_sem_;
+  HANDLE signal_event_;
+} pthread_cond_t;
+
+#else
+
+#include <pthread.h>
+
+#endif    /* _WIN32 */
+#endif    /* WEBP_USE_THREAD */
+
+// State of the worker thread object
+typedef enum {
+  NOT_OK = 0,   // object is unusable
+  OK,           // ready to work
+  WORK          // busy finishing the current task
+} WebPWorkerStatus;
+
+// Function to be called by the worker thread. Takes two opaque pointers as
+// arguments (data1 and data2), and should return false in case of error.
+typedef int (*WebPWorkerHook)(void*, void*);
+
+// Synchronize object used to launch job in the worker thread
+typedef struct {
+#if WEBP_USE_THREAD
+  pthread_mutex_t mutex_;
+  pthread_cond_t  condition_;
+  pthread_t       thread_;
+#endif
+  WebPWorkerStatus status_;
+  WebPWorkerHook hook;    // hook to call
+  void* data1;            // first argument passed to 'hook'
+  void* data2;            // second argument passed to 'hook'
+  int had_error;          // return value of the last call to 'hook'
+} WebPWorker;
+
+// Must be called first, before any other method.
+void WebPWorkerInit(WebPWorker* const worker);
+// Must be called initialize the object and spawn the thread. Re-entrant.
+// Will potentially launch the thread. Returns false in case of error.
+int WebPWorkerReset(WebPWorker* const worker);
+// Make sure the previous work is finished. Returns true if worker->had_error
+// was not set and not error condition was triggered by the working thread.
+int WebPWorkerSync(WebPWorker* const worker);
+// Trigger the thread to call hook() with data1 and data2 argument. These
+// hook/data1/data2 can be changed at any time before calling this function,
+// but not be changed afterward until the next call to WebPWorkerSync().
+void WebPWorkerLaunch(WebPWorker* const worker);
+// Kill the thread and terminate the object. To use the object again, one
+// must call WebPWorkerReset() again.
+void WebPWorkerEnd(WebPWorker* const worker);
+
+//------------------------------------------------------------------------------
+
+#if defined(__cplusplus) || defined(c_plusplus)
+}    // extern "C"
+#endif
+
+#endif  /* WEBP_UTILS_THREAD_H_ */
diff --git a/drivers/webpold/utils/utils.c b/drivers/webpold/utils/utils.c
new file mode 100644
index 0000000000..673b7e284c
--- /dev/null
+++ b/drivers/webpold/utils/utils.c
@@ -0,0 +1,44 @@
+// Copyright 2012 Google Inc. All Rights Reserved.
+//
+// This code is licensed under the same terms as WebM:
+//  Software License Agreement:  http://www.webmproject.org/license/software/
+//  Additional IP Rights Grant:  http://www.webmproject.org/license/additional/
+// -----------------------------------------------------------------------------
+//
+// Misc. common utility functions
+//
+// Author: Skal (pascal.massimino@gmail.com)
+
+#include <stdlib.h>
+#include "./utils.h"
+
+#if defined(__cplusplus) || defined(c_plusplus)
+extern "C" {
+#endif
+
+//------------------------------------------------------------------------------
+// Checked memory allocation
+
+static int CheckSizeArguments(uint64_t nmemb, size_t size) {
+  const uint64_t total_size = nmemb * size;
+  if (nmemb == 0) return 1;
+  if ((uint64_t)size > WEBP_MAX_ALLOCABLE_MEMORY / nmemb) return 0;
+  if (total_size != (size_t)total_size) return 0;
+  return 1;
+}
+
+void* WebPSafeMalloc(uint64_t nmemb, size_t size) {
+  if (!CheckSizeArguments(nmemb, size)) return NULL;
+  return malloc((size_t)(nmemb * size));
+}
+
+void* WebPSafeCalloc(uint64_t nmemb, size_t size) {
+  if (!CheckSizeArguments(nmemb, size)) return NULL;
+  return calloc((size_t)nmemb, size);
+}
+
+//------------------------------------------------------------------------------
+
+#if defined(__cplusplus) || defined(c_plusplus)
+}    // extern "C"
+#endif
diff --git a/drivers/webpold/utils/utils.h b/drivers/webpold/utils/utils.h
new file mode 100644
index 0000000000..316ac90612
--- /dev/null
+++ b/drivers/webpold/utils/utils.h
@@ -0,0 +1,44 @@
+// Copyright 2012 Google Inc. All Rights Reserved.
+//
+// This code is licensed under the same terms as WebM:
+//  Software License Agreement:  http://www.webmproject.org/license/software/
+//  Additional IP Rights Grant:  http://www.webmproject.org/license/additional/
+// -----------------------------------------------------------------------------
+//
+// Misc. common utility functions
+//
+// Author: Skal (pascal.massimino@gmail.com)
+
+#ifndef WEBP_UTILS_UTILS_H_
+#define WEBP_UTILS_UTILS_H_
+
+#include "../types.h"
+
+#if defined(__cplusplus) || defined(c_plusplus)
+extern "C" {
+#endif
+
+//------------------------------------------------------------------------------
+// Memory allocation
+
+// This is the maximum memory amount that libwebp will ever try to allocate.
+#define WEBP_MAX_ALLOCABLE_MEMORY (1ULL << 40)
+
+// size-checking safe malloc/calloc: verify that the requested size is not too
+// large, or return NULL. You don't need to call these for constructs like
+// malloc(sizeof(foo)), but only if there's picture-dependent size involved
+// somewhere (like: malloc(num_pixels * sizeof(*something))). That's why this
+// safe malloc() borrows the signature from calloc(), pointing at the dangerous
+// underlying multiply involved.
+void* WebPSafeMalloc(uint64_t nmemb, size_t size);
+// Note that WebPSafeCalloc() expects the second argument type to be 'size_t'
+// in order to favor the "calloc(num_foo, sizeof(foo))" pattern.
+void* WebPSafeCalloc(uint64_t nmemb, size_t size);
+
+//------------------------------------------------------------------------------
+
+#if defined(__cplusplus) || defined(c_plusplus)
+}    // extern "C"
+#endif
+
+#endif  /* WEBP_UTILS_UTILS_H_ */
diff --git a/drivers/windows/SCsub b/drivers/windows/SCsub
index bcd231579c..9fbb467baa 100644
--- a/drivers/windows/SCsub
+++ b/drivers/windows/SCsub
@@ -3,5 +3,3 @@ Import('env')
 env.add_source_files(env.drivers_sources,"*.cpp")
 
 Export('env')
-
-
diff --git a/godot_icon.png b/godot_icon.png
new file mode 100644
index 0000000000..013632ddf1
--- /dev/null
+++ b/godot_icon.png
diff --git a/godot_icon.svg b/godot_icon.svg
new file mode 100644
index 0000000000..6e32074d89
--- /dev/null
+++ b/godot_icon.svg
@@ -0,0 +1,133 @@
+<?xml version="1.0" encoding="UTF-8" standalone="no"?>
+<!-- Created with Inkscape (http://www.inkscape.org/) -->
+
+<svg
+   xmlns:dc="http://purl.org/dc/elements/1.1/"
+   xmlns:cc="http://creativecommons.org/ns#"
+   xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#"
+   xmlns:svg="http://www.w3.org/2000/svg"
+   xmlns="http://www.w3.org/2000/svg"
+   xmlns:sodipodi="http://sodipodi.sourceforge.net/DTD/sodipodi-0.dtd"
+   xmlns:inkscape="http://www.inkscape.org/namespaces/inkscape"
+   width="1024"
+   height="1024"
+   id="svg3030"
+   version="1.1"
+   inkscape:version="0.48.4 r9939"
+   sodipodi:docname="godot_icon.svg">
+  <defs
+     id="defs3032" />
+  <sodipodi:namedview
+     id="base"
+     pagecolor="#ffffff"
+     bordercolor="#666666"
+     borderopacity="1.0"
+     inkscape:pageopacity="0.0"
+     inkscape:pageshadow="2"
+     inkscape:zoom="0.24748737"
+     inkscape:cx="340.91041"
+     inkscape:cy="224.06536"
+     inkscape:document-units="px"
+     inkscape:current-layer="layer1"
+     showgrid="false"
+     inkscape:window-width="1366"
+     inkscape:window-height="748"
+     inkscape:window-x="-2"
+     inkscape:window-y="-3"
+     inkscape:window-maximized="1" />
+  <metadata
+     id="metadata3035">
+    <rdf:RDF>
+      <cc:Work
+         rdf:about="">
+        <dc:format>image/svg+xml</dc:format>
+        <dc:type
+           rdf:resource="http://purl.org/dc/dcmitype/StillImage" />
+        <dc:title />
+      </cc:Work>
+    </rdf:RDF>
+  </metadata>
+  <g
+     inkscape:label="Layer 1"
+     inkscape:groupmode="layer"
+     id="layer1"
+     transform="translate(0,-28.362183)">
+    <rect
+       style="fill:#a39f9f;fill-opacity:1;fill-rule:nonzero;stroke:none"
+       id="rect4009"
+       width="990"
+       height="990"
+       x="20"
+       y="47.362183"
+       ry="187.81349" />
+    <path
+       style="fill:#ffffff;fill-opacity:1;stroke:none"
+       d="m 116.99388,715.36604 43.13957,-74.51381 75.99672,-171.42666 271.088,-13.63746 282.06373,14.1696 138.45065,255.56931 -25.0756,66.96734 -376.12685,53.39482 -367.70391,-40.32222 z"
+       id="path3239"
+       inkscape:connector-curvature="0"
+       sodipodi:nodetypes="cccccccccc" />
+    <g
+       id="g3412"
+       transform="matrix(12.995388,0,0,-12.995388,898.37246,704.73082)">
+      <path
+         inkscape:connector-curvature="0"
+         d="m 0,0 0,-3.942 c 0,-0.39 -0.25,-0.734 -0.621,-0.852 L -6.835,-6.8 c -0.273,-0.091 -0.57,-0.042 -0.8,0.128 -0.232,0.168 -0.37,0.437 -0.37,0.721 l 0,4.305 -5.818,-1.108 0,-4.381 c 0,-0.447 -0.332,-0.824 -0.775,-0.885 l -8.41,-1.152 c -0.039,-0.003 -0.081,-0.008 -0.121,-0.008 -0.214,0 -0.424,0.078 -0.588,0.22 -0.195,0.172 -0.306,0.416 -0.306,0.676 l 0,4.638 -4.341,-0.018 0,-10e-4 -0.318,10e-4 -0.319,-10e-4 0,10e-4 -4.34,0.018 0,-4.638 c 0,-0.26 -0.112,-0.504 -0.307,-0.676 -0.164,-0.142 -0.374,-0.22 -0.587,-0.22 -0.041,0 -0.082,0.005 -0.123,0.008 l -8.41,1.152 c -0.442,0.061 -0.774,0.438 -0.774,0.885 l 0,4.381 -5.819,1.108 0,-4.305 c 0,-0.284 -0.137,-0.553 -0.368,-0.721 -0.232,-0.17 -0.529,-0.219 -0.802,-0.128 l -6.215,2.006 c -0.369,0.118 -0.619,0.462 -0.619,0.852 l 0,3.942 -3.837,1.29 c -0.19,-0.811 -0.295,-1.642 -0.295,-2.481 0,-10.301 14.512,-18.252 32.448,-18.309 l 0.022,0 0.023,0 c 17.936,0.057 32.448,8.008 32.448,18.309 0,0.766 -0.088,1.521 -0.247,2.266 L 0,0 z"
+         style="fill:#478cbf;fill-opacity:1;fill-rule:nonzero;stroke:none"
+         id="path3414" />
+    </g>
+    <g
+       id="g3416"
+       transform="matrix(12.995388,0,0,-12.995388,140.10982,467.34929)">
+      <path
+         inkscape:connector-curvature="0"
+         d="m 0,0 0,-16.047 2.163,-0.729 c 0.364,-0.122 0.61,-0.462 0.61,-0.847 l 0,-3.936 4.426,-1.428 0,4.154 c 0,0.27 0.118,0.52 0.323,0.689 0.206,0.172 0.474,0.241 0.739,0.192 l 7.608,-1.452 c 0.422,-0.079 0.728,-0.448 0.728,-0.877 l 0,-4.338 6.62,-0.904 0,4.509 c 0,0.241 0.096,0.467 0.264,0.635 0.167,0.166 0.394,0.259 0.633,0.259 l 0.002,0 5.551,-0.022 5.549,0.022 c 0.245,-10e-4 0.468,-0.093 0.635,-0.259 0.169,-0.168 0.264,-0.394 0.264,-0.635 l 0,-4.509 6.621,0.904 0,4.338 c 0,0.429 0.304,0.798 0.726,0.877 l 7.609,1.452 c 0.262,0.049 0.533,-0.02 0.738,-0.192 0.205,-0.169 0.325,-0.419 0.325,-0.689 l 0,-4.154 4.425,1.428 0,3.936 c 0,0.385 0.245,0.725 0.609,0.847 l 1.475,0.497 0,16.279 0.04,0 c 1.437,1.834 2.767,3.767 4.042,5.828 -1.694,2.883 -3.768,5.459 -5.986,7.846 -2.057,-1.035 -4.055,-2.208 -5.942,-3.456 -0.944,0.938 -2.008,1.706 -3.052,2.509 -1.027,0.824 -2.183,1.428 -3.281,2.132 0.327,2.433 0.489,4.828 0.554,7.327 -2.831,1.424 -5.85,2.369 -8.903,3.047 -1.219,-2.048 -2.334,-4.267 -3.304,-6.436 -1.152,0.192 -2.309,0.264 -3.467,0.277 l 0,0.002 c -0.008,0 -0.015,-0.002 -0.022,-0.002 -0.008,0 -0.015,0.002 -0.022,0.002 l 0,-0.002 c -1.16,-0.013 -2.316,-0.085 -3.468,-0.277 -0.97,2.169 -2.084,4.388 -3.305,6.436 C 19.475,24.555 16.456,23.61 13.626,22.186 13.69,19.687 13.852,17.292 14.18,14.859 13.081,14.155 11.925,13.551 10.898,12.727 9.855,11.924 8.79,11.156 7.846,10.218 5.958,11.466 3.961,12.639 1.904,13.674 -0.314,11.287 -2.388,8.711 -4.082,5.828 -2.807,3.767 -1.477,1.834 -0.04,0 L 0,0 z"
+         style="fill:#478cbf;fill-opacity:1;fill-rule:nonzero;stroke:none"
+         id="path3418" />
+    </g>
+    <g
+       id="g3420"
+       transform="matrix(12.995388,0,0,-12.995388,411.4457,567.42812)">
+      <path
+         inkscape:connector-curvature="0"
+         d="m 0,0 c 0,-3.611 -2.926,-6.537 -6.537,-6.537 -3.608,0 -6.535,2.926 -6.535,6.537 0,3.609 2.927,6.533 6.535,6.533 C -2.926,6.533 0,3.609 0,0"
+         style="fill:#ffffff;fill-opacity:1;fill-rule:nonzero;stroke:none"
+         id="path3422" />
+    </g>
+    <g
+       id="g3424"
+       transform="matrix(12.995388,0,0,-12.995388,391.00655,572.46636)">
+      <path
+         inkscape:connector-curvature="0"
+         d="m 0,0 c 0,-2.396 -1.941,-4.337 -4.339,-4.337 -2.396,0 -4.339,1.941 -4.339,4.337 0,2.396 1.943,4.339 4.339,4.339 C -1.941,4.339 0,2.396 0,0"
+         style="fill:#414042;fill-opacity:1;fill-rule:nonzero;stroke:none"
+         id="path3426" />
+    </g>
+    <g
+       id="g3428"
+       transform="matrix(12.995388,0,0,-12.995388,526.30933,660.10985)">
+      <path
+         inkscape:connector-curvature="0"
+         d="m 0,0 c -1.162,0 -2.104,0.856 -2.104,1.912 l 0,6.018 c 0,1.054 0.942,1.912 2.104,1.912 1.162,0 2.106,-0.858 2.106,-1.912 l 0,-6.018 C 2.106,0.856 1.162,0 0,0"
+         style="fill:#ffffff;fill-opacity:1;fill-rule:nonzero;stroke:none"
+         id="path3430" />
+    </g>
+    <g
+       id="g3432"
+       transform="matrix(12.995388,0,0,-12.995388,641.18731,567.42812)">
+      <path
+         inkscape:connector-curvature="0"
+         d="m 0,0 c 0,-3.611 2.926,-6.537 6.537,-6.537 3.609,0 6.535,2.926 6.535,6.537 0,3.609 -2.926,6.533 -6.535,6.533 C 2.926,6.533 0,3.609 0,0"
+         style="fill:#ffffff;fill-opacity:1;fill-rule:nonzero;stroke:none"
+         id="path3434" />
+    </g>
+    <g
+       id="g3436"
+       transform="matrix(12.995388,0,0,-12.995388,661.63165,572.46636)">
+      <path
+         inkscape:connector-curvature="0"
+         d="m 0,0 c 0,-2.396 1.941,-4.337 4.336,-4.337 2.398,0 4.339,1.941 4.339,4.337 0,2.396 -1.941,4.339 -4.339,4.339 C 1.941,4.339 0,2.396 0,0"
+         style="fill:#414042;fill-opacity:1;fill-rule:nonzero;stroke:none"
+         id="path3438" />
+    </g>
+  </g>
+</svg>
diff --git a/main/SCsub b/main/SCsub
index 795c427c8d..fa60ffc3e8 100644
--- a/main/SCsub
+++ b/main/SCsub
@@ -8,5 +8,3 @@ Export('env')
 lib = env.Library("main",env.main_sources)
 
 env.Prepend(LIBS=[lib])
-
-
diff --git a/main/main.cpp b/main/main.cpp
index 9cd190a0e8..a060dbd232 100644
--- a/main/main.cpp
+++ b/main/main.cpp
@@ -432,7 +432,6 @@ Error Main::setup(const char *execpath,int argc, char *argv[],bool p_second_phas
 		} else if (I->get()=="-e" || I->get()=="-editor") { // fonud editor
 
 			editor=true;
-			init_maximized=true;
 		} else if (I->get()=="-nowindow") { // fullscreen
 
 			OS::get_singleton()->set_no_window_mode(true);
@@ -649,6 +648,7 @@ Error Main::setup(const char *execpath,int argc, char *argv[],bool p_second_phas
 
 	if (editor) {
 		main_args.push_back("-editor");
+		init_maximized=true;
 		use_custom_res=false;
 	}
 
@@ -796,7 +796,6 @@ Error Main::setup(const char *execpath,int argc, char *argv[],bool p_second_phas
 	main_args.clear();
 	
 	print_help(execpath);
-	
 
 	if (performance)
 		memdelete(performance);
@@ -812,6 +811,8 @@ Error Main::setup(const char *execpath,int argc, char *argv[],bool p_second_phas
 		memdelete(packed_data);
 	if (file_access_network_client)
 		memdelete(file_access_network_client);
+	if(path_remap)
+		memdelete(path_remap);
 
 // Note 1: *zip_packed_data live into *packed_data
 // Note 2: PackedData::~PackedData destroy this.
@@ -820,7 +821,7 @@ Error Main::setup(const char *execpath,int argc, char *argv[],bool p_second_phas
 //		memdelete( zip_packed_data );
 //#endif
 
-
+	unregister_core_driver_types();
 	unregister_core_types();
 	
 	OS::get_singleton()->_cmdline.clear();
@@ -869,21 +870,14 @@ Error Main::setup2() {
 		String boot_logo_path=GLOBAL_DEF("application/boot_splash",String());
 		bool boot_logo_scale=GLOBAL_DEF("application/boot_splash_fullsize",true);
 		Globals::get_singleton()->set_custom_property_info("application/boot_splash",PropertyInfo(Variant::STRING,"application/boot_splash",PROPERTY_HINT_FILE,"*.png"));
-		print_line("BOOT SPLASH: "+boot_logo_path);
 
 		Image boot_logo;
 
 		boot_logo_path = boot_logo_path.strip_edges();
-		print_line("BOOT SPLASH IS : "+boot_logo_path);
 
 		if (boot_logo_path!=String() /*&& FileAccess::exists(boot_logo_path)*/) {
+			print_line("Boot splash path: "+boot_logo_path);
 			Error err = boot_logo.load(boot_logo_path);
-			if (err!=OK) {
-				print_line("ËRROR LOADING BOOT LOGO SPLASH :"+boot_logo_path);
-			} else {
-				print_line("BOOT SPLASH OK!");
-
-			}
 		}
 
 		if (!boot_logo.empty()) {
@@ -900,7 +894,7 @@ Error Main::setup2() {
 		} else {
 #ifndef NO_DEFAULT_BOOT_LOGO
 
-			MAIN_PRINT("Main: Create botsplash");
+			MAIN_PRINT("Main: Create bootsplash");
 			Image splash(boot_splash_png);
 
 			MAIN_PRINT("Main: ClearColor");
@@ -943,10 +937,10 @@ Error Main::setup2() {
 
 	if (String(Globals::get_singleton()->get("display/custom_mouse_cursor"))!=String()) {
 
-		print_line("use custom cursor");
+		//print_line("use custom cursor");
 		Ref<Texture> cursor=ResourceLoader::load(Globals::get_singleton()->get("display/custom_mouse_cursor"));
 		if (cursor.is_valid()) {
-			print_line("loaded ok");
+		//	print_line("loaded ok");
 			Vector2 hotspot = Globals::get_singleton()->get("display/custom_mouse_cursor_hotspot");
 			Input::get_singleton()->set_custom_mouse_cursor(cursor,hotspot);
 		}
@@ -1007,8 +1001,21 @@ bool Main::start() {
 	bool export_debug=false;
 	List<String> args = OS::get_singleton()->get_cmdline_args();
 	for (int i=0;i<args.size();i++) {
+		//parameters that do not have an argument to the right
+		if (args[i]=="-nodocbase") {
+			doc_base=false;
+		} else if (args[i]=="-noquit") {
+			noquit=true;
+		} else if (args[i]=="-convert_old") {
+			convert_old=true;
+		} else if (args[i]=="-editor" || args[i]=="-e") {
+			editor=true;
+		} else if (args[i].length() && args[i][0] != '-' && game_path == "") {
+			game_path=args[i];
+		}
 		//parameters that have an argument to the right
-		if (i < (args.size()-1)) {
+		else if (i < (args.size()-1)) {
+			bool parsed_pair=true;
 			if (args[i]=="-doctool") {
 				doc_tool=args[i+1];
 			} else if (args[i]=="-script" || args[i]=="-s") {
@@ -1037,20 +1044,13 @@ bool Main::start() {
 			} else if (args[i]=="-dumpstrings") {
 				editor=true; //needs editor
 				dumpstrings=args[i+1];
+			} else {
+				// The parameter does not match anything known, don't skip the next argument
+				parsed_pair=false;
+			}
+			if (parsed_pair) {
+				i++;
 			}
-			i++;
-		}
-		//parameters that do not have an argument to the right
-		if (args[i]=="-nodocbase") {
-			doc_base=false;
-		} else if (args[i]=="-noquit") {
-			noquit=true;
-		} else if (args[i]=="-convert_old") {
-			convert_old=true;
-		} else if (args[i]=="-editor" || args[i]=="-e") {
-			editor=true;
-		} else if (args[i].length() && args[i][0] != '-' && game_path == "") {
-			game_path=args[i];
 		}
 	}
 
@@ -1086,7 +1086,18 @@ bool Main::start() {
 
 #endif
 
-	if(script=="" && game_path=="" && !editor && String(GLOBAL_DEF("application/main_scene",""))!="") {
+	if (_export_platform!="") {
+		if (game_path=="") {
+			String err="Command line param ";
+			err+=export_debug?"-export_debug":"-export";
+			err+=" passed but no destination path given.\n";
+			err+="Please specify the binary's file path to export to. Aborting export.";
+			ERR_PRINT(err.utf8().get_data());
+			return false;
+		}
+	}
+
+	if(script=="" && game_path=="" && String(GLOBAL_DEF("application/main_scene",""))!="") {
 		game_path=GLOBAL_DEF("application/main_scene","");
 	}
 
diff --git a/main/splash.h b/main/splash.h
index 6ad0062e24..f69383cf00 100644
--- a/main/splash.h
+++ b/main/splash.h
@@ -34,7 +34,7 @@ static const unsigned char boot_splash_png[]={
 };
 
 static const unsigned char app_icon_png[]={
-0x89,0x50,0x4e,0x47,0xd,0xa,0x1a,0xa,0x0,0x0,0x0,0xd,0x49,0x48,0x44,0x52,0x0,0x0,0x0,0x80,0x0,0x0,0x0,0x80,0x8,0x6,0x0,0x0,0x0,0xc3,0x3e,0x61,0xcb,0x0,0x0,0x0,0x9,0x70,0x48,0x59,0x73,0x0,0x0,0x32,0xb8,0x0,0x0,0x32,0xb8,0x1,0x28,0xf3,0x26,0x89,0x0,0x0,0x0,0x7,0x74,0x49,0x4d,0x45,0x7,0xde,0x1,0x19,0x15,0x3b,0x3a,0x14,0xc2,0xb1,0x4b,0x0,0x0,0x0,0x19,0x74,0x45,0x58,0x74,0x43,0x6f,0x6d,0x6d,0x65,0x6e,0x74,0x0,0x43,0x72,0x65,0x61,0x74,0x65,0x64,0x20,0x77,0x69,0x74,0x68,0x20,0x47,0x49,0x4d,0x50,0x57,0x81,0xe,0x17,0x0,0x0,0x20,0x0,0x49,0x44,0x41,0x54,0x78,0xda,0xed,0xbd,0x77,0x9c,0x64,0x55,0x99,0x3e,0xfe,0xbc,0xe7,0xdc,0x50,0xb1,0xf3,0x4c,0x4f,0x64,0x98,0x61,0x40,0xc2,0x24,0x92,0xc0,0x22,0x12,0x5c,0x1,0x15,0x50,0x82,0xee,0x2a,0x6,0x14,0x14,0x17,0xdd,0x9f,0x12,0x6,0x56,0x24,0x8c,0xc3,0xa,0xae,0x20,0xa8,0xdf,0x75,0xd7,0x35,0xb0,0xe6,0x48,0x46,0x10,0x90,0xc,0x4a,0x1e,0xc2,0xc,0x99,0xc9,0xb9,0xa7,0x63,0xc5,0x1b,0xce,0x39,0xef,0xef,0x8f,0x5b,0x55,0x5d,0x55,0x5d,0xdd,0x53,0x3d,0x33,0xc0,0xe0,0xce,0xfd,0x30,0x1f,0xaa,0xaa,0x6f,0xdd,0xba,0xf7,0xbc,0xcf,0x79,0xc3,0xf3,0xbe,0xe7,0x3d,0xc0,0xae,0x63,0xd7,0xb1,0xeb,0xd8,0x75,0xec,0x3a,0x76,0x1d,0xef,0xcc,0xe3,0xc0,0xcb,0x1f,0xa1,0x3,0x16,0x3d,0x22,0xde,0xf2,0xdf,0xfd,0xc6,0xa3,0xe2,0xc0,0xcb,0x1f,0xa5,0x77,0xfa,0xf8,0x59,0xef,0xc4,0x9b,0x3e,0xe0,0xf2,0x7,0x25,0x20,0xa4,0xed,0xc4,0xf5,0x13,0x5f,0x3f,0x58,0x3,0xe0,0x93,0x2e,0xfc,0x9d,0x58,0xeb,0x4e,0xec,0x78,0xf6,0x8a,0x63,0x7a,0xdf,0xcc,0xdf,0xde,0xff,0xd2,0x7,0x5b,0x1c,0xe9,0xa9,0x27,0x2e,0x7f,0x4f,0x1,0x0,0xe,0x59,0xfc,0x92,0x8,0x4d,0x9f,0xcd,0x86,0xd5,0xb3,0x8b,0xdf,0xab,0x77,0x1,0xe0,0x2d,0x38,0x58,0x17,0xc,0x5b,0x49,0x14,0xbd,0xdc,0x1e,0x7,0x5c,0xfe,0xc8,0x47,0x41,0xf8,0xe7,0xf5,0x24,0xe6,0x8,0x60,0x25,0x80,0x59,0x6f,0xa,0xe8,0x16,0x3d,0x2a,0x96,0x2c,0x7a,0x8f,0x11,0x96,0x7d,0x95,0x82,0x7d,0xce,0x81,0xdf,0x78,0x74,0x2d,0x1b,0xfc,0x2e,0xd4,0x3d,0xbf,0xd3,0xda,0x7d,0x5e,0x88,0x22,0xbf,0x13,0xc7,0x72,0xa7,0x56,0x61,0xfb,0x5f,0x7a,0xbf,0x90,0xd2,0xa2,0xa7,0x17,0x45,0x33,0xeb,0xe0,0x45,0xf,0xed,0x19,0x6a,0x3a,0x88,0x61,0x8e,0x13,0x24,0x4e,0xb0,0x6d,0xd9,0xe9,0x38,0x36,0x2c,0x5b,0x20,0x11,0x77,0xbc,0xcd,0x3d,0x59,0x9b,0xc0,0x9f,0x7b,0xfa,0xf2,0xf7,0xfc,0x62,0x47,0xdf,0xcb,0x41,0x8b,0x9e,0xb0,0x35,0x79,0xdd,0x82,0xad,0xb5,0xed,0xed,0x71,0x5f,0x8,0x69,0x17,0x8b,0xbe,0x8,0x2,0x85,0x30,0xd4,0x5a,0x69,0x73,0x33,0x11,0xee,0x76,0x2c,0x7a,0xea,0xa9,0xcb,0xdf,0xfb,0x3c,0x0,0x1c,0x76,0xe5,0x33,0xe4,0x7b,0x19,0xb1,0x64,0xf1,0xd1,0x7a,0x17,0x0,0xc6,0x33,0xdb,0x2e,0xbd,0x5f,0x40,0xa,0xb9,0x64,0xd1,0x51,0x21,0x0,0x1c,0x74,0xf9,0x43,0x5f,0x30,0x10,0xff,0x62,0xb4,0x99,0x11,0x8f,0x5b,0xed,0xb1,0x98,0x8d,0x58,0xcc,0x81,0x94,0x92,0xa1,0x3d,0xb0,0x56,0x44,0x96,0x8d,0x82,0x4f,0x18,0x18,0x2c,0xbe,0x24,0x89,0xe,0x66,0xe6,0xe0,0x99,0x6f,0xbc,0x47,0xed,0x10,0x7b,0x7f,0xc5,0x5f,0xc5,0x33,0x97,0x1e,0x6e,0xe,0xfc,0xc6,0x5f,0x97,0xa4,0x92,0xee,0xfe,0x2d,0x29,0x1,0x55,0xcc,0xc3,0x8a,0x25,0x98,0x85,0xd,0xa3,0xd,0x5,0x81,0x86,0xe7,0x85,0xc8,0xe4,0xfc,0x9c,0x14,0xb4,0x5e,0x4a,0xfa,0xb9,0x9,0x6,0xae,0x59,0xf2,0xcd,0x93,0xc2,0x3,0x17,0x3f,0x62,0x99,0x20,0xe4,0x67,0xff,0xfd,0x18,0xbd,0xb,0x0,0xcd,0xcc,0xb6,0xcb,0xef,0xb3,0x95,0x11,0x33,0xc1,0xbc,0x50,0x5a,0xf2,0x2c,0x22,0x20,0x91,0x70,0xc3,0x8e,0xd6,0x98,0xd0,0xc6,0x8,0xa3,0x42,0xd2,0x61,0x0,0x1d,0x78,0x10,0xc2,0x2a,0x3d,0x5,0xc1,0x4e,0xb7,0xa3,0xa7,0x27,0xb,0x15,0xea,0x4f,0x2f,0x59,0x7c,0xe4,0x2f,0xf,0xbc,0xec,0x61,0x7a,0x66,0xf1,0x7b,0x77,0x88,0x6a,0x3e,0xe8,0x8a,0x47,0x4f,0x86,0xc1,0x4d,0x53,0xa6,0xb4,0xea,0x20,0x3b,0x28,0x99,0x35,0xc0,0xc,0x66,0x3,0x21,0x6d,0x8,0xcb,0x81,0xb4,0x1d,0x80,0x84,0xf6,0xfc,0xd0,0xc,0xc,0x16,0x6d,0x36,0xc,0x63,0xcc,0xad,0x6c,0xf8,0x4a,0x5b,0xe2,0xb9,0xa7,0x16,0x1f,0x13,0xec,0x2,0xc0,0xd6,0x66,0xdb,0x65,0xf7,0x7f,0x42,0x69,0x3e,0x83,0x4,0xbd,0x3f,0x9d,0x8c,0x21,0x95,0x8a,0x29,0x4b,0x30,0x19,0x1d,0xca,0xd0,0xf7,0x0,0x36,0x60,0x36,0x20,0x8a,0x1c,0x7f,0xa2,0xf2,0x23,0x30,0xa4,0x1d,0x87,0x22,0x57,0xf5,0xf5,0x17,0xe4,0x33,0x8b,0xde,0x23,0xde,0xfd,0xef,0x7f,0x13,0x4f,0x5e,0xf2,0xf,0x66,0xfb,0x6c,0xff,0xc3,0x94,0x10,0x6c,0xe5,0xb5,0x78,0xa2,0xad,0x25,0xbe,0x20,0x19,0x7,0x85,0x85,0x1c,0x80,0x5a,0x5c,0xb1,0x31,0x60,0x66,0x48,0xdb,0x1,0x9,0x9,0x27,0x16,0x33,0x45,0x9f,0x74,0x18,0x86,0x76,0x2e,0xe7,0xc1,0xf,0xd4,0x93,0x8e,0x25,0x7f,0x96,0x63,0xff,0xc7,0xaf,0x5c,0x71,0x9c,0xda,0x5,0x80,0xba,0x63,0xc1,0x45,0x77,0x4d,0x11,0x8e,0xfb,0x32,0x11,0x25,0xdb,0x5a,0x62,0x32,0x16,0xb7,0x1,0xd6,0x50,0xc5,0x2c,0x18,0x4,0x30,0x57,0xdd,0x31,0x55,0x5e,0x12,0x8,0x24,0xa2,0x3f,0x33,0x80,0x58,0xaa,0x8d,0x37,0xf4,0x14,0x88,0x8d,0xb9,0xfe,0xe9,0x45,0x47,0x9c,0xb9,0x3d,0xf7,0x34,0xff,0x8a,0x47,0xc8,0xd1,0x24,0x14,0xf1,0x89,0x92,0xc4,0xcd,0x53,0xba,0xd3,0xf0,0x73,0x83,0x20,0x36,0x15,0x0,0x94,0x61,0xc0,0x5c,0xd,0x88,0xe8,0xb5,0x20,0x9,0x61,0x3b,0x10,0x4e,0x1c,0xc5,0x62,0x80,0xfe,0x81,0x2,0x8,0xf0,0xc0,0xe6,0x48,0x8,0xf1,0xf4,0x92,0xc5,0x47,0x9b,0xb7,0x7b,0xdc,0xc5,0xce,0x2,0x0,0x19,0xb3,0x93,0x52,0x8a,0x96,0x69,0x93,0x5b,0xb4,0x63,0x19,0xa8,0x42,0x6,0x41,0x7e,0x30,0x92,0x7b,0x69,0x70,0x2b,0x2,0x7,0x20,0x8,0x20,0x2a,0x6b,0x0,0x82,0x10,0x11,0x28,0x42,0xdf,0xa3,0xce,0x8e,0x84,0x56,0x6,0x1f,0x39,0x78,0xf1,0x23,0xd3,0xb7,0x2b,0x44,0x32,0xc2,0x7a,0x6a,0xd1,0x7b,0x34,0x18,0xd7,0xb5,0xa4,0x63,0xd0,0x61,0x0,0x18,0x5d,0xf9,0x5d,0xa2,0xd2,0x7d,0x60,0xf8,0x5f,0xf5,0x61,0x8c,0x86,0xf2,0x8b,0xf0,0x33,0xbd,0x70,0xa4,0xc1,0x6e,0xd3,0xda,0x8c,0x25,0x45,0xcc,0x80,0x12,0x11,0xaa,0xdf,0xfe,0x63,0xe7,0x1,0x80,0x41,0x3f,0xc0,0x60,0xa3,0x84,0xf2,0xf2,0x60,0xa3,0x21,0x48,0x80,0x4a,0x23,0x55,0x2d,0xf0,0xb2,0xd0,0x89,0xa2,0xd9,0x5f,0x16,0x82,0x14,0x4,0x15,0x78,0x88,0x39,0x42,0xba,0x8e,0xe8,0xd0,0x9a,0x3f,0xb9,0xe0,0x92,0xfb,0x68,0xff,0xcb,0x1f,0xd8,0xa6,0xc1,0x7e,0xe6,0xf2,0xc3,0xc3,0x3,0x16,0x3d,0x7c,0xb2,0x25,0xc5,0xee,0x89,0xb8,0xa5,0x95,0x9f,0x87,0x65,0x9,0x88,0x92,0xf0,0x6b,0xee,0xa3,0x1,0x20,0x2a,0x6a,0x96,0x8,0x61,0x61,0x8,0x5a,0x69,0x61,0x59,0x4,0xa3,0x4d,0x8,0xde,0x39,0xa2,0xc6,0x9d,0x2,0x0,0x87,0x5d,0xf9,0x98,0x98,0xd0,0x6a,0x86,0x2a,0xfa,0x93,0xa8,0x4a,0xe0,0xf5,0x3,0x1d,0xd,0xb2,0x24,0x82,0xa8,0x39,0xaf,0xf4,0x5e,0x0,0x41,0xb1,0x80,0x8e,0xf6,0x4,0xc,0xe3,0x6b,0x52,0xda,0x52,0x90,0x65,0x6f,0xb3,0x8d,0x24,0xf1,0x83,0x64,0xc2,0xd,0xa0,0x3,0x29,0x45,0xe9,0xb7,0x4,0x22,0x10,0xd4,0x0,0xb3,0xf1,0x7d,0x56,0x0,0x41,0xd1,0x97,0x99,0x19,0x60,0xe,0xeb,0x7d,0x88,0xff,0xd3,0x44,0x90,0xe7,0x7,0xe2,0x8e,0x8b,0xdf,0xaf,0xe,0xbc,0xec,0x7e,0x4,0x5a,0x44,0x3,0x59,0x1a,0xd4,0x61,0x41,0xa0,0x34,0xb8,0xb5,0xef,0x6b,0xfe,0x6,0x40,0xa,0x81,0x30,0xf4,0x10,0x73,0x13,0x1c,0x8f,0xd9,0x69,0x3f,0x8,0xbf,0xbc,0x64,0xd1,0x11,0xdf,0x3d,0x60,0xd1,0xc3,0x82,0x19,0xc,0x88,0x8a,0xdf,0x48,0x4c,0xc4,0x15,0xd0,0x45,0x57,0x63,0xd6,0x6c,0x10,0xe2,0xb9,0x6f,0xbc,0x8f,0xf,0xfa,0xc6,0xa3,0xff,0xcc,0xc0,0xc4,0x96,0xb4,0x43,0x41,0x6e,0x8,0x8e,0x14,0x15,0xcb,0xcf,0xe0,0xd2,0x2f,0x96,0x74,0x39,0x51,0xc9,0xf,0xe1,0x1a,0xed,0xce,0x5c,0xf3,0x1c,0x1c,0x9d,0xc6,0x1,0x68,0xe7,0x40,0xc0,0xce,0xc1,0x4,0x6a,0x4d,0xd1,0x60,0x51,0xc5,0xc7,0xa3,0xd2,0xc,0xab,0xc,0x1e,0x51,0x8d,0xc0,0xcb,0x52,0xac,0xd7,0xed,0x96,0x24,0x28,0x23,0xa0,0xfc,0x3c,0xa5,0x92,0xf1,0x20,0x8,0xf5,0x35,0x0,0xbe,0x2b,0x44,0x4c,0x3c,0x7d,0xd9,0xbb,0x15,0x9a,0x98,0x7a,0x7,0x7e,0xe3,0x9,0xfb,0x80,0x45,0x8f,0xda,0xca,0xf0,0xe7,0x3b,0x5a,0xe3,0xd2,0xa8,0x0,0x16,0x19,0x0,0x2,0x54,0xa5,0xd6,0x6b,0xdc,0xbe,0x92,0xa0,0xa9,0xc,0xf,0x2e,0x81,0xb8,0xa,0x10,0xd1,0xe4,0x47,0xc0,0x8c,0x80,0xd,0x76,0x69,0x80,0x3a,0xa7,0x19,0x60,0x86,0x89,0xb4,0x66,0x5d,0x88,0x37,0xfc,0x5a,0x94,0xa7,0x6a,0x55,0x20,0x53,0x85,0xd,0x10,0x4,0x6c,0x69,0x10,0x6a,0x85,0x64,0x4a,0x58,0x3,0x3,0x44,0x7,0x2e,0x7a,0xe4,0x8a,0x40,0x15,0x7f,0x33,0xff,0xd2,0x87,0xe,0x20,0x12,0x7b,0x13,0x51,0x37,0x11,0xd2,0x82,0xa8,0x8d,0x99,0x8,0xe0,0x21,0x63,0x38,0x3,0x42,0x3f,0x1b,0xf3,0x9a,0xd6,0xde,0x72,0x66,0xd2,0xb6,0x65,0x1d,0x93,0x48,0x38,0xd0,0x85,0x41,0x38,0x56,0xad,0xb5,0x64,0xae,0xb7,0xf3,0xe5,0xc7,0xa0,0xaa,0x59,0x5f,0x3a,0xc9,0xc,0x23,0x99,0x19,0x1,0x18,0x3b,0xd,0x1f,0xb0,0x93,0xe4,0x2,0x2a,0x1e,0x51,0x3f,0x80,0x8e,0x8a,0x83,0x42,0x75,0x61,0x1f,0x35,0x0,0x6,0xaa,0xcd,0x45,0xf4,0xb9,0x2d,0x5,0x42,0xa5,0x10,0xf8,0x45,0x31,0x61,0x42,0x12,0xbd,0x7d,0xf9,0x4b,0x92,0xae,0x7d,0x89,0x63,0xcb,0xca,0x39,0x8e,0x2d,0x61,0xd9,0x2,0x0,0xc1,0x68,0x86,0xef,0x87,0xe0,0x92,0xc0,0x7c,0x5f,0x83,0xd9,0x20,0x9d,0x4e,0x68,0x56,0xbe,0xb4,0x25,0x41,0xa,0xaa,0xdc,0x25,0x33,0x81,0x88,0xc1,0xc4,0xd1,0xf5,0x38,0xa,0x3,0xcb,0xb7,0x20,0x41,0x30,0xe0,0x61,0x30,0x80,0x21,0x2c,0x7,0x44,0x40,0xae,0x10,0x7a,0xb1,0xb8,0xed,0x6b,0xbd,0xcb,0x7,0x18,0xa1,0x2,0x88,0x78,0x39,0x8,0x1d,0xcc,0x25,0x69,0xb,0x54,0x5,0x7e,0xc3,0x66,0xa1,0x6,0x4,0x54,0x4b,0x69,0x44,0x5f,0x23,0xc4,0x5d,0x9,0x2f,0xf4,0x61,0xb9,0x12,0xdd,0xdd,0x2d,0x0,0x6b,0x86,0x51,0x95,0x2f,0x19,0xe5,0x83,0x3,0x5,0x10,0x41,0x92,0x40,0x3a,0xe9,0x44,0x6a,0x9b,0x19,0xe9,0x64,0x1c,0x44,0x4,0xa3,0x7c,0x69,0xfc,0x22,0xe2,0xae,0x15,0x81,0x3,0x25,0xb2,0xa1,0xca,0xe7,0xa8,0xd0,0x13,0x54,0xa5,0xc8,0xc0,0x10,0x15,0xe5,0x4f,0x30,0x64,0x4a,0x7f,0x64,0x8,0x20,0x4b,0x8c,0x5c,0x1d,0x71,0xf0,0x7f,0x1d,0x0,0x91,0x2f,0xa6,0x14,0xaf,0x28,0x16,0x82,0x83,0x53,0xf1,0x4,0xd8,0x2f,0x40,0x40,0x56,0x9,0x96,0xaa,0x9c,0xa9,0x61,0xe1,0x47,0x76,0x37,0x52,0xcf,0x86,0x19,0x6c,0x0,0x9f,0x35,0x42,0x1d,0x9d,0xa0,0xfc,0x2,0xb8,0x98,0x7,0x88,0x22,0xcc,0x70,0xc5,0x2d,0x2f,0xcd,0x5e,0x3,0x86,0x81,0xd1,0x61,0x5,0x8b,0x91,0x68,0x86,0xd9,0xc6,0x9c,0xaf,0x60,0x4b,0x82,0x25,0x4,0x88,0x86,0x85,0x5b,0xb6,0xf8,0x28,0x5d,0x97,0xc0,0x25,0xf5,0x3f,0xac,0x2d,0x22,0x5,0x21,0x0,0x62,0x10,0x11,0x33,0x90,0x85,0xad,0xf2,0x46,0xd3,0x2e,0xd,0xd0,0xc0,0x4,0xac,0x28,0x49,0x29,0x9a,0x3b,0x44,0x23,0x54,0x3e,0x4a,0xcc,0x5f,0xf9,0x33,0x63,0x80,0xd0,0x18,0x68,0xc3,0x30,0xcc,0xd,0xdc,0x42,0x2,0x89,0xba,0x70,0x82,0xea,0x9d,0x8f,0x3a,0x50,0x45,0xcc,0x44,0xcd,0xdf,0x43,0xcd,0x8,0xb5,0x8e,0x42,0x3f,0x1,0x58,0x44,0x90,0x92,0x20,0x84,0x28,0xf9,0x2e,0x1c,0x81,0x80,0x2a,0x91,0x2c,0xc0,0x54,0x72,0x8,0x19,0xc2,0x76,0xe0,0x79,0xa,0x24,0x28,0xf3,0xd4,0xa5,0xff,0xb8,0xcb,0x7,0xa8,0x61,0xcc,0xa2,0xe9,0x4,0xc3,0xbc,0xbc,0xc2,0xe9,0x46,0x33,0xa6,0x62,0xe3,0xab,0xed,0xbd,0x61,0x46,0xa8,0xd,0x42,0xc5,0x15,0x11,0x55,0xfb,0x0,0x6f,0x26,0xeb,0xcd,0x0,0xb4,0x1,0x34,0xc,0x58,0x45,0x7e,0x8a,0x2d,0x5,0x6c,0x19,0xdd,0x1b,0x57,0x81,0x90,0xc1,0x20,0x8e,0x62,0x7f,0x49,0x2,0x61,0xa8,0x41,0x8c,0x21,0xec,0x44,0xc7,0x4e,0x1,0x0,0x22,0x83,0x3,0x2f,0x7b,0x44,0x86,0xa1,0xb7,0x32,0x8,0x14,0xd2,0xc9,0x38,0x74,0x50,0x80,0xa0,0xf2,0xa0,0x46,0x33,0x5d,0x19,0xd,0x55,0x46,0xb,0x46,0x46,0x8,0x4d,0x9,0xbf,0x19,0x1c,0x50,0x79,0xf2,0xd3,0x18,0x51,0x63,0xe4,0x8,0x1a,0x6,0x7c,0x65,0xe0,0x2b,0x40,0xa,0xc0,0x12,0x54,0x22,0xa9,0x50,0xd1,0x3c,0x86,0x1,0x9,0x2a,0xa9,0x8,0x6c,0xd8,0x99,0x0,0xb0,0x53,0x30,0x81,0xcf,0x7d,0xf3,0x58,0x36,0x26,0x4c,0x68,0xc6,0x4b,0xc6,0x70,0x44,0xb5,0x31,0x21,0xd4,0x6,0x5,0x5f,0xa3,0xe0,0x6b,0x14,0x95,0x46,0x58,0x27,0x7c,0xda,0xaa,0xf0,0x6b,0xe2,0xc3,0xf1,0x29,0x81,0xba,0x8,0x64,0x74,0x10,0xc,0x9f,0xa1,0x4d,0x4,0x86,0x42,0xa8,0x51,0x8,0x34,0xfc,0xd0,0x54,0x7c,0x82,0xa,0x53,0x60,0xf0,0xa,0x0,0xcc,0xbb,0xf8,0x2f,0xbb,0x72,0x1,0x35,0x37,0xc2,0xa1,0xb1,0x85,0xf8,0x7f,0x65,0x16,0x90,0x29,0x1a,0x4c,0xc3,0x5c,0x37,0x7,0xab,0x8,0xa0,0x51,0x85,0x3f,0x5e,0x69,0xa3,0x79,0xd,0xd2,0xe8,0x6f,0x34,0xf2,0x5c,0x53,0xf2,0x1b,0xf2,0xbe,0xaa,0x70,0xca,0x44,0x60,0x8,0x7c,0x6a,0xff,0x8b,0xef,0x9d,0xf6,0xc2,0x95,0xef,0xe7,0x77,0x1c,0x0,0xf6,0x3b,0xff,0x8e,0xca,0xf9,0xf3,0x2e,0xfc,0x93,0xbd,0x3d,0x3f,0x3c,0xff,0xe2,0x7b,0x2a,0xd7,0xda,0xff,0x92,0xfb,0xbe,0x1c,0x18,0xd1,0x9f,0x4c,0x3a,0xa7,0x75,0x74,0x24,0x58,0xfb,0x85,0x8a,0x7,0x5e,0x33,0xc0,0x15,0x76,0x90,0x1a,0x8,0x9f,0x1a,0xcf,0xf8,0x1d,0x86,0x85,0xb1,0x2e,0x46,0x75,0xb7,0x43,0x75,0x0,0x21,0x28,0xbf,0x88,0x74,0xca,0xa6,0x96,0x74,0xec,0x0,0x26,0x5a,0xbb,0xff,0x25,0xf7,0x2e,0x3e,0xe8,0xe2,0x7b,0xe3,0x0,0xb0,0xe0,0x6b,0x77,0x6d,0xd7,0x44,0x9c,0x77,0xe1,0x9d,0xb2,0xf2,0xfa,0x82,0xdb,0xc5,0x8e,0x82,0x76,0x63,0xc1,0x5d,0xf0,0xa7,0xf,0x42,0x40,0x3e,0xff,0xed,0x13,0x6e,0x9f,0x77,0xe1,0x9d,0x36,0xb4,0x51,0x2f,0x7c,0xe7,0x84,0x71,0xa1,0x79,0xc1,0xd7,0xee,0xa2,0xe7,0xae,0x3a,0x9e,0x17,0x7c,0xfd,0x9e,0x29,0x80,0xbc,0x4f,0x4a,0xda,0xbb,0xab,0x23,0x9,0xdb,0x96,0x50,0x7e,0x1e,0x6c,0x4c,0x43,0xfa,0x77,0xf4,0xdb,0xa5,0xed,0x78,0xa2,0xed,0x60,0x2e,0x47,0xf3,0xf,0x2a,0x5a,0x8b,0x6b,0xcf,0x25,0x82,0xed,0xc6,0x11,0x28,0xa0,0x7f,0xa0,0x0,0x15,0xea,0x8d,0x82,0xcd,0x7,0x97,0x5c,0x75,0xec,0x73,0xb,0x2e,0xbe,0xc7,0x7a,0xee,0xca,0x63,0xc7,0x55,0x28,0x32,0xef,0x82,0x5b,0x8,0xc2,0xb1,0x5f,0xf8,0xf6,0x7,0x83,0x5,0x17,0xdd,0xf1,0x1e,0x66,0x6e,0x7f,0xfe,0xdb,0x27,0xdc,0x3e,0x7f,0xe1,0xed,0xe2,0xf9,0xab,0x4f,0x34,0x3b,0x4c,0x3,0xcc,0x5b,0x78,0x2b,0x1,0xc0,0xdc,0x85,0xb7,0xed,0xcd,0x82,0x7e,0xb,0x12,0xb7,0xcd,0xb9,0xe0,0xf6,0x27,0x40,0x66,0xaf,0xb2,0xf0,0xe7,0x2d,0xbc,0x73,0xab,0x43,0xbf,0xe0,0xdf,0xee,0x26,0x0,0x90,0xaa,0x40,0xf3,0xbf,0x76,0xcf,0xa7,0x1,0xb1,0x3e,0x95,0x72,0x67,0x75,0x77,0xa7,0x99,0x38,0x44,0x90,0x1f,0x2,0x1b,0x1e,0xe,0xf0,0x41,0x4d,0xc8,0xf4,0x2d,0x16,0x7e,0x33,0xf3,0x87,0xaa,0xee,0xbc,0x5a,0x35,0x30,0x23,0x28,0x16,0x20,0xa0,0x30,0xa9,0x3b,0x85,0xd6,0xb6,0x44,0x87,0x11,0xe2,0xd9,0xf9,0x5f,0xbb,0xe7,0xf2,0x89,0x7b,0x17,0x75,0x34,0x39,0xb6,0x3e,0x8e,0xf3,0x17,0xde,0x41,0x0,0xf0,0xc2,0x35,0x1f,0x61,0x66,0x33,0x69,0xee,0xc2,0x3b,0x7e,0xf,0xa2,0x47,0x40,0xe2,0xb6,0x79,0xb,0x6f,0x3f,0xa6,0x59,0xe1,0x37,0x3d,0x64,0xef,0xfe,0xda,0xc3,0x94,0xd7,0xbd,0x6d,0xc2,0xd8,0xaf,0xa5,0xd3,0xf1,0xae,0x64,0xd2,0xd5,0xf9,0xbc,0x27,0x33,0x59,0x1f,0x44,0x74,0xa3,0x6d,0xe3,0xd2,0x25,0x57,0x7e,0xe8,0xe5,0xf9,0x17,0xfe,0xd9,0x79,0xfe,0xdb,0x1f,0x68,0x18,0xe3,0xce,0xb9,0xe0,0xe,0x5a,0x76,0xcd,0x87,0xf8,0xa0,0xb,0xef,0x68,0x51,0x96,0xfd,0x6b,0x40,0x9c,0xd0,0xd5,0x99,0x32,0xf1,0x98,0x14,0x7e,0x31,0x7,0x30,0xf,0xab,0x7d,0xaa,0x21,0x7a,0x47,0x51,0xf7,0x78,0x1b,0x5,0xdf,0x9c,0x26,0x18,0x51,0x25,0x54,0x62,0x3,0xd9,0x18,0x80,0x8,0x4e,0x3c,0x89,0x20,0x44,0xd8,0x3f,0x90,0xb7,0xc3,0x40,0x3f,0x2f,0x85,0x39,0x71,0xc9,0x95,0xc7,0xad,0x5d,0xf0,0xb5,0xbb,0xac,0xe7,0xae,0x3a,0xbe,0xa1,0x36,0x98,0x7f,0xe1,0x9d,0xf6,0xf3,0xdf,0xfe,0x60,0x78,0xd0,0xbf,0xfd,0x79,0x52,0x60,0x78,0x31,0x80,0xcf,0xa7,0x92,0x2e,0xd2,0x29,0x57,0xf9,0x7e,0x68,0xb6,0xf4,0xe6,0x1d,0xd7,0x11,0xb,0x42,0xdf,0x5a,0xba,0xf4,0xba,0x63,0xcd,0xe,0x1,0xc0,0x81,0xe7,0xdf,0xe6,0x84,0x42,0x2c,0x4f,0x24,0x9c,0xc9,0x6d,0x6d,0x71,0x19,0xe6,0xb3,0xb0,0xe2,0x9,0x10,0x59,0x18,0x1c,0x2a,0x22,0x57,0x8,0x20,0x80,0x9f,0xbf,0x70,0xf5,0x87,0xce,0x58,0xf0,0xb5,0xbf,0x88,0xe7,0xae,0x7a,0x7f,0xcd,0xf,0xcf,0xbd,0xe0,0x2e,0xb2,0x62,0x52,0xb0,0xe2,0x59,0x20,0x3c,0x66,0xdb,0x56,0x67,0x77,0x77,0xb,0x54,0xe8,0xc3,0x84,0x7e,0xad,0x27,0x3f,0x9c,0xfe,0x1b,0xc3,0xd1,0xa3,0xb7,0x57,0xf0,0xcd,0x82,0xa0,0xc6,0x81,0xad,0x35,0x7,0xa5,0xba,0x0,0x8,0xcb,0x81,0xb0,0x5d,0xf4,0xf5,0x17,0xd8,0xf7,0x43,0x3,0x56,0x1f,0x79,0xee,0xaa,0xf,0xfc,0x69,0x14,0xe1,0xcb,0xe7,0xbf,0xfd,0x41,0xbd,0xe0,0xc2,0x3b,0x16,0x19,0x88,0x8b,0x85,0x84,0x3d,0xa1,0x2b,0x5,0x61,0xc,0x54,0x58,0x84,0x1d,0x4b,0x20,0x97,0xb,0x30,0x38,0xe4,0x5,0x42,0x6,0x93,0xc1,0xf1,0xec,0xb,0x57,0x7f,0x20,0xdc,0x66,0x13,0x30,0xe7,0xbc,0xdb,0x6d,0x0,0xf0,0x81,0xdb,0xa5,0xa4,0x69,0x1d,0xed,0x71,0x19,0xe4,0x33,0x0,0x31,0xc2,0x62,0x16,0xa1,0x97,0x45,0x7b,0xab,0x83,0xc9,0xdd,0xe9,0xc0,0x71,0xe5,0x67,0xe6,0x2d,0xbc,0x33,0x6f,0x54,0x70,0xd2,0xb0,0xaa,0xba,0x93,0x16,0x5c,0xf8,0x67,0x5a,0x7a,0xcd,0xf1,0x6c,0xb4,0x3e,0x8d,0xc1,0xaf,0xa5,0xd3,0x6e,0xcb,0xc4,0x89,0x49,0x84,0xc5,0x1c,0x4c,0xe0,0x8d,0x82,0xc7,0x77,0x88,0xf0,0xeb,0xb4,0x55,0x3,0x82,0x63,0xd4,0x68,0x21,0xaa,0x66,0x22,0x18,0x15,0x40,0x79,0x79,0x4c,0x9c,0x90,0xa0,0xd6,0x96,0x98,0x26,0x92,0xb7,0xcf,0xbf,0xe8,0xae,0xcb,0x0,0xe0,0x80,0xcb,0x1e,0x10,0xf3,0x16,0xfe,0xa9,0xf2,0x2d,0x41,0x38,0x74,0xee,0xc2,0x3b,0x5e,0x96,0xb6,0xbc,0xbc,0xb3,0x33,0xc1,0x53,0x27,0xa5,0x0,0xe5,0x21,0xf4,0x73,0x0,0x1b,0x84,0xc5,0x2c,0x52,0x29,0x1b,0xad,0xad,0xae,0x80,0xb1,0x5f,0x4c,0x4e,0x8b,0xeb,0x6d,0xd6,0x0,0xfb,0x9d,0x77,0x9b,0xf5,0xe2,0xb5,0x27,0xa9,0x39,0xe7,0xdf,0xf6,0x1d,0xd7,0xb5,0xce,0xeb,0xea,0x4c,0x41,0xfb,0x51,0xa9,0xd6,0xf0,0xc3,0x94,0x58,0x2e,0xdb,0x86,0xb4,0x5d,0xf6,0x3,0xa6,0xfe,0x81,0x2,0x0,0x7a,0x48,0x12,0xce,0x58,0xf2,0xad,0xf,0xac,0x2,0x80,0xf9,0x17,0xdd,0xb5,0x58,0x48,0x71,0x69,0x5b,0x6b,0x42,0x27,0xe2,0x32,0xaa,0xee,0x8d,0x72,0xa4,0xc3,0x82,0xae,0x9a,0xf9,0xa3,0x3b,0x7d,0xb4,0x73,0x2f,0x65,0xe1,0xd1,0xcc,0x1,0x97,0xc8,0x6e,0x1e,0x71,0x4e,0x59,0x13,0x80,0x19,0x56,0x2c,0xe,0x65,0x4,0x6f,0xd9,0x92,0x25,0x86,0xb9,0xf1,0xb9,0x2b,0x8f,0x3f,0xd,0x0,0x8e,0xb8,0xe8,0x6,0x3b,0x83,0xe4,0x6f,0x94,0x32,0xa7,0x75,0x75,0x26,0x10,0x4f,0x38,0x86,0x43,0x5f,0xa8,0xc0,0x3,0x95,0xe8,0x85,0xb2,0xd3,0x49,0x24,0xe0,0x24,0x52,0xbc,0x71,0x53,0x86,0xb4,0x36,0x7f,0x7e,0xe1,0xea,0x13,0x3f,0x38,0xf7,0xdc,0x9b,0xc5,0xd2,0xeb,0x4e,0x36,0xe3,0x36,0x1,0xf3,0xce,0xbf,0xf5,0xb3,0x20,0x71,0x7d,0x47,0x67,0x52,0x5b,0x8,0x24,0x6b,0x55,0x11,0x42,0x39,0x1c,0x33,0x55,0x6a,0xcd,0x72,0x13,0x80,0xb4,0x30,0x38,0xe0,0x1b,0xcf,0xf,0x5,0x1b,0xfe,0x22,0x8,0x87,0x4a,0x4b,0x9e,0x31,0xa9,0xbb,0x85,0xd9,0x68,0x32,0xa1,0x5f,0xf5,0xcb,0xc3,0xf5,0x7d,0xa0,0x77,0xb8,0xf0,0xb7,0x2,0x82,0x1a,0x7f,0x80,0xb9,0x72,0xce,0xf0,0xe7,0x11,0x8,0x48,0x5a,0x20,0xcb,0xc5,0xc6,0x4d,0x19,0x80,0x71,0xaf,0xd1,0xea,0x67,0x42,0xca,0x5f,0x39,0x8e,0xd4,0x5d,0x1d,0x71,0x61,0xb4,0x26,0xed,0x17,0xa2,0x6c,0x85,0x88,0x18,0x52,0x80,0x4b,0x72,0x28,0x5f,0xc3,0x81,0xe5,0xba,0xd8,0xb4,0x39,0x7,0xa3,0xf4,0xd7,0x5f,0xf8,0xce,0x49,0x57,0xce,0x3f,0xef,0x36,0xf1,0xfc,0xb5,0x27,0x99,0xa6,0x0,0x30,0xef,0x5f,0xfe,0x48,0x94,0x70,0xf6,0x63,0xc6,0xd2,0x8e,0xce,0x54,0x10,0x73,0xc9,0x9,0xbd,0x42,0x2d,0xfb,0x45,0x91,0x5,0x11,0xa5,0x34,0xa8,0x66,0x6,0x31,0x40,0xd2,0x82,0xe5,0xc6,0x10,0x86,0x8,0xf3,0x85,0xd0,0x26,0x22,0xa4,0xd3,0xae,0x22,0x68,0xcb,0x84,0x7e,0xc4,0xf2,0x95,0x63,0xfa,0xea,0x59,0x4f,0x4d,0x38,0x7d,0xef,0x18,0x0,0x34,0x2,0x1,0x57,0xd5,0x81,0x56,0x52,0x8e,0xc3,0x0,0xa8,0xbc,0x37,0x51,0xed,0x80,0xb0,0x38,0x97,0xf,0x48,0x6b,0x83,0x98,0x6b,0x85,0xf1,0x98,0xb4,0x2,0xaf,0x48,0xd0,0x1,0xa4,0x94,0x11,0x4d,0xe,0x86,0x36,0x5c,0x52,0x20,0x8c,0x8a,0xc7,0x61,0x18,0x64,0xd9,0x20,0xcb,0x55,0x9b,0x36,0x65,0x2c,0x0,0x27,0x2f,0xfd,0xce,0x49,0xb7,0xcc,0x39,0xef,0x16,0x5a,0x76,0xed,0x47,0x78,0x4c,0x0,0xcc,0x3d,0xf7,0x16,0x77,0xe9,0x75,0x1f,0xf1,0xf7,0xfb,0xea,0x2d,0xcb,0x5b,0x5b,0x63,0xb3,0xd2,0x29,0x1b,0xa1,0x57,0xa8,0x2a,0xc1,0xa2,0x92,0xc,0xa9,0x54,0x1d,0x5b,0xfa,0xbc,0x84,0x46,0x6d,0x22,0x95,0x24,0xa4,0x5,0x69,0xd9,0xc,0x12,0x64,0x54,0x8,0x63,0x74,0x55,0xe,0x5f,0x54,0x9,0xba,0xc9,0xd9,0xff,0x4e,0x1,0xc0,0xa8,0x20,0xe0,0x2a,0x3f,0xb0,0x4a,0xe8,0xa5,0x3a,0x42,0x54,0x69,0x82,0xe1,0x22,0x12,0x1b,0x24,0x88,0xb5,0x52,0x64,0x54,0x8,0x1,0x86,0x63,0xc9,0x92,0x8c,0xb9,0x94,0xfe,0x2e,0x55,0x2a,0x70,0x4,0x1e,0xae,0xd2,0x42,0x96,0x13,0x43,0xc1,0x63,0x35,0x34,0x54,0xc,0x60,0xf4,0xbc,0x17,0xae,0x3b,0x79,0xf9,0x98,0x1a,0x60,0xee,0x79,0xb7,0xd0,0xd2,0x6b,0x3f,0xc2,0x73,0xcf,0xbd,0xf9,0x7f,0x1d,0xc7,0x3e,0xa3,0x6b,0x42,0x92,0x43,0xaf,0x50,0xb1,0x30,0xd5,0x2c,0x5c,0x54,0xd,0x2b,0x50,0x6d,0xc2,0x5,0x9,0x30,0x3,0x81,0x2e,0xd7,0xc3,0x50,0x94,0x3b,0xad,0x66,0xcb,0xca,0xdf,0xa9,0x16,0x3c,0x8d,0x45,0xef,0xbe,0xc3,0x84,0xdf,0x94,0x16,0xe0,0x11,0xe6,0xa2,0x42,0x7e,0x55,0x9f,0x63,0x4c,0xe5,0xa,0x8e,0x45,0xb0,0x5,0x45,0x42,0xaf,0x9a,0xf1,0xc6,0x94,0x5f,0x97,0x0,0x50,0xc1,0x52,0x24,0x1,0x3b,0x9e,0x44,0x7f,0x5f,0x11,0x85,0x82,0xff,0xb0,0xe3,0xf6,0x1c,0x15,0x86,0x13,0xc4,0xd2,0x6b,0x4f,0xd6,0xd,0xa3,0x80,0xa5,0xd7,0x7e,0x84,0xe7,0x5d,0x70,0xcb,0x67,0xc,0xe3,0x8c,0xce,0xce,0x64,0xa8,0x43,0x8f,0x50,0xaa,0x5e,0xa4,0xaa,0xd9,0x29,0xaa,0x8b,0x32,0xa9,0x36,0x29,0x63,0x49,0x1,0x5b,0x8a,0x6,0x74,0x2d,0x37,0x20,0x6d,0x68,0x64,0x61,0xdd,0x9b,0x4d,0xeb,0xbf,0x8d,0xfc,0x50,0x75,0x94,0x33,0x56,0xc4,0x50,0x99,0xe,0x42,0x54,0xde,0x97,0x6b,0x12,0x9,0xb5,0x45,0xb1,0x95,0x88,0xa2,0x21,0x25,0xcd,0x8,0x8b,0x79,0x74,0x76,0x25,0x61,0xd9,0xe2,0xbd,0x41,0x30,0xe1,0xf2,0xa5,0xd7,0x9e,0xac,0xe7,0x9e,0x77,0x93,0x1c,0x71,0xab,0x73,0xce,0xbb,0x49,0x90,0xa0,0xa9,0x46,0x99,0x25,0x5d,0x5d,0xe9,0x4e,0xc7,0x26,0xd2,0x41,0xb1,0x62,0xe7,0x87,0x85,0x3f,0x5c,0x60,0x41,0x55,0xdc,0x3c,0x89,0xa8,0x14,0xcb,0x30,0xc1,0x53,0x7a,0x84,0x73,0x57,0x4d,0xf2,0x10,0x44,0x1d,0x0,0xe8,0xef,0x43,0xf5,0x37,0xe5,0x10,0xd6,0x6b,0x81,0x6a,0x5f,0x80,0x4b,0xb,0x4e,0x47,0x6a,0x6,0x4b,0x0,0x6e,0x9,0x4,0xcc,0xc3,0x4e,0x9f,0x31,0x5c,0x71,0xc4,0x4d,0x49,0x3,0x80,0xcb,0xc5,0x31,0xa5,0xe1,0x93,0x36,0xc,0x49,0xdd,0xbb,0x25,0x2b,0x99,0xcd,0x81,0x4b,0xaf,0x3d,0x75,0x49,0x8d,0x6,0x98,0x73,0xee,0x4d,0xd6,0xb2,0x6b,0x4f,0x31,0x3a,0x34,0x57,0xba,0xae,0xd5,0xe5,0xba,0x16,0x69,0xdf,0xab,0x72,0xca,0xc6,0x9e,0xf9,0xd1,0xeb,0xe8,0x73,0x3f,0x54,0xa0,0x6a,0x89,0x31,0x46,0x26,0x4a,0x68,0x1c,0x34,0xd4,0x3b,0xb9,0x9,0xb,0x8d,0xf7,0x81,0x46,0xaa,0xbb,0xf2,0x78,0xab,0x92,0x7f,0x55,0xbd,0x2c,0x8d,0x51,0x5b,0x2a,0x57,0xad,0xa5,0xab,0xaf,0x62,0x74,0x8,0x4b,0xb0,0x4c,0xa5,0x62,0x3e,0x1b,0xba,0xa1,0x86,0x8,0xda,0xef,0xab,0x37,0xd0,0xb2,0xeb,0x4e,0x51,0x73,0xbf,0x7a,0xd3,0x7,0x2c,0x4b,0x7c,0xb2,0xb3,0xb3,0x45,0x6b,0xbf,0x58,0x15,0xea,0x45,0x97,0xdb,0x9a,0xf0,0x89,0x0,0x2f,0x34,0xc3,0x2b,0x2c,0xaa,0x9e,0x89,0x6b,0x66,0x32,0x35,0xa9,0x16,0xe9,0x9d,0x2d,0xfc,0xca,0x24,0xa6,0xf1,0x9b,0x83,0xfa,0x4,0x38,0x45,0x55,0x31,0x5e,0xa8,0x2b,0xbe,0xa2,0x68,0x20,0xf8,0xea,0xb5,0x14,0x54,0x37,0x7c,0xca,0xf7,0x91,0x4e,0xbb,0x2e,0x1,0x33,0xe7,0x9d,0x7b,0xd3,0x5,0x0,0x30,0xe7,0x2b,0x37,0x46,0x97,0x58,0x70,0xfe,0x8d,0xb6,0x32,0xdc,0xdf,0x92,0x4e,0x38,0xc9,0x84,0x74,0x74,0x18,0x94,0x99,0x27,0x0,0x2,0x52,0xa0,0xa6,0x3e,0x2f,0xf2,0xe5,0xa8,0x52,0xb2,0x45,0x14,0x39,0x7e,0x4a,0x73,0xc5,0xb3,0xa7,0x1a,0x7,0x8f,0xaa,0x56,0xfb,0x88,0x11,0xa1,0x5f,0x35,0xd2,0xff,0xae,0x0,0x30,0x8a,0x19,0xa8,0x8d,0xfd,0xab,0xa8,0xe1,0x4a,0x64,0x50,0x15,0xd2,0x95,0x4c,0x43,0x94,0x24,0x63,0x8,0x2,0x12,0xae,0x55,0x71,0x4,0xeb,0x9d,0x42,0x66,0x94,0x22,0x83,0xe8,0x9a,0xcc,0x11,0xe5,0x6,0x63,0x20,0x2c,0x17,0x6,0x12,0x9b,0x36,0xf,0x42,0xa,0x74,0x32,0x28,0x27,0x22,0x3b,0x43,0x9d,0x0,0x14,0x9,0xaa,0xc3,0x25,0x41,0x94,0x6a,0xf3,0x6a,0x66,0xbe,0xa8,0x9d,0xf9,0xda,0x44,0x35,0x7a,0xcd,0xd,0x8,0x37,0xaf,0x3b,0xf9,0xef,0x41,0xf8,0xe3,0xb1,0x75,0x3c,0x86,0x8f,0x38,0x5c,0x5e,0xe6,0x87,0xa6,0x2a,0x82,0xa6,0x9a,0xd9,0x3f,0x96,0x53,0xc8,0x6c,0x0,0x18,0x8,0x41,0x60,0x70,0xa7,0x61,0xa9,0xc5,0x9c,0xaf,0xde,0x40,0xcf,0x5f,0x77,0xca,0x26,0x30,0x4e,0xcd,0x64,0x8a,0x8e,0x66,0xa9,0xcb,0xd4,0x62,0xf9,0xc2,0x3c,0x86,0xda,0x7,0x0,0x2f,0xd4,0x8d,0xe5,0xdc,0x50,0xcb,0x11,0xc6,0x25,0x5d,0x7e,0x7,0x2,0x61,0x9b,0xee,0xb9,0x7e,0xc0,0xa8,0x66,0xb2,0x54,0x5f,0x2e,0xd4,0x51,0x15,0x74,0x65,0x61,0x6a,0x95,0x16,0x15,0x55,0x8b,0x67,0x6a,0x1c,0x6b,0x21,0xe1,0xc4,0x63,0xd8,0xb2,0x25,0xb,0x62,0x5c,0xb0,0xec,0xba,0x8f,0xbe,0x2e,0x28,0x34,0x14,0xd9,0x82,0x1b,0xe4,0xb2,0xef,0x9d,0xa6,0xf7,0xfd,0xca,0x1f,0xaf,0x4f,0x27,0x63,0x9f,0x6d,0x6b,0x4f,0x22,0x28,0xe6,0x40,0x44,0xb0,0xa5,0x84,0x10,0xd5,0xc2,0xa7,0xa,0x19,0x44,0x44,0x28,0xf8,0x3a,0xf2,0x38,0xcb,0x37,0x52,0x9d,0xc7,0x17,0xc3,0x55,0x34,0xe5,0xb0,0xb0,0x11,0xfb,0x37,0xaa,0x9,0x78,0xa7,0x3a,0x82,0x3c,0xf6,0x7,0x8d,0x68,0xe1,0x46,0x8c,0x60,0x3d,0x39,0x84,0xba,0x5,0x85,0x49,0x57,0x56,0x4e,0x89,0x4c,0x41,0xf4,0xc6,0x98,0xe1,0xd7,0xda,0x18,0x68,0x6,0x6c,0xd7,0x45,0x5f,0x5f,0x51,0x5,0x41,0xf8,0xfa,0xb,0xd7,0x9e,0xba,0x6f,0x4d,0x14,0xb0,0xec,0x7b,0xa7,0xe9,0x39,0x5f,0xf9,0xa3,0x74,0x6c,0x79,0x41,0x26,0x5b,0xec,0x2b,0x16,0x7c,0x63,0x39,0x6e,0xe4,0x3c,0x68,0x53,0x61,0xfc,0x2a,0x33,0xbf,0xb4,0x4c,0xda,0xf,0x4d,0x4d,0xb8,0x31,0x52,0x50,0xcd,0x55,0xeb,0x52,0x73,0xa3,0xf8,0xe,0xf5,0xfe,0x79,0xac,0x90,0x7f,0x94,0x91,0x6a,0xb0,0xbe,0xa1,0xc1,0x77,0x8a,0x81,0xae,0x72,0x4,0x47,0x3a,0x85,0xe5,0x70,0x51,0x48,0xb,0x61,0x88,0xd0,0xf7,0x42,0x8b,0x98,0x3f,0x5c,0x13,0x5,0x94,0x5f,0x2c,0xfb,0xde,0x47,0xf5,0x73,0xd7,0x9c,0xd2,0xef,0xd8,0xf2,0xa2,0xc1,0xc1,0xbc,0x20,0x61,0xe9,0x48,0xfd,0xf3,0x70,0xf8,0x51,0xfe,0x11,0x1e,0xa7,0xdd,0xaf,0x8b,0x79,0x77,0x1d,0x5b,0xf1,0x1,0x9a,0x3c,0xc,0x33,0x42,0x65,0xaa,0xbc,0xfe,0x5a,0x7f,0x40,0x95,0x8,0x2,0x12,0x36,0xfa,0x7a,0xb3,0x36,0xb3,0x39,0xff,0xf9,0xeb,0x4e,0x7b,0x7d,0xce,0x57,0xff,0x28,0x1a,0x32,0x81,0x73,0xcf,0xbd,0x41,0x3e,0x7f,0xed,0xa9,0x3f,0x35,0x8c,0xfb,0x7b,0xfb,0xb2,0x52,0xba,0x9,0x6,0xa2,0xf2,0xec,0x9a,0x75,0xee,0x14,0xa1,0x6f,0x5b,0xf4,0x62,0x8d,0x7,0x5c,0xc3,0x8c,0xff,0x3d,0x82,0xa3,0x41,0x56,0xb0,0x3a,0x29,0x54,0x9d,0x15,0xac,0x29,0x18,0x69,0x4e,0x21,0x32,0xa2,0xca,0xe9,0xca,0xd2,0xf4,0x2a,0x10,0x84,0x3a,0x8a,0x26,0xa4,0x1b,0x47,0x2e,0xef,0x21,0xd4,0xfa,0x59,0x41,0xe2,0x7,0x73,0xbe,0xfa,0x47,0x6b,0xd9,0x77,0x3f,0x6a,0x46,0x55,0x2c,0x73,0xce,0xbd,0xd1,0xb1,0x94,0x92,0x21,0x89,0x37,0xda,0xda,0x93,0x53,0x12,0x71,0x1b,0x5a,0xf9,0x20,0x0,0x49,0xd7,0x82,0x20,0x81,0x7c,0x10,0x96,0xd8,0x27,0xaa,0x8b,0xda,0xea,0x7c,0x0,0xa2,0x51,0xc2,0x42,0x34,0x99,0x6,0x7e,0x87,0x32,0x81,0x4d,0x67,0x4,0xeb,0x26,0x46,0x7d,0x9a,0x78,0x2b,0x3e,0x40,0x19,0x34,0x4,0x20,0x19,0xb3,0x2a,0x93,0x2b,0x54,0x6,0x81,0xd2,0x10,0x96,0x8d,0x20,0x84,0xb7,0x65,0x4b,0x26,0x66,0xbb,0xd6,0x3e,0xcf,0x5f,0x7d,0xf2,0x2b,0x4d,0x1b,0xe9,0xb9,0xe7,0xdd,0x78,0xb4,0x56,0xe6,0x2f,0xdd,0xdd,0xad,0x24,0x85,0x11,0xac,0x15,0x2c,0x11,0x51,0xba,0x61,0x25,0x71,0xb1,0x15,0x0,0x88,0x3a,0x30,0x34,0x2,0x40,0x15,0x41,0xf4,0x77,0x9f,0xc,0xaa,0xae,0x16,0xae,0xe2,0x8,0x86,0x69,0xe0,0x3a,0xaa,0xb8,0x49,0x0,0x44,0x54,0x31,0x21,0xe6,0x8,0x18,0x3,0x14,0x7c,0x5,0x12,0x4,0x16,0x8e,0xd9,0xd2,0x33,0x24,0x58,0xe3,0xcc,0xa5,0xdf,0x3b,0xf5,0xfa,0x46,0xb7,0x3a,0x6a,0x49,0xd8,0xd2,0x6b,0x4f,0x7d,0xc0,0xb2,0xc4,0x35,0x5b,0xb6,0x64,0x84,0x90,0xb6,0x26,0x12,0x50,0x65,0xbb,0xdf,0xac,0xb6,0x66,0xde,0x8a,0x2e,0xe3,0xbf,0x17,0xb7,0xaf,0x9,0x9d,0x5d,0xb7,0xc0,0x85,0xc7,0x78,0x62,0x6a,0x30,0x76,0xd,0xc6,0xb2,0x86,0xe9,0x33,0xc,0xa5,0x19,0x5e,0xa8,0xc0,0xc,0x58,0x4e,0x1c,0xfd,0x7d,0x39,0xa1,0x35,0xff,0x6e,0xe9,0xf7,0x4e,0xbd,0x7e,0xce,0x57,0x6e,0xb4,0xc7,0x45,0x4c,0xef,0xf7,0x95,0x1b,0xe5,0x8b,0xdf,0x3b,0x55,0xef,0xf7,0x95,0x1b,0x1e,0x8e,0xc7,0xed,0x23,0xda,0xdb,0x93,0xd0,0x41,0xb1,0xae,0x72,0x77,0x6b,0x26,0xa0,0x81,0xda,0xaf,0x37,0x11,0x95,0x13,0xff,0x4e,0x52,0xc2,0x5b,0x2d,0xb,0x43,0xdd,0xcc,0xe6,0x91,0xfe,0x80,0xe1,0x11,0x13,0x65,0xe4,0x67,0xf5,0x26,0x65,0xf8,0x77,0x6d,0x27,0x86,0xc1,0xa1,0x22,0x67,0x32,0xde,0xaa,0x98,0x23,0xe6,0x4,0x1,0xfb,0xcb,0xbe,0x7f,0xaa,0x1e,0x6f,0x66,0x2,0x73,0xfe,0xbf,0x1b,0x1c,0x69,0xe9,0x64,0x10,0x60,0xe5,0x94,0xa9,0x1d,0xad,0x30,0x61,0x49,0xd,0xd1,0xe8,0x0,0x88,0x74,0x7f,0x6d,0xb8,0x43,0xd5,0x35,0x1,0xf5,0x66,0xa0,0x19,0x4e,0xe0,0x1d,0x2,0x82,0x31,0x2a,0x84,0x1b,0xd1,0xbf,0x35,0x9f,0xd7,0xd8,0xff,0x5a,0x2d,0xcb,0x65,0xd5,0xcf,0x5b,0x7,0x0,0x9,0x1,0xa5,0x84,0xea,0xeb,0xcf,0x59,0x0,0x1f,0xbe,0xf4,0xba,0xd3,0xfe,0x36,0xd6,0x2d,0x8f,0x59,0x15,0xcc,0x42,0x85,0xcf,0x5f,0xfb,0x4f,0x3,0x6c,0x70,0xe3,0xf8,0x38,0xfa,0xb1,0xd4,0xd7,0xc8,0x72,0x28,0x29,0xa2,0xe6,0xb,0xb6,0x45,0xb0,0x2d,0x1,0x4b,0x10,0x2c,0x49,0xa3,0x84,0x92,0xef,0x40,0xd5,0xcf,0x6,0x44,0xd1,0x33,0x59,0x32,0x7a,0x4e,0x4b,0x46,0xcf,0x29,0x88,0x86,0x5,0x5c,0x4f,0x12,0x6d,0x8b,0x7d,0x8c,0x16,0xa3,0x32,0x18,0x43,0x4b,0xaf,0x3b,0xed,0x6f,0x73,0xbe,0x7a,0xa3,0x1c,0xeb,0xf4,0x31,0x97,0x87,0x13,0x8b,0xf2,0x35,0x9d,0x5a,0x1,0x6e,0x5,0x4,0xcc,0x63,0x2c,0xdc,0x1c,0x3e,0x47,0x48,0x1,0x5b,0x48,0xf4,0x67,0xb,0x8,0x2,0xd,0x5d,0x6e,0xa3,0x22,0x24,0x6c,0x49,0xe8,0x68,0x4b,0x82,0x88,0xa0,0xb4,0xa9,0x25,0x48,0x78,0x27,0xd3,0x4,0x3c,0x3a,0xe3,0x27,0x4,0xc1,0xb6,0x24,0x72,0x79,0xf,0xf9,0x42,0x0,0x63,0x18,0x9a,0x4d,0xa5,0xcf,0x60,0x2a,0x6e,0x23,0x95,0x74,0xa1,0x94,0x8e,0xca,0xe9,0xea,0x2e,0xc3,0xdb,0xd9,0x4e,0x8c,0xb6,0x2,0x1d,0xab,0xf9,0x27,0xe4,0x6,0x56,0x63,0xc,0x49,0xd4,0x83,0xa0,0x42,0x17,0xb,0x18,0x30,0x72,0x85,0x0,0xad,0x31,0x9,0x57,0x12,0xce,0x3c,0x7c,0x6,0xde,0x35,0xa5,0x15,0x93,0xda,0x13,0x88,0x39,0x16,0x36,0xf6,0x17,0xb1,0xae,0x3f,0x8f,0x3b,0x96,0xac,0xc5,0xda,0xfe,0x22,0xf2,0x6,0x70,0x5d,0x1b,0xb6,0x25,0xab,0x54,0x26,0xed,0xd4,0xb,0x43,0x4,0x11,0x72,0x5e,0x0,0x28,0x8d,0x16,0x57,0xe0,0xa8,0xd9,0x9d,0xf8,0x87,0x3d,0xbb,0xd0,0xdd,0x1e,0xc7,0xc4,0x74,0xc,0x5b,0x32,0x1e,0x36,0xf,0x16,0xf0,0xd8,0xeb,0x5b,0xf0,0xd8,0x2b,0x9b,0x51,0xd0,0x80,0x26,0x42,0x2a,0xe1,0xc0,0x68,0x1e,0x5b,0x1b,0x6c,0xe5,0x86,0x18,0x6,0xd5,0x1d,0x8b,0x76,0x0,0x0,0x46,0x13,0x35,0x8d,0x7d,0x23,0x6,0x35,0xed,0x59,0x4,0x0,0x3f,0x8,0x90,0xc9,0x14,0x71,0xd6,0x51,0xb3,0x71,0xfc,0x82,0xa9,0x98,0x39,0xb9,0xad,0xe2,0xe4,0x98,0x92,0x7b,0x31,0xa5,0x3d,0x8e,0x3,0xf7,0xe8,0xc2,0x87,0xf,0x9e,0x81,0x4c,0x31,0xc0,0x3,0xcb,0x36,0xe1,0xbf,0xee,0x7d,0xd,0x83,0x79,0x1f,0x13,0x3a,0x52,0x8,0x94,0x1e,0xd6,0x44,0x6f,0x97,0x5f,0x30,0x86,0xbd,0xb7,0xa5,0xc0,0xaa,0x8d,0x3,0x78,0xcf,0xec,0x2e,0x9c,0x79,0xd4,0x1e,0x38,0x60,0x56,0x67,0xc9,0x79,0x36,0x30,0xa5,0x44,0x4e,0x57,0xca,0xc1,0x7e,0xd3,0x5a,0xf1,0xbe,0xf9,0x53,0x61,0x94,0xc2,0xd3,0x6f,0xf4,0xe2,0x37,0x7f,0x5b,0x89,0xfb,0x5e,0xd9,0x82,0x19,0x93,0x5a,0xe1,0x7,0xaa,0xf9,0x5b,0x68,0xf4,0x79,0x6d,0x47,0xcd,0x6d,0x3,0x0,0x57,0xdb,0x5d,0x6e,0xd6,0x75,0x1c,0xe9,0xfc,0x50,0xa9,0x45,0xaa,0x1f,0x6a,0xa4,0x49,0xe1,0xf,0x17,0x1d,0x83,0xb8,0x6b,0xc1,0x92,0x12,0xbe,0x1f,0x56,0xf5,0xff,0x8d,0x1c,0x49,0xa5,0xa2,0xb4,0x25,0x11,0x21,0xe9,0x48,0x9c,0x70,0xe0,0x34,0x7c,0xf8,0xe0,0xe9,0xf8,0xb7,0xdf,0x2c,0xc1,0x43,0xaf,0xf7,0xa1,0xbd,0x35,0x31,0x1c,0x3b,0x97,0xb5,0xc1,0x5b,0x9,0x4,0x1e,0x9b,0xe7,0x5f,0xbe,0x7a,0xb,0x7e,0xf0,0xb9,0x43,0x70,0xd4,0xbe,0xdd,0x30,0xcc,0xd0,0x3a,0x7a,0x1e,0xae,0xf2,0x7d,0x34,0x0,0xad,0x19,0x26,0x8c,0x56,0x6e,0xcd,0x9d,0xd1,0x81,0x2b,0xa6,0xb5,0xe2,0xc3,0xcb,0xfb,0x70,0xde,0xcf,0x1e,0xc7,0xc4,0x49,0x1d,0xd0,0xc6,0x6c,0xd5,0x92,0x36,0x52,0xfa,0xc4,0x80,0xa0,0x72,0xc7,0xf2,0xb1,0x35,0x80,0x18,0xbf,0x9,0x18,0xbf,0x23,0x26,0x4,0xe0,0xf9,0xa,0x7b,0x75,0x38,0xf8,0xdd,0xb9,0x47,0xc3,0xb1,0x4,0xa4,0x10,0x35,0xde,0xbe,0xd6,0x1a,0x5a,0xeb,0xa,0x60,0x8c,0x31,0x95,0xf7,0x60,0x46,0xc1,0x57,0xf8,0xd6,0x27,0xe,0xc4,0xa7,0xe,0x99,0x8e,0x2d,0x7d,0xd9,0x92,0x39,0x68,0xe0,0x20,0xf2,0xdb,0x23,0xfc,0x72,0x9f,0xc0,0xcc,0x60,0xe,0x37,0x9e,0x77,0x14,0x8e,0xdc,0xa7,0x3b,0x4a,0x96,0x95,0x32,0x73,0x5a,0xeb,0xd2,0x58,0x88,0xca,0x7b,0x63,0x4c,0x15,0x70,0xa2,0x2c,0xeb,0x3f,0xec,0xd5,0x85,0xff,0xfd,0xd2,0x11,0xc8,0xf,0x66,0xb6,0x9a,0x3b,0x19,0x35,0x89,0x46,0xdc,0x74,0xd9,0xdd,0xd8,0x0,0x60,0xb3,0x43,0x28,0x9a,0x6c,0xae,0x88,0xc9,0x9,0xc2,0x77,0x3f,0x73,0x8,0x98,0x1,0xc7,0xb6,0x2a,0x83,0x66,0x8c,0x41,0x10,0x4,0x88,0xc5,0x62,0x88,0xc5,0x62,0xf0,0x7c,0xf,0x9b,0x36,0x6d,0x82,0xe3,0x38,0x70,0x1c,0x7,0x42,0x8,0x84,0xa1,0x82,0x6b,0x9,0x68,0xad,0x71,0xce,0xf1,0xfb,0xe0,0xd4,0x83,0xa6,0xa1,0xa7,0x2f,0xb,0xab,0xa6,0x7b,0x27,0xf,0xff,0x7b,0xb3,0x80,0xc0,0x3c,0x2a,0xca,0x12,0x31,0x1b,0xab,0xd7,0xf7,0xe1,0x3f,0xcf,0x78,0x37,0xf6,0x9e,0xda,0x6,0xcd,0xc,0x41,0x8c,0x20,0x8,0x60,0xdb,0x36,0x5c,0xd7,0x45,0x18,0x2a,0x6c,0xd8,0xb0,0x1e,0x96,0x6d,0x23,0x1e,0x8f,0x97,0x9e,0x2d,0xac,0x71,0x18,0x43,0xd,0xec,0x35,0x39,0x8d,0xff,0x3e,0xeb,0x30,0xc,0xe,0xe6,0x9a,0xe8,0x7f,0xd4,0x50,0x7,0x80,0x4a,0x5a,0xb1,0xfc,0xff,0x6d,0xf4,0x1,0x78,0x14,0x4d,0x40,0xe3,0x1a,0xb3,0x18,0x1,0xd7,0x7e,0xfa,0x60,0x14,0xfc,0x10,0xf1,0x98,0x33,0xc2,0x3c,0xf4,0xf6,0xf6,0xe2,0xda,0xef,0x5c,0x87,0xdb,0x6e,0xbb,0xd,0x5a,0x6b,0x24,0x12,0x9,0x64,0x32,0x59,0xcc,0x9e,0x3d,0x1b,0x9f,0xfa,0xd4,0xe9,0x38,0xe1,0xa4,0x13,0x87,0xcd,0x48,0x10,0xe2,0xdf,0x3e,0x3c,0x17,0xaf,0x6c,0x18,0xc2,0xca,0x41,0xf,0x89,0xaa,0xeb,0x8d,0xc,0x19,0xb7,0xee,0x28,0x96,0xb3,0x9b,0xbc,0xd5,0xc7,0x1f,0xdd,0xe2,0xa,0x22,0xbc,0xb1,0xa6,0xf,0xdf,0xf8,0xe8,0x2,0x2c,0xd8,0xbd,0x3,0x4a,0x97,0xb7,0x93,0x61,0x14,0xa,0x5,0x7c,0xff,0x7b,0xff,0xf,0x37,0xdf,0x7c,0x33,0x7c,0xdf,0x47,0x32,0x99,0xc2,0xc0,0x40,0x3f,0x76,0xdb,0x6d,0x6,0xce,0xf8,0xec,0xa7,0x71,0xec,0xb1,0xc7,0x42,0x4a,0x51,0xe9,0x25,0x24,0x4,0xc1,0x18,0xc2,0x6e,0x5d,0x49,0x5c,0x78,0xc2,0x3e,0xb8,0xe6,0xcf,0xaf,0xa2,0xad,0x2d,0x39,0xee,0x44,0x2a,0x37,0x39,0x75,0x45,0xd3,0x52,0x2c,0x13,0x40,0xe3,0xac,0xd5,0xb,0x82,0x10,0x27,0xcd,0x9f,0x8c,0xb6,0x84,0x83,0x98,0x6b,0x97,0xea,0xd4,0x22,0x15,0x68,0x59,0x16,0x7e,0xf8,0xc3,0xff,0xc1,0xc7,0x3e,0xfa,0x4f,0xb8,0xfb,0xae,0xbb,0xb1,0xd7,0x5e,0x7b,0x61,0xc1,0x82,0xfd,0xb1,0xcf,0x3e,0xfb,0xe0,0xdd,0xef,0x3e,0x18,0x96,0x25,0x71,0xd5,0x55,0xdf,0xc2,0x69,0xa7,0x9e,0x86,0xa5,0x4b,0x97,0x95,0x96,0x44,0x1,0x45,0x3f,0xc4,0x45,0x27,0xee,0x8b,0xc0,0xb,0xb7,0xe2,0x29,0x73,0x8d,0x62,0xa8,0xfe,0x67,0x9,0x81,0x2d,0x3,0x5,0xac,0xda,0x30,0x84,0x35,0x3d,0x19,0x64,0xf3,0x1,0x6c,0x29,0x23,0xc6,0x8d,0x9b,0x14,0x7e,0xa9,0x31,0x65,0x31,0x50,0xd8,0xab,0x3b,0x89,0xd3,0xe,0xd9,0xd,0x5,0x2f,0x44,0x18,0x4,0xb0,0x2c,0xb,0xbf,0xfd,0xcd,0x6f,0x71,0xec,0xfb,0x8f,0xc3,0xdd,0x77,0xdf,0x83,0x59,0xb3,0x66,0xe1,0x80,0x3,0xe,0xc0,0x3e,0xfb,0xec,0x8d,0x43,0xe,0x39,0x4,0xa9,0x54,0x12,0x57,0x7d,0xf3,0x2a,0x9c,0x7e,0xfa,0x27,0xf1,0xf8,0xe3,0x4f,0xc0,0xb6,0xed,0x8a,0x49,0x10,0x42,0x40,0x8,0x81,0xf,0xec,0x3f,0x1d,0x93,0x92,0x56,0x69,0x19,0xd8,0x38,0x84,0x6f,0xaa,0xf2,0x8,0xbc,0x43,0x7c,0x80,0xba,0x72,0xe5,0x26,0xd1,0x18,0x73,0x2d,0x78,0x43,0x39,0x7c,0xee,0x1f,0xf7,0xae,0x61,0xff,0x8c,0x31,0x90,0x52,0xe2,0xc2,0xb,0x2f,0xc2,0x1f,0x7e,0xff,0x7b,0x74,0x76,0x76,0xa2,0x6b,0x42,0x17,0x98,0x19,0xb6,0x6d,0x21,0x16,0x8b,0x41,0x4a,0x9,0xcb,0xb2,0x30,0x6d,0xea,0x34,0x4,0x7e,0x80,0x2f,0x9c,0xf5,0x5,0x2c,0x59,0xb2,0x4,0x96,0x65,0xc1,0x96,0x84,0x7d,0xa6,0xb5,0x63,0xc1,0xb4,0x34,0x0,0x44,0x3,0xc7,0x3c,0x2a,0x5,0x3b,0x32,0x44,0x3,0xd6,0x6f,0xe8,0xc3,0x79,0xc7,0xee,0x89,0x5b,0xcf,0x3d,0x2,0xbf,0x3a,0xfb,0x50,0xbc,0xef,0x5d,0x9d,0x58,0xb5,0xbe,0x1f,0x31,0xd7,0x1e,0xf3,0xbb,0x15,0xc1,0x97,0x12,0x3b,0x44,0x80,0x57,0xf4,0xf1,0xf9,0xa3,0xf7,0x40,0x10,0x28,0x48,0x62,0xc4,0xe3,0x71,0x5c,0x71,0xc5,0xbf,0xe3,0x7,0x3f,0xf8,0x2f,0x4c,0x9a,0x34,0x9,0x6d,0xad,0xad,0x10,0x42,0xc0,0x71,0x1c,0xb8,0x6e,0xac,0x22,0xe0,0xdd,0x66,0xcc,0x40,0x18,0x84,0xb8,0xf4,0x92,0x4b,0x71,0xef,0xbd,0xf7,0x56,0x26,0x47,0x19,0x4,0x8e,0x94,0x38,0xe7,0xf8,0x7d,0x30,0x30,0x58,0x80,0x94,0x62,0x1c,0xdc,0xd0,0xf0,0xba,0x0,0xb3,0x23,0x0,0xc0,0x64,0x6a,0x7,0xa3,0x9,0x40,0x12,0x11,0xb2,0x59,0xf,0x87,0xec,0xdd,0x8d,0x74,0xcc,0xaa,0xfc,0x94,0x31,0x6,0xc6,0x18,0xdc,0x74,0xd3,0xcd,0x78,0xf4,0x91,0x47,0x90,0x4a,0xa5,0xa1,0xb5,0x86,0xef,0xfb,0x28,0x14,0xa,0x18,0x18,0x18,0x40,0x6f,0x6f,0x1f,0x6,0x6,0x7,0x51,0x2c,0x16,0xe1,0x7,0x1,0x8,0xc0,0x6e,0x33,0x76,0xc3,0x39,0xff,0xf2,0x25,0xf4,0xf6,0xf6,0x55,0xba,0x71,0x1e,0xb9,0x4f,0x37,0x3c,0x2f,0x88,0x54,0x68,0xc5,0x44,0xf3,0x98,0x8e,0xac,0xd2,0x1a,0x41,0xae,0x80,0x25,0xdf,0x3a,0x11,0x9f,0x38,0x7c,0x77,0xcc,0xe8,0x4a,0x62,0xce,0xb4,0x36,0x7c,0xe3,0xb4,0xf9,0xf8,0xc5,0xbf,0xfc,0x3,0xd6,0x6e,0x1c,0x40,0xb4,0x8b,0x0,0x6f,0x2d,0x36,0x8a,0xd4,0xbf,0x4,0x5a,0x5d,0xb,0x73,0xa6,0xb7,0x43,0x10,0xc1,0x75,0x5d,0xfc,0xf1,0x8f,0x37,0xe0,0xf6,0xdb,0x6e,0xc7,0xd4,0xa9,0x53,0xa3,0xa5,0x72,0x61,0x88,0x5c,0x2e,0x8f,0xfe,0xbe,0x7e,0x6c,0xe9,0xdd,0x82,0xc1,0xd2,0xb3,0x85,0x61,0x8,0x21,0x8,0xa9,0x54,0x1a,0x57,0x2c,0xbe,0x2,0x99,0x4c,0x6,0xbe,0xef,0x57,0xae,0xee,0x29,0x8d,0xe3,0xe6,0x4f,0x43,0x3e,0x9b,0xaf,0x6c,0x89,0xd3,0x4c,0x10,0x6e,0x18,0x30,0xac,0x4a,0xaf,0xd5,0xf6,0x38,0x81,0x55,0xdc,0xf4,0xd6,0xa,0x16,0xea,0x2f,0x4c,0x84,0xa1,0xbc,0x8f,0xd3,0xe,0xdd,0x1d,0x79,0x7f,0xb8,0x74,0x49,0x29,0x5,0x66,0xc6,0x77,0xaf,0xbb,0xe,0xed,0xed,0x9d,0xc3,0x37,0x6d,0x18,0x9e,0xe7,0x21,0x93,0xc9,0x62,0x70,0x70,0x0,0xb9,0x4c,0x16,0xbe,0xef,0x47,0x7e,0x82,0x10,0xc8,0xe5,0x72,0x48,0xa5,0x52,0xf8,0xd1,0x8f,0x7e,0x4,0x63,0x18,0x61,0xa8,0x70,0xec,0xfc,0x29,0xc8,0xe,0x15,0x60,0x5b,0x54,0x97,0x8b,0x19,0xfd,0x46,0x7,0x32,0x1e,0xfe,0xfb,0xac,0x43,0x11,0x94,0x98,0x37,0x5b,0xa,0x58,0x52,0x20,0x54,0x1a,0xb,0x66,0xb4,0xe3,0xe3,0x87,0x4c,0x47,0xb6,0x10,0x34,0x6d,0x73,0x9,0x84,0x9,0x49,0x1b,0x53,0x3b,0x53,0x0,0x11,0x6,0xfa,0x7,0xf0,0xd3,0x9f,0x5c,0x8f,0xa9,0x53,0xa6,0xa2,0x58,0x28,0x2,0xcc,0xd0,0x5a,0xc1,0xf7,0x3d,0x64,0xb3,0x59,0xc,0xd,0xe,0x22,0x9f,0xcf,0x23,0x8,0x82,0x92,0xca,0x2f,0x6b,0x46,0x81,0xab,0xae,0xbc,0xa,0x1d,0x1d,0x1d,0x95,0x88,0x81,0x88,0xe0,0x87,0x1a,0x27,0x1e,0x3c,0x3,0x83,0x99,0xe2,0x38,0x9c,0x40,0x6e,0xba,0x8,0xab,0x29,0xd,0x40,0x4c,0x63,0x65,0x71,0x47,0x9,0xfd,0x8,0x7d,0x3,0x39,0x2c,0xd8,0xbd,0xa3,0x44,0xdc,0x44,0x4e,0x5f,0x2c,0x1e,0xc3,0x5d,0x7f,0xbe,0xb,0x4a,0xe9,0x1a,0x54,0x53,0xd,0x77,0xce,0x95,0x22,0x47,0x66,0x3,0x63,0xa2,0x81,0xb6,0x2c,0xb,0x4f,0x3c,0xf6,0x4,0xf2,0xf9,0x3c,0xc2,0x50,0xa3,0x23,0x9d,0x80,0x36,0x66,0x14,0x1b,0xc9,0xd,0x66,0x6,0x63,0x6a,0x8b,0x83,0x89,0x2d,0xb1,0xd2,0x76,0x33,0xc3,0xdf,0xb3,0x4a,0x2a,0x76,0xee,0x6e,0xed,0xb0,0xa4,0xa8,0x8a,0xbf,0x87,0x8b,0x36,0x99,0x79,0xc4,0x80,0x1a,0x66,0xcc,0x9c,0x90,0x4,0x40,0xd0,0x5a,0x63,0xc9,0xb3,0xcf,0x22,0x97,0xcd,0xc2,0xf3,0xbd,0x28,0x27,0x5f,0xdf,0xc,0xa2,0xea,0xff,0x54,0xd9,0x43,0x80,0xd1,0xd9,0xd9,0x89,0x7,0x1f,0x7c,0x8,0xbd,0xbd,0x7d,0x90,0xa5,0x35,0x81,0x42,0x8,0x28,0xcd,0xd8,0x67,0x5a,0x1b,0xfc,0xb0,0xf9,0xa,0x2c,0xae,0x4d,0x38,0xbe,0x9,0x3c,0x40,0x13,0x26,0xc0,0x30,0x23,0x1d,0x77,0x10,0xb3,0x64,0xd,0x1b,0x15,0x73,0x63,0xb8,0xf3,0xce,0x3b,0xd1,0xd9,0xd9,0x5,0x63,0xb8,0xc9,0x18,0xd7,0x44,0x2d,0xd8,0x85,0xc0,0x6b,0xaf,0xbf,0x86,0x4c,0x26,0x3,0xa5,0x23,0xd5,0x36,0xb9,0x3d,0x1e,0x2d,0x48,0x81,0x19,0x31,0x0,0xc3,0x42,0x2b,0x35,0x66,0xe2,0x68,0x51,0x85,0x14,0xa3,0x3f,0x40,0xdc,0x96,0x51,0xe1,0xb,0x50,0x11,0x78,0x2d,0xdf,0x30,0xdc,0xd1,0x43,0x12,0x90,0xcb,0x79,0x98,0xd6,0x95,0x0,0x4a,0xaa,0xf6,0xf9,0xe7,0x9f,0xdf,0x26,0x42,0x4a,0x29,0x85,0x96,0xd6,0x16,0x3c,0xf4,0xe0,0x83,0xb0,0x6c,0xbb,0x32,0x6,0xca,0x18,0x4c,0x6d,0x4f,0x20,0x8,0xf5,0x8,0x52,0x88,0xb7,0x1a,0xae,0x6e,0x1d,0x1,0x62,0x6b,0x2c,0xde,0xf0,0x25,0xc6,0xe9,0x89,0x32,0x90,0x88,0xc9,0xa8,0x61,0x72,0xdd,0x9d,0xaf,0x5c,0xb9,0xa,0xf1,0x78,0xac,0xe6,0xe6,0x9a,0xb5,0x2e,0xf1,0x78,0x1c,0xab,0x56,0xad,0x86,0x6d,0x45,0x11,0x6c,0xc2,0xb5,0x2a,0xc2,0x45,0xb9,0x94,0xba,0x61,0xbc,0x3e,0xfe,0xc5,0x26,0x34,0x66,0x36,0x32,0x7a,0xae,0x20,0x50,0x68,0x89,0xd9,0x51,0xc7,0x6b,0x66,0x6c,0xda,0xb4,0x9,0x96,0x35,0xfe,0x16,0xcc,0xc6,0x18,0x24,0xe2,0x9,0xac,0x59,0xbb,0xa6,0xe6,0xfb,0xc6,0x30,0xd2,0x31,0xb,0x41,0xa8,0x46,0xdc,0x11,0x35,0x77,0xe3,0x3b,0x48,0x3,0x54,0x6d,0x96,0xd8,0xb4,0x9,0x18,0xf2,0x20,0x88,0x60,0xea,0xaa,0x87,0xcb,0x6c,0xd8,0xb6,0xdc,0x73,0x39,0x56,0x2e,0x7f,0x7b,0xf3,0x40,0xb1,0xa1,0x87,0xfc,0xa6,0x71,0xc3,0x55,0x97,0xd4,0xda,0xa0,0xbd,0x2d,0x89,0xb5,0xfd,0x5,0x40,0x8e,0x55,0xcb,0xd0,0xfc,0xb3,0xd5,0x7f,0xdf,0x92,0x2,0x1b,0x87,0x3c,0x24,0xe2,0x4e,0xf3,0xc9,0xa1,0x1d,0x17,0x6,0x6e,0x7,0x9d,0xc6,0xc,0x37,0xe6,0x60,0xe3,0x60,0x11,0xb2,0x2e,0xb7,0x3f,0x63,0xf7,0xdd,0x10,0x4,0xe1,0x36,0x5d,0x36,0x9f,0xcf,0x63,0xda,0xb4,0xe9,0xd0,0x2a,0xda,0xff,0x29,0xeb,0x5,0x10,0x6f,0x53,0x56,0x90,0x4b,0xce,0xee,0xba,0xde,0x3c,0xa2,0xfd,0x5,0x80,0x49,0xdd,0x93,0x10,0x86,0xe3,0xdf,0x19,0x56,0x4a,0x89,0x5c,0x2e,0x83,0xe9,0xd3,0xa7,0x57,0x9c,0x40,0xe6,0xa8,0x56,0x62,0x7d,0x5f,0xbe,0xa1,0xd9,0xda,0x11,0x64,0xa7,0x68,0xfe,0x61,0x69,0x5c,0x4b,0xdd,0xc,0x3,0xc9,0x98,0x83,0xfb,0x96,0x6e,0x40,0xcc,0x1e,0xae,0x49,0xf0,0x7d,0x1f,0xc7,0x1f,0x7f,0x3c,0x6,0x7,0xfa,0xab,0x16,0x8a,0x36,0x37,0xda,0x4a,0x29,0xcc,0x9e,0x3d,0x1b,0xad,0xad,0x2d,0x70,0x2c,0x89,0x25,0xcb,0x7b,0x90,0x4c,0xb8,0x55,0xf5,0x2,0x6f,0x65,0x22,0x28,0x7a,0x2d,0x8,0x58,0xd5,0x9b,0xaf,0x0,0x7a,0xee,0xdc,0x39,0x30,0x46,0x37,0xe5,0x81,0xd7,0xcc,0x74,0xcb,0x42,0x7f,0xff,0x20,0x8e,0x79,0xdf,0x31,0x95,0x50,0xd0,0x18,0x83,0x98,0x2d,0xf1,0xc0,0x8b,0x9b,0x90,0x8a,0x3b,0x4d,0xeb,0xb7,0xd2,0xd2,0xd0,0x11,0x41,0xeb,0x36,0x84,0x81,0xd5,0x2a,0xc5,0x8c,0xb,0x72,0xc6,0x18,0x4c,0x9a,0xd8,0x82,0x9b,0x9e,0x58,0x5,0xd7,0x95,0x15,0xda,0xb7,0x50,0x28,0xe0,0x84,0x13,0x4e,0x80,0x32,0x7a,0x7c,0xf9,0x6e,0x2,0xc2,0x30,0xc4,0xe1,0x87,0x1f,0x8e,0x64,0x32,0x9,0xd7,0xb5,0xf1,0x97,0x17,0x36,0x20,0x16,0x7b,0x8b,0x1,0x50,0x57,0xb5,0xae,0xd,0x23,0x13,0x18,0xbc,0xbe,0x21,0xda,0x7,0x62,0xfe,0x82,0xf9,0x68,0x6d,0x6d,0x85,0x94,0xb2,0xe9,0x39,0x4a,0x44,0xe8,0xef,0xeb,0xc3,0x71,0xc7,0x1f,0x87,0xd6,0x96,0xd6,0x1a,0x1,0xc,0x79,0x21,0x9e,0x5d,0xb1,0x5,0xa9,0x84,0x3b,0x3e,0x7f,0x7d,0xc7,0x44,0x1,0x65,0xe2,0x87,0x9b,0xca,0x88,0xd5,0x1f,0x5a,0x1b,0xc,0x78,0x6,0x4f,0xbe,0xda,0x13,0x6d,0x1,0x4b,0x54,0xa1,0x3c,0x2f,0x38,0xff,0x7c,0xc,0xd,0xd,0x80,0x48,0x6c,0x15,0x8,0xcc,0x88,0x12,0x45,0x9e,0x87,0xcf,0x9d,0xf9,0x59,0x18,0x66,0xc,0xe4,0x2,0x3c,0xfa,0x6a,0xf,0x5c,0xc7,0x1a,0x23,0x9a,0x78,0xf3,0xeb,0xc7,0xb4,0x61,0x4,0x4c,0xb8,0xef,0xc5,0x8d,0x30,0xcc,0x98,0x30,0x61,0x2,0xce,0x3a,0xeb,0x4c,0xac,0x5f,0xbf,0x1e,0xf1,0x78,0x62,0xab,0x5a,0xa0,0x3c,0x31,0x3c,0xdf,0xc3,0x85,0x17,0x2e,0xc4,0xd0,0xd0,0x50,0xc5,0xf,0x48,0xc7,0x6c,0x7c,0xef,0xf6,0x65,0x98,0x31,0x7d,0x42,0xc9,0x9,0xdc,0x96,0xa7,0xe3,0x1d,0x63,0x2,0x1a,0x5f,0x68,0x6c,0x5b,0x10,0x2a,0x3,0x27,0x11,0xc3,0xaf,0x1e,0x59,0x8e,0x50,0x69,0x28,0xa5,0x20,0x84,0x80,0x94,0x12,0xa7,0x9e,0x76,0xa,0xe,0x3a,0xe8,0x60,0x28,0x15,0x34,0xf4,0x9a,0xab,0x7f,0xcd,0xb6,0x6d,0xbc,0xf4,0xd2,0x4b,0xf8,0x9f,0x1f,0xfd,0x10,0x1d,0x9d,0x9d,0xd0,0xda,0xe0,0xfe,0x17,0x37,0x61,0x43,0x4e,0x45,0xf9,0x9e,0x11,0xbd,0x88,0xde,0x42,0x85,0x40,0x40,0x3a,0xe9,0xe2,0x27,0xf,0xad,0x40,0xa8,0xc,0x86,0xb2,0x79,0x9c,0x72,0xea,0x29,0x38,0xfd,0xf4,0x4f,0x60,0xd5,0xaa,0x95,0xa5,0x44,0xcf,0xe8,0xbd,0x84,0x5d,0xd7,0xc5,0xe6,0xcd,0x9b,0xb0,0xf8,0x8a,0xc5,0x68,0x6b,0x6b,0x83,0xe3,0x38,0x20,0x8a,0x38,0x85,0x57,0x36,0xc,0xe2,0x91,0xd7,0x7a,0xe1,0x38,0x56,0x43,0x20,0x51,0x53,0x2a,0x60,0x7,0xf9,0x0,0xdb,0x52,0x10,0x42,0x14,0x79,0xb1,0x8f,0x2e,0xef,0xc7,0x23,0xaf,0xf4,0x44,0xbb,0x7a,0x6a,0x3,0x21,0x4,0x8a,0x45,0xf,0x3f,0xf8,0xaf,0xff,0xc4,0x91,0x47,0x1e,0x89,0xc1,0xc1,0x1,0x64,0x72,0xd9,0x12,0x35,0x2a,0x2a,0x7d,0xf0,0x7c,0xdf,0xc7,0xe0,0xc0,0x20,0x98,0xd,0x6e,0xb9,0xf5,0x66,0xcc,0x9e,0x3d,0x1b,0x64,0x34,0xfa,0xf3,0x1,0xbe,0x75,0xdb,0x32,0x4c,0x68,0x4f,0x34,0x78,0xce,0xb7,0xc1,0x23,0x64,0xc6,0x9b,0xa6,0xb3,0x95,0x0,0x0,0x1e,0xa3,0x49,0x44,0x41,0x54,0x84,0x8e,0x14,0xbe,0xfc,0xb3,0x27,0xd1,0x9a,0x74,0x50,0x28,0x14,0x71,0xde,0xf9,0xe7,0xe1,0x8b,0x5f,0x3c,0x3b,0x62,0xff,0x86,0x86,0x22,0xf0,0x53,0xf4,0x6c,0x44,0x54,0x62,0x3d,0x33,0x8,0x2,0x1f,0xd7,0x7c,0xe7,0x1a,0x1c,0x7b,0xec,0xb1,0x95,0x28,0x20,0x8,0x15,0xa4,0x20,0xfc,0xf0,0xee,0x57,0x10,0x48,0xb,0xa1,0x32,0xd,0x1f,0x8b,0xb7,0xc2,0xe0,0x36,0xe5,0x7b,0x34,0x27,0xf5,0xba,0x1d,0x71,0xc7,0x59,0x94,0xd9,0xdd,0xd5,0x82,0x4b,0x7e,0xff,0x2c,0xba,0x5a,0x62,0x38,0x68,0x56,0x27,0xbc,0xd0,0xc0,0x75,0x5d,0xc,0xe,0xe,0xe2,0x92,0x4b,0x2f,0xc1,0x49,0x1f,0x3e,0x9,0x7f,0xb9,0xe7,0x5e,0x3c,0xfc,0xc8,0xc3,0x58,0xbb,0x66,0x2d,0x8a,0xc5,0x2,0x3a,0xbb,0xba,0x30,0x6f,0xde,0x7c,0x1c,0x75,0xd4,0x91,0x38,0xee,0xb8,0x63,0x91,0x4e,0xa7,0x10,0x86,0xa,0xf9,0xc0,0xe0,0x8c,0xff,0x7a,0x14,0x9d,0x5d,0x2d,0x4d,0xd4,0x9e,0x8e,0x1f,0xc,0xbc,0xd,0x27,0x30,0x0,0xd7,0xb1,0xb0,0x72,0xd0,0xc7,0xa2,0x1b,0x9e,0xc7,0xa2,0x8f,0x2e,0x40,0x36,0x5f,0xc4,0xd9,0x5f,0x3c,0x1b,0x47,0x1e,0x75,0x24,0xee,0xbe,0xeb,0x6e,0x3c,0xf8,0xd0,0x43,0x58,0xb3,0x7a,0x2d,0xb2,0xb9,0xc,0x3a,0xda,0x3b,0xb0,0xef,0xbe,0xfb,0xe0,0xa8,0xa3,0x8f,0xc2,0x71,0xc7,0x1e,0x87,0xf6,0xce,0x76,0x14,0x8b,0x45,0x58,0x96,0x5,0x2f,0x8,0xd1,0x9e,0x76,0x71,0xc5,0xef,0x97,0xe0,0x6f,0xab,0x87,0x86,0xab,0x9f,0x78,0xfc,0xa0,0x6c,0x6,0xc,0x56,0x53,0xd7,0xd8,0xce,0x71,0x55,0xca,0x60,0xf2,0x94,0x4e,0xfc,0xeb,0xf5,0x8f,0xe3,0x5b,0xa7,0x1f,0x84,0x43,0x66,0x4f,0x80,0xd6,0x1a,0x8e,0xe3,0xc0,0xf7,0x7d,0xcc,0x9e,0x3d,0x1b,0xb3,0x67,0xcf,0xc6,0x97,0xbe,0x7c,0x4e,0xb4,0xd,0x5b,0x3,0x32,0xca,0xf7,0x43,0x64,0x3c,0x85,0xcf,0xfc,0xd7,0xa3,0xa0,0x78,0xc,0xc6,0x94,0x8,0xa6,0x1d,0x5c,0x21,0x4c,0xcd,0x9c,0xc0,0x8d,0x7d,0x81,0x64,0xdc,0xc1,0xad,0x4b,0x36,0x40,0x6b,0xc6,0xa5,0xa7,0xcc,0x43,0xa1,0x58,0xc4,0xee,0xbb,0xef,0x8e,0x2f,0x9c,0xfd,0x5,0x9c,0xfd,0xc5,0xb3,0x47,0x38,0xc9,0x5a,0x6b,0x8,0x21,0xa0,0x95,0x86,0x94,0x12,0x61,0x18,0xc2,0x91,0x2,0x17,0xff,0xf2,0x29,0xfc,0x6d,0x75,0xa6,0xaa,0xf4,0x6d,0x7c,0x0,0x1e,0x87,0xf,0xd8,0xa4,0x9,0xa8,0xe4,0xc8,0xb7,0xb1,0x36,0x9f,0x0,0xa5,0x34,0xba,0xba,0x3b,0x70,0xc1,0xaf,0x9f,0xc5,0xcf,0x1e,0x7c,0x1d,0x7d,0x59,0xf,0x71,0x3b,0xda,0x6c,0x11,0x14,0x65,0xf3,0x42,0xa5,0xe0,0xfb,0x1,0x8a,0x9e,0x8f,0xa2,0xe7,0x43,0x29,0x85,0xb8,0x1b,0x61,0xf4,0x9e,0x17,0xd6,0xe3,0xd4,0x6b,0x1f,0x82,0xb6,0x9d,0xda,0x72,0x32,0x6a,0xfe,0x1e,0x88,0x80,0x21,0x3f,0x84,0x32,0x66,0x94,0x81,0x65,0xe4,0xbc,0x10,0x81,0x32,0xdb,0xec,0x49,0x4c,0x9b,0xdc,0x86,0xbb,0x5f,0xee,0xc1,0xe7,0x7e,0xf8,0x57,0xbc,0xb2,0x7e,0x8,0x31,0xd7,0x82,0x2d,0x5,0x82,0x20,0x84,0xe7,0xf9,0xf0,0x3c,0x1f,0x7e,0x10,0x20,0x54,0xaa,0xe4,0x0,0x47,0x2d,0xe0,0x62,0x36,0xe1,0x95,0xd,0x43,0x38,0xff,0x97,0x4f,0xe2,0xde,0xd7,0xfa,0xe0,0x94,0x6a,0x27,0xb6,0xd5,0x24,0xed,0x58,0x13,0x50,0xbd,0x11,0xf2,0x36,0x9a,0xd9,0xc8,0xb1,0x31,0xe8,0x9e,0xd8,0x82,0xdf,0x3c,0xb5,0x1e,0xb7,0x3d,0xb3,0x16,0xef,0xdb,0x6f,0x32,0x4e,0x3e,0x74,0x26,0xde,0x35,0xbd,0x1d,0xac,0x34,0xb4,0x36,0x20,0x21,0x22,0xd2,0x43,0x4a,0xf4,0x67,0xa,0xf8,0xf9,0x83,0xaf,0xe3,0x86,0xc7,0xd7,0xa0,0x37,0x30,0x48,0xb7,0x25,0x23,0x16,0xb0,0x3e,0x31,0x35,0xa2,0x21,0x43,0xe3,0xd4,0x29,0x9,0xc2,0xba,0x2d,0x45,0xbc,0xbe,0x29,0x8b,0xb9,0xd3,0xdb,0x61,0x98,0x2b,0x9,0xa1,0x40,0x19,0x38,0x96,0xc0,0x63,0xcb,0xfb,0xb6,0x8b,0xcd,0xb,0x42,0x8d,0xce,0xb6,0x4,0xd6,0x64,0x3,0x9c,0xf3,0xbf,0x4f,0x61,0xee,0xd4,0x34,0x4e,0x7f,0xcf,0x2c,0x1c,0x35,0x67,0x72,0x54,0xa4,0xaf,0x39,0x2a,0x17,0x43,0x54,0x2,0x6,0x22,0x3c,0xb0,0x74,0x3,0x7e,0xfb,0xc8,0x1b,0x58,0xb6,0x21,0xb,0x2b,0xee,0xa2,0xbd,0x25,0x51,0xb5,0x9,0xe6,0x36,0x80,0x80,0x78,0xc7,0x98,0x0,0xf0,0x28,0x88,0xda,0xe,0xb5,0xcb,0xcc,0x48,0x27,0x5c,0xd8,0xb6,0xc4,0x9f,0x5f,0xee,0xc5,0xaf,0xfe,0xb6,0xa,0x9d,0x49,0x1b,0xfb,0xce,0xe8,0xc4,0xb4,0xb6,0x38,0x5c,0x4b,0x62,0x4d,0x7f,0x1,0x2b,0x7b,0xf2,0x58,0xde,0x93,0x41,0x6b,0x3a,0x81,0xae,0x8e,0x14,0x5a,0xec,0x66,0xc9,0x15,0x1a,0x73,0x62,0x4c,0xef,0x6e,0xc5,0xd9,0x3f,0x79,0x1c,0x7f,0xba,0xf0,0x7d,0xe8,0x6e,0x89,0xd,0xef,0x4a,0x4e,0x84,0x5b,0x9f,0x59,0x8b,0x9b,0x9f,0x5e,0x8b,0x19,0x53,0xda,0xc7,0x4c,0x54,0x6d,0x9d,0x3,0x61,0xc4,0x5d,0x1b,0xa9,0xb8,0x83,0x55,0x19,0x85,0xb,0x7e,0xf7,0x1c,0x54,0xf8,0x34,0xe6,0x4e,0x6f,0xc7,0xb4,0xf6,0x38,0x26,0xb6,0xba,0xd8,0x92,0xf5,0xb1,0xbe,0xbf,0x80,0xa7,0x5f,0xef,0x41,0x2a,0x9d,0x40,0x7b,0x6b,0x1c,0x2d,0xed,0xe9,0xa8,0x5e,0xa2,0xc9,0x19,0x3c,0x2a,0x3c,0x18,0xd8,0x41,0xeb,0x2,0xaa,0x92,0x35,0xd1,0xee,0x87,0x3b,0xc6,0xd1,0x26,0x20,0x8,0xd,0xe2,0x31,0x7,0xbb,0x4f,0xef,0x82,0x61,0xe0,0x95,0x5e,0xf,0xcf,0xae,0xcb,0x46,0x31,0xbf,0x6b,0x21,0x11,0x73,0xb0,0xc7,0x8c,0x9,0xd0,0x9a,0xe1,0x7,0x7a,0xec,0x1d,0xe2,0xc6,0x23,0x1c,0x66,0x4c,0xee,0x6e,0xc3,0x87,0xae,0x7e,0x0,0xc7,0xcf,0x9b,0x82,0x19,0x1d,0x71,0x78,0x4a,0xe3,0xb9,0x35,0x43,0x78,0x7e,0xed,0x20,0x66,0x4e,0x6d,0x2f,0x65,0x17,0xb7,0x93,0x5a,0xa0,0xa8,0xec,0x5b,0x5a,0x2,0xdd,0x5d,0x69,0x10,0x11,0x7a,0x7c,0x83,0xd7,0xdf,0xe8,0xaf,0x14,0x80,0x26,0x62,0x36,0x66,0xce,0xec,0x86,0xd1,0x26,0x22,0xb3,0xb6,0xc1,0xde,0x8f,0x8a,0xf4,0x26,0x9d,0x0,0xab,0xb9,0x18,0x0,0x86,0x44,0xa9,0x6d,0xa9,0xa0,0xed,0x52,0x91,0xd5,0x21,0xa2,0x61,0x46,0x50,0xca,0x73,0xdb,0x52,0xc0,0x49,0xb9,0xa5,0xfb,0x8f,0x12,0x48,0x81,0xc1,0x76,0x27,0x58,0x46,0x23,0x6f,0xba,0xbb,0xd2,0x78,0x62,0xcd,0x10,0x1e,0x5f,0x3d,0x4,0x6,0xc3,0x12,0x2,0x93,0xba,0x52,0x63,0xb,0x1f,0xe3,0xd4,0xca,0xa5,0x46,0x5f,0x15,0xa6,0x92,0x81,0x96,0x94,0x1b,0x6d,0xf2,0x50,0xba,0x4c,0x10,0xa8,0x1d,0xf4,0x54,0x5c,0x43,0x5,0xef,0x18,0xd,0x50,0xba,0x51,0x5b,0x60,0xa8,0x67,0xf3,0x20,0x12,0x31,0x7,0xf1,0x54,0x1c,0x42,0x10,0x5b,0x8e,0xbd,0xc3,0xd7,0x66,0x31,0xef,0x10,0x2b,0xd3,0xb4,0xad,0xb2,0x2d,0x59,0x91,0x27,0xa1,0xc9,0xa5,0x58,0xbc,0xfd,0xb7,0xb4,0xa3,0xa8,0xaa,0x7a,0x2c,0xb2,0xd6,0xac,0x95,0x26,0x15,0x2a,0x1b,0x86,0x13,0x25,0x7b,0x34,0x3e,0x0,0xec,0xf3,0xb9,0xef,0x4b,0x96,0x31,0x1,0x66,0xfd,0xca,0xf,0x3f,0x57,0x6a,0x15,0xce,0x97,0xf4,0x2d,0xbb,0xff,0xf5,0x4c,0xdb,0xd4,0xa3,0xe2,0x5d,0xd3,0x8f,0x71,0xe2,0xc9,0xb6,0x64,0xd2,0xd5,0xa9,0x74,0x52,0x92,0xb4,0xc7,0x37,0x2a,0x4d,0x4a,0x96,0x76,0xf0,0xf5,0x1a,0x9d,0x5c,0x13,0x48,0xec,0xec,0xab,0x8e,0xc7,0xd2,0x3c,0xcc,0x8,0x7c,0x8f,0x33,0x43,0x79,0x52,0x81,0x46,0xbe,0x6f,0xe3,0x93,0x30,0xc1,0xcf,0xeb,0xa7,0xd5,0xde,0x9f,0xff,0x91,0x5,0xc0,0xbc,0xf2,0xe3,0x2f,0x8c,0xec,0x11,0xb4,0xf7,0x59,0x3f,0x22,0x80,0x93,0x20,0xab,0xe3,0x95,0x1f,0x9f,0xb9,0xe6,0x5d,0x5f,0xb8,0x9e,0x88,0x15,0x5e,0xf9,0xf1,0x17,0xca,0x17,0x38,0xc,0xc0,0x24,0x0,0x98,0x70,0xf0,0x29,0xf3,0x3a,0xe6,0x7f,0xe0,0xf2,0x9,0xdd,0xed,0xe4,0xc4,0x5c,0x90,0x90,0xb5,0xfd,0x1,0x4a,0xbf,0xcb,0xcc,0xb5,0xeb,0xfe,0x85,0x1c,0x7d,0x87,0x4d,0x1a,0xd9,0x1b,0x88,0x68,0x6b,0xe7,0x8d,0xdc,0x5a,0xbe,0xf6,0xf4,0xba,0x92,0xb3,0x7a,0x53,0x42,0x34,0x8a,0xe3,0x54,0xf5,0xc,0x8d,0xa6,0xfe,0x98,0xed,0x5f,0xab,0xfe,0xd6,0x60,0xe5,0xd2,0x88,0x86,0x58,0xa3,0xad,0xf9,0x2f,0x95,0xc3,0xd,0x7f,0x5f,0x34,0xa8,0x8,0x8a,0x2e,0xa8,0xc3,0x0,0xfd,0xbd,0x43,0xc8,0xf5,0xae,0xff,0xdb,0x8a,0xdf,0x5d,0x7c,0xd,0x9,0x49,0x6c,0xf4,0x93,0x0,0xd6,0x95,0xcf,0x7d,0xd7,0x59,0x3f,0xa2,0x57,0x7f,0xf2,0x85,0x11,0x10,0xaa,0xe4,0x69,0xbb,0xe,0x38,0x41,0x30,0x63,0xf,0x21,0xf8,0xf5,0x8e,0xf9,0x1f,0x3a,0x13,0x46,0x77,0xa,0x49,0xc1,0xe4,0x43,0x4e,0xed,0xef,0x79,0xea,0x96,0x0,0xc0,0x54,0x8,0xd9,0x22,0x63,0x69,0x3b,0xb7,0xfa,0xb9,0x8d,0xf1,0x49,0x7b,0xcb,0xd6,0x49,0x53,0xf7,0xb3,0x6c,0x2b,0x4a,0xeb,0x56,0x1,0x80,0x8d,0x41,0x18,0x4,0xec,0x17,0x7d,0xa,0x83,0x0,0x61,0xa0,0x10,0x6,0x1,0x88,0xc,0x54,0x10,0x1a,0x29,0x65,0x49,0x20,0xd1,0xb2,0x95,0x61,0x46,0x8f,0x9a,0x6,0x0,0x81,0x9a,0x16,0xfe,0xf0,0xd7,0xc6,0x0,0xc0,0x56,0xd5,0xd0,0xd8,0x8d,0x2b,0xb7,0xc5,0x5c,0x31,0x73,0x69,0xd3,0xa7,0xd2,0x1b,0xd6,0x8,0x83,0x90,0x75,0x18,0x40,0x5,0x21,0xf9,0xbe,0x8f,0xd0,0xf,0xa3,0x54,0x33,0x6b,0x48,0x29,0x6a,0x52,0xe8,0x95,0x9e,0xab,0x5a,0xa1,0x90,0x2f,0x78,0xaf,0xfe,0xfc,0xbc,0x85,0x4e,0xcb,0xc4,0xb8,0x9,0x3d,0xd,0x36,0xeb,0x0,0x64,0x1,0x60,0xdf,0xb3,0x7f,0xba,0x17,0x1b,0x73,0x5c,0xd7,0x81,0x27,0x7d,0x7f,0xc2,0x81,0x27,0x66,0x7a,0x97,0xdc,0xfe,0x4a,0x3,0x13,0xc0,0x64,0x8c,0x99,0xe8,0xc6,0x5c,0xb4,0x75,0xa4,0xa7,0xfb,0xc5,0xf0,0xeb,0xf9,0x5c,0xf1,0xeb,0xbe,0xf6,0x56,0xec,0xfd,0xf9,0x1f,0xfd,0x6d,0xe0,0xe5,0x7,0x1f,0xde,0xfc,0xe8,0x6f,0x7a,0xb5,0x97,0x55,0x0,0xa0,0xbd,0xdc,0x50,0xa3,0xd6,0x66,0xc,0x86,0xd1,0x1a,0x3,0xfd,0x43,0x94,0xeb,0xd9,0xf4,0x54,0x7e,0xe3,0xcb,0xf,0x3b,0xad,0x13,0xa7,0xd9,0x2d,0x13,0xf7,0x4e,0xb6,0x76,0xbd,0x8b,0x89,0x62,0x96,0x95,0x3,0x91,0x40,0x2c,0xe1,0x68,0x41,0x64,0x52,0x2d,0x9,0x61,0x34,0xaa,0x5a,0x89,0x96,0x5f,0x5a,0x55,0x5c,0x4,0x6d,0xb3,0xe6,0xc4,0x36,0x3b,0x91,0x54,0x29,0x41,0xe7,0x9a,0xae,0x68,0x8c,0x11,0x2d,0x6f,0xcb,0xfd,0x79,0xb9,0x4c,0x9a,0xf3,0xc8,0xd9,0x6a,0xca,0x79,0x7a,0x3,0x66,0x66,0x21,0x60,0xc2,0x40,0xb1,0xef,0x7,0x1c,0x6,0xca,0xe,0x43,0x5,0xa3,0x34,0x31,0x3,0x61,0x31,0xb7,0xc6,0xcb,0xf4,0xbe,0xea,0xf7,0xaf,0x7f,0x4d,0xb8,0xc9,0x54,0x7a,0xfa,0x7e,0x9f,0xec,0xea,0x6e,0x93,0xb1,0x58,0x1c,0x24,0xc4,0x48,0xa2,0x5e,0x5,0xfd,0x56,0xb2,0xdd,0xd,0xf3,0x3,0x3e,0xeb,0xd0,0xb4,0xed,0x7b,0x54,0xcb,0xe4,0xf7,0x7c,0xe2,0x93,0x86,0x71,0x5a,0x18,0x86,0xb3,0xe3,0x71,0xa7,0x8d,0xd,0x23,0xc,0xc3,0x67,0x0,0xdc,0x5c,0x3,0x80,0x77,0x9d,0xf9,0x43,0x22,0x6,0xb3,0xe1,0xc9,0xb6,0x6d,0xc1,0x75,0x5d,0xb8,0xae,0x8b,0xd6,0xf6,0x34,0x7,0x7e,0x30,0x23,0x97,0x2d,0x4c,0x6b,0xdf,0xfb,0xa8,0x4f,0x6,0x3,0x9b,0xce,0x1f,0x78,0xf1,0xfe,0x95,0x95,0xa1,0x6d,0xd4,0xb7,0x26,0xaa,0x1d,0xe0,0xe2,0xe0,0xd0,0xa6,0xf5,0xf7,0x7c,0xf7,0x7b,0x61,0x6e,0xc0,0x67,0xe5,0x57,0x4a,0x5a,0xad,0x64,0xbb,0xd3,0xb5,0xff,0x7,0xe6,0xc6,0x27,0xce,0xde,0xaf,0xd8,0xd2,0x3d,0x5f,0x58,0x6e,0x77,0x5f,0xcf,0xa0,0x65,0xbb,0x8e,0x1d,0x8b,0xbb,0x24,0xa5,0x44,0x22,0x11,0x8b,0x2e,0x4f,0x1,0xa4,0x6d,0x83,0x84,0xb5,0x9d,0x6,0xf4,0xad,0xeb,0x28,0xc1,0x35,0xd4,0x23,0xc0,0x46,0xc3,0xa8,0x10,0x5c,0x62,0x1f,0xb5,0xd2,0xc8,0xe7,0x7d,0x10,0x31,0x65,0x6,0xf3,0x2c,0x88,0x43,0x36,0xc6,0xd7,0x85,0x81,0x17,0xfd,0xfe,0x75,0x4b,0x33,0x2b,0x9e,0x5a,0x3a,0xf4,0xfa,0xe3,0x1b,0x2b,0x57,0x89,0xd4,0x39,0x4f,0xff,0xe0,0x5,0x8e,0x97,0xde,0xff,0xe3,0x31,0xd7,0xe5,0x72,0x1f,0x77,0xaa,0xb6,0x46,0x1c,0xed,0xe1,0x47,0x52,0x12,0x2b,0xa0,0xfb,0xb0,0x7f,0xba,0x53,0x90,0x98,0x91,0x48,0x38,0xba,0xb5,0x2d,0x2d,0x18,0xcc,0x85,0x6c,0x81,0x7,0x7,0xc2,0x3d,0x47,0x3a,0x81,0x44,0x10,0xca,0x63,0x40,0x76,0x5a,0x56,0x94,0xad,0x22,0xcb,0x6,0x1b,0x43,0x6e,0xcc,0x95,0xc9,0x54,0xcc,0xac,0x5b,0xbd,0x19,0xc4,0xd5,0x95,0x17,0xa3,0xe7,0xe0,0x9,0x80,0x31,0x6a,0x28,0xcc,0xf5,0x7b,0x6c,0x34,0x93,0x90,0x24,0xdc,0xa4,0x45,0x24,0xc8,0x84,0x9e,0xde,0xf4,0xe8,0x6f,0x9e,0x1,0xf0,0x4c,0xf9,0x1b,0x6d,0x7b,0x1d,0x36,0x39,0xd6,0xbd,0xe7,0x6e,0x76,0xeb,0xa4,0xe9,0x56,0xb2,0x6d,0xaa,0x70,0x92,0x53,0x9d,0x44,0x7a,0x8a,0x1b,0x8b,0x25,0x52,0xe9,0x38,0x27,0xd2,0x49,0x2,0x59,0xa3,0x84,0x84,0x75,0xaa,0xbf,0x54,0x1c,0xca,0xe3,0xea,0xd1,0x4a,0x5b,0xed,0x68,0xc6,0x5c,0xf5,0x4b,0x35,0xf9,0xa,0xaa,0x17,0x7b,0x15,0x7f,0x66,0x60,0x54,0x88,0x81,0xbe,0xc,0x54,0xa8,0x51,0xc8,0xe,0x6d,0x34,0x41,0x61,0xa3,0xf6,0x32,0xeb,0xc2,0x4c,0xef,0xba,0xb0,0x7f,0xcd,0x9a,0xcc,0xca,0xa7,0xd7,0xfa,0x3,0x9b,0x6a,0x8a,0xfe,0x65,0xbc,0xc5,0x8e,0x54,0x7b,0x68,0x4c,0xe8,0x19,0x0,0x8,0xb,0x83,0x5b,0xa2,0xee,0xaf,0x3c,0xb2,0x34,0x94,0x1a,0xae,0x5e,0xca,0xb5,0x75,0xa4,0x75,0x2c,0x1e,0xb3,0xc,0x8,0x14,0x55,0x4d,0x6b,0x30,0x77,0x8c,0x0,0x0,0x81,0xe9,0xc5,0x9f,0x7d,0xc5,0xcc,0xfe,0xcc,0x7f,0xda,0x95,0x25,0x4f,0xa5,0x42,0x5,0x90,0x5,0xad,0x55,0x14,0xba,0x52,0x5d,0x61,0x5a,0xf9,0xdc,0x11,0xc9,0x8,0x6,0x1b,0x26,0x18,0xc5,0x25,0x67,0x90,0x75,0x31,0x53,0x29,0x2,0x14,0x6e,0xd2,0x2,0x1b,0x66,0x63,0xc0,0x3a,0x30,0x83,0xaf,0x3d,0xb6,0x11,0xaf,0x3d,0xb6,0x11,0xc0,0x13,0x91,0x96,0x68,0x73,0x84,0xe5,0xca,0xae,0x3,0x3e,0x74,0x40,0xb0,0xf7,0x51,0xe7,0x93,0x10,0x3a,0x9e,0x4a,0x49,0x8c,0x48,0xfe,0xd4,0xdd,0x8e,0x61,0xe8,0xc0,0x43,0xb1,0xe8,0x6b,0x22,0x92,0xcc,0xd4,0x58,0x3,0xd4,0xcc,0x9f,0x6,0x3d,0x79,0xb9,0xba,0x91,0x53,0xad,0xaa,0xb7,0x2c,0x61,0x62,0x71,0x57,0x90,0xed,0x56,0x9d,0x47,0xb5,0xa6,0xaa,0x42,0xc3,0x6a,0xde,0xb2,0xa9,0x1f,0x5e,0xa6,0xef,0xa5,0xd5,0x7f,0xba,0xe6,0x3f,0x58,0x5,0x3a,0xcc,0xf7,0xfb,0xac,0xc2,0x9a,0xd8,0x8c,0xa4,0x2d,0x40,0x82,0x48,0x5a,0xc4,0x2a,0x30,0xd5,0x63,0x55,0x46,0x7c,0xa9,0xbc,0x9d,0xb8,0xe2,0x59,0xf3,0x48,0x87,0xbb,0x16,0xf3,0xb6,0x56,0x5a,0x80,0x4a,0x79,0x13,0x6d,0xca,0xbb,0x9,0xc8,0x11,0x0,0x60,0x6,0xbf,0xeb,0x73,0xff,0x29,0xb4,0xe6,0xd,0x41,0x10,0x20,0x6e,0x14,0x50,0x9,0xef,0xca,0xff,0xcc,0x88,0xf6,0xe5,0x5c,0xd3,0x36,0x66,0xf8,0x35,0x57,0xff,0x95,0x99,0x21,0x2c,0xda,0xf3,0x93,0x57,0x7f,0x2b,0xcc,0x67,0xd6,0x5,0x43,0x9b,0x5f,0xcb,0xaf,0x7f,0xe9,0xd5,0x30,0xdb,0x9b,0x53,0xd9,0x2d,0xf9,0x60,0x68,0xf3,0x88,0x25,0x2f,0xda,0xcb,0x2b,0x76,0x98,0x37,0x3c,0x70,0xfd,0x5f,0x53,0xbb,0x1f,0x78,0x32,0x23,0x3d,0xb,0x34,0xa,0x21,0x54,0x5,0x6,0x1d,0x16,0xd0,0xdf,0x97,0x45,0x6e,0xa0,0x77,0x55,0x30,0xb8,0xf9,0xd9,0xf2,0xa6,0x2a,0xa0,0x52,0xab,0x6b,0xaa,0xec,0x58,0x51,0x69,0x57,0x46,0x84,0xd2,0xa0,0x56,0x56,0x80,0xa0,0x62,0xa4,0xcb,0xbd,0xd9,0x4a,0x5d,0x1a,0x2d,0x37,0xd5,0xe9,0x74,0x4c,0x3a,0x62,0xc2,0xc4,0x76,0xe1,0x82,0x20,0x2c,0x7b,0xd4,0xbc,0x47,0x69,0xa6,0x1a,0x66,0x96,0x5b,0x9e,0xf8,0xc3,0xf,0xfd,0x81,0xd,0x79,0x21,0x2d,0xd1,0x88,0x63,0x16,0x76,0x4c,0x3a,0xad,0x13,0x13,0x32,0xd9,0x99,0x48,0x74,0xcf,0x9c,0x1c,0x9b,0x30,0x73,0x3f,0xa7,0x65,0xe2,0xbe,0x85,0x75,0x2f,0xfe,0x79,0xc3,0x83,0x3f,0x7d,0xb8,0x9a,0xde,0xc1,0x68,0xd,0x1f,0xa8,0x81,0xcf,0x51,0x69,0x15,0x53,0x71,0x64,0x24,0x3,0xeb,0x47,0x0,0xe0,0xd5,0x9f,0x7e,0x91,0xdf,0xf5,0xd9,0xff,0x16,0x80,0xde,0xac,0x43,0x3,0xd6,0x26,0x6a,0xb,0x55,0xd7,0xd6,0xb7,0x7a,0xa9,0x39,0x1b,0xcd,0x82,0x8,0x30,0x1a,0x4c,0x2,0x24,0xe5,0xb0,0x2a,0xad,0xf,0x93,0x8c,0x62,0x92,0xee,0xc4,0xc9,0xb3,0xf7,0xde,0xd3,0xf3,0x67,0x1d,0xad,0xf7,0x3e,0x14,0x46,0xa9,0xa0,0x90,0xc9,0x6e,0x66,0xe3,0xf5,0x18,0x2f,0xb7,0x51,0xe5,0x7a,0x56,0x14,0x37,0x2f,0x5f,0xdd,0xbf,0xf4,0xde,0x95,0xac,0x43,0xa3,0xbd,0x4c,0x74,0x15,0xe5,0xf,0x46,0x79,0xa8,0xb1,0x3d,0x7e,0xd6,0x21,0x8c,0x6,0xb2,0x7d,0x7d,0x6b,0xde,0xf8,0xf9,0x97,0x16,0xbe,0x59,0x36,0x7e,0xea,0xfb,0xff,0xf5,0x95,0x44,0xe2,0x88,0x73,0xdc,0x98,0x3b,0xac,0x7e,0x79,0x94,0x40,0xbd,0x54,0x47,0x19,0x64,0xfb,0xf2,0x30,0x9a,0x8d,0x89,0xca,0x7d,0x93,0x53,0xf6,0x6e,0x4b,0xcd,0xdc,0x7f,0x96,0xd3,0x36,0x6d,0xa6,0x95,0x6c,0xdb,0xd,0x32,0x3e,0xc1,0x8e,0xc7,0x27,0xc5,0x13,0xc9,0x36,0x10,0x23,0x16,0x73,0xe0,0x7b,0x21,0xf2,0xeb,0xe8,0x9e,0x2a,0x1f,0x7d,0x78,0x6c,0x29,0x7a,0xcf,0x6c,0xc0,0x5a,0x95,0x9a,0x4b,0x94,0x62,0xc6,0x1a,0xcf,0xb0,0x36,0x4b,0x46,0x44,0x82,0x99,0xd7,0x8c,0x42,0x4,0x19,0xb0,0x31,0x5,0x1a,0x61,0xdf,0xa9,0x21,0x85,0x25,0x6d,0x4b,0x64,0x6,0x73,0x88,0xc5,0x43,0x4e,0xb7,0x26,0xa1,0x43,0x43,0xc2,0x76,0x4a,0x28,0x69,0xd0,0x27,0xcf,0x68,0x93,0x48,0xba,0x2a,0x16,0x77,0xad,0x12,0x3e,0x1d,0x9a,0xd4,0x31,0x3d,0x8,0xc2,0xe9,0x61,0xa0,0x91,0xcb,0xcd,0xe2,0xd6,0x3d,0xe,0xc1,0xc4,0x77,0x7f,0x74,0xe8,0x8d,0xdf,0x5e,0xf4,0x45,0x36,0x8a,0xb5,0x97,0xb,0x2b,0xb9,0xe2,0x31,0x66,0x7e,0x65,0x80,0xc0,0xd0,0x7e,0x6e,0x6d,0xe4,0x3c,0x59,0xc4,0x46,0x31,0x9,0x49,0x8d,0xbd,0xf7,0x66,0x1d,0xc7,0xf2,0x4e,0xdf,0xd1,0xcc,0xf5,0x6,0xd6,0x6f,0x1a,0x9e,0x6e,0xe5,0x98,0x74,0x24,0x8,0x22,0x2d,0x50,0xf2,0x89,0xaa,0x42,0xb7,0xdd,0x3e,0x74,0xc1,0xc9,0xc9,0x69,0xfb,0x7e,0x8a,0x99,0x39,0x91,0x88,0x91,0x10,0x84,0x74,0x6b,0x12,0x5c,0x69,0x7,0x43,0x20,0x82,0x31,0xda,0xd0,0x88,0x26,0x90,0xe5,0x56,0xb2,0x5a,0xc3,0xa8,0x0,0x30,0x6,0xa1,0x52,0x5c,0xcc,0x7b,0xe4,0x7b,0x81,0x35,0xb2,0x34,0x6e,0x18,0x30,0xc4,0x1a,0xd9,0x4c,0x1e,0x52,0xd2,0xfa,0x86,0x0,0x60,0x66,0x76,0x1c,0x6b,0x63,0x2e,0x5b,0xe8,0x69,0xeb,0x6a,0x99,0x60,0x94,0x22,0x21,0xe5,0xb0,0x36,0xac,0x1a,0x4,0x0,0x18,0x7c,0xe9,0x81,0xc7,0xac,0x58,0xaa,0x23,0x9f,0xec,0x9a,0x33,0xd8,0x9f,0x98,0x1e,0x4b,0xc4,0x63,0xc9,0x74,0xc,0x44,0x82,0xdd,0xb8,0xc3,0xcc,0x5c,0x3,0x85,0xf2,0x1e,0x36,0xd2,0x8d,0x83,0x28,0x22,0x8e,0x8c,0x56,0x88,0xd9,0x36,0x62,0x9,0x46,0xba,0x3d,0xc5,0xc6,0x18,0xda,0xb2,0x61,0x4b,0xd,0xa9,0x32,0xbc,0x25,0x6a,0x59,0x20,0xd4,0xd8,0x7,0x20,0xd4,0x2c,0xaf,0x2e,0x1f,0xed,0x73,0xde,0x37,0x53,0x2b,0x66,0x22,0x22,0x46,0xa9,0xef,0xbd,0x31,0xa3,0x70,0xbf,0x14,0x39,0x78,0x95,0xfd,0x2f,0xa2,0xff,0xfc,0xbe,0x35,0x7d,0x85,0xd,0xaf,0xc,0x96,0x5,0x4b,0xe5,0xce,0x9d,0x95,0x7,0x1c,0x4d,0x13,0x70,0x1d,0x2f,0xf,0x28,0xa5,0xb8,0x73,0x62,0x2b,0x5c,0xc7,0x31,0x24,0x84,0x60,0x66,0x82,0xb0,0xa2,0xa2,0x59,0x61,0x45,0x49,0x24,0xbf,0x8,0xc3,0x6,0x54,0x4f,0x2e,0x11,0xb3,0x56,0xca,0x78,0x5,0x4f,0xfa,0x7e,0x0,0xaf,0x18,0xc0,0xcb,0x66,0x36,0x71,0x90,0x5f,0x5d,0x58,0xbb,0xec,0xde,0x30,0xd7,0xe7,0x57,0x9a,0x72,0x72,0x69,0x4f,0xd1,0xd2,0x10,0x69,0x6d,0xc,0xb3,0x11,0x4,0x79,0x77,0x43,0x0,0x90,0x20,0x7e,0xf9,0xa7,0xe7,0xac,0x9d,0xfd,0xa9,0xef,0xbf,0xe1,0x15,0xbc,0xae,0x64,0xda,0xa2,0x88,0x27,0x22,0x80,0x99,0x98,0x1,0x99,0x68,0x8b,0x16,0xe4,0xb,0x49,0x85,0xf5,0x2f,0xf7,0xaf,0x59,0xff,0xf2,0x2f,0x1,0x20,0xbd,0xc7,0xc1,0x53,0x92,0x53,0xe7,0xec,0x99,0xeb,0xde,0xf3,0x68,0xb7,0xa5,0x63,0x9e,0xed,0xd8,0x24,0x88,0x46,0xf6,0x58,0x28,0x2f,0xdb,0x12,0x91,0x30,0x85,0xb4,0xaa,0xda,0xcb,0x42,0xa8,0x7c,0x6,0xcc,0x64,0x8d,0x9c,0xa5,0xf5,0xc2,0x1f,0xc9,0xf0,0x8d,0x20,0xdd,0x58,0x43,0x48,0x8b,0xba,0xf,0x3f,0xfd,0xaa,0x44,0xdc,0x35,0x6,0x64,0x45,0xb3,0xb1,0x26,0x54,0xe7,0x3a,0xf,0x36,0xe2,0x97,0xb8,0xf4,0x92,0x98,0xc3,0xd0,0xc8,0xcc,0xaa,0x17,0x7e,0xb3,0xfa,0xb6,0x6f,0xdd,0x58,0x26,0xb9,0x2a,0x33,0xbb,0x5e,0xd7,0x8d,0x20,0xe7,0x47,0x2a,0x43,0x22,0x21,0xb5,0xd2,0x80,0x2b,0xa5,0xb0,0xdd,0xba,0x66,0x57,0x5c,0xd1,0x64,0x84,0xda,0x9a,0x7e,0x2,0xd8,0x2b,0x78,0xe4,0x17,0x7c,0xe9,0x7b,0x85,0x41,0xaf,0x77,0xed,0x7d,0xf9,0x75,0x4b,0x9f,0x2a,0x6e,0x78,0x79,0x83,0xd7,0xbb,0x26,0x57,0x56,0x3b,0xc2,0x72,0xa5,0x9,0xa,0x8a,0x41,0x5d,0x60,0xd6,0x0,0x4,0x6b,0x8d,0x62,0x3e,0x2f,0xb4,0x32,0xea,0xb5,0x9f,0x9d,0xf3,0xe2,0x5e,0x67,0xfc,0xa7,0xf5,0xda,0xcf,0xbe,0xac,0x6a,0x0,0xf0,0xea,0xf5,0x5f,0x62,0x0,0x10,0x82,0x9e,0x2a,0x64,0x8b,0x87,0x25,0xd3,0xc9,0x61,0xef,0x81,0x48,0xba,0x8e,0x6d,0x5a,0x66,0x1d,0x72,0xda,0xc0,0xd2,0x7b,0x5f,0xd1,0x61,0x51,0x57,0x7b,0x3c,0xd9,0x95,0x4b,0x36,0x66,0x57,0x2e,0xd9,0x8,0xa3,0x1f,0x22,0xcb,0x11,0x93,0x8f,0x3c,0xe3,0x18,0xa7,0xa5,0x7b,0x4f,0x92,0xb6,0x60,0xe3,0xeb,0x91,0x29,0x32,0xd4,0x9,0x32,0xa2,0x3a,0xcb,0x2e,0xe4,0xf0,0x6c,0xaa,0xa3,0x4b,0x1b,0xa,0x1f,0xd,0xe2,0x90,0x2a,0x7,0x9a,0x21,0x3a,0x27,0xb4,0x41,0x73,0xed,0x2,0x22,0x6a,0x2a,0xd7,0xc3,0x9c,0xcf,0x16,0x68,0x90,0x9b,0xc9,0x8,0x95,0xa4,0x4f,0x18,0x69,0x6,0x1b,0x74,0xda,0x21,0x1a,0x85,0x12,0x1e,0x65,0x4b,0x25,0x36,0x61,0x90,0xdf,0xb2,0xe6,0xae,0xfe,0x17,0xee,0xb9,0x3d,0xbb,0xfc,0xc9,0x8d,0xe5,0xe8,0x81,0x8d,0xaa,0x51,0x97,0x26,0x28,0xa8,0x49,0x87,0x7f,0xe2,0x30,0x1,0x9a,0xe0,0x38,0xb6,0x1,0x8,0x6c,0x42,0x84,0xa1,0x16,0x6c,0xcc,0xef,0xeb,0xf9,0xe7,0x11,0xc,0x8b,0x65,0xdb,0xd7,0x7,0xbe,0xff,0x15,0x1d,0x86,0xa1,0x25,0x1d,0x1b,0x44,0x20,0x21,0xa9,0xa5,0x2d,0x41,0x5a,0x77,0xce,0x9b,0xf5,0xcf,0x57,0xfe,0xc2,0x1f,0xea,0x79,0xdc,0xef,0x5b,0xfb,0x6c,0x7e,0xf5,0xb3,0x2f,0xe7,0xd6,0x2e,0xeb,0x5,0x6b,0x90,0xb4,0x85,0x70,0xe3,0x16,0xb3,0xe1,0xd,0xf7,0xfd,0xe8,0x5e,0x0,0xf7,0x8e,0x70,0x88,0x1a,0xc5,0xe3,0x55,0x3b,0x88,0x52,0xc3,0x92,0xb3,0xf2,0xa,0x5d,0x1d,0x99,0x8e,0x32,0x4d,0x5c,0x53,0x11,0xca,0x95,0xdf,0xa8,0xd5,0xea,0xc3,0x6d,0x58,0xed,0x58,0x92,0xc6,0xb7,0x86,0x8c,0xa0,0xfd,0x62,0xd9,0xfe,0x37,0x4e,0x5b,0x46,0x21,0x44,0xf5,0xce,0xca,0xc3,0xee,0x81,0x89,0x1a,0x60,0x31,0xd7,0x55,0xf4,0x96,0x16,0xb0,0x72,0x23,0x8e,0x82,0x47,0xa4,0x74,0x2b,0xb,0xe,0x37,0x3f,0xfa,0xab,0xc7,0x0,0x3c,0x16,0xf1,0x4,0x69,0x1b,0x86,0xa1,0xfd,0x5c,0x58,0x26,0x8b,0x3a,0xe7,0x1d,0xb7,0x8f,0xdb,0x3d,0x7b,0xae,0xd3,0x36,0xe9,0xdd,0xb1,0x54,0xeb,0xcc,0x64,0x2a,0xe,0xcb,0xb6,0x25,0x48,0x40,0x7b,0x1e,0x8a,0x79,0xf,0x0,0x7e,0xbc,0xe7,0xa7,0xaf,0x93,0x4,0x32,0xd,0x1,0xb0,0xe7,0xa7,0xaf,0x15,0x2f,0xfd,0xe4,0x8b,0x2f,0xec,0xf9,0xe9,0xeb,0x9e,0x1c,0x1a,0x18,0x7a,0x77,0xc7,0x44,0x7,0x64,0x45,0xc9,0x1e,0x27,0x96,0x44,0xc7,0x4,0x82,0xe,0x93,0x31,0xbf,0xbd,0xe5,0x28,0x6f,0xc2,0xb4,0xa3,0x52,0xd3,0xe7,0x65,0x27,0xb2,0xda,0xe2,0x6f,0x59,0xf1,0x97,0xf5,0xf7,0xfd,0xf8,0x6e,0xad,0x43,0x3,0x21,0x49,0x38,0x9,0x8b,0x75,0x68,0x58,0xf,0xc7,0xbb,0xa5,0x18,0x76,0xcc,0xfc,0x4f,0xa4,0xc,0xeb,0x6,0xdb,0x68,0x23,0x25,0x31,0x87,0x3e,0xeb,0x20,0x88,0x2a,0xbb,0x84,0x4,0x64,0x44,0x13,0x4b,0x27,0x56,0x65,0x61,0x18,0x80,0xae,0xbb,0x66,0x79,0xc0,0x4d,0xa5,0xf5,0xed,0x78,0x38,0xbd,0xda,0x8d,0x1b,0x50,0x9,0xad,0x98,0x23,0x7a,0x85,0x85,0x0,0x54,0x8,0xa3,0x75,0xe4,0xd7,0x84,0x1,0x88,0xc0,0xc6,0x18,0x58,0x92,0xc,0x8,0xb2,0xca,0x75,0xaa,0x7d,0xc6,0x1a,0x0,0x95,0x41,0x80,0x4a,0x24,0x5a,0x65,0x1a,0x18,0x44,0x44,0xd2,0x11,0x44,0x4,0x5d,0xcc,0x86,0x0,0xd0,0x31,0xf7,0x1f,0xf7,0x68,0xd9,0xeb,0xd0,0x13,0x65,0xac,0x75,0x8e,0xb0,0xec,0x96,0x78,0x3c,0x6e,0xa5,0x5a,0x92,0x0,0xc,0x9c,0x78,0x2,0xc2,0x8e,0x41,0x7,0x1e,0x82,0x30,0x44,0x10,0xf8,0xcb,0xa4,0x2b,0x1e,0x67,0x4d,0xf4,0xea,0xcf,0xbe,0xdc,0x58,0x3,0xbc,0xfe,0x8b,0xf3,0xcc,0x3e,0x9f,0xba,0x5a,0x86,0x24,0xbe,0xe8,0x79,0xfe,0x92,0xa0,0x90,0x63,0x27,0xed,0x44,0xc1,0xb2,0x65,0xc1,0xb1,0xdb,0x60,0x94,0x82,0xa3,0x2,0xa4,0xd2,0x49,0x56,0x61,0x6b,0xc2,0x2b,0x14,0x66,0xe4,0x13,0xa9,0xb3,0xdf,0xf5,0xd9,0xef,0x9f,0x1d,0xe4,0x32,0x2f,0xe6,0xd6,0xbc,0xf0,0xa7,0xdc,0x9a,0x67,0x5f,0xf3,0xb6,0xac,0xc9,0x94,0xaf,0x1b,0x6b,0x9f,0x9a,0x20,0xc0,0x2,0x78,0xf4,0x40,0x9e,0x69,0x78,0x79,0x77,0x4d,0x8e,0x5b,0xe5,0x7b,0xd6,0xf7,0x14,0xd,0xc3,0x6a,0x6d,0x6f,0x75,0x8c,0x31,0xb0,0x1d,0x1b,0x6e,0xa9,0x3b,0x98,0x28,0xe6,0x2b,0x3b,0x24,0x9,0x21,0x1b,0xf8,0x4d,0x63,0x6f,0x4d,0x35,0x66,0x1e,0xa0,0xc1,0x22,0x8b,0xca,0x36,0xed,0x5a,0xa3,0x38,0xd4,0xcb,0x20,0x22,0xd6,0x51,0x19,0x17,0x1,0xc8,0x65,0xb,0x10,0x82,0xa8,0x58,0xf4,0x74,0x50,0xf4,0x7c,0x61,0x59,0xc6,0xa8,0xd0,0x54,0xc8,0x81,0xb2,0xdb,0x51,0xad,0xf2,0xeb,0x83,0x2e,0x36,0xc4,0x86,0x49,0xb8,0xc9,0xd6,0x6a,0xf5,0x6e,0xa7,0x3b,0x63,0xf1,0xae,0xdd,0x3b,0x5b,0xf6,0x3c,0xec,0x98,0xd8,0x84,0xdd,0x3e,0x20,0x85,0xb0,0x1d,0xc7,0x66,0x37,0xee,0x98,0x54,0x3a,0x25,0xc,0x1b,0x8,0x2b,0x56,0xe2,0x27,0x4a,0x48,0x32,0x21,0xf,0xd,0xc,0x91,0x10,0xe2,0xd7,0xaf,0xfd,0xf4,0xab,0xc5,0xad,0xd6,0x3,0x28,0x92,0xf4,0xc6,0xcf,0xbf,0xf2,0xec,0x1e,0xa7,0x5f,0xf3,0x48,0x2e,0x57,0x3c,0xa2,0xd5,0xca,0x42,0xa4,0x5a,0x2b,0x9d,0xc2,0x84,0x94,0x80,0x95,0x0,0xc0,0x24,0x1c,0x25,0xdd,0x44,0x12,0xa9,0x16,0x8f,0x3,0xcf,0x33,0x5e,0x32,0xbe,0x5f,0xaa,0xf3,0x98,0xfd,0xf2,0x7b,0x1d,0x1a,0x9a,0x62,0xee,0xe5,0x60,0x68,0xf3,0xb,0x41,0xa6,0x67,0x5d,0x72,0xea,0x3e,0x47,0x39,0xf1,0x58,0x8a,0x41,0x4c,0x42,0x56,0x86,0xb7,0xe6,0xa1,0xa9,0x8a,0x59,0x14,0x52,0x18,0x15,0xad,0x85,0x5a,0x71,0xe3,0xa2,0xef,0x25,0x27,0xef,0xd5,0x1e,0xeb,0x9c,0xd6,0x31,0x18,0xef,0x68,0xb7,0x13,0xad,0x1d,0x32,0xd1,0x36,0x41,0xc4,0x92,0x13,0xa5,0x1d,0xef,0x4e,0xb4,0xb6,0x4e,0x24,0x69,0xb5,0xb0,0x61,0x72,0x5c,0x7,0x96,0x65,0x39,0xb5,0x1a,0xa0,0xbc,0xa6,0x61,0x1b,0xf2,0x1,0xd,0x7a,0xe,0x9,0x29,0x44,0x18,0x84,0xd4,0xd7,0x3b,0x18,0x15,0xb1,0x82,0x95,0x9f,0xcf,0xf5,0x4,0xc5,0x7c,0x8f,0xf1,0xf3,0x3d,0xc6,0xcf,0x6d,0x56,0x85,0xc1,0x3e,0x95,0x1f,0xec,0x57,0xd9,0x9e,0x81,0xfc,0xa6,0x37,0xfa,0xc3,0xa1,0xcd,0x45,0x8,0x49,0x60,0x5d,0xcb,0x2b,0x73,0x35,0xd4,0xb8,0x2a,0x1,0x25,0xc9,0x8d,0xd9,0x70,0x27,0xec,0x7e,0x62,0xf7,0x91,0x9f,0xcd,0xb2,0x56,0x61,0xac,0x73,0xda,0x5c,0x11,0x6f,0xdd,0x37,0x96,0x4c,0x4d,0x8a,0xb9,0xe,0x1c,0xd7,0x66,0xdb,0xb1,0xb5,0x1b,0x4f,0x8,0x90,0x90,0x10,0x12,0x96,0x94,0x35,0xe5,0x60,0x46,0x5,0xc8,0x67,0x73,0xbe,0xa,0x95,0x69,0x4b,0xb6,0x7e,0xbf,0xa9,0x82,0x90,0xd7,0x7f,0x71,0x9e,0x2,0x0,0x29,0xad,0xb,0xb3,0x43,0xb9,0xfb,0x13,0x9,0xd7,0x15,0xbe,0x27,0xa4,0xe3,0xa2,0x9e,0x18,0x17,0xd2,0x2,0x40,0xb0,0xa4,0x4d,0x56,0xb2,0x45,0xc6,0xfc,0x2,0x54,0x10,0xa2,0xb5,0xb5,0xc5,0xf6,0x3c,0x6f,0x9e,0x9e,0x34,0x65,0x1e,0x97,0xd4,0x57,0x2a,0x95,0x80,0xb0,0x9c,0xe1,0x5d,0x8f,0x1b,0x51,0xb9,0xd1,0xe3,0x5b,0xcc,0x86,0x49,0x58,0x4,0x22,0x62,0x15,0x98,0xdc,0xda,0x65,0x7d,0xb9,0xb5,0xcb,0xfa,0x6a,0xe6,0xa7,0x90,0x44,0x96,0x23,0x48,0xd8,0x82,0x2c,0x4b,0xc6,0x27,0xce,0x6e,0x4f,0x4e,0xdb,0x67,0x16,0x98,0xd5,0xf0,0x6c,0x8b,0x5c,0x85,0x7a,0x67,0xbf,0x69,0xf9,0xd3,0xc8,0xb0,0xd2,0xef,0x5b,0xb3,0xb9,0xff,0xf5,0xa7,0xfe,0xa7,0xd8,0xb3,0x72,0x55,0x7e,0xcd,0xb,0xeb,0x95,0x97,0xd,0x60,0x34,0x9b,0xd0,0xd7,0xd5,0x26,0xaf,0x9e,0x1a,0x24,0x12,0x4,0x29,0x8,0x42,0x3a,0x5c,0x36,0xef,0x65,0xbb,0x4f,0xb5,0xb5,0x3,0x32,0x16,0x87,0xed,0x17,0xd0,0x39,0xa1,0xab,0xad,0xad,0xe3,0xf0,0x73,0x4a,0x34,0xa,0xa5,0xd2,0x49,0x80,0x18,0x96,0xe3,0x42,0x3a,0x2e,0x91,0xb4,0x64,0x43,0x67,0x19,0x80,0x31,0x1a,0x7e,0x3e,0x6b,0x32,0x83,0xb9,0x98,0x90,0xf2,0x94,0xa7,0xff,0xe7,0xac,0xc2,0xb8,0x4a,0xc2,0x5e,0xfb,0xc5,0x57,0x1f,0xdf,0xeb,0xd3,0xd7,0xfd,0xef,0x96,0xcd,0x7d,0xe7,0x4c,0x9e,0x2a,0xb5,0xb4,0x1d,0x9,0x1a,0x25,0xe7,0x5e,0x7a,0x10,0x19,0x4b,0xc1,0x8a,0xb,0xe8,0xc0,0x83,0x1d,0x77,0x23,0x22,0x34,0x62,0xa0,0x88,0x2c,0x27,0xb2,0xd7,0xe5,0x72,0xe7,0x6,0x12,0x11,0x42,0x30,0x49,0x8a,0x4d,0x3a,0xe2,0xd3,0x27,0xc,0x2d,0xbb,0xf7,0x1,0x3f,0xb3,0xb9,0x10,0x66,0xfb,0xbc,0x86,0x37,0x28,0x2c,0x11,0xe5,0x26,0xc,0x60,0x8c,0xc9,0xaf,0x5b,0xda,0x93,0x5d,0xf1,0xe4,0xc6,0x7a,0xe3,0x42,0x6c,0x40,0xd8,0x9e,0xfa,0xfa,0x5a,0x27,0x30,0xbb,0xe2,0x99,0xcd,0xd9,0x15,0xcf,0xdc,0xd,0x21,0x49,0xba,0xc9,0xe1,0x30,0xda,0xb2,0x45,0x94,0xdf,0x18,0x59,0x54,0x28,0x2c,0x57,0xb8,0x1d,0x53,0x53,0x4e,0xc7,0xb4,0x8e,0xc4,0xc4,0xdd,0x3f,0x28,0x28,0x6a,0x7c,0x5,0xe6,0xc6,0x31,0x24,0x33,0x9c,0x74,0x2b,0xa4,0xed,0x80,0xcb,0xe,0x4,0x33,0xc8,0x72,0x20,0x2c,0x7b,0xf8,0xbb,0x6c,0x1a,0x42,0x9b,0x99,0x61,0xbc,0x3c,0x32,0x99,0x9c,0xd0,0x2a,0xbc,0xe5,0x8d,0x5f,0x2f,0xbc,0x79,0xaf,0x4f,0x5d,0x6b,0xbd,0xf6,0xcb,0xf3,0x54,0xd3,0x0,0x98,0x7d,0xfa,0xd5,0xf2,0xb5,0x5f,0x9c,0xfb,0xa5,0x3d,0x3e,0x71,0xf5,0x87,0x32,0x43,0xd9,0x19,0x2d,0x42,0xc0,0x49,0xb4,0xd5,0xe5,0x50,0x78,0x4,0x8,0x98,0x35,0x84,0xe5,0x2,0xc2,0x2d,0xbb,0x79,0x11,0x27,0xc0,0x63,0xd5,0x59,0x44,0xdf,0x75,0x92,0x2d,0x94,0x2c,0x14,0x58,0xcc,0x5e,0x70,0xba,0x33,0x61,0xe6,0x87,0x60,0x54,0x16,0xac,0xb3,0x8,0xc3,0xa1,0xb0,0x98,0xd9,0x60,0xbc,0xa1,0x8d,0x7e,0xff,0xba,0x75,0xc5,0x75,0x2f,0x6e,0x28,0x6c,0x59,0x9d,0x63,0xe5,0x8f,0xe0,0xd4,0xc9,0x72,0x4,0x1b,0xc5,0xac,0x35,0xb,0x37,0x21,0xca,0x89,0xa9,0x6d,0xe8,0xb9,0x3c,0x9c,0x68,0x29,0xe5,0xa3,0x85,0x1d,0x97,0x24,0xa5,0x0,0x9,0xe8,0x62,0x56,0xd5,0x24,0x6d,0x2a,0x99,0xcd,0x43,0xa7,0xc4,0x26,0xcc,0x9c,0x6a,0xa5,0x27,0x4c,0x16,0xb1,0xd6,0x69,0xd2,0x8d,0x4f,0x60,0xc8,0x14,0x49,0x99,0x74,0x93,0xa9,0x49,0x89,0x84,0xb,0x37,0x1e,0x83,0x70,0xdc,0x31,0xeb,0xf5,0x88,0x4,0xac,0x78,0x6a,0x98,0xca,0xa9,0x6e,0x7d,0x53,0x2e,0x2a,0x25,0x6a,0x50,0xc4,0xc4,0x50,0xc5,0xc,0xb2,0x3,0x43,0xca,0xcb,0x17,0xf3,0x42,0x5a,0x5f,0xdc,0xf3,0x93,0xdf,0x91,0x8d,0x84,0x3f,0x26,0x0,0xde,0xf8,0xf5,0x42,0x3d,0xfb,0xf4,0x6f,0xdb,0x82,0x70,0x48,0x76,0x28,0xbb,0xdc,0xb2,0xad,0x24,0x1b,0x86,0x9b,0x6a,0xc7,0x70,0x38,0x45,0xa3,0xc7,0xc3,0x3c,0x4a,0x8e,0x7c,0xac,0xf1,0x96,0x16,0x52,0x6d,0x1d,0x14,0x4b,0x16,0xd1,0xd9,0xd5,0xda,0xa,0xa0,0xd5,0x2b,0xfa,0x8,0x82,0x10,0x52,0xa,0x1a,0x1a,0xca,0xa3,0x65,0xd6,0x81,0xc0,0x41,0x27,0x45,0xfe,0x8d,0xa,0x7a,0x74,0x31,0xb7,0x32,0xc8,0xf5,0xaf,0xa,0x6,0x37,0xac,0x2a,0xac,0x7f,0x79,0x55,0x7e,0xdd,0x4b,0xbd,0xa5,0x1c,0x82,0x9e,0x78,0xf8,0xe7,0x8f,0x23,0x40,0x6e,0x47,0x1d,0x26,0x31,0x1b,0xe3,0xb6,0x4d,0x39,0x54,0xba,0xc9,0x1b,0x8d,0xa,0xc,0xb3,0x31,0x76,0xb2,0xcd,0x6d,0xdb,0xfb,0x88,0x3d,0xdc,0xae,0x19,0xbb,0x3b,0xa9,0xce,0x99,0x32,0xd5,0xb6,0xbb,0x74,0x12,0x33,0x89,0xc8,0x2a,0x13,0x8c,0x6e,0xcc,0x81,0x1b,0x73,0xd8,0x28,0x8d,0xd6,0xf6,0x34,0xb4,0xe6,0x88,0x61,0x22,0x1,0x3b,0xd9,0x5a,0xa5,0xfa,0x9,0xa3,0xaf,0xe6,0xe5,0xba,0xb5,0x19,0x3c,0x46,0x99,0x60,0xa9,0x31,0x64,0xe8,0x23,0x33,0x30,0xa0,0x73,0x99,0x82,0x45,0x82,0x3e,0xf6,0xda,0x2f,0xce,0xdd,0xbc,0xdd,0xb5,0x97,0x7b,0x9d,0x71,0xdd,0x11,0xca,0xf3,0x1f,0x6e,0xeb,0x68,0xf5,0x93,0xad,0x2d,0xae,0x93,0x6a,0x6b,0x48,0xc5,0xd6,0xec,0x3d,0xde,0x68,0xf,0xa0,0x31,0xf6,0xa,0xa2,0xea,0xd7,0x24,0xa0,0xc2,0x68,0x83,0x2a,0x36,0x3a,0x4a,0x38,0x1,0xc,0xa3,0x18,0x20,0x16,0x44,0x3c,0x34,0x38,0xc8,0x42,0x4a,0x4b,0x85,0x9a,0xc2,0x20,0xda,0x26,0x2d,0xf0,0xc3,0x88,0x15,0xd4,0x3a,0xc3,0xac,0xa,0x24,0x9c,0x49,0x6d,0x1d,0x29,0x24,0x5b,0xdb,0x61,0xc5,0x53,0xe3,0xaf,0xbb,0x37,0xc,0x95,0xef,0xc7,0xc6,0xf5,0xbd,0x0,0x9b,0xa2,0xd1,0xc1,0xa0,0xb0,0x63,0x93,0x98,0x99,0x2c,0x29,0x2a,0xbd,0x89,0xe2,0x89,0x18,0x8c,0xd1,0xca,0x71,0x1d,0xb8,0xae,0x5b,0x6e,0xd4,0x2e,0x28,0xea,0x85,0x44,0xc2,0x72,0xc1,0x6c,0x22,0xe6,0x53,0x5a,0xd1,0x33,0xd5,0xf3,0xfb,0xc,0x8c,0x6c,0xee,0xd7,0x78,0x93,0xc9,0x11,0xf5,0x83,0xa5,0xa1,0x54,0xc5,0x3c,0xfc,0xdc,0x10,0xf7,0xf5,0xe,0x11,0x1b,0xf3,0xf9,0x37,0x7e,0xbd,0xf0,0x27,0xb3,0x3f,0x79,0xb5,0x7c,0xe3,0x57,0xb,0xf5,0x76,0x1,0x0,0x0,0xf6,0xf8,0xc4,0xb7,0x4f,0x63,0x63,0xfe,0xd8,0xd6,0xd1,0x12,0x26,0x52,0x29,0xdb,0x6d,0xed,0x40,0xcd,0xd2,0xc2,0x6d,0x6,0xc0,0x70,0x8d,0x5f,0x8d,0x6f,0x21,0x86,0x1d,0xa5,0xf2,0x5e,0xb8,0xa6,0x7a,0xbf,0x42,0xd6,0xc3,0x59,0x40,0x15,0x32,0x9,0x9,0x2f,0x3b,0x8,0x22,0xaa,0x9,0x5,0x9d,0x64,0xa,0x76,0xa2,0x65,0x1b,0xe7,0x3f,0x41,0xfb,0x1e,0xc2,0xfc,0x10,0xb4,0xa,0x87,0x93,0x3c,0x60,0xb6,0x9d,0x58,0xc9,0x46,0x33,0xa4,0x9b,0x8c,0x7c,0x91,0x52,0xe1,0x66,0xc4,0x77,0x88,0xe1,0x4e,0xd4,0x86,0x47,0x72,0xb,0x3b,0xa,0x0,0x86,0x1,0x21,0xa0,0xa,0x59,0x4,0x85,0xac,0xd9,0xb2,0xa9,0x4f,0x18,0xe6,0xc5,0x2b,0x7e,0x7b,0xd1,0xe5,0x7b,0x9c,0x7e,0xb5,0x5c,0xfe,0xeb,0x85,0x7a,0xbb,0x35,0xc0,0xac,0x8f,0x5f,0x2d,0x6c,0x77,0x3,0xab,0x60,0xf2,0x49,0x2a,0xc,0x6f,0xe9,0x9a,0xd0,0x8e,0x78,0x3a,0x9,0x37,0xdd,0x9,0x90,0x44,0x55,0xf6,0x4,0xf5,0x9b,0x41,0x8e,0x5f,0x3,0xa0,0x31,0xe7,0xdf,0x90,0xf2,0x2d,0xbf,0x17,0xa5,0x7b,0x10,0xa3,0x74,0x33,0xd9,0xce,0x9a,0xef,0xca,0xb6,0x76,0xf5,0x14,0x75,0xb9,0xd3,0xa7,0xa9,0xe2,0x33,0x78,0xe4,0x6e,0x88,0x5c,0x5f,0x5d,0xc4,0x3b,0x54,0x3,0x84,0xf9,0x21,0x78,0xf9,0x2c,0x7a,0x7b,0xfa,0x20,0x84,0xf8,0xf6,0x1b,0xbf,0xbe,0xe8,0xa2,0x66,0x1f,0xad,0x29,0x6a,0x6c,0xc5,0x6f,0x17,0x1a,0x15,0x4c,0xa1,0xe5,0xbf,0xb9,0xf0,0x56,0x37,0xe6,0x1c,0xd7,0xd7,0xdb,0x8f,0xcc,0xc0,0xa0,0xa,0x32,0xbd,0xac,0x82,0x22,0xca,0xb1,0xfd,0x36,0x14,0xce,0xd5,0x51,0x9f,0x63,0x8,0x6e,0xcc,0x35,0xea,0xe5,0x95,0x37,0x3a,0x72,0xac,0xb8,0xea,0xdf,0x8e,0x28,0xf8,0x67,0x1e,0xe5,0xda,0x8d,0xaa,0x8d,0x68,0xdc,0xcf,0x3e,0xfe,0x13,0x4b,0xf6,0x5e,0x6b,0x84,0xb9,0x7e,0x64,0xfa,0x7a,0x75,0x6f,0x4f,0x2f,0x0,0xfa,0xd7,0x37,0x7e,0x7d,0xd1,0x45,0x7b,0xfc,0xf3,0x55,0x4d,0x17,0x51,0x6e,0x53,0xa5,0xe4,0xac,0x8f,0x5f,0xb5,0xf,0x1b,0xbd,0xc4,0xb2,0xad,0xd8,0xc4,0x49,0x13,0x21,0xec,0x18,0x9c,0x74,0x7b,0x15,0xd9,0x32,0xca,0x9e,0xc0,0x34,0xcc,0xfa,0x8c,0xfc,0xac,0x4e,0x33,0xd4,0x6b,0x91,0xad,0xe,0x2e,0x6d,0xe7,0x53,0x8d,0x57,0x1e,0x63,0xb4,0x90,0xaf,0x3f,0x87,0x6b,0x5f,0x33,0x1a,0xed,0x15,0x58,0xaf,0x1,0xb8,0x4e,0x19,0x70,0x4d,0xd9,0x99,0xe,0x7d,0x14,0x7,0x36,0x23,0x97,0xcd,0xfb,0xf9,0x6c,0xc1,0x15,0x24,0x4e,0x5e,0xfe,0xbb,0xaf,0xdd,0x32,0xeb,0x9f,0xae,0xa4,0x15,0xbf,0xbf,0x98,0x77,0xa8,0x6,0x18,0x71,0xe8,0xe0,0x35,0x99,0x6a,0xed,0xd4,0x61,0xf8,0xf0,0xfa,0xd5,0xeb,0x50,0xcc,0xf4,0x73,0x30,0xb4,0xa5,0x94,0x23,0xdf,0x6,0xb6,0x6d,0xb4,0x85,0x14,0xdb,0x3e,0x7d,0xde,0xe4,0x83,0xb7,0xf9,0x54,0x1e,0x75,0xcd,0x5e,0x13,0xd7,0x2c,0x85,0xda,0x61,0xb6,0x1f,0xb9,0x2d,0xeb,0xcd,0x60,0xff,0x10,0x72,0x83,0xd9,0x75,0x64,0xc5,0x66,0x2e,0xff,0xdd,0xd7,0x6e,0x99,0xf5,0xf1,0xab,0xc6,0x25,0xfc,0x1d,0x32,0x57,0x66,0x7e,0xec,0xdf,0x17,0x82,0xcd,0xb7,0x93,0x2d,0x49,0x9d,0x6a,0x49,0xb,0x37,0xd9,0x4a,0x4e,0xaa,0x3,0xcc,0x6a,0xe4,0x6c,0xaf,0xf6,0x3,0xaa,0x1c,0xc0,0x51,0xb5,0x40,0x1d,0xe1,0xd4,0x38,0xfd,0xfb,0x16,0x69,0x2,0xde,0x11,0xb3,0xbf,0xee,0xef,0x35,0x3b,0x88,0x8e,0xa5,0x1,0x22,0xcd,0x1a,0x16,0x32,0x30,0x41,0x9e,0x33,0x7d,0x3,0x7e,0x36,0x9b,0x8b,0x69,0xc3,0x3f,0x9c,0x3d,0x67,0xfe,0x97,0xee,0xbb,0xfc,0xc3,0x66,0xd6,0xc7,0xff,0x43,0xac,0xf8,0xed,0x45,0xe3,0xee,0x97,0x27,0xb7,0x77,0x5c,0x6,0x5f,0xbc,0xff,0x6f,0xed,0x73,0x8f,0xf9,0x7d,0xe8,0xf9,0xa7,0x16,0x73,0x85,0x94,0x2d,0x41,0xaa,0x30,0x4,0x3b,0x91,0xae,0xaa,0xbf,0x1c,0x9,0x80,0x51,0x81,0x51,0x9f,0x22,0xac,0x7,0xc1,0x78,0x0,0xb0,0xa3,0x80,0x30,0x6e,0xe1,0x6f,0xed,0x32,0xdc,0xa0,0xfb,0x3a,0x8f,0xe9,0x83,0x78,0xfd,0x1b,0x51,0xcc,0xc,0xa0,0x7f,0x73,0xf,0x79,0x5,0xcf,0x83,0x94,0xc7,0xae,0xfa,0xc3,0xa5,0x3f,0xa0,0x49,0x73,0x31,0xb0,0xec,0x7e,0xc,0x2c,0xbb,0x77,0x9b,0xd4,0xa3,0xdc,0x7e,0xd,0xf0,0x4d,0xc1,0x83,0x43,0x7d,0xab,0x6e,0xff,0xe6,0x77,0xda,0xf7,0x3b,0x9a,0x87,0x6,0x6,0xdf,0xd,0x22,0x47,0x17,0x32,0x5a,0x8,0x10,0x9,0x49,0xc2,0xb2,0x87,0x43,0x22,0xa2,0xda,0x82,0xd,0x8c,0xdc,0xdf,0x6e,0x2c,0x2d,0x30,0x12,0x24,0x4d,0x80,0xe0,0x4d,0x5f,0x13,0xc2,0x8d,0xf7,0x41,0x6f,0x68,0xde,0xb6,0xf2,0x6d,0x2e,0x85,0x93,0x42,0x40,0xfb,0x1e,0x54,0x31,0x83,0xcc,0xa6,0xd5,0x3a,0x9f,0xc9,0x89,0xc1,0xfe,0xa1,0x90,0x99,0x7f,0xb8,0xe2,0x86,0xcb,0x8f,0x18,0x58,0x76,0xff,0xea,0x99,0xff,0x7c,0xa5,0x5c,0xf9,0xbb,0xaf,0x6f,0x57,0x97,0xcc,0x1d,0x36,0x34,0x33,0x3f,0xfc,0x75,0x5a,0x79,0xeb,0x37,0x79,0xcf,0x8f,0x7f,0x73,0x4a,0xe0,0xfb,0x8b,0xa5,0x14,0x67,0x3a,0xae,0x63,0x5a,0xda,0xdb,0xd8,0x89,0x27,0xa5,0x93,0xee,0x80,0xe5,0x26,0x86,0x49,0x10,0xaa,0x25,0x7f,0x6a,0x1d,0xbe,0x46,0x21,0x61,0x1d,0x8,0xde,0xa,0xa7,0xb0,0x29,0xc7,0x8f,0x1b,0x2c,0xf9,0xe6,0x6,0xb,0x43,0xab,0xc4,0x3c,0x22,0xcc,0x1b,0x2e,0xe0,0x24,0x21,0xa1,0x82,0x2,0xc2,0x4c,0x1f,0x42,0xdf,0xd3,0x3,0xbd,0xbd,0x26,0xc,0xb4,0xad,0xb5,0xb9,0x47,0x5a,0xf2,0xab,0x2b,0xfe,0x70,0xd9,0xcb,0x3b,0x12,0xba,0x62,0x47,0x5d,0x68,0xe5,0xad,0xdf,0x64,0x0,0x50,0xa1,0xea,0x59,0x7d,0xd3,0xe2,0xb3,0xa4,0x1d,0x9b,0xe4,0x7b,0xc1,0x5f,0x37,0xaf,0xdb,0xa4,0xfa,0x36,0x6d,0x42,0x6e,0xd3,0x6a,0xc,0xad,0x7d,0x15,0x26,0xf0,0xaa,0xfa,0xd8,0x8e,0x62,0xf,0xeb,0x7,0x90,0x1b,0x2a,0xd1,0xa6,0x42,0xa5,0x1d,0xe7,0x3b,0x8e,0xbe,0xa9,0xf3,0x56,0x85,0x8f,0xad,0x34,0xc6,0x2f,0x71,0xfa,0xda,0xcf,0xa3,0xd0,0xbb,0x1e,0xb9,0x8d,0xab,0xd0,0xb7,0xb9,0x7,0x1b,0x56,0xaf,0x17,0xbe,0xa7,0x5e,0x86,0x14,0x7,0xae,0xbe,0xe9,0x1b,0xc7,0x31,0xf3,0xeb,0x3b,0x5a,0x77,0xbd,0x29,0xca,0x71,0xe6,0x69,0x8b,0x64,0x31,0xbb,0xd9,0x6c,0xba,0xfb,0xbf,0x79,0xd6,0xc7,0x16,0x1f,0xa6,0x42,0xff,0x4b,0x42,0x58,0xa7,0xdb,0x8e,0x85,0x64,0x22,0xae,0xdd,0x64,0x12,0xb1,0x74,0xbb,0x24,0x27,0x8e,0x58,0xb2,0x15,0x1c,0x95,0xa4,0x97,0xee,0x48,0x8c,0xa2,0xea,0x47,0x33,0x7,0xcd,0xf8,0x4,0xa3,0x67,0x20,0xb7,0x2e,0xef,0xb1,0x6d,0x73,0x53,0xc2,0x67,0x1e,0x41,0x1e,0x95,0x9f,0x2d,0xc,0x3c,0x68,0x2f,0xb,0x55,0xcc,0x9b,0xdc,0x60,0xbf,0x9,0x83,0xd0,0xca,0xe7,0xa,0x60,0xf0,0x1d,0xb6,0x65,0xff,0xf7,0xf2,0x3f,0x5c,0x7e,0x47,0x64,0x6a,0x17,0x5b,0x2b,0xff,0x70,0x99,0x7a,0x47,0x0,0xa0,0xfe,0x38,0xfc,0xb2,0x3f,0xd1,0x9a,0xe7,0x1f,0xef,0x20,0x41,0x67,0xa,0xe2,0x7f,0x27,0x41,0x76,0x2c,0x1e,0xb,0x5b,0xda,0xda,0x84,0xb0,0x6c,0x69,0xc7,0x53,0x88,0xb5,0x4d,0x2c,0x9,0xbf,0xaa,0x96,0x1d,0x62,0x64,0xea,0x70,0x9b,0xcc,0x41,0x3d,0xb7,0xb0,0xbd,0xc2,0x1f,0x45,0xed,0x37,0x70,0xe8,0xea,0x59,0x3f,0x22,0x1,0xa3,0x2,0x4,0x85,0xc,0x82,0x6c,0x1f,0x88,0x84,0xc9,0xc,0xf4,0x6b,0xaf,0xe8,0xd9,0x2a,0x54,0x60,0xd0,0x77,0x48,0x88,0xef,0xb2,0xed,0x6e,0x5c,0xfd,0xdb,0x8b,0xf5,0x9b,0x2d,0x1b,0xf9,0x56,0x0,0xc0,0x9a,0xb0,0x27,0x41,0x90,0x5a,0x73,0xd3,0x15,0xf,0xf,0xbe,0xfc,0xf0,0x15,0xed,0xfb,0x1d,0xbd,0x22,0xf4,0xfd,0x99,0x99,0xc1,0xa1,0xa4,0xd6,0x2a,0xa6,0xbc,0x2,0xfc,0x4c,0x2f,0x4c,0x50,0x64,0x68,0x4d,0x24,0x44,0x85,0x4b,0x17,0xa5,0x35,0x4,0x8d,0x67,0xf2,0xd6,0x2a,0x84,0xb7,0x3,0x4,0xe3,0x12,0x7e,0x3,0x6f,0x9e,0xa8,0xd2,0x8,0x9b,0xb5,0x6,0xeb,0x10,0xaa,0x98,0x43,0x71,0x60,0x33,0x67,0x7a,0x56,0xc3,0xcf,0x65,0x68,0xb0,0x7f,0x0,0x3,0x5b,0xfa,0xf2,0x81,0x1f,0xae,0x4,0xd3,0x77,0x56,0xdd,0x74,0xc5,0xfb,0x87,0x5e,0x7e,0xf8,0x9e,0xd6,0xbd,0xdf,0xeb,0x81,0x8d,0x19,0x7a,0xe9,0xa1,0x37,0x9d,0xf8,0x78,0x4b,0x1b,0xeb,0xce,0x38,0xf5,0x72,0xc1,0xca,0xa3,0x35,0xb7,0xfe,0x87,0x6,0x80,0x3d,0x3e,0x76,0xf9,0x5e,0xa1,0xe7,0x1f,0xc6,0xe0,0xd3,0x2c,0xc7,0x39,0xc1,0x92,0x2,0xb6,0x63,0xc3,0x76,0x6c,0xbf,0xa5,0xab,0x5b,0x82,0x2c,0x9,0x21,0xc8,0x89,0xb7,0x80,0x2c,0x7,0x76,0x3c,0x51,0xd9,0x1e,0xc6,0x18,0x55,0x95,0x63,0x10,0x15,0x70,0xd0,0x8e,0x0,0x1,0x8f,0xd,0x80,0x8a,0xe3,0x56,0x75,0x1e,0x95,0x72,0x22,0x24,0x25,0x74,0x18,0x40,0x79,0x79,0x68,0xbf,0x0,0x1d,0xfa,0xcc,0x3a,0xd4,0xc5,0xec,0x90,0xe,0x3c,0xdf,0xf5,0x7d,0x1f,0x2a,0x54,0x50,0xa1,0x7a,0x54,0x8,0xfa,0x83,0xb4,0xac,0x87,0x56,0xdc,0x70,0xc5,0xb,0x0,0xb0,0xdb,0x49,0x17,0xa,0xe1,0x24,0xb0,0xea,0x86,0x45,0x6f,0x59,0xff,0xfb,0xb7,0x69,0xaf,0x8d,0xda,0x63,0xf7,0x8f,0x5e,0x2a,0x74,0x28,0x2d,0x21,0xd4,0xc7,0xc0,0xe6,0x33,0x60,0xfe,0x47,0x30,0x23,0x9e,0x4c,0x42,0xda,0x16,0xb7,0xb4,0xb6,0xc2,0x30,0x13,0x98,0x61,0xc7,0x53,0x90,0xb1,0x4,0xe2,0xed,0x93,0x4b,0xeb,0x1e,0x46,0x3e,0x46,0x53,0x11,0x2,0x6d,0x85,0xfb,0x6f,0x6a,0xf6,0xf,0x97,0x80,0xfb,0xd9,0x7e,0xb0,0x31,0x28,0xe,0x6c,0x8c,0x56,0xf8,0xb0,0xe1,0x62,0xa1,0x0,0xbf,0xe0,0xc1,0xf7,0x3c,0x8a,0x56,0xe9,0x88,0x27,0x89,0xf9,0x97,0xc6,0x8e,0xfd,0x52,0x72,0x98,0x5d,0x79,0xc3,0x15,0xe6,0xed,0x1e,0xfb,0x9d,0x2,0x0,0x33,0x4e,0xbd,0x54,0xc2,0x28,0x6b,0xf5,0xcd,0x57,0x55,0x4a,0x7c,0x66,0x7d,0x6c,0xd1,0x11,0xca,0xf7,0x4f,0x4,0xf8,0x60,0xa3,0xcd,0x74,0xcb,0xb1,0xa7,0x3b,0xae,0xeb,0x38,0x4e,0x54,0x12,0x65,0xdb,0x12,0xcc,0xac,0x9d,0x44,0x1a,0x6e,0x22,0x45,0x6c,0xc,0x39,0xe9,0xce,0xa8,0xc7,0x1c,0x31,0xa4,0x13,0xaf,0x3c,0x22,0x33,0xc3,0x72,0x62,0x18,0x5e,0xd7,0x4f,0x35,0x8b,0x8d,0x6a,0x34,0x7e,0x89,0xaa,0x50,0x7e,0x1e,0xc3,0xc5,0x38,0xc,0x56,0x7e,0xd4,0xe1,0x43,0x8,0x56,0x85,0x21,0xb0,0xd1,0xac,0x55,0xc8,0xc5,0x4c,0x3f,0x98,0x59,0x6a,0x1d,0x39,0xb2,0x5e,0xd1,0x87,0xd1,0x86,0x7d,0xdf,0x5b,0x4b,0x44,0xeb,0x88,0x68,0x89,0xb0,0xed,0x3f,0xaf,0xfa,0xe3,0xe2,0x3b,0x2b,0xcf,0xfb,0xe1,0x8b,0x1c,0x92,0x96,0x11,0x96,0xad,0x57,0xfc,0x61,0x11,0xff,0x9f,0x7,0x40,0x45,0x13,0x9c,0x72,0x9,0x31,0x83,0x8a,0x83,0x2b,0xb9,0xe7,0x81,0x5f,0x57,0x6,0x66,0xf6,0x47,0x2f,0xeb,0xc,0xc2,0xb0,0xdb,0x68,0x3d,0x85,0x48,0x1c,0xce,0xc6,0xbc,0xd7,0x71,0x9d,0x83,0x8d,0xe1,0xb4,0x10,0x14,0xed,0xcf,0x67,0x80,0x54,0x4b,0x82,0x1,0x84,0xcc,0xe0,0xd6,0xae,0x89,0x54,0x8a,0x2c,0x88,0xc1,0x44,0xd2,0xa6,0xd2,0x22,0x4d,0xe2,0x91,0xca,0xa2,0x7a,0xc9,0x42,0xa4,0xd8,0x75,0xc0,0xc3,0xe9,0x7b,0xe6,0x42,0x66,0x88,0x4b,0x5b,0xc1,0xd8,0xc5,0x82,0x27,0x8c,0xd1,0x51,0x55,0x92,0x31,0x20,0xa2,0x50,0x85,0xea,0x19,0x30,0x3f,0xc,0xa2,0xbf,0x2,0x58,0x61,0x5b,0x72,0xcb,0x8a,0x9b,0xbe,0x59,0x53,0x8d,0x33,0xe3,0xe4,0xaf,0xb,0x62,0xf0,0xaa,0x5b,0xbe,0xb9,0xd3,0xf4,0x24,0xdb,0xa9,0x0,0xd0,0xd8,0x3c,0x2c,0x22,0x84,0x1,0x31,0x81,0xd8,0x18,0xac,0xb9,0xf5,0x5b,0x35,0x9e,0xf1,0xac,0x8f,0x5f,0xb1,0xb7,0xf1,0xfc,0x5,0x0,0x1f,0x68,0x8c,0x59,0xc0,0xcc,0x7b,0x11,0xd0,0xc6,0xc4,0x82,0x98,0x44,0x69,0x61,0x86,0x48,0x24,0x13,0x42,0x5a,0x56,0x84,0x16,0xf0,0x58,0x2b,0xc3,0x98,0x48,0x70,0x66,0x70,0x90,0x9,0xa4,0x41,0x6c,0xc0,0xd0,0x44,0x30,0x4c,0xc8,0x3,0xb4,0x5c,0x10,0x3d,0xb,0xa6,0xe7,0xa4,0xe3,0x3e,0xbb,0xfc,0xf,0x97,0x3d,0x5b,0x7d,0x81,0xdd,0x4e,0x58,0x28,0xc9,0x76,0x0,0x36,0x4c,0xd2,0xe6,0x55,0x37,0x2e,0xde,0xa9,0x1b,0xd0,0xed,0xf4,0x0,0x18,0x1,0x88,0x53,0x2f,0x11,0x46,0x29,0x41,0x44,0x82,0x8d,0xc1,0x9a,0xdb,0xfe,0x23,0x68,0x6c,0xc6,0x99,0xf6,0xfa,0xf4,0xd5,0x5d,0xc6,0xcb,0x77,0xb1,0x52,0x5d,0x4a,0xeb,0x34,0x33,0xbb,0x20,0x72,0x19,0x70,0x5,0x10,0x63,0x90,0x5b,0xea,0x55,0xee,0x33,0xe0,0x3,0xf0,0xc0,0xec,0x1b,0x63,0x7c,0x37,0x16,0x2b,0x80,0xd1,0x6b,0xa7,0x5a,0xb6,0xcc,0x3e,0xe2,0xa0,0xbe,0x3b,0x3e,0x7f,0x74,0xc3,0x6d,0xce,0xa6,0x9d,0x78,0xa1,0x2d,0xa5,0x24,0x66,0x18,0x21,0x85,0x11,0xb6,0xcb,0x2b,0x7e,0x7f,0x19,0xbf,0x53,0xc6,0xf3,0x1d,0x7,0x80,0x51,0xfd,0x88,0x8f,0x5c,0x5c,0x6e,0x83,0x81,0x35,0xb7,0x5c,0xf5,0xa6,0x8,0x60,0xb7,0x8f,0xfc,0x5b,0xb4,0x5e,0x9c,0x81,0xd5,0x6f,0xd2,0x6f,0xec,0x3a,0x76,0x1d,0xbb,0x8e,0x5d,0xc7,0xae,0x63,0xd7,0xb1,0xeb,0xd8,0x75,0xbc,0x15,0xc7,0xff,0xf,0xd8,0x38,0x69,0x89,0x29,0xb2,0xb8,0x39,0x0,0x0,0x0,0x0,0x49,0x45,0x4e,0x44,0xae,0x42,0x60,0x82
+0x89,0x50,0x4e,0x47,0x0d,0x0a,0x1a,0x0a,0x0,0x0,0x0,0x0d,0x49,0x48,0x44,0x52,0x0,0x0,0x01,0x0,0x0,0x0,0x01,0x0,0x08,0x06,0x0,0x0,0x0,0x5c,0x72,0xa8,0x66,0x0,0x0,0x0,0x06,0x62,0x4b,0x47,0x44,0x0,0xff,0x0,0xff,0x0,0xff,0xa0,0xbd,0xa7,0x93,0x0,0x0,0x0,0x09,0x70,0x48,0x59,0x73,0x0,0x0,0x0d,0xd7,0x0,0x0,0x0d,0xd7,0x01,0x42,0x28,0x9b,0x78,0x0,0x0,0x20,0x0,0x49,0x44,0x41,0x54,0x78,0xda,0xed,0x9d,0x79,0x9c,0x54,0xd5,0x99,0xf7,0x7f,0xe7,0xde,0x5b,0x7b,0x55,0x57,0xf5,0xbe,0x77,0x43,0x37,0x6b,0xb3,0x8b,0x20,0x8b,0x22,0x2e,0xd1,0x98,0x31,0xd1,0xc4,0x84,0x44,0x07,0xd0,0x06,0x9d,0x64,0x26,0xaf,0x8e,0x89,0x79,0x67,0x46,0xf3,0x26,0xa2,0x93,0x65,0x7c,0xe7,0xcd,0x32,0x8e,0x66,0x24,0x09,0x69,0x40,0x8d,0x19,0x26,0x89,0x21,0x2e,0xb8,0x6f,0x80,0x88,0x82,0xa0,0xec,0x74,0x43,0xd3,0xd0,0x6b,0xf5,0x56,0xfb,0x7a,0xef,0x3d,0xef,0x1f,0x8d,0xad,0xd0,0x5b,0x55,0x77,0xed,0xf5,0x7c,0x3f,0x1f,0x3e,0x68,0x51,0x55,0xe7,0xd6,0x59,0x7e,0xe7,0x39,0xcf,0x79,0xce,0x73,0x0,0x82,0x20,0x08,0x82,0x20,0x08,0x82,0x20,0x08,0x82,0x20,0xb2,0x01,0x96,0xce,0x0f,0xbf,0x6d,0xe3,0x46,0xab,0x47,0xa3,0xd1,0xea,0x0,0x0b,0x67,0xcc,0xa4,0xaa,0xaa,0x8e,0x33,0x66,0xa3,0x66,0x25,0xe2,0x3a,0x68,0x38,0x0f,0x88,0xa2,0xe8,0x57,0x65,0xd9,0xc9,0xb5,0xda,0x90,0x28,0x8a,0x6e,0x8d,0x46,0xe3,0x5d,0xb5,0x6a,0x55,0x88,0x04,0x20,0x0e,0x34,0x34,0x34,0xd8,0x24,0xc6,0x96,0x73,0x60,0x16,0x38,0x9f,0x2e,0x70,0x3e,0x93,0x33,0x36,0x03,0x40,0x2e,0x75,0x47,0x22,0x35,0x46,0x12,0x93,0xc1,0x79,0x33,0x07,0x8e,0x0a,0x9c,0x9f,0xe0,0xc0,0x71,0xae,0xaa,0x1f,0xac,0x59,0xbf,0xfe,0x08,0x63,0x8c,0x93,0x0,0x44,0xc1,0xc6,0x8d,0x1b,0x8d,0x26,0x8d,0xe6,0x6a,0x15,0x58,0xc9,0x18,0x5b,0x09,0x60,0x3e,0x0,0x91,0x7a,0x19,0x91,0x86,0x74,0x83,0xb1,0xb7,0x39,0xf0,0x16,0x53,0x94,0xd7,0xd6,0xac,0x5f,0x7f,0x82,0x04,0x60,0x18,0xde,0xdc,0xb0,0x41,0x3a,0x57,0x5d,0x7d,0x83,0x0,0xac,0xe1,0xc0,0x17,0x01,0xe8,0xa9,0xef,0x10,0x19,0x07,0xe7,0xc7,0xc0,0xd8,0x36,0x41,0x51,0x1a,0xfe,0xf6,0xce,0x3b,0x5b,0xb2,0x5e,0x0,0xb6,0x6d,0xdc,0x68,0x0d,0x6a,0xb5,0xdf,0x06,0x70,0x27,0x80,0xc9,0xd4,0x43,0x88,0x2c,0x21,0x04,0xce,0x5f,0xe0,0x9c,0xff,0x7c,0xed,0xfa,0xf5,0xbb,0xb2,0x4e,0x0,0x9e,0xd9,0xb4,0xa9,0x4c,0x61,0xec,0x9f,0x39,0x63,0xeb,0x01,0x98,0xa8,0x3f,0x10,0x59,0x6c,0x15,0x1c,0xe0,0x8c,0xfd,0x74,0xcd,0x1d,0x77,0xfc,0x31,0xd1,0xfe,0x82,0x84,0x0b,0xc0,0xd6,0xad,0x5b,0x4d,0x90,0xe5,0xfb,0x19,0x63,0xf7,0x0,0xb0,0x50,0xeb,0x13,0xc4,0x20,0xbb,0x19,0xf0,0xbd,0xd5,0xf5,0xf5,0xef,0x65,0x9c,0x0,0x70,0xce,0xd9,0x53,0x5b,0xb6,0xac,0x01,0xe7,0x8f,0x0,0x28,0xa1,0xb6,0x26,0x88,0x11,0xc6,0x0a,0x63,0xcf,0x6b,0x54,0xf5,0x1f,0x6e,0x5d,0xb7,0xee,0x5c,0x46,0x08,0xc0,0xe6,0xcd,0x9b,0x6b,0x45,0xce,0x1f,0x03,0xf0,0x79,0x6a,0x5e,0x82,0x88,0x40,0x04,0x80,0x3e,0xc6,0xf9,0x03,0xab,0xeb,0xeb,0x7f,0x1d,0xcf,0x65,0x41,0xdc,0x05,0xe0,0xc9,0xdf,0xfd,0xee,0x4e,0x30,0xf6,0x28,0x0,0x03,0x35,0x2b,0x41,0x44,0x6d,0x39,0xbf,0xa9,0x30,0x76,0x5b,0x7d,0x7d,0x7d,0x67,0x5a,0x09,0xc0,0xc6,0x8d,0x1b,0x8d,0x26,0xad,0xf6,0x09,0x0e,0xac,0xa1,0x66,0x24,0x88,0x09,0xd1,0xc6,0x55,0xf5,0x1b,0xf1,0xd8,0x2d,0x88,0x8b,0x0,0x3c,0xb3,0x69,0x53,0x99,0x2c,0x8a,0xdb,0xc1,0xf9,0xa5,0xd4,0x76,0x04,0x11,0x13,0x42,0xe0,0xfc,0x9e,0x35,0xeb,0xd6,0x6d,0x4c,0x69,0x01,0x78,0xba,0xa1,0x61,0xbe,0x0a,0xbc,0x08,0xa0,0x94,0xda,0x8c,0x20,0x62,0x3c,0x63,0x73,0xfe,0x68,0xd3,0xd9,0xb3,0xdf,0xd9,0xb0,0x61,0x83,0x9a,0x72,0x02,0xf0,0xf4,0xe6,0xcd,0x2b,0x54,0xce,0xb7,0x03,0xa0,0x03,0x39,0x04,0x11,0x3f,0xfe,0x24,0x03,0xab,0xeb,0xeb,0xeb,0x03,0x13,0xfd,0x22,0x21,0x56,0x4f,0xf4,0x64,0x43,0xc3,0xe7,0x54,0xce,0x77,0xd0,0xe0,0x27,0x88,0xb8,0x73,0x8b,0x04,0x6c,0xdb,0xb6,0x6d,0x9b,0x36,0x25,0x2c,0x80,0xad,0x9b,0x36,0x5d,0xce,0x04,0xe1,0x15,0x90,0xa7,0x9f,0x20,0x12,0x07,0xe7,0x7f,0xd5,0x99,0xcd,0x5f,0x59,0xb5,0x6a,0x95,0x92,0x34,0x0b,0xe0,0xc9,0x4d,0x9b,0xe6,0x31,0x41,0x78,0x9e,0x06,0x3f,0x41,0x24,0xda,0x21,0xc0,0xbe,0x14,0xf4,0x7a,0x1f,0x4f,0xda,0x12,0xe0,0x99,0x4d,0x9b,0xca,0x98,0x20,0xfc,0x05,0x80,0x95,0x5a,0x83,0x20,0x92,0xc2,0x37,0x9f,0x6c,0x68,0xf8,0xe7,0x84,0x2f,0x01,0xb6,0x6d,0xdb,0xa6,0x0d,0x7a,0xbd,0xbb,0x0,0x2c,0xa2,0x36,0x20,0x88,0xe4,0x2e,0x06,0x0,0x7c,0x65,0x4d,0x7d,0xfd,0x5f,0x12,0x66,0x01,0x04,0x7d,0xbe,0x7f,0xa3,0xc1,0x4f,0x10,0xa9,0xb1,0x18,0x0,0xf0,0xbb,0x86,0x86,0x86,0x49,0x09,0x11,0x80,0xad,0x0d,0x0d,0x5f,0x01,0xe7,0xf7,0x52,0xbd,0x13,0x44,0xca,0x90,0x2b,0x02,0xff,0x1d,0xed,0xce,0x40,0xd4,0x02,0xf0,0xfb,0xdf,0xfc,0xa6,0x98,0x31,0xb6,0x11,0x69,0x9e,0x50,0x94,0x20,0x32,0xd0,0x0c,0x58,0x1c,0xf4,0x7a,0x1f,0x88,0xab,0x0,0x28,0x92,0xf4,0x4b,0x70,0x5e,0x40,0xd5,0x4d,0x10,0xa9,0xa8,0x02,0xec,0xfb,0x5b,0xb6,0x6c,0x99,0x1b,0x17,0x01,0x78,0xb2,0xa1,0xe1,0xf3,0x0,0xbe,0x41,0xb5,0x9c,0x1c,0x42,0xaa,0x80,0x10,0x17,0xd2,0xea,0x99,0xbd,0xaa,0x04,0x4e,0x4d,0x97,0x38,0x38,0x97,0x04,0x55,0x7d,0x8c,0x73,0x1e,0x91,0x85,0x1e,0xb1,0x19,0xff,0xe8,0xa3,0x8f,0xea,0x72,0x2d,0x96,0x43,0x0,0xa6,0x52,0x2d,0x27,0x06,0xa7,0xa2,0x81,0x3d,0xac,0x83,0x5d,0xd6,0xa3,0x27,0xac,0x87,0x5b,0x95,0x30,0x49,0xeb,0xc1,0xe5,0x96,0x9e,0xb4,0x19,0xfc,0x7f,0x75,0x94,0x83,0x73,0x20,0x57,0x0a,0xa3,0x44,0xe3,0x47,0xa1,0x14,0x44,0x91,0x26,0x0,0x2d,0x53,0xa9,0x81,0xe3,0x69,0x08,0x70,0xbe,0x66,0xf5,0xba,0x75,0x4f,0xc5,0x4c,0x0,0x9e,0x6a,0x68,0xb8,0x9b,0x03,0x8f,0x52,0xd5,0xc6,0x8f,0x30,0x67,0x68,0x0b,0x1b,0xd1,0x15,0xd6,0xc3,0x1e,0xd6,0xc3,0xa9,0x68,0x86,0x6d,0xb0,0xeb,0xad,0xed,0x28,0x90,0x52,0xff,0x0e,0x8a,0xdd,0xee,0x02,0x34,0x87,0xcc,0x43,0x5e,0x17,0x19,0x1f,0x14,0x82,0x72,0xad,0x1f,0x79,0x62,0x90,0x1c,0x4a,0xb1,0x5f,0x0a,0xb4,0xf6,0xbb,0x5c,0x53,0xee,0xb9,0xe7,0x9e,0xe0,0x68,0x6f,0x93,0x22,0xf9,0xae,0x6d,0xdb,0xb6,0x19,0xa2,0x75,0x2e,0x10,0x91,0xcf,0xf2,0xad,0x21,0x23,0xda,0xc2,0x46,0xd8,0xc3,0xda,0x31,0x35,0x99,0x03,0x78,0xdf,0x5b,0x80,0x2f,0x58,0xdb,0x53,0xfa,0x77,0x75,0x85,0x75,0xc3,0x0e,0x7e,0x0,0x50,0x38,0x43,0x67,0x58,0x8f,0xce,0xb0,0x1e,0x1f,0xfb,0x6c,0xd0,0x09,0x2a,0xca,0x34,0x3e,0x54,0x6a,0xfd,0x28,0xd3,0xf8,0x21,0x91,0x75,0x10,0x8b,0xa5,0x40,0x45,0x9e,0xc5,0xb2,0x1e,0xc0,0xaf,0x26,0x6c,0x01,0x6c,0x6d,0x68,0xf8,0x16,0x03,0xfe,0x8b,0x6a,0x35,0x76,0x83,0xfe,0x6c,0xc8,0x88,0xd6,0x90,0x11,0x7d,0xb2,0x6e,0x5c,0x6b,0xe4,0xe5,0xe6,0x6e,0x4c,0xd6,0x79,0x53,0xb3,0xef,0x01,0x78,0xd9,0x59,0x8a,0x1e,0x59,0x17,0xf5,0x67,0xb5,0x4c,0x45,0x99,0xc6,0x8f,0x0a,0xad,0x0f,0xe5,0x5a,0x1f,0x34,0x8c,0x3c,0x08,0x13,0x68,0x87,0x73,0x7a,0x93,0x69,0xca,0x68,0x57,0x96,0x8d,0x69,0x01,0xbc,0xb9,0x61,0x83,0xd4,0x0a,0xfc,0x13,0x55,0xe7,0xc4,0x08,0x72,0x11,0x67,0x02,0x46,0x9c,0x0e,0x59,0xd0,0x2b,0x4f,0xf8,0x10,0x17,0x0e,0xfa,0x72,0x51,0xa5,0xf5,0x41,0x4c,0xc1,0x01,0x72,0x26,0x64,0x1e,0xd7,0xe0,0x07,0x80,0x10,0x17,0x70,0x26,0x64,0xc2,0x99,0x90,0x09,0x22,0xe3,0xa8,0xd2,0x78,0x51,0xa3,0xf7,0xa2,0x54,0xe3,0xa7,0x4e,0x14,0xed,0x2a,0x0,0xa8,0x0c,0x79,0x3c,0xb7,0x02,0xd8,0x32,0x6e,0x01,0x68,0xab,0xaa,0xba,0x19,0x74,0x61,0xc7,0xb8,0x90,0x39,0x43,0x73,0xd0,0x84,0xe6,0x90,0x25,0x22,0xf3,0x3e,0x1a,0xbc,0xaa,0x84,0xa3,0x81,0x1c,0xcc,0x31,0x38,0x53,0xea,0x37,0x2b,0x9c,0xe1,0x80,0x37,0x37,0x66,0xdf,0xd5,0x1c,0x32,0xa3,0x39,0x64,0x86,0x56,0x50,0x51,0xad,0xf5,0xa1,0x46,0xe7,0x41,0xa1,0x14,0xa0,0xce,0x15,0xa9,0x15,0xc0,0xd8,0x77,0x46,0x13,0x80,0x31,0xf7,0x94,0x38,0xb0,0x8e,0xaa,0x31,0x3a,0x5c,0x8a,0x06,0xfb,0xbc,0x79,0x78,0xd6,0x51,0x89,0xbd,0xde,0x02,0xd8,0xc3,0x3a,0xc4,0x23,0x6e,0xea,0xb8,0xdf,0x9a,0x72,0xdb,0x82,0x8d,0x41,0x0b,0x7c,0x6a,0xec,0xaf,0x71,0x0c,0xa9,0x02,0x1a,0x03,0x66,0xbc,0xec,0x2c,0xc1,0x4b,0xce,0x52,0x34,0x06,0x2c,0x50,0x38,0xb9,0x0e,0x23,0x60,0xde,0xd6,0x4d,0x9b,0x2e,0x1d,0x97,0x05,0xd0,0xd0,0xd0,0x50,0x02,0xe0,0x3a,0xaa,0xc3,0xc8,0x68,0x0d,0x19,0x71,0x34,0x60,0x3d,0x3f,0xe0,0x13,0xb1,0xac,0x10,0x70,0xc4,0x6f,0xc5,0x02,0x63,0x7f,0x4a,0xfc,0xfe,0x30,0x67,0x38,0xe4,0x8b,0xff,0xc1,0xd0,0x1e,0x59,0x87,0x1e,0x59,0x87,0x0f,0x7d,0x79,0xa8,0xd1,0x7b,0x30,0x5b,0xef,0x80,0x41,0x50,0xa8,0x03,0x8e,0x34,0xcb,0x33,0xb6,0x06,0xc0,0xbe,0xa8,0x2d,0x0,0x91,0xf3,0x5b,0x41,0xb7,0xf2,0x8e,0x8a,0x0a,0x86,0xd3,0x41,0x13,0x5e,0x74,0x96,0xe1,0x2d,0x77,0x51,0xc2,0x06,0xff,0xa0,0x15,0x10,0xc8,0x81,0x5f,0x4d,0x8d,0x26,0x3a,0x16,0xb0,0x22,0xc8,0x13,0xf7,0x2c,0x61,0xce,0x70,0xc2,0x6f,0xc1,0xb3,0x8e,0x4a,0xec,0xf6,0x14,0x0c,0xbb,0x6d,0x4a,0x0,0x9c,0xb1,0x5b,0x37,0x6e,0xdc,0xa8,0x89,0xda,0x02,0x60,0x8c,0xdd,0x4a,0xd5,0x37,0x32,0xe7,0x42,0x46,0x1c,0xf4,0xe5,0x26,0xb5,0xe3,0x29,0x9c,0xe1,0x88,0xdf,0x8a,0x4b,0x4d,0x7d,0x49,0xad,0x8b,0x90,0x2a,0xe0,0xb8,0x3f,0x27,0x39,0x22,0xcc,0x81,0xe6,0xa0,0x19,0x2d,0x41,0x13,0xaa,0xb4,0x3e,0x5c,0x62,0xea,0x83,0x91,0x2c,0x82,0xcf,0x52,0x68,0xd4,0x6a,0xaf,0x01,0xf0,0x52,0xc4,0x16,0xc0,0x79,0xf3,0x9f,0xd2,0x7a,0x8f,0x42,0x73,0xd0,0x94,0x12,0xb3,0xce,0xc9,0x80,0x05,0x3e,0x55,0x4a,0xea,0x33,0x1c,0x0b,0x24,0xdf,0x1f,0xa1,0x82,0xe1,0x4c,0xc8,0x04,0xb7,0x22,0x51,0xe7,0x1c,0x3a,0x9b,0xff,0x4d,0x54,0x4b,0x0,0x89,0xf3,0xab,0x40,0x27,0xfe,0x46,0x45,0x9b,0x22,0x5b,0x70,0x2a,0x18,0x0e,0xf9,0x93,0x97,0x94,0x29,0xc8,0x45,0x1c,0x0b,0xe4,0x50,0xbb,0xa4,0xf4,0x5a,0x55,0xbd,0x26,0x3a,0x1f,0x80,0x20,0x5c,0x4d,0xb5,0x36,0x3a,0xa2,0x90,0x3a,0x1d,0xed,0x54,0xc0,0x0c,0x6f,0x92,0xac,0x80,0xa3,0x7e,0x0b,0xe4,0x14,0xf2,0xc8,0x8b,0x24,0x0,0xc3,0x59,0x0,0x33,0xb7,0x6c,0xd9,0x52,0x1e,0xb9,0x0,0x70,0x4e,0x02,0x30,0xd6,0x4c,0x83,0xd4,0x09,0x59,0x55,0x31,0xe0,0x0b,0x48,0xc6,0xec,0x7f,0xd2,0x9f,0x93,0x5a,0xed,0x42,0xa1,0xc4,0xc3,0xcf,0xe9,0x9c,0xaf,0x8c,0x48,0x0,0xb6,0x6e,0xdd,0x5a,0x04,0xa0,0x86,0xaa,0x6c,0x74,0xa4,0x14,0x9b,0x69,0x9a,0x02,0xe6,0x84,0xfb,0x02,0x8e,0xfb,0x73,0x10,0x86,0x90,0x62,0xed,0x42,0x02,0x30,0x02,0x4b,0x22,0xb3,0x0,0x64,0x79,0x26,0xd5,0x55,0x24,0xa6,0x66,0x6a,0x75,0x34,0x15,0x0c,0xc7,0x12,0x38,0x1b,0x87,0x39,0xc3,0x89,0x80,0x25,0xc5,0x5a,0x85,0xd3,0x12,0x60,0x64,0xab,0x7e,0x66,0x44,0x02,0xc0,0x04,0x61,0x3a,0xd5,0x56,0x7a,0x9a,0x9a,0x27,0x83,0x96,0x84,0xed,0xc5,0x9f,0x0c,0xe4,0xa4,0x5c,0x24,0xa2,0xc4,0x38,0x79,0xae,0x47,0xf6,0x03,0x4c,0x8f,0xcc,0x02,0xe0,0x9c,0x04,0x20,0x22,0x0b,0x20,0xf5,0x66,0x1a,0x85,0x33,0x9c,0xf0,0x9b,0xe3,0x6f,0x6d,0x70,0xe0,0x44,0x20,0x27,0xe5,0x7e,0x3f,0xed,0x0,0x8c,0x6a,0x01,0x94,0x6f,0x7b,0xfc,0x71,0xf3,0xd8,0x02,0x40,0x59,0x7f,0x22,0x42,0x93,0xa2,0x6b,0xcd,0x13,0x81,0x9c,0xb8,0xc7,0xc9,0x9f,0x0e,0x99,0xe3,0x12,0xf3,0x9f,0x89,0xa2,0x9c,0x4a,0x36,0x40,0xd8,0x68,0x9c,0x72,0x81,0xc5,0x34,0xc2,0x1b,0x4b,0xa8,0xae,0x22,0x11,0x80,0xd4,0xec,0x6c,0x41,0x2e,0xe2,0x74,0xd0,0x84,0xa9,0x7a,0xcf,0x88,0xef,0x09,0xa9,0x02,0x14,0x30,0x84,0xb9,0x80,0x30,0x67,0x0,0x18,0x04,0xc6,0x21,0x31,0x15,0x5a,0xc6,0x21,0x41,0x1d,0x71,0x30,0x71,0x20,0x69,0x51,0x7f,0xe9,0xb8,0x2c,0x4b,0x25,0xd4,0x8b,0xc6,0xb6,0x34,0x42,0x03,0x5b,0x68,0x1d,0x15,0xc9,0x7a,0x33,0x75,0x3b,0xdb,0xb1,0x80,0x15,0x12,0x1b,0x38,0x36,0xec,0x55,0x45,0x78,0x14,0x09,0x3e,0x55,0x82,0x47,0x95,0xa2,0xb2,0x0e,0xf4,0x82,0x02,0xb3,0xa0,0xc0,0x24,0x84,0x61,0x14,0x14,0x98,0x44,0x19,0x32,0x67,0x70,0x28,0xda,0x94,0xfc,0xdd,0x22,0x48,0x0,0x46,0x5d,0x05,0x0,0x96,0x31,0x05,0x80,0x5d,0xf4,0x26,0x22,0xfd,0x04,0xc0,0xa5,0x68,0xb0,0xdb,0x33,0xf1,0xec,0xed,0x01,0x55,0x44,0x40,0x15,0xd1,0x03,0x6d,0x5a,0xb4,0x09,0x59,0x0,0xa3,0x23,0x5c,0x34,0xb6,0x47,0xf2,0x01,0x90,0x0,0x44,0x80,0x81,0x3a,0x5b,0xca,0xa1,0xa3,0x43,0x40,0x63,0x91,0x13,0x89,0x0,0x98,0xa8,0x9e,0x22,0xb3,0x0,0xc8,0xe9,0x94,0x62,0xa2,0x2c,0x90,0x28,0x8f,0xea,0x03,0xe0,0x3c,0xa2,0x5d,0x0,0xca,0x01,0x10,0xe9,0x1a,0x99,0xd1,0x8c,0x93,0x5a,0x16,0x0,0x09,0xc0,0x68,0x30,0xc6,0xa4,0x48,0x04,0x80,0xa0,0x19,0x27,0x4d,0x05,0x59,0xa6,0x4a,0x88,0xce,0x27,0x40,0x50,0x87,0x23,0x41,0x26,0x01,0x20,0xc6,0x67,0x72,0x8a,0xd4,0xe1,0x48,0x90,0x49,0x0,0xb2,0x16,0x93,0x40,0x1d,0x2e,0x95,0xa0,0x54,0x60,0x24,0x0,0x09,0xc5,0x4c,0x02,0x90,0x32,0x48,0x4c,0x85,0x96,0x96,0x0,0x24,0x0,0x89,0x9d,0x71,0x48,0x0,0x52,0xc7,0x1a,0xa3,0xd9,0x9f,0x04,0x80,0x2c,0x80,0xec,0x6d,0x0b,0x91,0xda,0x82,0x04,0x20,0xd1,0x16,0x80,0xa8,0x0,0xa0,0x60,0xa0,0x94,0x68,0x0b,0x72,0x0,0x92,0x0,0x24,0x1a,0xb7,0x22,0x41,0xa4,0x93,0x53,0x29,0x41,0x90,0x8b,0x50,0x29,0x1d,0x48,0x54,0x50,0x02,0xf5,0xf1,0x76,0x36,0x55,0xc0,0x3e,0x5f,0x1e,0x9a,0x83,0x26,0x50,0xf6,0xf4,0xd4,0xe0,0x6c,0xc8,0x88,0x67,0xfb,0x2b,0xb0,0xc0,0xd8,0x87,0x9a,0x14,0xbd,0x3a,0x9d,0x04,0x20,0xcd,0xe1,0x18,0x48,0xbe,0x79,0xd0,0x9f,0x87,0xa0,0x4a,0x06,0x54,0xaa,0xe1,0x57,0x45,0xbc,0xeb,0x29,0xc4,0xe9,0xa0,0x05,0x8b,0x4c,0xbd,0xb0,0x8a,0x61,0xaa,0x14,0x12,0x80,0xd8,0x99,0xfb,0xef,0x7b,0xf3,0xd1,0x11,0x36,0x50,0x65,0xa4,0x38,0x9d,0x61,0x3d,0x76,0x38,0xcb,0x30,0xc7,0xe0,0xc4,0x4c,0x83,0x13,0x02,0xf9,0x69,0xd2,0xd3,0x07,0x10,0x50,0x45,0x7c,0xe4,0xb3,0x25,0xf4,0xd2,0xc9,0x8b,0x19,0xc8,0xb9,0x6f,0xc3,0xf3,0xce,0x72,0x1a,0xfc,0x69,0x84,0xcc,0x19,0x0e,0xf8,0x6c,0x78,0xd1,0x59,0x86,0x5e,0x59,0x97,0xd4,0x67,0xe9,0x0a,0xeb,0x71,0x3c,0x05,0x73,0x28,0xa6,0xb4,0x05,0x10,0xe6,0x0c,0xaf,0xbb,0x8b,0xd1,0x2f,0x6b,0x71,0x2c,0x90,0x83,0x3a,0xbd,0x0b,0x33,0x0d,0xce,0x84,0xa6,0xe2,0x72,0x28,0x5a,0xec,0xf6,0x14,0xa0,0x5f,0xd6,0xd2,0x88,0x4a,0x53,0x1c,0xb2,0x06,0x2f,0x39,0x4b,0x31,0xd3,0xe0,0xc2,0x7c,0x43,0x1f,0x84,0x04,0xba,0x6c,0x7a,0x65,0x1d,0x3e,0xf2,0xe7,0xa2,0x3d,0xa4,0x07,0x0,0x30,0xce,0x31,0xdd,0xe0,0x26,0x01,0x18,0x73,0xd6,0xe5,0xc0,0xdb,0xee,0xa2,0xc1,0x81,0x27,0x73,0x01,0x1f,0xfb,0x6d,0x38,0x11,0xcc,0xc1,0x2c,0x83,0x13,0xd3,0x75,0xae,0xb8,0x9f,0xc5,0x3f,0x19,0xcc,0xc1,0x7e,0x6f,0x6e,0xdc,0x13,0x6c,0x12,0xf1,0x87,0x03,0x38,0xea,0xcf,0x41,0x57,0x58,0x8f,0xcb,0xcd,0x76,0x58,0xe2,0x1c,0x33,0xe0,0x54,0x34,0x38,0xe8,0xcf,0x45,0x6b,0xd0,0x78,0xc1,0xe2,0xe3,0x03,0x5f,0x1e,0x0c,0xa2,0x8a,0x2a,0x6d,0x6a,0x38,0x29,0x85,0x54,0x6d,0xac,0x3d,0xde,0x42,0x74,0x0e,0x63,0x6e,0x07,0x55,0x01,0x1f,0x7a,0x73,0xf1,0x57,0x67,0x05,0xce,0x84,0x4c,0x71,0x59,0xd9,0x85,0xb8,0x80,0xb7,0xdd,0x45,0x78,0xdf,0x93,0x47,0x83,0x3f,0xc3,0xe8,0x95,0xb5,0x78,0xd1,0x51,0x76,0x7e,0xf7,0x26,0xf6,0x04,0xb9,0x88,0xbd,0xde,0x7c,0x3c,0xef,0x28,0xc7,0xb9,0x8b,0x06,0xff,0x0,0x0c,0xbb,0xdc,0x05,0xb0,0xcb,0x7a,0x12,0x80,0x91,0x78,0xdf,0x9b,0x3f,0x66,0x03,0x79,0x15,0x11,0xbb,0xdc,0x85,0xd8,0xe1,0x2c,0x83,0x3d,0x1c,0xbb,0xca,0xec,0x91,0x75,0x78,0xc1,0x51,0x86,0x73,0x21,0x23,0x8d,0x96,0x0c,0x25,0x0c,0x01,0xbb,0x3d,0x85,0xd8,0xed,0x29,0x88,0x99,0xc0,0x2b,0x9c,0xe1,0x63,0x9f,0x0d,0x7f,0xee,0xaf,0x40,0x63,0xc0,0x32,0xea,0xc4,0xa4,0x82,0xe1,0x0d,0x57,0x11,0xfa,0x52,0x60,0x59,0x99,0x72,0x02,0x70,0x2c,0x60,0x45,0x63,0x14,0xd7,0x4d,0xf5,0xc9,0x5a,0xbc,0xe2,0x2a,0xc6,0xbb,0x9e,0x02,0x04,0x26,0x98,0xa7,0xbe,0x39,0x68,0xc6,0xab,0xae,0x92,0xa4,0xdd,0xb2,0x4b,0x24,0x96,0x58,0xb5,0x77,0x6b,0xd8,0x88,0xe7,0x9d,0xe5,0xf8,0xd8,0x6f,0x8b,0x58,0x50,0x64,0x2e,0xe0,0x4d,0x77,0x71,0xd2,0xfb,0x5a,0x4a,0x09,0xc0,0xb9,0x90,0x11,0xfb,0xbd,0xb6,0x71,0x7c,0x92,0xe1,0x74,0xd0,0x8c,0xed,0x8e,0x0a,0x1c,0xf3,0xe7,0x44,0x1d,0x0d,0xa6,0x82,0x61,0xaf,0x37,0x3f,0xa6,0x33,0x02,0x91,0x1e,0xf4,0xc8,0x3a,0xbc,0xe8,0x28,0x1d,0x76,0xb9,0x39,0x16,0x6e,0x55,0xc2,0x5b,0xae,0x22,0xbc,0xe5,0x2a,0x82,0x5b,0x89,0x7e,0x20,0xfb,0x55,0x11,0x6f,0xba,0x8a,0x20,0x27,0xf1,0x7a,0xb5,0x94,0x11,0x0,0x87,0xac,0xc1,0xbb,0xee,0x02,0x4c,0x24,0xaa,0x2e,0xcc,0x19,0xf6,0xfb,0xf2,0xf0,0xa2,0xa3,0x14,0x5d,0x61,0x5d,0xc4,0x8d,0xf0,0x8a,0xb3,0x24,0x2a,0xab,0x83,0xc8,0x2c,0x82,0x5c,0xc4,0xeb,0xae,0x62,0x1c,0x8d,0xf0,0xb2,0x93,0x4f,0xcc,0xfd,0xe7,0x1d,0xe5,0x68,0x0d,0x4f,0x6c,0xa9,0xe8,0x50,0xb4,0xd8,0x15,0x83,0xf4,0xed,0x69,0x2d,0x0,0x41,0x2e,0xe2,0x2d,0x4f,0x71,0xcc,0xae,0x99,0x76,0x28,0x5a,0xbc,0xe6,0x2a,0xc5,0x6e,0x77,0x01,0x02,0xa3,0x44,0xeb,0xf5,0xc9,0x5a,0xec,0x70,0x96,0xa2,0x27,0xc9,0x7b,0xc4,0x44,0xf2,0xe1,0x0,0x3e,0xf4,0xe5,0xe1,0x5d,0x4f,0xc1,0xa8,0x16,0x64,0x5b,0xc8,0x80,0xe7,0xa2,0x34,0xf7,0xc7,0x5c,0x42,0x84,0x8c,0xf8,0xc8,0x67,0x4b,0xca,0xef,0x4e,0xfa,0x62,0x57,0x05,0xc3,0x4e,0x77,0x21,0x3c,0x8a,0x14,0xf3,0x06,0x6d,0x0e,0x99,0xd1,0x2e,0x1b,0xb1,0xd0,0xd8,0x3b,0x24,0x36,0xbc,0x35,0x6c,0xc4,0x2e,0x77,0x21,0x64,0x32,0xf9,0x89,0xcf,0x70,0x3a,0x68,0x86,0x47,0x95,0xb0,0xd2,0x62,0xbf,0xe0,0x92,0x91,0x20,0x17,0xf1,0x81,0x27,0x17,0x67,0x42,0xf1,0xb9,0x78,0xf5,0xb0,0xdf,0x06,0x9b,0x14,0x46,0x75,0x82,0xb7,0x07,0x93,0x6e,0x01,0xec,0xf5,0xe4,0xa3,0x33,0x1c,0xbf,0x2d,0x91,0xa0,0x2a,0xe0,0x5d,0x4f,0x21,0x5e,0x71,0x95,0xc2,0xa9,0x68,0x0,0x0,0x8d,0x01,0x0b,0xde,0x76,0x15,0xd1,0xe0,0x27,0x86,0xc5,0x1e,0xd6,0xe3,0x65,0x67,0xe9,0xa0,0x83,0xee,0x78,0x20,0x07,0xdb,0xfb,0xcb,0xe3,0x36,0xf8,0x3f,0x99,0xb0,0xde,0xf5,0x14,0xa0,0x27,0xc1,0x3b,0x03,0x49,0xb5,0x0,0x9a,0x82,0x16,0x9c,0x0a,0x9a,0x13,0xd4,0xa8,0x3a,0xec,0x70,0x96,0xa1,0x5a,0xeb,0xc5,0xa9,0x20,0xdd,0x7b,0x42,0x8c,0x8e,0x53,0xd1,0xe0,0x35,0x67,0x31,0xcc,0xa2,0x9c,0xb0,0xf0,0x6f,0x85,0x33,0xec,0x74,0x17,0xe1,0x0b,0xb6,0x0e,0xe8,0x12,0x74,0xdf,0x44,0xd2,0x2c,0x80,0x1e,0x59,0x87,0xf7,0xbd,0xf9,0x09,0x2d,0x53,0xe6,0xec,0xbc,0xe0,0xd0,0xcc,0x4f,0x8c,0x8d,0x5b,0xd5,0x24,0xfc,0xec,0x87,0x57,0x95,0xf0,0x8e,0xbb,0x30,0x61,0x47,0x97,0x92,0x22,0x0,0x41,0x55,0xc0,0x3b,0xee,0x42,0xa8,0x74,0x40,0x8b,0x20,0x86,0xd0,0x15,0xd6,0xe3,0x63,0x5f,0x6e,0xe6,0x0a,0xc0,0x7b,0xbe,0x02,0xf8,0x28,0xd8,0x86,0x20,0x46,0xe4,0x90,0x3f,0x27,0xae,0xbe,0xb1,0xa4,0x09,0x40,0x63,0xd0,0x82,0x73,0x41,0x0a,0xb3,0x25,0x88,0xd1,0x61,0xd8,0xed,0x29,0x8c,0xfb,0x31,0xf8,0x84,0x0a,0x80,0x43,0xd6,0x60,0x9f,0x37,0x8f,0xda,0x96,0x20,0x22,0xc0,0xaf,0x8a,0xd8,0xed,0x2e,0x88,0xab,0x3f,0x20,0x61,0x02,0xa0,0x70,0x86,0x9d,0x9e,0x22,0x0a,0xb5,0x25,0x88,0x28,0x68,0x0f,0x1b,0xe2,0x1a,0xa5,0x9a,0x30,0x01,0x38,0xe8,0xb3,0x0d,0xee,0xc3,0x13,0x04,0x11,0x39,0xfb,0x7d,0x79,0x71,0x1b,0x3b,0x09,0x11,0x80,0xce,0xb0,0x01,0xc7,0x03,0x56,0x6a,0x49,0x82,0x18,0xa7,0xf5,0xbc,0xcb,0x5d,0x18,0x97,0x94,0xe7,0x42,0x22,0x1e,0xfe,0x7d,0x6f,0x1e,0xa5,0x64,0x24,0x88,0x09,0xd0,0xaf,0x68,0xe3,0x92,0x53,0x30,0xee,0x02,0x70,0xd8,0x6f,0x83,0x8b,0x4c,0x7f,0x82,0x98,0x30,0x1f,0xf9,0x6c,0x70,0xc7,0x78,0xfb,0x3c,0xae,0x02,0xd0,0xaf,0x68,0x71,0xc4,0x9f,0x43,0x2d,0x47,0x10,0xb1,0xb2,0xa6,0x3d,0xf9,0xe9,0x23,0x0,0x7b,0x3d,0xf9,0x74,0x55,0x13,0x41,0xc4,0x90,0x8e,0xb0,0x21,0xa6,0xf9,0x0c,0xe3,0x26,0x0,0x27,0x03,0x16,0x3a,0x67,0x4f,0x10,0x71,0x60,0x9f,0x37,0x2f,0x66,0x01,0x42,0x71,0x11,0x80,0xa0,0x2a,0xe0,0xa0,0x3f,0x97,0x5a,0x8a,0x20,0xe2,0x31,0xbe,0xb8,0x18,0xb3,0x04,0x22,0x71,0x11,0x0,0x9d,0xa0,0xc2,0x2a,0xd0,0x9d,0x6c,0x04,0x11,0x2f,0x0a,0xa5,0x60,0x6a,0x2f,0x01,0x2e,0x33,0xf7,0x26,0xf4,0x06,0x16,0x82,0xc8,0x16,0x4a,0x35,0x7e,0x4c,0xd6,0x79,0x52,0x5b,0x0,0x6c,0x62,0x08,0xb3,0xf4,0x4e,0x6a,0x2d,0x82,0x88,0x21,0x1a,0xa8,0x58,0x62,0xee,0x8d,0xd9,0xf7,0xc5,0x75,0x17,0x60,0x8e,0xd1,0x41,0xd7,0x33,0x13,0x44,0x8c,0xc7,0x94,0x49,0x90,0xd3,0x43,0x0,0x04,0x70,0x5c,0x6a,0xea,0xa3,0x8d,0x40,0x82,0x88,0x01,0xb9,0x62,0x08,0x33,0x62,0x7c,0xb1,0x68,0xdc,0x23,0x01,0x4b,0x35,0x7e,0x4c,0xd5,0xbb,0xa9,0xf5,0x08,0x62,0x02,0x88,0x8c,0xe3,0x72,0x4b,0x37,0x84,0x18,0x07,0xd5,0x27,0xe4,0x30,0xd0,0x25,0xc6,0x3e,0x58,0x68,0x57,0x80,0x20,0xc6,0xcd,0x5c,0x83,0x33,0x2e,0xcb,0xe9,0x84,0x08,0x80,0xc4,0x38,0x96,0x9a,0x7b,0x0,0x3a,0x12,0x44,0x10,0x51,0x53,0x20,0x05,0x51,0x67,0x70,0xc4,0x69,0x99,0x9e,0x20,0x8a,0x34,0x41,0xcc,0xa0,0xa5,0x0,0x41,0x44,0x6d,0xfa,0x2f,0x35,0xf7,0xc4,0xcd,0x8f,0x96,0xd0,0x94,0x60,0xf3,0x4c,0x0e,0x58,0x62,0xe8,0xc1,0x24,0x88,0x4c,0x67,0x8e,0x21,0xbe,0x3b,0x69,0x09,0x15,0x0,0x0d,0x54,0x5c,0x61,0xb1,0x53,0x80,0x10,0x41,0x44,0x40,0x89,0x26,0x80,0xd9,0x86,0xf8,0xc6,0xd2,0x24,0x3c,0x2b,0x70,0x9e,0x14,0xc2,0xbc,0x38,0xad,0x67,0x08,0x22,0x53,0xd0,0x09,0x2a,0x96,0x9b,0xbb,0xe3,0x5e,0x4e,0x52,0xee,0x05,0x98,0x65,0x70,0xa0,0x44,0x13,0xa0,0x56,0x26,0x88,0x11,0x58,0x62,0xec,0x81,0x41,0x50,0x32,0x53,0x0,0x0,0x60,0xb9,0xb9,0x1b,0xba,0xcf,0xdc,0xbe,0x4a,0x10,0xc4,0x0,0x53,0x74,0x6e,0x54,0xea,0x7c,0x09,0x29,0x2b,0x69,0x02,0x60,0x10,0x14,0x2c,0x32,0xf5,0x52,0x6b,0x13,0xc4,0x67,0xc8,0x11,0xc3,0x58,0x68,0xea,0x4b,0x58,0x79,0x49,0xbd,0x1e,0x7c,0x92,0xce,0x8b,0x99,0x06,0x17,0xb5,0x3a,0x41,0x0,0x90,0x98,0x8a,0x95,0x16,0x3b,0x34,0x8c,0x67,0x87,0x0,0x0,0xc0,0x02,0x63,0x3f,0x8a,0x34,0x41,0x6a,0x7d,0x82,0xd6,0xfd,0xe6,0x3e,0xe4,0x24,0xf8,0xf0,0x5c,0xd2,0x05,0x40,0x0,0x3f,0xef,0x0f,0x50,0xa8,0x07,0x10,0x59,0x4b,0xad,0xde,0x83,0x49,0x5a,0x4f,0x12,0xc6,0x5f,0x0a,0x60,0x12,0x64,0x2c,0xb5,0xf4,0x82,0x42,0x85,0x89,0x6c,0xc4,0x26,0x86,0xb0,0xc8,0x98,0x1c,0x7f,0x98,0x90,0x2a,0x95,0x50,0xa1,0xf1,0xa5,0xce,0xc3,0x10,0x44,0x02,0x29,0xd3,0x06,0x20,0x31,0x9e,0xdd,0x02,0x40,0x10,0x04,0x09,0x0,0x41,0x10,0x09,0x44,0xa2,0x2a,0x88,0x1e,0x93,0x4e,0x42,0x65,0x9e,0x11,0x15,0xb9,0x46,0x68,0xa5,0x4f,0x35,0x54,0x51,0x39,0x3a,0x9d,0x7e,0x9c,0xed,0xf3,0xa1,0xdf,0x1b,0xa2,0x8a,0x4a,0x10,0x16,0xbd,0x06,0x55,0xf9,0x46,0x94,0xd9,0x0c,0xd0,0x88,0x9f,0xb6,0x87,0xac,0xaa,0xe8,0x70,0x04,0xd0,0xd2,0xeb,0x85,0xcb,0x4f,0xf9,0x28,0x48,0x0,0x26,0xc0,0xd4,0x62,0x0b,0x56,0xce,0x28,0xc2,0x82,0xea,0x5c,0x4c,0x2e,0x34,0x8f,0x79,0x3c,0xb3,0xcb,0x15,0xc0,0xc1,0x96,0x7e,0xec,0x6a,0xec,0xc6,0xfe,0x33,0xfd,0x50,0x39,0x39,0x38,0x63,0x05,0x03,0x50,0x57,0x6e,0xc5,0x8a,0xe9,0x45,0x58,0x50,0x65,0x43,0x55,0xfe,0xd8,0x37,0xe5,0xb4,0xf5,0xfb,0x70,0xe0,0xac,0x03,0xef,0x9c,0xb0,0xe3,0xd0,0x39,0x07,0xb9,0x9b,0x49,0x0,0x22,0xa8,0x1c,0x81,0x61,0xe5,0x8c,0x62,0x7c,0xe5,0xd2,0x0a,0xd4,0x14,0x9a,0xa3,0xfa,0x6c,0x71,0x8e,0x1e,0xd7,0xcf,0x29,0xc5,0xf5,0x73,0x4a,0xd1,0xeb,0x09,0xe2,0xb9,0x83,0xed,0x78,0xfe,0x60,0x1b,0x3c,0x41,0x3a,0x0e,0x3d,0x5e,0xb4,0x92,0x80,0xeb,0x66,0x95,0xe0,0xcb,0x0b,0x2b,0x50,0x9e,0x6b,0x8c,0xea,0xb3,0xe5,0xb9,0x46,0x94,0xe7,0x1a,0x71,0xe3,0xbc,0x32,0x74,0x3a,0x03,0xf8,0xcb,0x87,0xad,0x78,0xe9,0x50,0x07,0x02,0x61,0x85,0x04,0x80,0x18,0xca,0xe2,0x9a,0x7c,0xfc,0xfd,0xd5,0x53,0x50,0x6a,0x35,0x4c,0xf8,0xbb,0xf2,0xcd,0x3a,0xdc,0x71,0xf9,0x64,0x7c,0x6d,0x51,0x25,0x9e,0x7c,0xf7,0x0c,0x9e,0x3b,0xd8,0x06,0x45,0xa5,0x39,0x28,0x1a,0x56,0xce,0x28,0xc2,0xdf,0xad,0x9c,0x82,0x3c,0x93,0x76,0xc2,0xdf,0x55,0x62,0xd5,0xe3,0x5b,0x57,0x4d,0xc1,0x37,0x2e,0xab,0xc2,0xa6,0x77,0x4e,0xe3,0xb5,0x23,0x9d,0x59,0x6b,0x11,0x90,0x0,0x5c,0x84,0x51,0x2b,0xe1,0xee,0x6b,0xa7,0xe2,0xaa,0x99,0xc5,0x71,0xf1,0x1d,0x7c,0xeb,0xaa,0x29,0xb8,0xba,0xae,0x18,0x8f,0xbc,0x70,0x14,0x6d,0xfd,0x7e,0xaa,0xf0,0x31,0xb0,0x19,0xb5,0xb8,0xef,0xf3,0xd3,0xb1,0x68,0x72,0x7e,0x9c,0xbe,0x7b,0x06,0xae,0xa9,0x2b,0xc6,0xbf,0xef,0x38,0x8e,0x5e,0x4f,0xf6,0x45,0xa4,0xd2,0x2e,0xc0,0x45,0x66,0xe2,0xe3,0x6b,0x16,0xc6,0x65,0xf0,0x7f,0x96,0x69,0xc5,0x16,0x3c,0xb6,0xfa,0x52,0x2c,0xa9,0xcd,0xa7,0x4a,0x1f,0xa3,0x9e,0x7e,0xb5,0xf6,0xd2,0xb8,0x0c,0xfe,0xcf,0x32,0xbf,0x2a,0x17,0xbf,0x5a,0x7b,0x29,0x66,0x95,0x5b,0x49,0x0,0xb2,0x95,0xda,0x22,0x33,0x7e,0xf6,0x8d,0xf9,0x28,0xb5,0x19,0x12,0x52,0x9e,0x41,0x2b,0xe2,0x07,0x5f,0x9a,0x8d,0x6b,0xea,0x8a,0xa9,0xf2,0x87,0x61,0x4e,0x85,0x0d,0x8f,0xac,0x9a,0x1f,0x13,0x93,0x3f,0x12,0xac,0x06,0x0d,0x7e,0x72,0xcb,0x5c,0x2c,0x9a,0x9c,0x47,0x02,0x90,0x8d,0x83,0xff,0xff,0xae,0x9a,0x0f,0x9b,0x51,0x9b,0xd0,0x72,0x45,0x81,0xe1,0x7b,0x37,0xcc,0xc4,0xf5,0xb3,0x4b,0xa9,0x11,0x2e,0x1a,0xfc,0x3f,0xbe,0x65,0x2e,0x0c,0x5a,0x31,0xa1,0xe5,0xea,0x34,0x22,0x1e,0xbc,0x79,0x4e,0xdc,0x2d,0x0e,0x12,0x80,0x14,0x22,0xc7,0xa0,0xc1,0x83,0x37,0xcd,0x86,0x49,0x37,0x71,0x77,0x88,0xc7,0xe3,0x81,0xa2,0x44,0xe7,0x55,0x66,0x0,0xee,0xbe,0x76,0x2a,0x66,0x94,0xe6,0xd0,0xc8,0x07,0x50,0x94,0xa3,0xc7,0x0f,0x6f,0x9a,0x75,0x41,0x7c,0x45,0x24,0x70,0xce,0xe1,0x76,0xbb,0xc1,0x27,0xb8,0xdd,0x2a,0x09,0x0c,0x0f,0xdc,0x58,0x87,0x8a,0x3c,0x63,0x56,0xd4,0x77,0x56,0x3b,0x01,0x19,0x80,0xef,0x5e,0x3f,0x1d,0x45,0x39,0xfa,0xa8,0x3f,0xeb,0x74,0x3a,0xf1,0xde,0x7b,0x7b,0xb1,0xe7,0xdd,0x3d,0x68,0x6e,0x6e,0x86,0xdd,0x6e,0x47,0x30,0x18,0x84,0x28,0x8a,0xc8,0xcf,0xcf,0x43,0x79,0x79,0x05,0x16,0x2d,0x5e,0x84,0xa5,0x4b,0x97,0xa2,0xba,0xba,0x6a,0xf4,0x46,0x10,0x05,0xdc,0x7f,0x63,0x1d,0xbe,0xbd,0x75,0x5f,0x56,0x6f,0x13,0x8a,0x02,0xc3,0xbf,0xfc,0xcd,0x4c,0x58,0xf4,0x9a,0x31,0xdf,0x6b,0xb7,0xdb,0xb1,0x67,0xcf,0x1e,0xec,0xd9,0xf3,0x1e,0xce,0x9d,0x3d,0x87,0xee,0xee,0x6e,0xc8,0xb2,0x0c,0x49,0x92,0x50,0x54,0x54,0x84,0xca,0xca,0x4a,0x2c,0x5d,0xba,0x04,0x4b,0x96,0x2e,0x45,0x51,0x51,0x61,0xd4,0xcb,0xb3,0xfb,0xff,0xa6,0x0e,0xf7,0xfe,0xfe,0x43,0x84,0x15,0x95,0x04,0x20,0x53,0xf9,0xdc,0xec,0x12,0x2c,0xa9,0x2d,0x88,0xea,0x33,0x76,0xbb,0x1d,0x5b,0x36,0x6f,0xc5,0xcb,0x2f,0xbf,0x0c,0x55,0x1d,0xda,0x39,0x14,0x45,0x81,0xdd,0xde,0x0d,0xbb,0xbd,0x1b,0x07,0x0e,0x1c,0xc0,0xaf,0x37,0xfe,0x1a,0x75,0xb3,0xea,0x70,0xd7,0x5d,0x77,0x62,0xde,0xbc,0x79,0x23,0x7e,0x6f,0x71,0x8e,0x1e,0x77,0xad,0xac,0xc5,0x2f,0x5e,0x3e,0x91,0xb5,0xed,0xf1,0xd5,0x45,0x95,0xa8,0x2b,0x1b,0xdd,0x11,0x77,0xfa,0x74,0x33,0x36,0x6d,0xda,0x84,0x3d,0xef,0xee,0x19,0xf6,0xdf,0x65,0x59,0x46,0x7b,0x7b,0x3b,0xda,0xdb,0xdb,0xb1,0x77,0xef,0x5e,0xe0,0x97,0xff,0x81,0xe5,0xcb,0x97,0x63,0xdd,0xfa,0x7a,0x4c,0x9e,0x3c,0x39,0xaa,0x65,0xe1,0x6d,0x4b,0xab,0xb1,0x65,0x57,0x33,0x2d,0x01,0x32,0x11,0xa3,0x56,0x42,0xfd,0x15,0x35,0x51,0x99,0x98,0xcf,0x3c,0xf3,0x07,0xac,0x59,0xbd,0x16,0x3b,0x76,0xec,0x18,0x76,0xf0,0x8f,0xc4,0xd1,0x23,0x47,0xf1,0x9d,0x7b,0xbf,0x8b,0x1f,0xfe,0xe0,0x87,0xf0,0x7a,0xbd,0x23,0xbe,0xef,0xba,0x59,0x25,0x98,0x56,0x62,0xc9,0xca,0xf6,0xc8,0x37,0xeb,0xf0,0x8d,0xcb,0xaa,0x47,0xfc,0x77,0x59,0x96,0xf1,0x1f,0xbf,0x7c,0x14,0x77,0xdd,0x79,0xd7,0x88,0x83,0x7f,0x24,0x76,0xef,0xde,0x8d,0x3b,0xd7,0xdf,0x85,0xff,0xfc,0xcf,0xc7,0x20,0xcb,0x91,0x5b,0x58,0xb7,0x2c,0xac,0x8c,0x49,0x1c,0x08,0x09,0x40,0x0a,0x72,0xd3,0x25,0xe5,0xc8,0x8d,0xd0,0xe9,0x17,0x08,0x04,0xf0,0xd0,0x86,0x87,0xf1,0x9b,0x5f,0xff,0x06,0xe1,0xf0,0xf8,0x63,0xca,0x77,0xed,0xda,0x8d,0x6f,0xfe,0xdd,0xb7,0xd0,0xd2,0xd2,0x32,0xfc,0x92,0x84,0x31,0xdc,0xbe,0x7c,0x72,0x56,0xb6,0xc7,0xd7,0x17,0x57,0xc1,0xa0,0x19,0xde,0xe9,0xd7,0xd7,0xd7,0x87,0x7b,0xff,0xf1,0x3b,0xd8,0xbe,0x7d,0xfb,0xb8,0xd7,0xf8,0x9c,0x73,0x3c,0xfb,0xe7,0x67,0x71,0xef,0xbd,0xdf,0x41,0x5f,0x5f,0x64,0x39,0xf7,0xb4,0x92,0x80,0xdb,0x96,0x56,0x93,0x0,0x64,0xdc,0xba,0x47,0x14,0xf0,0xa5,0xf9,0xe5,0x11,0x77,0x9c,0x9f,0xfc,0xf8,0x27,0x78,0xe7,0x9d,0x77,0x62,0x52,0x76,0x7b,0x7b,0x3b,0xbe,0x77,0xdf,0xff,0x46,0x6f,0xef,0xf0,0x09,0x20,0x2e,0x99,0x94,0x87,0x49,0x05,0xa6,0x84,0xd4,0x03,0x63,0xc0,0xd5,0x33,0x8b,0xf1,0xe3,0x5b,0xe6,0xe2,0x7f,0xbe,0xbd,0x1c,0x2f,0xdd,0xb7,0x12,0xcf,0xde,0x7d,0x05,0xfe,0xfd,0xeb,0xf3,0xf1,0xc5,0xf9,0xe5,0x17,0x1c,0xac,0x89,0x27,0x66,0xbd,0x84,0xeb,0x66,0x97,0x0c,0xfb,0x6f,0xc1,0x60,0x10,0xf7,0xff,0xcb,0xfd,0x38,0x7a,0xf4,0x68,0x4c,0xca,0x3a,0x7a,0xe4,0x28,0x1e,0xb8,0xff,0xfb,0x08,0x06,0x23,0x0b,0xfa,0xb9,0x6a,0x46,0x11,0xf2,0xcd,0x3a,0x12,0x80,0x4c,0x62,0x49,0x4d,0x3e,0x72,0x23,0xdc,0x5f,0xfe,0xed,0x6f,0x37,0x61,0xd7,0xae,0xdd,0x31,0x2d,0xbf,0xb7,0xb7,0x17,0x0f,0xdc,0xff,0xc0,0xb0,0x9d,0x90,0x01,0xb8,0x61,0x4e,0xfc,0xb7,0x05,0x0b,0x2c,0x3a,0xfc,0xec,0x1b,0x0b,0xf0,0x4f,0x5f,0x98,0x89,0x85,0x93,0xf2,0x06,0x1d,0x6f,0x06,0xad,0x88,0x39,0x15,0x36,0x7c,0xfb,0x9a,0xa9,0xf8,0xd5,0xda,0x4b,0x31,0x39,0xca,0x33,0x10,0xe3,0xe1,0xaa,0x19,0xc5,0xd0,0x0f,0x33,0xfb,0x73,0x95,0xe3,0x47,0xff,0xfa,0x63,0x34,0x36,0x36,0xc5,0xb4,0xbc,0x93,0x27,0x4f,0xe2,0x27,0x3f,0xfe,0x69,0x44,0xd6,0x84,0x24,0x0a,0xb8,0x76,0x56,0x31,0x09,0x40,0x26,0x71,0xe5,0x8c,0xa2,0x88,0x67,0x8b,0x3f,0x3c,0xf3,0x87,0xb8,0x3c,0x43,0x63,0x63,0x13,0x9e,0x7e,0xfa,0xf7,0xc3,0xfe,0xdb,0x15,0xd3,0x8b,0x10,0xcf,0xdb,0xd3,0xcc,0x3a,0x09,0x3f,0xfd,0xea,0xbc,0x31,0x1d,0x6e,0x95,0x79,0x46,0xfc,0xdb,0xd7,0xe6,0xa1,0xc4,0xaa,0x8f,0x6f,0x7b,0x4c,0x1f,0xbe,0x3d,0x5e,0x79,0xf5,0x55,0xec,0xde,0xbd,0x3b,0x2e,0x65,0xee,0xdc,0xb9,0x13,0x6f,0xbc,0xf1,0x46,0x44,0xef,0x5d,0x31,0xad,0x88,0x04,0x20,0x53,0x60,0x0c,0x98,0x5f,0x65,0x8b,0xc8,0xf4,0x7f,0xec,0xb1,0xc7,0x27,0xbc,0xaf,0x3c,0x1a,0xdb,0xfe,0x7b,0x1b,0xec,0x76,0xfb,0x90,0xd7,0xf3,0x4c,0x5a,0x54,0xc7,0x71,0x19,0xb0,0x7a,0xd9,0x24,0x54,0x46,0xb8,0xcf,0x6d,0x35,0x68,0xf0,0xed,0x6b,0xa6,0xc6,0xed,0x59,0x0c,0x5a,0x11,0x33,0xcb,0x72,0x86,0xf5,0xbb,0xfc,0xf6,0x37,0xbf,0x8d,0x6b,0x5f,0xd8,0xf8,0xc4,0xaf,0x23,0x5a,0x0a,0xd4,0x14,0x99,0x23,0xf6,0x17,0x91,0x0,0xa4,0x38,0x55,0x79,0xa6,0x88,0xf6,0x99,0x3f,0xf8,0x60,0x1f,0x8e,0x1f,0x3f,0x1e,0xd7,0x67,0x09,0x85,0x42,0x23,0x5a,0x18,0xb3,0xe3,0x14,0x97,0xae,0x95,0x84,0xa8,0x23,0x0f,0x2f,0x9d,0x9c,0x1f,0x37,0x6f,0xf8,0xf4,0x92,0x1c,0x88,0xc3,0xdc,0x16,0xfb,0xc2,0x0b,0x2f,0x8e,0xe8,0x27,0x89,0x15,0x3d,0x3d,0x3d,0xd8,0xb1,0xe3,0xa5,0xb1,0x27,0x0d,0x60,0x58,0x91,0x22,0x01,0x48,0x43,0x26,0x15,0x46,0x36,0xb3,0xbe,0xf6,0xea,0xab,0x09,0x79,0x9e,0xd7,0x5f,0x7f,0x63,0xd8,0xe8,0xc1,0x78,0x39,0x02,0x2b,0xf3,0x8c,0x51,0x87,0xd8,0x32,0x0,0xd3,0x4a,0xe3,0xb3,0x3d,0x39,0x79,0x84,0xf6,0x78,0xf5,0x95,0xc4,0xd4,0x7f,0xa4,0xe5,0x24,0xca,0x31,0x4b,0x02,0x10,0x67,0xca,0x22,0x38,0xec,0x23,0xcb,0x32,0xf6,0xec,0x79,0x2f,0x21,0xcf,0xe3,0x76,0xbb,0xf1,0xf1,0xc7,0x1f,0x0f,0x7d,0xce,0xdc,0xf8,0x84,0xa2,0x9a,0xc7,0x19,0xf2,0x1c,0x89,0xd5,0x14,0xab,0xf6,0xb0,0xdb,0xed,0x68,0x6c,0x6c,0x4c,0x48,0xfd,0x1f,0x3f,0x7e,0x1c,0xdd,0xdd,0x63,0xdf,0xc2,0x5b,0x96,0x6b,0x20,0x01,0xc8,0x04,0x22,0x19,0x0,0x67,0xce,0x9c,0x19,0x35,0x60,0x27,0xd6,0x1c,0x3e,0x74,0x78,0x98,0x01,0x17,0x9f,0x20,0x4d,0xc6,0xc6,0xe7,0x5e,0x8c,0x97,0x53,0x72,0xb8,0x33,0x18,0x87,0x0f,0x1f,0x89,0xab,0xef,0xe5,0xb3,0x70,0xce,0x71,0xf4,0xe8,0xb1,0xa4,0x09,0x20,0x09,0x40,0x82,0x89,0xc4,0xfc,0xed,0xe9,0xe9,0x49,0xe8,0x33,0x0d,0x57,0xde,0x48,0x41,0x31,0x19,0xd7,0x1e,0xc3,0xfc,0xce,0x78,0xaf,0xfd,0x87,0xd4,0x7f,0x04,0x16,0x40,0xa6,0xb6,0x47,0xd6,0x09,0x40,0x58,0x19,0x7b,0x66,0xe9,0xed,0xed,0x4b,0x6c,0x07,0x1c,0xa6,0xc3,0xcb,0x59,0x92,0x32,0x6c,0xb8,0xdf,0x99,0x68,0x01,0x88,0xa4,0xbc,0x4c,0x3d,0x14,0x94,0x75,0x02,0x10,0x51,0x12,0xc8,0x44,0x67,0xf0,0x1d,0xa6,0x3c,0x5f,0x48,0xce,0xde,0xf6,0x48,0x70,0xfd,0x47,0x52,0x9a,0x3f,0x43,0x93,0x87,0x66,0x9d,0x0,0xf4,0xb8,0xc7,0xde,0xf7,0x2d,0x28,0x2c,0x48,0xe8,0x33,0x15,0x14,0x0e,0x3d,0xae,0xda,0xed,0xce,0x8e,0xfc,0x74,0xc3,0xfd,0xce,0xfc,0x82,0x04,0xd7,0x7f,0x04,0xe5,0x75,0xbb,0x82,0x24,0x0,0x99,0xc0,0xb9,0x3e,0xdf,0x98,0xef,0xc9,0xcf,0x4f,0x6c,0x46,0x98,0x82,0x61,0xca,0x8b,0xe4,0x39,0x33,0xb5,0x3d,0x12,0x5e,0xff,0x11,0x08,0x40,0x6b,0xbf,0x8f,0x04,0x20,0x13,0x38,0x65,0xf7,0x8c,0xe9,0x61,0xae,0xa9,0xa9,0x81,0xd5,0x9a,0xb8,0x04,0x91,0xf3,0xe6,0x0f,0xcd,0x13,0xd0,0xd4,0xe5,0xc9,0x8a,0xf6,0x68,0xea,0x72,0x0f,0xad,0x8f,0x79,0x73,0xc7,0xbd,0x5b,0x11,0x2d,0x8c,0x31,0xcc,0x9b,0x37,0x77,0xcc,0xf7,0x35,0x0e,0xf3,0x9c,0x24,0x0,0x69,0x88,0x3b,0x10,0xc6,0x29,0xfb,0xe8,0x83,0x4b,0x10,0x04,0x2c,0xbe,0x6c,0x71,0x42,0x9e,0xc7,0x6a,0xb5,0x62,0xf6,0xec,0xd9,0x17,0xbc,0xa6,0x72,0x8e,0x43,0xad,0x8e,0xac,0xb1,0x0,0x2e,0x4e,0xc7,0x9d,0x9f,0x9f,0x8f,0x69,0xd3,0xa7,0x25,0xa4,0xfc,0x99,0x75,0x33,0x61,0xb3,0x8d,0x1e,0x1a,0xee,0x09,0xc8,0x19,0x2b,0xc8,0x59,0x79,0x18,0xe8,0x83,0xe6,0xb1,0xbd,0xfc,0x2b,0x56,0x5c,0x91,0x90,0x67,0xb9,0xfc,0xf2,0xe5,0x10,0x84,0x0b,0x9b,0xe1,0x48,0x9b,0x13,0xde,0x2c,0x4a,0x0d,0x36,0x5c,0x7b,0xac,0x58,0xb1,0x22,0x21,0x65,0x47,0x52,0xce,0xbe,0x33,0x7d,0x19,0x7b,0xb5,0x5b,0x56,0x0a,0xc0,0x4b,0x87,0x3a,0xc6,0xf4,0xfc,0x2e,0x5f,0xbe,0x7c,0xc8,0xcc,0x1c,0x6b,0x74,0x3a,0x1d,0x6e,0xbf,0xe3,0xf6,0x21,0xaf,0xbf,0x72,0xb8,0x33,0xab,0xda,0xe3,0xe5,0xc3,0x1d,0x43,0x5e,0xfb,0xca,0x57,0xbe,0x1c,0x75,0x2e,0xbf,0x68,0x29,0x2a,0x2a,0xc2,0xcd,0x37,0xdf,0x34,0xe6,0xfb,0x5e,0x3d,0x92,0xb9,0xed,0x91,0x95,0x02,0xd0,0xe5,0x0a,0xe0,0xc3,0x33,0x63,0x5b,0x01,0xeb,0xd6,0xd5,0xc7,0xf5,0x39,0x6e,0xba,0xe9,0x4b,0x43,0x1c,0x50,0x2e,0x7f,0x18,0x3b,0x4f,0x76,0x67,0x55,0x7b,0x1c,0x6f,0x77,0x0d,0x59,0x96,0xe9,0x74,0x3a,0xdc,0x76,0xdb,0x6d,0x71,0x2d,0x77,0xed,0xed,0x6b,0xa1,0xd5,0x8e,0x7e,0xca,0xaf,0xad,0xdf,0x8f,0x03,0x2d,0xfd,0x24,0x0,0x99,0xc6,0x1f,0xf6,0x9e,0x1d,0xf3,0x3d,0xf3,0x17,0xcc,0xc7,0x9a,0xb5,0x6b,0xe2,0x52,0x7e,0xdd,0xac,0x3a,0xac,0x5b,0xbf,0x6e,0xc8,0xeb,0xcf,0xee,0x6f,0xcd,0xba,0x0b,0x2b,0x39,0x80,0x6d,0xef,0x0f,0x6d,0x8f,0x2f,0x7e,0xe9,0x8b,0xb8,0xfa,0x9a,0xab,0xe3,0x52,0xe6,0xb5,0xd7,0x5e,0x83,0x1b,0x6e,0xf8,0xfc,0x98,0xef,0xdb,0xf6,0xfe,0xd9,0x8c,0xbe,0xd9,0x39,0x6b,0x05,0xe0,0x50,0xab,0x23,0xa2,0x99,0xf6,0x8e,0xdb,0x6f,0xc7,0xb2,0x65,0xcb,0x62,0x5a,0x76,0x7e,0x7e,0x3e,0x36,0x6c,0x78,0x70,0xc8,0xec,0xd3,0xe1,0xf4,0xe3,0x4f,0xfb,0xcf,0x65,0x65,0x7b,0xbc,0x73,0xc2,0x8e,0x8f,0xce,0x5d,0xe8,0xf8,0x64,0x8c,0xe1,0xbe,0xfb,0xbe,0x8b,0xda,0xda,0xda,0x98,0x96,0x35,0x75,0xea,0x54,0xdc,0xf7,0xbd,0xfb,0xc6,0xdc,0x69,0x38,0xde,0xe1,0xc2,0x2b,0x47,0x3a,0x32,0xba,0xde,0xb3,0xfa,0x62,0x90,0xdf,0xbc,0x7d,0x6a,0xcc,0x88,0x3b,0x26,0x30,0x3c,0xf4,0xf0,0x86,0x88,0xd6,0x8a,0x91,0x30,0x6d,0xda,0x34,0xfc,0xd7,0x13,0xbf,0x1a,0x76,0xef,0x79,0xe3,0x9b,0x4d,0x08,0xc9,0x6a,0x56,0xb6,0x05,0x07,0xf0,0xc4,0x9b,0x4d,0x90,0x2f,0x0a,0xb9,0x35,0x18,0x0c,0xf8,0x8f,0x47,0x7f,0x89,0xa5,0x4b,0x97,0xc4,0xa4,0x9c,0xa5,0xcb,0x96,0xe2,0x17,0xbf,0xfc,0x39,0x74,0xba,0xd1,0xf3,0xfc,0xa9,0x9c,0xe3,0x89,0x37,0x9b,0xc0,0x33,0x3c,0x22,0x3b,0xab,0x05,0xc0,0xee,0x0a,0xe0,0xe7,0x2f,0x8d,0x9d,0x87,0x5f,0x14,0x45,0xdc,0x7d,0xcf,0xdd,0xb8,0xeb,0xae,0x3b,0x21,0x49,0xe3,0x3f,0xa5,0xb7,0x7c,0xf9,0x72,0xfc,0xec,0xe7,0xff,0x6f,0xd8,0xc1,0xbf,0xfd,0xc3,0x56,0xbc,0x77,0xaa,0x37,0x9b,0x9b,0x03,0xcd,0xdd,0x1e,0xfc,0xfa,0xed,0x53,0x43,0x5e,0x37,0x1a,0x8d,0x78,0xe8,0xe1,0x87,0x70,0xd3,0x4d,0x37,0x8d,0xff,0x34,0x23,0x63,0xb8,0xf9,0xcb,0x37,0xe3,0xa1,0x87,0x36,0xc0,0x68,0x1c,0xfb,0xa8,0xf5,0x96,0x5d,0xcd,0x38,0xde,0xe1,0xca,0xf8,0x3a,0xcf,0xfa,0xab,0xc1,0x76,0x35,0x76,0x63,0xfb,0x87,0xad,0x11,0x75,0xa0,0x5b,0x6f,0xbb,0x15,0xbf,0x7f,0xe6,0x69,0xdc,0x78,0xe3,0x8d,0x43,0xb6,0xee,0x46,0x63,0xd6,0xac,0x59,0x78,0xec,0xf1,0xff,0xc4,0xbf,0xfe,0xe8,0x61,0x98,0x4c,0x43,0x13,0x4b,0x1c,0x6b,0x77,0x61,0xd3,0xce,0xd3,0x20,0x80,0xe7,0x0e,0xb4,0xe1,0xed,0x13,0x43,0xd3,0xa4,0x49,0x92,0x84,0x7f,0xbc,0xf7,0x1e,0x34,0x6c,0xfe,0x1d,0xae,0x5c,0x79,0x65,0x54,0xdf,0x79,0xe5,0xca,0x2b,0xb1,0x79,0x4b,0x03,0xee,0xb9,0xe7,0xee,0x88,0x04,0x7c,0xef,0xe9,0x5e,0xfc,0xcf,0x07,0xd9,0xb1,0x14,0x93,0xa8,0xcb,0x01,0x4f,0xbc,0xd5,0x04,0x93,0x5e,0xc2,0xb5,0x75,0x25,0x63,0xbe,0xb7,0xa0,0xa0,0x0,0xdf,0xbd,0xef,0x3b,0xb8,0xf5,0xb6,0x6f,0xe0,0xdd,0xdd,0xef,0xe2,0xdd,0x77,0xf7,0xe0,0xec,0xd9,0xb3,0x70,0x38,0x1c,0x83,0x99,0x7d,0x72,0x72,0x72,0x50,0x5c,0x5c,0x84,0x4b,0x17,0x2d,0xc2,0xb2,0x65,0x4b,0x51,0x37,0xb3,0x0e,0x4c,0x18,0x7e,0xe6,0x3a,0xdd,0xed,0xc1,0x0f,0x9e,0xfd,0x38,0x6b,0x4d,0xff,0xe1,0x96,0x02,0xff,0xbe,0xe3,0x38,0x8c,0x5a,0x71,0xd8,0x4b,0x3a,0xab,0xaa,0xaa,0xf0,0xe0,0x83,0x3f,0xc4,0xa9,0xd5,0xa7,0xb0,0x67,0xcf,0x7b,0x78,0x6f,0xcf,0x7b,0xe8,0xe8,0xe8,0x80,0xc3,0xe1,0x0,0xe7,0x1c,0x8c,0x31,0xd8,0x6c,0x36,0x94,0x96,0x95,0x62,0xe9,0xd2,0xa5,0x58,0xba,0x74,0x09,0x6a,0x6a,0x22,0xbf,0x0,0xe6,0xe3,0x73,0x0e,0xfc,0xf8,0xb9,0x23,0x19,0xed,0xf8,0x23,0x01,0xb8,0xb8,0xd3,0x71,0xe0,0x67,0x3b,0x8e,0xc3,0xe9,0x0b,0xe3,0x96,0x4b,0x2b,0x23,0xfa,0x4c,0x69,0x69,0x29,0x6e,0xf9,0xea,0x2d,0xb8,0xe5,0xab,0xb7,0x9c,0xff,0x0e,0x0e,0x8f,0xc7,0x03,0x83,0xc1,0x10,0xf1,0x32,0xe1,0x40,0x4b,0x3f,0x1e,0xde,0x7e,0x38,0x63,0x4f,0x9a,0x8d,0x17,0x59,0x51,0xf1,0xe0,0xb3,0x87,0x71,0xef,0x75,0xd3,0x47,0xbc,0x2f,0xa0,0xb6,0xb6,0x16,0xb5,0xb5,0xb5,0x58,0xbd,0xfa,0x6f,0x01,0x0c,0x5c,0xc9,0xe6,0xf3,0xf9,0x60,0x32,0x99,0xa2,0xb2,0xce,0x3e,0xcb,0xce,0x93,0xdd,0x78,0xe4,0xc5,0x63,0x43,0xfc,0x10,0x24,0x0,0x59,0x32,0xf3,0xfc,0xf6,0xed,0x53,0xe8,0xf3,0x86,0x50,0x7f,0xf9,0x64,0x48,0x51,0x5e,0x8a,0xc1,0x18,0x83,0xc5,0x12,0x79,0xde,0xbc,0x57,0x8f,0x74,0xe2,0xf1,0xd7,0x1b,0x13,0xbe,0xe5,0x37,0x91,0x9b,0x75,0x12,0x89,0xca,0x39,0x7e,0xf9,0xca,0x09,0xf4,0x79,0x83,0xf8,0xfa,0xe2,0xaa,0x31,0xd7,0xfe,0xa2,0x28,0x46,0x55,0xff,0x17,0xb7,0xfd,0xb3,0xfb,0x5b,0xf1,0xbb,0x77,0x4e,0x65,0x4d,0x1e,0x06,0x12,0x80,0x11,0x3a,0xc2,0x9f,0xf6,0x9d,0xc3,0xa1,0x73,0x0e,0xfc,0xe3,0x75,0xd3,0x51,0x5b,0x14,0xfb,0x4b,0x31,0xfa,0xbd,0x21,0x3c,0xf1,0x56,0x13,0xde,0x3e,0x6e,0x4f,0xca,0x6f,0x74,0xfa,0xc7,0x77,0xb5,0x99,0xc3,0x17,0x4e,0xf8,0xb3,0xaa,0x9c,0x63,0xf3,0xae,0x66,0x1c,0x68,0xe9,0xc7,0x3d,0x9f,0x9b,0x8e,0xf2,0x38,0xe4,0xe5,0xeb,0x72,0x05,0xf0,0xd8,0x6b,0x27,0x23,0x0a,0x0f,0x27,0x01,0xc8,0x12,0x4e,0x76,0xb9,0x71,0xcf,0xd3,0xfb,0xf1,0x85,0xb9,0x65,0xf8,0xda,0xa2,0xca,0x71,0x5d,0x1f,0x7e,0x31,0xfe,0x90,0x82,0x17,0x3e,0x6a,0xc7,0x1f,0xf6,0xb6,0x24,0xf5,0x0a,0xf0,0xb3,0xbd,0x3e,0x38,0xfd,0x61,0x58,0x0d,0x9a,0xa8,0x06,0xe2,0x91,0x36,0x67,0xd2,0x9e,0xf9,0xa3,0x73,0x0e,0x7c,0x6b,0xcb,0x07,0xf8,0xf2,0xc2,0x0a,0x7c,0x65,0x61,0x05,0x6c,0x31,0xc8,0xd1,0xef,0xf2,0x87,0xb1,0xfd,0x40,0x1b,0xfe,0xb8,0xef,0x1c,0x82,0x59,0xbc,0x04,0x23,0x01,0x18,0x01,0x45,0xe5,0x78,0xee,0x60,0x1b,0x76,0x7c,0xdc,0x8e,0xcb,0xa7,0x15,0xe2,0xfa,0xd9,0xa5,0x98,0x5f,0x65,0x8b,0x7a,0x1b,0xaa,0xb9,0xdb,0x83,0x57,0x8f,0x74,0xe2,0x95,0xc3,0x9d,0x49,0x1d,0xf8,0x9f,0x1d,0xcc,0x2f,0x7e,0xd4,0x8e,0x5b,0x97,0x44,0x7e,0xe9,0xe5,0xce,0x93,0xdd,0xe8,0xf7,0x85,0x92,0xfa,0xdc,0x61,0x45,0xc5,0xb6,0xf7,0xcf,0xe2,0x2f,0x1f,0xb6,0xe2,0xaa,0x99,0xc5,0xb8,0x6e,0x56,0x09,0xea,0xca,0xad,0x51,0x27,0x2b,0x3d,0xde,0xe1,0xc2,0xab,0x47,0x3a,0xf1,0xfa,0xd1,0xae,0xac,0x8b,0xb8,0x24,0x01,0x18,0x07,0xb2,0xca,0xf1,0xd6,0x71,0x3b,0xde,0x3a,0x6e,0x47,0xae,0x51,0x8b,0x79,0x55,0x36,0xcc,0xae,0xb0,0xa1,0x2a,0xcf,0x88,0x8a,0x3c,0x23,0xcc,0x3a,0x09,0x5a,0x49,0x80,0xac,0x72,0x78,0x83,0x32,0x3a,0x9d,0x7e,0x9c,0xed,0xf5,0xe1,0x78,0x87,0x0b,0x07,0xcf,0xf6,0xa3,0xad,0xdf,0x9f,0x72,0xbf,0xe9,0x99,0xbd,0x2d,0x98,0x5b,0x69,0xc3,0xac,0x08,0x2e,0x1f,0x69,0x77,0xf8,0xf1,0xab,0xd7,0x1b,0x53,0xe6,0xd9,0x43,0xb2,0x8a,0x97,0x0f,0x75,0xe0,0xe5,0x43,0x1d,0x28,0xb0,0xe8,0x30,0xbf,0x32,0x17,0xb3,0xca,0xad,0xa8,0xca,0x37,0xa2,0x2c,0xd7,0x0,0x93,0x76,0xa0,0x3d,0xc2,0x8a,0x0a,0x6f,0x50,0x46,0x87,0x23,0x80,0xb3,0xbd,0x5e,0x1c,0x6d,0x77,0xe1,0xc0,0xd9,0x7e,0xd8,0x5d,0x01,0xea,0xd4,0x24,0x0,0xe3,0x5c,0xbf,0xfb,0x42,0x83,0x62,0x90,0xce,0x84,0x64,0x15,0x3f,0x7c,0xf6,0x10,0xfe,0xf7,0x0d,0x33,0xb0,0xa4,0xb6,0x60,0xd4,0xd9,0xf2,0x47,0xcf,0x1d,0x19,0xb7,0xdf,0x20,0xde,0xf4,0xb8,0x83,0x78,0xed,0x68,0x27,0x5e,0x3b,0xda,0x49,0x9d,0x93,0x04,0x80,0x88,0x06,0x6f,0x50,0xc6,0x86,0xbf,0x1c,0xc6,0x9c,0x0a,0x1b,0xae,0x9d,0x55,0x8c,0xb9,0x15,0x36,0x58,0xf4,0x1a,0xf8,0x42,0x32,0x8e,0x77,0xb8,0xf0,0xc6,0x31,0x3b,0xf6,0x9e,0xee,0x01,0xe7,0x54,0x57,0x24,0x0,0x44,0xc6,0x72,0xa8,0xd5,0x91,0x35,0xd9,0x87,0x88,0xa1,0x08,0x54,0x05,0x04,0x41,0x02,0x90,0x12,0x24,0x2a,0x11,0x24,0x41,0x10,0x29,0x26,0x0,0x4c,0x10,0x20,0x8a,0x64,0x90,0x10,0xd9,0x87,0x28,0x26,0xef,0xda,0xb1,0x94,0xf1,0x01,0x4c,0x9b,0x36,0x0d,0x4f,0xcd,0x5b,0x88,0x3d,0x4d,0x3d,0x78,0xe7,0x64,0x37,0x3e,0x3a,0xdb,0x9f,0x75,0x61,0x99,0x44,0xf6,0x61,0xd0,0x8a,0x98,0x39,0xa9,0x04,0xe8,0xe8,0xce,0x5e,0x01,0xd0,0xe9,0x74,0x98,0x77,0xc9,0x25,0xd0,0x6a,0x35,0xb8,0x7e,0x4e,0x29,0xae,0x9f,0x53,0x8a,0x60,0x58,0xc1,0x81,0xb3,0x0e,0xec,0x3c,0x69,0xc7,0xee,0xc6,0x1e,0x0a,0xda,0x20,0x32,0x06,0x9b,0x51,0x8b,0x2b,0x67,0x14,0x61,0xc5,0xb4,0x42,0xcc,0x2c,0xcb,0x01,0x38,0xc7,0x73,0xcf,0x9e,0x81,0xdb,0xed,0xce,0x4c,0x01,0x50,0xc1,0x20,0x8c,0x92,0x87,0x77,0xee,0xfc,0xf9,0x43,0xd2,0x63,0xe9,0x34,0x22,0x96,0xd4,0xe6,0x63,0x49,0x6d,0x3e,0xbe,0x75,0x55,0x18,0x2f,0x1f,0xea,0xc4,0xe6,0xdd,0xcd,0x59,0x75,0x52,0x8b,0xc8,0xbc,0x81,0x7f,0xd7,0x95,0xb5,0xb8,0x62,0x5a,0x21,0xb4,0xd2,0x67,0x96,0xbb,0x8c,0x61,0xee,0x82,0x05,0xd8,0xfd,0xce,0x3b,0x99,0xe9,0x03,0x68,0x09,0x1a,0xf1,0x57,0x47,0x39,0x0e,0xfa,0x6c,0x70,0x28,0x17,0x0e,0x74,0x5b,0x6e,0x2e,0xa6,0xcd,0x98,0x31,0xea,0xe7,0x2d,0x7a,0x0d,0xbe,0xba,0xa8,0x12,0x77,0xad,0xa8,0xa1,0x5e,0x44,0xa4,0x2d,0xff,0xe7,0x8b,0xb3,0x70,0x4d,0x5d,0xf1,0x85,0x83,0xff,0x3c,0x93,0x6b,0x6a,0x90,0x37,0xca,0x95,0x68,0xbd,0x8a,0x0e,0x3c,0x5d,0x05,0xc0,0xa1,0x68,0xe1,0x52,0x34,0x38,0xec,0xb7,0xe1,0x79,0x47,0x19,0x9e,0x75,0x54,0xe2,0x03,0x5f,0x3e,0xba,0x65,0x3d,0x16,0x2e,0x5e,0x1c,0xb1,0xf7,0xff,0x73,0xb3,0x4b,0x40,0xfb,0x04,0x44,0x3a,0x52,0x66,0x33,0x60,0x76,0xc5,0xe8,0xa1,0xd7,0x0b,0x16,0x2e,0xbc,0xc0,0x6a,0x6e,0x0d,0x19,0xb1,0xdb,0x53,0x80,0xff,0xe9,0xab,0xc4,0x0e,0x47,0x29,0x7c,0x6a,0xec,0x0d,0x76,0x29,0x51,0x02,0xf0,0x59,0xbc,0x8a,0x88,0x13,0x7e,0x0b,0x4e,0xf8,0x2d,0x38,0xf0,0x7c,0x0b,0x96,0x4e,0xf1,0x62,0xc5,0xb4,0xc2,0x31,0x63,0xd3,0x8d,0x5a,0x09,0x1a,0x49,0xa0,0xec,0x39,0x44,0xda,0x61,0x35,0x8e,0x7d,0xfa,0xb2,0xa8,0xa4,0x14,0x5e,0x73,0x05,0x0e,0x76,0x06,0xd0,0x1e,0x32,0x20,0xc8,0x2f,0xdc,0x1d,0x70,0x2a,0x1a,0x98,0x04,0x39,0x0d,0x05,0x40,0x1e,0xf9,0xc7,0xdb,0x5d,0x01,0x6c,0xff,0xb0,0x15,0xdb,0x3f,0x6c,0x45,0x4d,0xa1,0x19,0x2b,0xa6,0x17,0xe2,0x8a,0x69,0x45,0x71,0x39,0xfb,0xbd,0x70,0x52,0x1e,0xee,0x5c,0x51,0x83,0x7c,0xb3,0x0e,0xed,0x0e,0x3f,0x4e,0xd9,0x3d,0x83,0x7f,0x9a,0x7b,0x3c,0x24,0x2c,0x04,0xcc,0x3a,0x09,0x35,0x45,0x66,0xd4,0x16,0x99,0x51,0x53,0x38,0xf0,0x77,0x91,0x45,0x8f,0x26,0xbb,0x1b,0xff,0xf5,0x46,0x13,0x5a,0x7a,0xbd,0x31,0x2f,0xf3,0x64,0x97,0x1b,0x3b,0x4f,0x74,0x63,0xe7,0x49,0x3b,0x3a,0x9d,0x12,0x80,0xe1,0xf3,0x50,0x38,0x15,0x0d,0xca,0x34,0xfe,0xf4,0x12,0x0,0x0e,0xc0,0xab,0x46,0xb6,0xcf,0x79,0xba,0xdb,0x83,0xd3,0xdd,0x1e,0x6c,0xde,0xd5,0x8c,0x47,0xff,0x76,0x21,0xa6,0x95,0x58,0x62,0xf6,0x1c,0xd5,0xf9,0x26,0x3c,0x78,0xd3,0xec,0xc1,0xf5,0x57,0x8e,0x41,0x83,0x19,0xa5,0x39,0x9f,0x9a,0x5c,0x9c,0xa3,0xb5,0xcf,0x37,0x20,0x08,0xdd,0x1e,0x9c,0xb6,0x7b,0xd0,0x64,0xf7,0xc0,0x95,0xa2,0x07,0x61,0x88,0x89,0x53,0x94,0xa3,0x1f,0x1c,0xe4,0xb5,0x85,0x66,0xd4,0x14,0x99,0x51,0x62,0x1d,0x3e,0xf7,0xc3,0xfc,0xaa,0x5c,0xfc,0xe8,0x96,0xb9,0xf8,0xe6,0xe6,0x0f,0xc6,0x4c,0x25,0x1f,0x0d,0x2f,0x1d,0xea,0xc0,0x2f,0x5f,0x39,0x11,0xd1,0x7b,0xdd,0x8a,0x26,0xe6,0x75,0x10,0x77,0x01,0x08,0x73,0x01,0x18,0xc7,0xca,0x5d,0x89,0x71,0x0c,0xc0,0xb2,0xa9,0x05,0xc3,0x3a,0x5f,0x06,0x9d,0x21,0x8c,0xa1,0x2a,0xdf,0x84,0xaa,0x7c,0x13,0xae,0x9a,0x59,0x3c,0xf8,0x7a,0x8f,0x3b,0x38,0x28,0x08,0x9f,0x88,0x43,0xa7,0xc3,0x0f,0x8a,0x50,0x48,0x1f,0x44,0x81,0xa1,0x32,0xcf,0xf8,0xe9,0x60,0x3f,0xff,0xc7,0xa2,0x8f,0x6e,0x40,0x15,0x5a,0x74,0x98,0x53,0x61,0xc5,0xde,0xd3,0xb1,0x4b,0xdf,0x1e,0x4d,0x3f,0x1f,0x18,0x4b,0xe9,0x26,0x0,0x6a,0x6a,0xb8,0xed,0xf2,0x4d,0xe3,0xcb,0x22,0x53,0x60,0xd1,0xa1,0xc0,0xa2,0xc3,0x65,0x35,0x9f,0x7a,0x68,0x65,0x95,0xa3,0xad,0xdf,0x87,0xc6,0x2e,0x37,0x1a,0xbb,0x3c,0x68,0xea,0x72,0xe3,0xb4,0xdd,0x43,0xc9,0x3d,0x53,0x80,0x5c,0xa3,0x16,0xd3,0x4a,0x2c,0x98,0x5a,0x6c,0xc1,0x94,0x62,0x0b,0xaa,0x0b,0x8c,0x28,0xce,0xd1,0x43,0x88,0x51,0x98,0x79,0xbe,0x59,0x97,0xb4,0xdf,0x16,0xe6,0x2c,0x0d,0x05,0x0,0x62,0xc6,0x75,0x32,0x49,0x60,0xa8,0xce,0x37,0xa1,0x3a,0xdf,0x84,0x6b,0xeb,0x3e,0x15,0x85,0xb3,0xbd,0x5e,0x9c,0xb2,0x0f,0x2c,0x63,0x4e,0x76,0xba,0x71,0xb4,0xcd,0x49,0x96,0x42,0x3c,0xdb,0x41,0x14,0x30,0xaf,0xd2,0x86,0x29,0x9f,0x99,0xd5,0xcb,0x6c,0x33,0x6b,0x97,0xff,0x0,0x0,0x10,0xb2,0x49,0x44,0x41,0x54,0x86,0x8c,0x3d,0x53,0x12,0x52,0xd3,0xd0,0x02,0xc8,0x96,0x03,0xe5,0x92,0xc0,0x50,0x53,0x38,0xe0,0x38,0xfa,0xac,0x73,0xe7,0xe1,0xed,0x87,0xd1,0xe3,0x0e,0xd2,0x68,0x8d,0x31,0xd3,0x4b,0x72,0xf0,0x83,0x2f,0xcd,0x42,0x81,0x45,0x47,0x95,0x31,0x01,0xe2,0x1e,0x07,0x90,0xea,0xe7,0x7b,0x5e,0x39,0xdc,0x89,0x5d,0x8d,0xdd,0xe8,0x70,0xc6,0x3e,0x75,0xd7,0xb4,0x62,0x0b,0xbe,0xb9,0x72,0x0a,0xf5,0xb2,0x38,0xf0,0x4f,0x5f,0x98,0x19,0xf3,0xc1,0xaf,0xa8,0x1c,0x2d,0xbd,0x5e,0xbc,0x71,0xac,0x2b,0xa6,0xeb,0xfc,0x98,0x4d,0x32,0x2c,0xf6,0x93,0x69,0xdc,0x2d,0x0,0x0d,0x52,0x7b,0x6b,0xed,0xa5,0x43,0x1d,0x38,0xda,0xee,0x1c,0x9c,0xc5,0xcb,0x72,0x8d,0x98,0x5a,0x6c,0xc6,0xd4,0xe2,0x81,0x75,0x64,0x6d,0x91,0x19,0x7a,0xcd,0xf8,0x97,0x31,0x63,0x05,0x7f,0x4c,0x84,0xda,0x22,0x33,0xd6,0xaf,0xa8,0x45,0xa9,0x4d,0x9f,0x52,0x75,0xaa,0xaa,0xc0,0x29,0xbb,0x1b,0x1b,0xdf,0x3a,0x85,0x5e,0x4f,0xec,0xad,0x9f,0x3c,0x93,0x76,0xc2,0xdb,0xc4,0x4e,0x7f,0x18,0xc7,0xda,0x5d,0x68,0xea,0x72,0xa3,0xa5,0xd7,0x8b,0x96,0x5e,0x1f,0xda,0xfa,0x7d,0x83,0x4e,0xb9,0x1b,0xe7,0x97,0x5f,0xe0,0xf7,0x49,0x05,0x34,0x42,0x1a,0x0a,0x80,0x56,0x50,0x31,0xb0,0x19,0x98,0xfa,0xeb,0xb2,0x4f,0xd6,0xf1,0x67,0x7b,0xbd,0x78,0xfd,0x68,0xd7,0x40,0xa5,0x8b,0x02,0x26,0x15,0x98,0x2e,0xf0,0x1e,0x4f,0x2e,0x34,0xc3,0x10,0xa1,0x28,0xe8,0xa5,0xf8,0xf8,0x40,0x0a,0x2c,0x3a,0x3c,0xb2,0x6a,0x3e,0xcc,0xba,0xd4,0x4c,0xea,0x54,0x9e,0x6b,0x40,0x75,0x81,0x09,0xff,0xeb,0xc9,0xfd,0x08,0xc7,0xf8,0xfc,0xc6,0x68,0xbb,0x39,0xc3,0xe1,0xf2,0x87,0x07,0x77,0x70,0x3e,0x89,0xfb,0x68,0xed,0xf3,0xa5,0xdd,0xf5,0x5f,0x3a,0xa6,0xa4,0x9f,0x0,0x08,0xe0,0x30,0x89,0x2a,0xbc,0x4a,0x7a,0x3a,0x03,0xc3,0x8a,0x7a,0xde,0xdb,0xef,0x1e,0x32,0x0b,0x4d,0x2d,0xb6,0xa0,0xba,0xc0,0x84,0xaa,0x7c,0x23,0xa6,0x16,0x5b,0x50,0x95,0x6f,0x4a,0x98,0xcc,0xad,0x98,0x56,0x94,0xb2,0x83,0xff,0x13,0xaa,0xf3,0x4d,0x98,0x55,0x6e,0xc5,0xc1,0xb3,0xfd,0x89,0xb1,0x3c,0x38,0xc7,0xb9,0xbe,0x0b,0x77,0x67,0x5a,0x7a,0xbc,0x29,0x91,0x8e,0x3d,0x16,0x98,0x85,0x70,0xfa,0x09,0x0,0x0,0x98,0x58,0x18,0xde,0x0c,0xdb,0x0d,0xe8,0xf3,0x86,0xb0,0xf7,0x74,0xef,0x05,0x6b,0xc5,0x86,0x3b,0x2f,0x43,0xa9,0xd5,0x90,0x90,0xf2,0xf3,0x4c,0xda,0xb4,0xa8,0xa7,0x44,0x3e,0xe7,0x7b,0x4d,0xbd,0x78,0xf8,0xaf,0x87,0x33,0xd6,0xef,0x61,0x11,0x63,0x2f,0x64,0x09,0x71,0xd1,0x59,0xa5,0xd8,0x29,0x57,0x2c,0x6e,0x85,0x21,0x88,0x44,0x63,0x35,0x4c,0xbc,0xdf,0xe6,0x88,0x69,0x6a,0x01,0x14,0x4a,0x01,0x34,0x22,0x36,0x61,0xbd,0x5b,0xef,0x5a,0x82,0x96,0x5e,0x2f,0x76,0x9e,0xe8,0xc6,0xdb,0x27,0xec,0x38,0xd7,0xe7,0xa3,0xde,0xf5,0x19,0xfc,0x61,0x05,0x8a,0x12,0xfd,0xda,0x56,0xaf,0x11,0x20,0x89,0x03,0x07,0xad,0xc6,0x73,0x26,0x42,0x2b,0x09,0x51,0xaf,0xcd,0x33,0x19,0x06,0xa0,0xae,0xdc,0x8a,0x2b,0xa6,0x15,0x62,0xe9,0x94,0x02,0x14,0x4f,0xf0,0x7a,0x39,0x1d,0x53,0xd3,0x57,0x0,0x0a,0xa4,0xd8,0x5e,0x2b,0x55,0x9d,0x6f,0x42,0xf5,0x32,0x13,0x56,0x2f,0x9b,0x34,0x28,0x06,0x7b,0x4f,0xf7,0x0e,0x59,0xa7,0x67,0x23,0x3f,0xfa,0xeb,0x11,0xec,0x3f,0x93,0xf8,0x8b,0x2e,0xbf,0x7a,0x69,0x25,0xee,0xbc,0xb2,0x36,0xab,0xeb,0x5e,0x14,0x18,0xe6,0x56,0xda,0x70,0x59,0x4d,0x3e,0x96,0x4d,0x29,0x88,0xc9,0x9d,0x92,0x9f,0x90,0x2f,0x05,0xe3,0xe2,0x5f,0x4a,0x88,0x0,0x58,0xc4,0x30,0x8c,0x82,0x1c,0xd5,0x79,0xe6,0x7e,0x6f,0x28,0x6a,0x31,0xe8,0x74,0x06,0xf0,0xde,0xa9,0x1e,0xec,0x3c,0xd9,0x4d,0x51,0x78,0x44,0x42,0x90,0x04,0x86,0x85,0x93,0xf2,0x70,0xc5,0xf4,0x42,0x2c,0x9e,0x9c,0x8f,0x1c,0x43,0x74,0xe7,0x0b,0x22,0xdd,0x26,0x2d,0xd1,0xc6,0xe7,0x4a,0xb3,0x84,0x08,0x0,0x03,0x50,0xa9,0xf5,0xe1,0x44,0x20,0x27,0xe2,0xcf,0x3c,0xf7,0x51,0x1b,0x96,0x4c,0xc9,0x8f,0x2a,0x86,0xbb,0xc4,0xaa,0xc7,0xcd,0x97,0x54,0xe0,0xe6,0x4b,0x2a,0x60,0x77,0x05,0xf0,0x6e,0xd3,0x79,0x31,0x68,0x77,0x52,0x4f,0x25,0x62,0x37,0x68,0x44,0x01,0x0b,0xab,0x73,0x71,0xc5,0xf4,0x42,0x5c,0x56,0x93,0x1f,0xf5,0xa1,0xa2,0x4f,0xf0,0x06,0xe5,0xc1,0xed,0xe6,0xb1,0xa8,0xd4,0xfa,0xd2,0x57,0x0,0x30,0x0e,0x01,0x38,0xd0,0xd2,0x8f,0xef,0xff,0xe9,0x63,0xdc,0xbe,0x7c,0x32,0xa6,0x97,0xe6,0x44,0x6d,0xfe,0x14,0xe5,0x7c,0x2a,0x06,0x1d,0x0e,0x7f,0xcc,0x4f,0x17,0x12,0xd9,0xc9,0x0d,0x73,0x4b,0xb1,0x76,0xf9,0xa4,0x09,0x39,0xa3,0x65,0x95,0x63,0xff,0x99,0x3e,0x6c,0x7a,0xe7,0x34,0xba,0x22,0xb8,0xac,0xd4,0x26,0x86,0x60,0x11,0xc2,0xe9,0x2d,0x0,0x45,0x9a,0x20,0x0c,0x82,0x02,0xbf,0x2a,0x46,0x25,0x02,0x07,0x5a,0xfa,0x51,0x60,0xd1,0x61,0xf9,0x94,0x02,0x2c,0x9b,0x5a,0x88,0x39,0x15,0xd6,0xa8,0x4f,0x76,0x95,0xda,0x0c,0xd4,0x73,0x89,0x98,0x30,0xb5,0x78,0x7c,0xce,0xec,0x90,0xac,0x62,0x5f,0x73,0x1f,0x76,0x37,0x0d,0xf8,0xab,0x3c,0x81,0xc8,0xb7,0xf4,0x26,0xeb,0xe2,0xe7,0xe8,0x4e,0x98,0x0,0x08,0xe0,0x98,0xaa,0x77,0xe3,0x63,0x9f,0x2d,0xea,0xcf,0xf6,0xb8,0x83,0xd8,0x7e,0xa0,0x0d,0xdb,0x0f,0xb4,0x41,0xa7,0x11,0x31,0xbf,0xd2,0x86,0x2b,0xa6,0x17,0x62,0xf9,0x94,0x42,0x18,0xb4,0x22,0xf5,0x4a,0x22,0x25,0xe9,0xf7,0x86,0xf0,0xf6,0x09,0x3b,0x76,0x9e,0xec,0xc6,0xb1,0x76,0xd7,0xb8,0x22,0x0f,0x05,0x70,0x4c,0xd1,0xbb,0xd3,0x5f,0x0,0x0,0x60,0x8a,0xce,0x83,0xc3,0x3e,0x2b,0xd4,0x09,0xf8,0x33,0x83,0x61,0x65,0x30,0x0,0xe7,0x51,0xe9,0x24,0x16,0x54,0xe5,0xe2,0xb2,0xda,0x01,0xaf,0x2b,0xc5,0x08,0x10,0xc9,0xa6,0xc7,0x1d,0xc4,0xae,0xc6,0xee,0x41,0xdf,0xd3,0x44,0xa3,0x8d,0x2b,0x75,0xbe,0xb8,0x84,0x0,0x27,0x45,0x0,0x8c,0x82,0x8c,0x2a,0xad,0x0f,0x67,0x42,0xa6,0x98,0x7c,0x5f,0x48,0x56,0x07,0xc5,0xe0,0x89,0x37,0x9b,0xb0,0x70,0x52,0x1e,0x96,0x4d,0x29,0xc0,0x92,0xda,0xc8,0x1d,0x33,0x74,0xcf,0x0,0x31,0x1c,0x8a,0x1a,0x79,0xbf,0xb0,0xbb,0x02,0xd8,0xd3,0xd4,0x83,0x5d,0x8d,0x3d,0x38,0xd2,0xe6,0x8c,0xe9,0x19,0x83,0x99,0x7a,0x57,0x5c,0x7f,0x67,0xc2,0x83,0xc9,0xe7,0x18,0x1d,0x68,0x09,0x99,0x62,0xbe,0x45,0x17,0x92,0x55,0xec,0x69,0xea,0xc1,0x9e,0xa6,0x1e,0x48,0xe7,0xf7,0x63,0x97,0x4f,0x1d,0x08,0xc2,0x18,0x29,0x1c,0xd5,0x17,0x92,0x71,0x36,0xce,0x81,0x44,0xa2,0xc0,0x30,0xb3,0x2c,0x07,0xed,0xfd,0x7e,0x38,0x29,0xbf,0xe0,0x84,0x60,0x0,0xf2,0x2d,0xba,0x71,0xaf,0xc3,0xa3,0xe1,0x64,0xa7,0x7b,0xd4,0x23,0x6c,0x6d,0xfd,0x3e,0xec,0x6a,0xec,0xc1,0xee,0xc6,0x6e,0x34,0x9e,0x7f,0x6f,0xac,0x29,0xd7,0xfa,0x51,0x20,0x05,0x33,0x4b,0x0,0xac,0x62,0x18,0x93,0x74,0x5e,0x34,0x07,0x4d,0x71,0x2b,0x43,0x56,0x39,0x3e,0x6c,0xe9,0xc7,0x87,0x2d,0xfd,0x78,0xec,0xf5,0x93,0xa8,0x2b,0xb3,0x62,0xd9,0x94,0x02,0x2c,0x9f,0x5a,0x38,0x98,0xf4,0x31,0x10,0x56,0xf0,0xb3,0x97,0x4e,0xc4,0xfd,0xca,0x31,0xad,0x24,0xe0,0x17,0xb7,0x5e,0x32,0x68,0x6d,0x74,0x7b,0x82,0xe8,0x74,0x04,0xd0,0xe1,0xf4,0xa3,0xd3,0x19,0x40,0x87,0x63,0xe0,0xef,0x96,0x5e,0x2f,0x65,0x25,0x06,0x60,0xd0,0x88,0xa8,0xc8,0x33,0xa2,0xc4,0xaa,0x47,0xa9,0xcd,0x30,0xf0,0xb7,0xd5,0x80,0x12,0x9b,0x1e,0x45,0x16,0x3d,0x44,0x21,0x31,0xc7,0xad,0x4e,0xd9,0x3d,0xd8,0xb2,0xab,0x19,0x6b,0x97,0x4f,0x1a,0x74,0x3a,0x9f,0xb2,0x7b,0xb0,0xbb,0xb1,0x1b,0xbb,0x1b,0x7b,0xe2,0x92,0x1d,0xf8,0x62,0xb1,0x9b,0x6b,0x70,0xc4,0xfd,0x77,0x26,0xe5,0x38,0xd9,0x7c,0x63,0x3f,0xce,0x86,0x8c,0x50,0x78,0xfc,0x1b,0x93,0x73,0xe0,0x48,0x9b,0x13,0x47,0xda,0x9c,0xf8,0xcd,0xdb,0xa7,0x30,0xa9,0xc0,0x84,0x5c,0x93,0x16,0xa7,0x92,0x90,0xf1,0x57,0x12,0x05,0x94,0x5a,0x0d,0x28,0xb5,0x1a,0xb0,0x0,0xb9,0x17,0x99,0x9c,0x1c,0x76,0x77,0x60,0x58,0x71,0x38,0xd7,0xe7,0xcb,0xa8,0xbb,0x11,0x4d,0x3a,0x09,0x65,0xe7,0x07,0x77,0x75,0xbe,0x09,0xd5,0x05,0xa6,0xc1,0x81,0x6e,0xd6,0xa7,0xce,0x09,0xc7,0x3f,0xec,0x6d,0xc1,0x5b,0xc7,0xbb,0x50,0x6a,0x33,0xa0,0xad,0xdf,0x0f,0xbb,0x2b,0x90,0xb0,0xb2,0x27,0xe9,0xbc,0xc8,0x97,0x82,0x99,0x29,0x0,0x26,0x41,0xc6,0x6c,0x83,0x13,0x1f,0x8d,0x63,0x47,0x60,0xa2,0x9c,0xe9,0xf1,0xe2,0x4c,0x4f,0x7c,0xd4,0x3b,0x18,0x1e,0xff,0x0c,0x2e,0x0a,0x6c,0x44,0x71,0x90,0x15,0x15,0x5d,0xae,0x0,0xda,0x1d,0x01,0xb4,0x3b,0xfc,0x68,0x77,0xf8,0x51,0x91,0x97,0x1e,0x5b,0x9b,0x75,0x65,0x39,0xc8,0x33,0x6b,0x51,0x66,0x33,0x0c,0xfe,0x29,0xb4,0xe8,0xe2,0x92,0xb7,0x2f,0x1e,0x49,0x59,0x3b,0x9d,0x01,0x74,0x3a,0x03,0x09,0xad,0x33,0x89,0xa9,0x58,0x60,0x4c,0x4c,0x38,0x77,0xd2,0xe4,0xb6,0x4e,0xef,0xc4,0xa9,0xa0,0x19,0x1e,0x45,0x42,0xa6,0xb0,0xaf,0xb9,0x0f,0x93,0x0a,0x62,0xbf,0xb4,0x91,0x44,0x01,0xe5,0xb9,0x46,0x94,0xe7,0x1a,0xd3,0xae,0x4e,0x6e,0x9c,0x5f,0x9e,0xb0,0xb2,0x3e,0x68,0xee,0xcd,0x88,0x7e,0x34,0xd7,0xe8,0x84,0x51,0x48,0x8c,0xc5,0x97,0xb4,0xe3,0x5b,0x22,0xe3,0x58,0x6e,0xee,0xc9,0xa8,0xbb,0xfe,0x9e,0xda,0x73,0x06,0xef,0x36,0xf5,0xd0,0x19,0x84,0x04,0x23,0xab,0x1c,0x7f,0xdc,0x77,0x0e,0x6f,0x1f,0xb7,0xa7,0xfd,0x6f,0x29,0x94,0x82,0x98,0xa9,0x4f,0x5c,0xe8,0xba,0x94,0xdc,0x1f,0x1b,0xc0,0x2c,0x83,0x13,0x87,0xfd,0xd6,0x8c,0xe8,0x88,0x81,0xb0,0x82,0x87,0xb7,0x1f,0x46,0x81,0x45,0x87,0x49,0xf9,0x26,0x94,0xda,0x0c,0x28,0xb5,0x19,0x50,0x66,0x1b,0x70,0x68,0x95,0x5a,0x0d,0x74,0x64,0x76,0x02,0x78,0x83,0x32,0x3a,0x1c,0x7e,0x74,0x38,0x07,0x96,0x42,0x9d,0x4e,0x3f,0x3a,0x1c,0x01,0x9c,0xee,0xce,0x8c,0x1b,0x9c,0x24,0xc6,0xb1,0xcc,0x92,0xd8,0x49,0x31,0xe9,0xf6,0xf7,0x5c,0x43,0x3f,0x3a,0xc2,0x7a,0xf4,0xca,0x99,0x93,0xde,0xb9,0xc7,0x1d,0x1c,0x31,0x15,0x78,0xbe,0x59,0x87,0x52,0xdb,0x80,0xc3,0xab,0xec,0xbc,0x40,0x7c,0xf2,0xff,0xd1,0x9e,0x24,0x1b,0xa9,0xec,0x64,0xd0,0xeb,0x8d,0xcd,0x91,0xef,0x5e,0x4f,0x10,0x1d,0xe7,0x1d,0xa1,0xed,0x0e,0xff,0xa0,0x23,0xb4,0xdd,0xe1,0xcf,0xf8,0x6b,0xda,0x16,0x99,0xfa,0xe2,0x16,0xf3,0x9f,0xb2,0x02,0x20,0x30,0xe0,0x4a,0x4b,0x37,0x5e,0x74,0x96,0x22,0xa0,0x66,0x7e,0x58,0x6f,0xaf,0x27,0x88,0x5e,0x4f,0x10,0x87,0x5b,0x87,0x9a,0x79,0x26,0x9d,0x74,0xde,0x52,0xd0,0x0f,0x8a,0x43,0xc9,0xf9,0xff,0x8e,0xc4,0x71,0x76,0xa2,0xd3,0x85,0xb3,0x7d,0xde,0xa4,0xfc,0xae,0xfd,0x67,0xfa,0xd0,0xef,0x0b,0x21,0x77,0x8c,0x68,0xcc,0x01,0x87,0x66,0x10,0x1d,0xce,0x81,0xc1,0xdd,0xe1,0xf0,0xa3,0xdd,0x11,0x40,0xe7,0xf9,0x01,0x9f,0xad,0x5b,0xa1,0xd3,0xf5,0x2e,0xd4,0xea,0x12,0x9f,0xcf,0x22,0x25,0x3c,0x70,0x46,0x41,0xc6,0x95,0x16,0x3b,0x5e,0x75,0x95,0x22,0x9b,0x0f,0xed,0x79,0x83,0x32,0x9a,0xba,0xdc,0x68,0x1a,0x26,0xb1,0x89,0x24,0x0a,0x83,0x5b,0x65,0x65,0x36,0x3d,0x4a,0xac,0x9f,0x2e,0x2f,0x04,0xc6,0x70,0xa4,0xdd,0x89,0xad,0xbb,0xcf,0x24,0xed,0x1e,0x16,0x97,0x3f,0x8c,0xef,0xff,0xf1,0x63,0xac,0x5e,0x36,0x09,0x93,0x0a,0x4c,0xf0,0x87,0x94,0xcf,0x0c,0xf2,0xc0,0xe0,0x6c,0xde,0xed,0x0e,0xa6,0x5d,0x36,0xde,0x78,0x53,0xa2,0x09,0x60,0xa1,0xa9,0x3f,0x29,0x65,0xa7,0x8c,0x0b,0xbe,0x50,0x0a,0x62,0x85,0xb9,0x0b,0x6f,0xbb,0x8b,0xc9,0x89,0x36,0xc2,0xcc,0xd9,0xda,0xe7,0x43,0x6b,0x0a,0xa7,0x40,0x3b,0xdd,0xed,0xc1,0xc3,0xdb,0x0f,0x53,0x63,0x45,0x41,0x9e,0x14,0xc2,0x95,0x96,0x2e,0x08,0x49,0xea,0xf5,0x29,0xe5,0x91,0xaa,0xd0,0xfa,0xb1,0xd8,0xd4,0x4b,0xbd,0x82,0xc8,0x0a,0x4c,0x82,0x8c,0x95,0x96,0x2e,0x68,0x58,0xf2,0xa6,0xbc,0x94,0x73,0x49,0x4f,0xd5,0xbb,0x51,0xa7,0xa7,0x0c,0x3e,0x44,0x66,0xa3,0x63,0x2a,0xae,0xb2,0x74,0x25,0x6c,0xbf,0x3f,0x6d,0x04,0x0,0x0,0x2e,0x31,0xf5,0x63,0x81,0xb1,0x9f,0x7a,0x09,0x91,0x91,0x18,0x04,0x05,0xd7,0x59,0x3b,0x60,0x93,0x92,0xbf,0xab,0x91,0xb2,0x61,0x78,0xb3,0x0c,0x4e,0x30,0x70,0x7c,0xe8,0xcb,0xa3,0x1e,0x43,0x64,0x0c,0x46,0x41,0xc6,0xb5,0x39,0x5d,0x71,0x49,0xf1,0x9d,0x51,0x02,0x0,0x0,0x75,0x06,0x17,0x18,0x80,0xfd,0xbe,0x5c,0x0,0x8c,0x7a,0x0f,0x91,0xd6,0x58,0x44,0x19,0xd7,0x58,0x3a,0x61,0x16,0x53,0xe7,0xaa,0xb2,0x91,0x04,0x40,0x01,0x52,0xe3,0x2e,0xaf,0x99,0x06,0x17,0xac,0x62,0x08,0x3b,0x3d,0xc5,0x08,0x73,0x12,0x01,0x22,0x3d,0x29,0xd5,0xf8,0x71,0xa5,0xa5,0x1b,0x12,0x4b,0x6e,0x9c,0x03,0xe7,0x5c,0x8e,0xc4,0x07,0xe0,0x49,0xa5,0xca,0x2b,0xd3,0x06,0x70,0x8d,0xa5,0x13,0x06,0x41,0xa1,0x9e,0x44,0xa4,0x1d,0xd5,0x3a,0x2f,0x56,0x5a,0xec,0x49,0x1f,0xfc,0x0,0xc0,0x18,0x73,0x8f,0x2d,0x0,0x17,0xbd,0x29,0x15,0x28,0xd0,0x04,0x71,0xa3,0xad,0x1d,0x25,0x9a,0x0,0xf5,0x28,0x22,0x2d,0x10,0x18,0x70,0x99,0xa9,0x07,0x57,0x98,0xbb,0x21,0xb2,0x14,0x89,0x6e,0xe1,0xdc,0x35,0xb6,0x0,0xa8,0x6a,0x4a,0xde,0xb1,0xa5,0x63,0x0a,0xae,0xb6,0x74,0x62,0x9a,0xce,0x45,0xbd,0x8b,0x48,0x69,0xf4,0x82,0x8a,0xab,0x2d,0x9d,0x98,0xaa,0x4f,0x29,0x63,0x1a,0x0c,0x70,0x8f,0xe9,0x03,0xe0,0x8c,0xb9,0x53,0x75,0xb5,0x2d,0x30,0x60,0xb1,0xb9,0x0f,0x95,0x3a,0x3f,0xf6,0x78,0x0a,0xe0,0x53,0x29,0x2d,0x38,0x91,0x5a,0x4c,0xd2,0x7a,0xb0,0xd8,0xdc,0x07,0x2d,0x4b,0xbd,0x73,0x0d,0xea,0x45,0x02,0x20,0x8c,0xa0,0x12,0x5d,0xa9,0x5e,0xc9,0xa5,0x1a,0x3f,0x6e,0xb0,0xb6,0xa3,0x54,0xe3,0xa7,0x1e,0x47,0xa4,0x04,0x22,0xe3,0x58,0x6c,0xea,0xc5,0x72,0x4b,0x4f,0x4a,0x0e,0xfe,0x81,0x67,0x64,0x5d,0x63,0x2f,0x01,0x80,0xe3,0xe9,0x50,0xe1,0x06,0x41,0xc1,0x35,0x39,0x5d,0x58,0x61,0xe9,0x8e,0x6b,0xee,0x74,0x82,0x18,0x8b,0x4a,0xad,0x0f,0x37,0xd9,0x5a,0x31,0x4d,0xef,0x4e,0xe5,0x0d,0x6b,0x55,0x63,0x32,0x9d,0x88,0x64,0x09,0x70,0x82,0xa5,0xd1,0x89,0xad,0x2a,0xad,0x17,0x25,0x36,0x3f,0x0e,0xf8,0xf3,0xd0,0x18,0x30,0x53,0x6f,0x24,0x12,0x86,0x8e,0x29,0xb8,0xcc,0xdc,0x87,0x2a,0xad,0x37,0x1d,0x1e,0xf7,0xdc,0xaa,0x55,0xab,0xfc,0x63,0x0a,0x0,0x14,0xe5,0x04,0x84,0xf4,0xca,0x5c,0xa3,0x15,0x54,0x5c,0x66,0xea,0x41,0xb9,0xc6,0x87,0xfd,0xde,0x3c,0xb8,0x55,0x89,0x7a,0x27,0x11,0x37,0x18,0x80,0x49,0x3a,0x0f,0x2e,0x31,0xf6,0xa7,0xd3,0xf6,0xf4,0x89,0x8b,0x5f,0x18,0x7e,0x94,0x68,0x34,0x27,0xa1,0xa4,0xa7,0x49,0x5d,0xa1,0xf5,0xa1,0x4c,0xeb,0xc7,0xc9,0x80,0x05,0x1f,0xf9,0x72,0x29,0x78,0x88,0x88,0x39,0x85,0x52,0x10,0x8b,0x4c,0xbd,0xc8,0x93,0x42,0xe9,0xf5,0xe0,0x8c,0x9d,0xbc,0xf8,0xa5,0x61,0xa7,0xf9,0xb5,0x6b,0xd7,0xda,0x01,0x9c,0x4c,0xd7,0x06,0x12,0xc0,0x31,0x43,0xef,0xc2,0x8d,0xb6,0x36,0x4c,0xd1,0x79,0x28,0x88,0x98,0x88,0x09,0x26,0x41,0xc6,0x52,0x53,0x0f,0xae,0xb3,0x76,0xa4,0xdf,0xe0,0x07,0xc0,0x54,0x75,0x57,0x64,0x16,0xc0,0x80,0x89,0xf3,0x06,0x07,0xa6,0xa5,0x7b,0x83,0x2d,0x31,0xf7,0x60,0x8e,0xa1,0x1f,0x87,0x03,0xb9,0x68,0x0a,0x98,0x29,0xd9,0x08,0x11,0x35,0x46,0x41,0xc1,0x1c,0x43,0x3f,0x6a,0xf5,0xde,0xa4,0x25,0xee,0x88,0x01,0x5c,0x95,0xa4,0x37,0x23,0x16,0x0,0x15,0x78,0x83,0x01,0xdf,0xca,0x08,0xe5,0x16,0x15,0x5c,0x66,0xea,0xc1,0x54,0x9d,0x0b,0x87,0x02,0x36,0xb4,0x06,0x8d,0x24,0x04,0xc4,0x98,0xe8,0x05,0x15,0x33,0xf5,0x4e,0x4c,0xd7,0xbb,0x53,0x22,0x8c,0x77,0x82,0x1c,0x3a,0x6f,0xd9,0x47,0x26,0x0,0x5c,0x10,0xde,0x60,0xaa,0xaa,0x22,0x45,0x73,0x06,0x8c,0x87,0x3c,0x29,0x84,0x2b,0xcd,0x76,0x38,0x0c,0x5a,0x1c,0xf6,0x59,0x71,0x26,0x64,0x04,0x9d,0x32,0x24,0x2e,0xc6,0x20,0x28,0xa8,0x33,0xb8,0x30,0x55,0xe7,0x82,0xc4,0x32,0x63,0xaa,0xe0,0xc0,0x1b,0xc3,0x2f,0x97,0x47,0xe0,0xf6,0xdb,0x6f,0xef,0x05,0xb0,0x3f,0x13,0x1b,0xd8,0x26,0x86,0x70,0xb9,0xa5,0x1b,0x5f,0xcd,0x6d,0xc5,0x5c,0x83,0x83,0x62,0x08,0x08,0x0,0x40,0x89,0xc6,0x8f,0x95,0x16,0x3b,0x6e,0xc9,0x3d,0x87,0x99,0x7a,0x67,0xc6,0x0c,0xfe,0xf3,0x4b,0xfa,0x97,0x87,0x7b,0x7d,0xd4,0xbd,0x32,0xce,0xd8,0x16,0xc6,0xf9,0xa2,0xcc,0x35,0xf1,0x14,0xcc,0x35,0x3a,0x30,0xd3,0xe0,0xc4,0xa9,0xa0,0x05,0x27,0x03,0x16,0xb8,0x14,0x0d,0x8d,0x84,0x2c,0x42,0x0,0x47,0x85,0xd6,0x8f,0xe9,0x7a,0x27,0x8a,0x35,0xc1,0xcc,0xfc,0x91,0x8c,0xb5,0x9e,0x3a,0x73,0xe6,0x95,0xa8,0x05,0x40,0x2f,0xcb,0xcf,0x04,0x45,0xf1,0x67,0x0,0x74,0x99,0xdc,0x09,0x34,0x6c,0x60,0xd7,0x60,0x86,0xde,0x85,0x5e,0x59,0x8b,0xa6,0x60,0x0e,0x9a,0x83,0x26,0xc8,0xb4,0x85,0x98,0xb1,0xd8,0xc4,0x10,0xa6,0xeb,0x5d,0xa8,0xd6,0xf9,0x52,0x36,0x6c,0x37,0x76,0xf6,0x3f,0x7f,0x66,0xc3,0x86,0x0d,0xea,0x08,0x96,0xc1,0xe8,0x6c,0x6d,0x68,0xf8,0x0b,0x03,0x6e,0xca,0xb6,0x0e,0xe2,0xe7,0x22,0x9a,0x83,0x66,0x9c,0x0e,0x9a,0xe1,0x90,0xc9,0x2a,0xc8,0x04,0x44,0xc6,0x51,0xa9,0xf1,0xa1,0x46,0xef,0x41,0x59,0x16,0x9d,0x21,0x51,0x18,0x9b,0x7d,0xc7,0x1d,0x77,0x1c,0x89,0xda,0x02,0x0,0x0,0x81,0xf3,0xa7,0x38,0x63,0x59,0x27,0x0,0x06,0xa6,0xa0,0x4e,0xef,0x44,0x9d,0xde,0x09,0x87,0xa2,0x45,0x73,0xd0,0x84,0x33,0x41,0x13,0xbc,0x14,0x61,0x98,0x6e,0x6b,0x5f,0x14,0x6b,0x02,0xa8,0xd1,0x79,0x50,0xa9,0xf5,0x26,0x35,0x05,0x77,0x92,0xf8,0x68,0xa4,0xc1,0x1f,0x91,0x0,0x34,0x9d,0x3d,0xfb,0xe7,0x29,0xd5,0xd5,0x47,0x38,0x30,0x2b,0x9b,0xcd,0xc5,0x05,0xc6,0x10,0x16,0x18,0xfb,0xe1,0x54,0x34,0x68,0x0d,0x19,0xd1,0x16,0x36,0xa2,0x3b,0xac,0xa3,0xed,0xc4,0x14,0x5d,0xd2,0x55,0x68,0xbd,0xa8,0xd6,0xfa,0x50,0xa2,0xf1,0x67,0x94,0x33,0x2f,0x6a,0x01,0xe4,0xfc,0xa7,0x63,0x09,0xe4,0x98,0x6c,0x6d,0x68,0xb8,0x83,0x01,0x0d,0xd4,0xb5,0x2e,0xc4,0xa9,0x68,0xd0,0x16,0x32,0xa2,0x3d,0x6c,0x80,0x3d,0xac,0x83,0x4a,0x5b,0x8a,0x49,0xc3,0x28,0xc8,0x28,0xd5,0x06,0x50,0xae,0xf1,0xa3,0x4c,0xe3,0xcf,0x84,0x7d,0xfb,0x58,0x70,0x52,0x67,0x32,0xd5,0xad,0x5a,0xb5,0x4a,0x19,0xb7,0x05,0x0,0x0,0xfe,0x50,0xe8,0x69,0x93,0x56,0xfb,0x20,0x07,0x26,0x51,0x9d,0x7e,0x8a,0x55,0x0c,0xc3,0x6a,0x70,0xa2,0xce,0xe0,0x84,0xcc,0x19,0xba,0xc2,0x7a,0xb4,0xcb,0x46,0x74,0x86,0xf4,0x70,0x29,0x1a,0xb2,0x0e,0xe2,0x39,0xcb,0x43,0x45,0x91,0x36,0x88,0x62,0xc9,0x8f,0x32,0x6d,0x0,0x36,0x31,0x44,0x95,0x32,0x74,0x76,0x7f,0x64,0xb4,0xc1,0x1f,0xb1,0x05,0x0,0x0,0x4f,0x35,0x34,0xdc,0xcd,0x81,0x47,0xa9,0x5a,0x23,0xc3,0xcf,0x45,0x74,0x85,0xf5,0xe8,0x3a,0x6f,0x1d,0x90,0x20,0x4c,0x0c,0x89,0xa9,0x28,0x90,0x42,0x28,0xd1,0x04,0x50,0x24,0xf9,0x51,0xa0,0x09,0xa5,0x73,0x58,0x6e,0x02,0x46,0x3f,0x6b,0xed,0x77,0xb9,0xa6,0xdc,0x73,0xcf,0x3d,0xc1,0x98,0x08,0xc0,0xb6,0x6d,0xdb,0xc4,0xa0,0xd7,0xbb,0x0f,0xc0,0x7c,0xaa,0xdd,0xe8,0x09,0x72,0x01,0xbd,0xb2,0x0e,0xdd,0x61,0x1d,0xba,0x65,0x3d,0xfa,0x64,0x2d,0x42,0x5c,0xa0,0x8a,0x19,0x06,0x91,0x71,0xe4,0x8a,0x21,0x14,0x48,0x41,0xe4,0x49,0x41,0xe4,0x4b,0x21,0x58,0x53,0xe4,0x22,0x8d,0x74,0x81,0x03,0xb7,0xac,0xad,0xaf,0xff,0x73,0x04,0x56,0x42,0xe4,0x3c,0xbd,0x65,0xcb,0x62,0x55,0x55,0xf7,0x20,0x83,0xc2,0x83,0x93,0x89,0x5b,0x95,0xe0,0x90,0xb5,0xe8,0x95,0xb5,0x70,0xc8,0x5a,0x38,0x54,0x2d,0x7c,0x8a,0x98,0x55,0xbe,0x04,0x2d,0x53,0x61,0x11,0x65,0xe4,0x8a,0x41,0xe4,0x4b,0x41,0x14,0x48,0x21,0x58,0xa5,0x30,0xcd,0xee,0x13,0x19,0xfc,0x8c,0x3d,0xbf,0xf6,0x8e,0x3b,0xbe,0x18,0xe1,0x32,0x21,0x3a,0xb6,0x36,0x34,0x6c,0x62,0xc0,0x3a,0xaa,0xe6,0xf8,0x11,0x52,0x05,0x38,0x55,0x2d,0x1c,0xb2,0x04,0x8f,0xaa,0x81,0x5b,0x91,0xe0,0x55,0x35,0x70,0x28,0x1a,0x28,0x69,0x18,0x9c,0x64,0x11,0x64,0x98,0xc4,0x30,0x2c,0xa2,0x02,0xb3,0x10,0x86,0x45,0x94,0x61,0x15,0x43,0x30,0x0b,0x72,0xea,0xa4,0xcb,0xce,0x1c,0x02,0xe7,0xf7,0xfd,0x4f,0x45,0xb4,0xb4,0x8a,0x7a,0x2d,0x26,0xcb,0x0f,0x28,0x1a,0xcd,0x97,0xc0,0x79,0x01,0xd5,0x75,0x9c,0x66,0x45,0x41,0x45,0xa1,0x10,0x40,0xe1,0x45,0xad,0xa3,0x82,0x9d,0x17,0x03,0x09,0x41,0x2e,0x22,0xa0,0x8a,0x08,0xaa,0x02,0x02,0xaa,0x88,0x0,0x1f,0xf8,0xef,0x20,0x17,0xe0,0x57,0xa5,0xb8,0x27,0x42,0x11,0xc0,0xa1,0x13,0x54,0xe8,0x98,0x02,0xbd,0xa0,0x42,0xcf,0x64,0xe8,0x05,0x15,0x5a,0xa6,0x40,0x2f,0x72,0x18,0x98,0x0c,0x1d,0x53,0x61,0x11,0xc3,0x74,0xa1,0x4b,0x62,0x79,0x24,0xd2,0xc1,0x3f,0x2e,0x0b,0xe0,0xfc,0x52,0xe0,0x2a,0x55,0x55,0x5f,0x45,0x8a,0x5c,0x1f,0x46,0x0c,0x45,0x05,0x43,0x50,0x15,0x20,0x73,0x86,0x10,0x17,0x10,0xe6,0x02,0x64,0x2e,0x40,0x86,0x80,0x70,0x84,0x3b,0x64,0x8c,0x01,0x5a,0xc6,0x21,0x41,0x81,0xc4,0x38,0x24,0xc6,0xa1,0x11,0x54,0x68,0xa1,0x42,0x27,0xd0,0x36,0x5b,0x0a,0xf2,0xea,0xa9,0x96,0x96,0xcf,0x8f,0x14,0xf6,0x1b,0x33,0x01,0x0,0x80,0x27,0x1b,0x1a,0x1e,0x06,0xf0,0x03,0xaa,0x73,0x82,0x48,0x09,0xda,0xb9,0x28,0x2e,0x18,0xee,0xcc,0xff,0xe8,0x96,0xdc,0x38,0x39,0xd5,0xd2,0xb2,0x81,0x31,0xf6,0x1a,0xd5,0x3b,0x41,0x24,0x1d,0x85,0x73,0x7e,0x5b,0xb4,0x83,0x7f,0x42,0x16,0x0,0x0,0x3c,0xb3,0x69,0x53,0x99,0x22,0x08,0xbb,0x29,0x40,0x88,0x20,0x92,0xca,0xbf,0xac,0xa9,0xaf,0x7f,0x64,0x3c,0x1f,0x9c,0xd0,0x76,0xde,0xad,0xeb,0xd7,0xb7,0x33,0xce,0xaf,0x01,0xd0,0x41,0x6d,0x40,0x10,0x89,0x87,0x03,0xff,0x36,0xde,0xc1,0x3f,0x61,0x0b,0xe0,0x13,0x9e,0x6a,0x68,0x98,0xc3,0x81,0xb7,0x01,0xe4,0x52,0x93,0x10,0x44,0xc2,0xd8,0xb4,0xfa,0x8e,0x3b,0xee,0x62,0x6c,0xfc,0x7b,0xa9,0x31,0x09,0xe8,0x59,0x5d,0x5f,0x7f,0x08,0xc0,0xd7,0x01,0xf8,0xa8,0x4d,0x08,0x22,0x21,0x3c,0xa7,0x33,0x99,0xfe,0x61,0x22,0x83,0x3f,0x66,0x02,0x0,0x0,0x6b,0xea,0xeb,0x5f,0x65,0xc0,0x12,0x0,0x6d,0xd4,0x36,0x04,0x11,0x57,0x7e,0xa5,0x33,0x99,0xbe,0xbc,0x6a,0xd5,0xaa,0x09,0x9f,0x80,0x8a,0x79,0xb4,0xc8,0xd6,0xad,0x5b,0x27,0x33,0x45,0x79,0x09,0x69,0x7e,0xa7,0x0,0x41,0xa4,0xe6,0x92,0x1f,0xf7,0x4f,0x64,0xcd,0x1f,0x37,0x0b,0xe0,0x13,0xd6,0xae,0x5d,0xdb,0x2c,0xa9,0xea,0x55,0x60,0x6c,0x1f,0xb5,0x17,0x41,0xc4,0x8c,0x10,0x38,0xff,0xfb,0x58,0x0e,0xfe,0xb8,0x08,0x0,0x30,0xb0,0x3b,0x50,0x71,0xe6,0xcc,0x52,0x0,0x0f,0x01,0xa0,0x90,0x31,0x82,0x98,0x18,0x47,0x19,0xe7,0x0b,0xd6,0xac,0x5b,0xb7,0x31,0xd6,0x5f,0x1c,0xf7,0x93,0x25,0x4f,0xfd,0xee,0x77,0x37,0xaa,0x8c,0x6d,0x61,0x40,0x1e,0xb5,0x23,0x41,0x44,0xcd,0x1f,0x42,0xaa,0xfa,0x77,0xeb,0xd7,0xaf,0x77,0xc7,0xe3,0xcb,0x13,0x72,0xb4,0x6c,0xf3,0xe6,0xcd,0xb5,0x22,0xe7,0x8f,0x01,0xf8,0x3c,0xb5,0x27,0x41,0x44,0xb4,0xd8,0xef,0x63,0x9c,0x3f,0xb0,0xba,0xbe,0xfe,0xd7,0x13,0xf5,0xf4,0x27,0x5d,0x0,0x06,0xad,0x81,0xcd,0x9b,0xaf,0x51,0x39,0x7f,0x8c,0x01,0x33,0xa8,0x89,0x09,0x62,0x58,0x14,0xc6,0xf9,0xe3,0x61,0xc6,0x1e,0xac,0xaf,0xaf,0x77,0xc4,0xbb,0xb0,0x84,0x1f,0x2e,0xdf,0xba,0x75,0xab,0x09,0xb2,0x7c,0x3f,0x63,0xec,0x1e,0x0,0x16,0x6a,0x6f,0x82,0x18,0x64,0x37,0x03,0xbe,0xb7,0xba,0xbe,0xfe,0xbd,0x44,0x15,0x98,0xb4,0xec,0x12,0xdb,0x1e,0x7f,0xdc,0x1c,0x34,0x1a,0xd7,0x03,0xf8,0x67,0x0,0xa5,0xd4,0xf6,0x44,0xb6,0x5a,0xfb,0x9c,0xb1,0x17,0x38,0x63,0x3f,0xb9,0xfd,0xf6,0xdb,0xf7,0x24,0xba,0xf0,0xa4,0xa7,0x97,0xd9,0xb6,0x71,0xa3,0x35,0xa0,0xd1,0xfc,0x03,0x13,0x84,0x75,0xe0,0x7c,0x0a,0xf5,0x07,0x22,0x4b,0x08,0x32,0xe0,0x05,0x55,0x55,0x7f,0xb1,0x76,0xfd,0xfa,0x5d,0xc9,0x7a,0x88,0x94,0xca,0x2f,0xb5,0x79,0xf3,0xe6,0x59,0x22,0xe7,0x5f,0x03,0xb0,0x06,0x40,0x0d,0xf5,0x11,0x22,0xc3,0x08,0x73,0xc6,0x5e,0x06,0xe7,0x5b,0xf5,0x26,0xd3,0xf3,0xab,0x56,0xad,0x4a,0xfa,0xfd,0x64,0x29,0x99,0x60,0xee,0xcd,0x0d,0x1b,0xa4,0xd6,0xea,0xea,0xa5,0x9c,0xf3,0x6b,0x19,0x63,0xd7,0x82,0xb1,0xc5,0xe0,0x9c,0xee,0xe4,0x22,0xd2,0x91,0x0e,0x06,0xbc,0xc6,0x19,0x7b,0x4d,0xe6,0xfc,0x95,0xfa,0xfa,0xfa,0xce,0x54,0x7a,0xb8,0xb4,0xc8,0x30,0xf9,0xd4,0x53,0x4f,0xe5,0xa8,0xe1,0xf0,0xe5,0x60,0x6c,0x36,0x38,0x9f,0x2e,0x70,0x3e,0x93,0x33,0x36,0x03,0x74,0xfa,0x90,0x48,0x99,0x91,0xc4,0x64,0x70,0xde,0xcc,0x81,0xa3,0x02,0xe7,0x27,0x38,0x70,0x5c,0x11,0x84,0xf7,0x47,0xbb,0x97,0x8f,0x04,0x20,0x06,0xfe,0x03,0x8f,0x46,0xa3,0xd5,0x01,0x16,0xce,0x98,0x89,0x31,0xa6,0xa5,0x9e,0x48,0x24,0x0a,0x55,0x96,0x9d,0x5c,0xab,0x0d,0x89,0xa2,0xe8,0xd6,0x68,0x34,0xde,0x58,0x1c,0xce,0x21,0x08,0x82,0x20,0x08,0x82,0x20,0x08,0x82,0x88,0x23,0xff,0x1f,0x03,0x13,0x3a,0x12,0x64,0x41,0x40,0x31,0x0,0x0,0x0,0x0,0x49,0x45,0x4e,0x44,0xae,0x42,0x60,0x82
 };
 
 static const Color boot_splash_bg_color = Color(224/255.0,224/255.0,224/255.0);
diff --git a/methods.py b/methods.py
index 9608b1b61d..21c2293bf0 100755
--- a/methods.py
+++ b/methods.py
@@ -1098,6 +1098,8 @@ def update_version():
 	f.write("#define VERSION_MINOR "+str(version.minor)+"\n")
 	f.write("#define VERSION_REVISION "+str(rev)+"\n")
 	f.write("#define VERSION_STATUS "+str(version.status)+"\n")
+	import datetime
+	f.write("#define VERSION_YEAR "+str(datetime.datetime.now().year)+"\n")
 
 def parse_cg_file(fname, uniforms, sizes, conditionals):
 
diff --git a/modules/SCsub b/modules/SCsub
index d215f72c08..9215bfd48f 100644
--- a/modules/SCsub
+++ b/modules/SCsub
@@ -19,5 +19,3 @@ for x in env.module_list:
 lib = env_modules.Library("modules",env.modules_sources)
 
 env.Prepend(LIBS=[lib])
-
-
diff --git a/modules/gdscript/SCsub b/modules/gdscript/SCsub
index d20da72b72..403fe68f66 100644
--- a/modules/gdscript/SCsub
+++ b/modules/gdscript/SCsub
@@ -3,5 +3,3 @@ Import('env')
 env.add_source_files(env.modules_sources,"*.cpp")
 
 Export('env')
-
-
diff --git a/modules/gdscript/gd_editor.cpp b/modules/gdscript/gd_editor.cpp
index 0d986e92a2..381edcba50 100644
--- a/modules/gdscript/gd_editor.cpp
+++ b/modules/gdscript/gd_editor.cpp
@@ -1291,6 +1291,15 @@ static void _find_identifiers(GDCompletionContext& context,int p_line,bool p_onl
 
 	const GDParser::BlockNode *block=context.block;
 
+	if (context.function) {
+
+		const GDParser::FunctionNode* f = context.function;
+
+		for (int i=0;i<f->arguments.size();i++) {
+			result.insert(f->arguments[i].operator String());
+		}
+	}
+
 	while(block) {
 
 		GDCompletionContext c = context;
diff --git a/modules/gdscript/gd_parser.cpp b/modules/gdscript/gd_parser.cpp
index 313fb57d0e..c55bfee591 100644
--- a/modules/gdscript/gd_parser.cpp
+++ b/modules/gdscript/gd_parser.cpp
@@ -65,8 +65,10 @@ bool GDParser::_enter_indent_block(BlockNode* p_block) {
 
 
 	if (tokenizer->get_token()!=GDTokenizer::TK_COLON) {
-
-		_set_error("':' expected at end of line.");
+		// report location at the previous token (on the previous line)
+		int error_line = tokenizer->get_token_line(-1);
+		int error_column = tokenizer->get_token_column(-1);
+		_set_error("':' expected at end of line.",error_line,error_column);
 		return false;
 	}
 	tokenizer->advance();
@@ -1940,9 +1942,15 @@ void GDParser::_parse_extends(ClassNode *p_class) {
 
 	p_class->extends_used=true;
 
-	//see if inheritance happens from a file
 	tokenizer->advance();
 
+	if (tokenizer->get_token()==GDTokenizer::TK_BUILT_IN_TYPE && tokenizer->get_token_type()==Variant::OBJECT) {
+		p_class->extends_class.push_back(Variant::get_type_name(Variant::OBJECT));
+		tokenizer->advance();
+		return;
+	}
+
+	// see if inheritance happens from a file
 	if (tokenizer->get_token()==GDTokenizer::TK_CONSTANT) {
 
 		Variant constant = tokenizer->get_token_constant();
diff --git a/modules/gdscript/gd_parser.h b/modules/gdscript/gd_parser.h
index 134279b6d8..04f3dff3de 100644
--- a/modules/gdscript/gd_parser.h
+++ b/modules/gdscript/gd_parser.h
@@ -276,7 +276,6 @@ public:
 	};
 
 	struct NewLineNode : public Node {
-		int line;
 		NewLineNode() { type=TYPE_NEWLINE; }
 	};
 
diff --git a/modules/gdscript/gd_script.cpp b/modules/gdscript/gd_script.cpp
index 99ddc74bb4..c3cc779bce 100644
--- a/modules/gdscript/gd_script.cpp
+++ b/modules/gdscript/gd_script.cpp
@@ -1449,7 +1449,7 @@ Object *GDNativeClass::instance() {
 
 
 
-GDInstance* GDScript::_create_instance(const Variant** p_args,int p_argcount,Object *p_owner,bool p_isref) {
+GDInstance* GDScript::_create_instance(const Variant** p_args,int p_argcount,Object *p_owner,bool p_isref,Variant::CallError& r_error) {
 
 
 	/* STEP 1, CREATE */
@@ -1465,14 +1465,13 @@ GDInstance* GDScript::_create_instance(const Variant** p_args,int p_argcount,Obj
 
 	instances.insert(instance->owner);
 
-	Variant::CallError err;
-	initializer->call(instance,p_args,p_argcount,err);
+	initializer->call(instance,p_args,p_argcount,r_error);
 
-	if (err.error!=Variant::CallError::CALL_OK) {
+	if (r_error.error!=Variant::CallError::CALL_OK) {
 		instance->script=Ref<GDScript>();
 		instance->owner->set_script_instance(NULL);
 		instances.erase(p_owner);
-		ERR_FAIL_COND_V(err.error!=Variant::CallError::CALL_OK, NULL); //error consrtucting
+		ERR_FAIL_COND_V(r_error.error!=Variant::CallError::CALL_OK, NULL); //error constructing
 	}
 
 	//@TODO make thread safe
@@ -1505,7 +1504,7 @@ Variant GDScript::_new(const Variant** p_args,int p_argcount,Variant::CallError&
 	}
 
 
-	GDInstance* instance = _create_instance(p_args,p_argcount,owner,r!=NULL);
+	GDInstance* instance = _create_instance(p_args,p_argcount,owner,r!=NULL,r_error);
 	if (!instance) {
 		if (ref.is_null()) {
 			memdelete(owner); //no owner, sorry
@@ -1637,7 +1636,8 @@ ScriptInstance* GDScript::instance_create(Object *p_this) {
 		}
 	}
 
-	return _create_instance(NULL,0,p_this,p_this->cast_to<Reference>());
+	Variant::CallError unchecked_error;
+	return _create_instance(NULL,0,p_this,p_this->cast_to<Reference>(),unchecked_error);
 
 }
 bool GDScript::instance_has(const Object *p_this) const {
@@ -2274,6 +2274,26 @@ bool GDInstance::get(const StringName& p_name, Variant &r_ret) const {
 	return false;
 
 }
+
+Variant::Type GDInstance::get_property_type(const StringName& p_name,bool *r_is_valid) const {
+
+
+	const GDScript *sptr=script.ptr();
+	while(sptr) {
+
+		if (sptr->member_info.has(p_name)) {
+			if (r_is_valid)
+				*r_is_valid=true;
+			return sptr->member_info[p_name].type;
+		}
+		sptr = sptr->_base;
+	}
+
+	if (r_is_valid)
+		*r_is_valid=false;
+	return Variant::NIL;
+}
+
 void GDInstance::get_property_list(List<PropertyInfo> *p_properties) const {
 	// exported members, not doen yet!
 
diff --git a/modules/gdscript/gd_script.h b/modules/gdscript/gd_script.h
index 37ef47af6c..3d16b59065 100644
--- a/modules/gdscript/gd_script.h
+++ b/modules/gdscript/gd_script.h
@@ -286,7 +286,7 @@ friend class GDScriptLanguage;
 	String name;
 
 
-	GDInstance* _create_instance(const Variant** p_args,int p_argcount,Object *p_owner,bool p_isref);
+	GDInstance* _create_instance(const Variant** p_args,int p_argcount,Object *p_owner,bool p_isref,Variant::CallError &r_error);
 
 	void _set_subclass_path(Ref<GDScript>& p_sc,const String& p_path);
 
@@ -373,6 +373,8 @@ public:
 	virtual bool set(const StringName& p_name, const Variant& p_value);
 	virtual bool get(const StringName& p_name, Variant &r_ret) const;
 	virtual void get_property_list(List<PropertyInfo> *p_properties) const;
+	virtual Variant::Type get_property_type(const StringName& p_name,bool *r_is_valid=NULL) const;
+
 
 	virtual void get_method_list(List<MethodInfo> *p_list) const;
 	virtual bool has_method(const StringName& p_method) const;
diff --git a/modules/gdscript/gd_tokenizer.cpp b/modules/gdscript/gd_tokenizer.cpp
index b591ed3b4b..e445701669 100644
--- a/modules/gdscript/gd_tokenizer.cpp
+++ b/modules/gdscript/gd_tokenizer.cpp
@@ -774,20 +774,15 @@ void GDTokenizerText::_advance() {
 							{Variant::INT,"int"},
 							{Variant::REAL,"float"},
 							{Variant::STRING,"String"},
-							{Variant::VECTOR2,"vec2"},
 							{Variant::VECTOR2,"Vector2"},
 							{Variant::RECT2,"Rect2"},
 							{Variant::MATRIX32,"Matrix32"},
-							{Variant::MATRIX32,"mat32"},
-							{Variant::VECTOR3,"vec3"},
 							{Variant::VECTOR3,"Vector3"},
 							{Variant::_AABB,"AABB"},
 							{Variant::_AABB,"Rect3"},
 							{Variant::PLANE,"Plane"},
 							{Variant::QUAT,"Quat"},
-							{Variant::MATRIX3,"mat3"},
 							{Variant::MATRIX3,"Matrix3"},
-							{Variant::TRANSFORM,"trn"},
 							{Variant::TRANSFORM,"Transform"},
 							{Variant::COLOR,"Color"},
 							{Variant::IMAGE,"Image"},
@@ -795,7 +790,6 @@ void GDTokenizerText::_advance() {
 							{Variant::OBJECT,"Object"},
 							{Variant::INPUT_EVENT,"InputEvent"},
 							{Variant::NODE_PATH,"NodePath"},
-							{Variant::DICTIONARY,"dict"},
 							{Variant::DICTIONARY,"Dictionary"},
 							{Variant::ARRAY,"Array"},
 							{Variant::RAW_ARRAY,"RawArray"},
diff --git a/modules/gridmap/SCsub b/modules/gridmap/SCsub
index 4cb47e7e67..211a043468 100644
--- a/modules/gridmap/SCsub
+++ b/modules/gridmap/SCsub
@@ -1,6 +1,3 @@
 Import('env')
 
 env.add_source_files(env.modules_sources,"*.cpp")
-
-
-
diff --git a/platform/android/SCsub b/platform/android/SCsub
index 6feeb8b365..834ee58adc 100644
--- a/platform/android/SCsub
+++ b/platform/android/SCsub
@@ -62,10 +62,10 @@ pp_baseout.write( manifest )
 
 
 for x in env.android_source_files:
-	shutil.copy(x,abspath+"/java/src/com/android/godot")	
+	shutil.copy(x,abspath+"/java/src/com/android/godot")
 
 for x in env.android_module_libraries:
-	shutil.copy(x,abspath+"/java/libs")	
+	shutil.copy(x,abspath+"/java/libs")
 
 
 env_android.SharedLibrary("#bin/libgodot",[android_objects],SHLIBSUFFIX=env["SHLIBSUFFIX"])
diff --git a/platform/android/detect.py b/platform/android/detect.py
index 9db5d02b48..66097a5149 100644
--- a/platform/android/detect.py
+++ b/platform/android/detect.py
@@ -98,6 +98,7 @@ def configure(env):
 
 	if env['android_arch']=='x86':
 		env['NDK_TARGET']=env['NDK_TARGET_X86']
+		env["x86_opt_gcc"]=True
 
 	if env['PLATFORM'] == 'win32':
 		import methods
@@ -210,7 +211,8 @@ def configure(env):
 #	env.Append(CPPFLAGS=['-DANDROID_ENABLED', '-DUNIX_ENABLED','-DMPC_FIXED_POINT'])
 
 	if(env["opus"]=="yes"):
-		env.Append(CFLAGS=["-DOPUS_ARM_OPT"])
+		if (env["android_arch"]=="armv6" or env["android_arch"]=="armv7"):
+			env.Append(CFLAGS=["-DOPUS_ARM_OPT"])
 		env.opus_fixed_point="yes"
 
 	if (env['android_stl']=='yes'):
diff --git a/platform/android/export/export.cpp b/platform/android/export/export.cpp
index f8fc03ec61..7d550f4fa0 100644
--- a/platform/android/export/export.cpp
+++ b/platform/android/export/export.cpp
@@ -1020,18 +1020,24 @@ Error EditorExportPlatformAndroid::export_project(const String& p_path, bool p_d
 
 	EditorProgress ep("export","Exporting for Android",104);
 
-	String apk_path = EditorSettings::get_singleton()->get_settings_path()+"/templates/";
-
-	if (p_debug) {
-
-		src_apk=custom_debug_package!=""?custom_debug_package:apk_path+"android_debug.apk";
-	} else {
-
-		src_apk=custom_release_package!=""?custom_release_package:apk_path+"android_release.apk";
+	if (p_debug)
+		src_apk=custom_debug_package;
+	else
+		src_apk=custom_release_package;
 
+	if (src_apk=="") {
+		String err;
+		if (p_debug) {
+			src_apk=find_export_template("android_debug.apk", &err);
+		} else {
+			src_apk=find_export_template("android_release.apk", &err);
+		}
+		if (src_apk=="") {
+			EditorNode::add_io_error(err);
+			return ERR_FILE_NOT_FOUND;
+		}
 	}
 
-
 	FileAccess *src_f=NULL;
 	zlib_filefunc_def io = zipio_create_io_from_file(&src_f);
 
@@ -1123,6 +1129,10 @@ Error EditorExportPlatformAndroid::export_project(const String& p_path, bool p_d
 		if (file=="lib/armeabi/libgodot_android.so" && !export_arm) {
 			skip=true;
 		}
+		
+		if (file.begins_with("META-INF") && _signed) {
+			skip=true;
+		}
 
 		print_line("ADDING: "+file);
 
@@ -1655,10 +1665,7 @@ bool EditorExportPlatformAndroid::can_export(String *r_error) const {
 		err+="Debug Keystore not configured in editor settings.\n";
 	}
 
-
-	String exe_path = EditorSettings::get_singleton()->get_settings_path()+"/templates/";
-
-	if (!FileAccess::exists(exe_path+"android_debug.apk") || !FileAccess::exists(exe_path+"android_release.apk")) {
+	if (!exists_export_template("android_debug.apk") || !exists_export_template("android_release.apk")) {
 		valid=false;
 		err+="No export templates found.\nDownload and install export templates.\n";
 	}
diff --git a/platform/android/java/res/drawable/icon.png b/platform/android/java/res/drawable/icon.png
index 78757e9035..013632ddf1 100644
--- a/platform/android/java/res/drawable/icon.png
+++ b/platform/android/java/res/drawable/icon.png
diff --git a/platform/android/java/res/values-fa/strings.xml b/platform/android/java/res/values-fa/strings.xml
new file mode 100644
index 0000000000..450f9fe212
--- /dev/null
+++ b/platform/android/java/res/values-fa/strings.xml
@@ -0,0 +1,16 @@
+<?xml version="1.0" encoding="utf-8"?>
+<resources>
+    <string name="godot_project_name_string">godot-project-name-fa</string>
+    <string name="testuf8">سلام</string>
+    <string name="text_paused_cellular">آیا می خواهید بر روی اتصال داده همراه دانلود را شروع کنید؟ بر اساس نوع سطح داده شما این ممکن است برای شما هزینه مالی داشته باشد.</string>
+    <string name="text_paused_cellular_2">اگر نمی خواهید بر روی اتصال داده همراه دانلود را شروع کنید ، دانلود به صورت خودکار در زمان دسترسی به وای-فای شروع می شود.</string>
+    <string name="text_button_resume_cellular">ادامه دانلود</string>
+    <string name="text_button_wifi_settings">تنظیمات وای-فای</string>
+    <string name="text_verifying_download">درحال تایید دانلود</string>
+    <string name="text_validation_complete">تایید فایل XAPK تکمیل شد.  برای خروج تایید کنید.</string>
+    <string name="text_validation_failed">اعتبارسنجی فایل XAPK ناموق.</string>
+    <string name="text_button_pause">توقف دانلود</string>
+    <string name="text_button_resume">ادامه دانلود</string>
+    <string name="text_button_cancel">انصراف</string>
+    <string name="text_button_cancel_verify">انصراف از تایید شدن</string>
+</resources>
diff --git a/platform/android/java/src/com/android/godot/GodotLib.java b/platform/android/java/src/com/android/godot/GodotLib.java
index f099e0feff..3d870b3b1f 100644
--- a/platform/android/java/src/com/android/godot/GodotLib.java
+++ b/platform/android/java/src/com/android/godot/GodotLib.java
@@ -46,7 +46,7 @@ public class GodotLib {
 
      public static native void initialize(Godot p_instance,boolean need_reload_hook,String[] p_cmdline,Object p_asset_manager);
      public static native void resize(int width, int height,boolean reload);
-     public static native void newcontext();
+     public static native void newcontext(boolean p_32_bits);
      public static native void quit();
      public static native void step();
      public static native void touch(int what,int pointer,int howmany, int[] arr);
diff --git a/platform/android/java/src/com/android/godot/GodotPaymentV3.java b/platform/android/java/src/com/android/godot/GodotPaymentV3.java
index 0fd102ac55..0799e1e83d 100644
--- a/platform/android/java/src/com/android/godot/GodotPaymentV3.java
+++ b/platform/android/java/src/com/android/godot/GodotPaymentV3.java
@@ -27,7 +27,7 @@ public class GodotPaymentV3 extends Godot.SingletonBase {
 				activity.getPaymentsManager().requestPurchase(sku, transactionId);				
 			}
 		});
-	};
+	}
 	
 /*	public string requestPurchasedTicket(){
 	    activity.getPaymentsManager()
@@ -42,7 +42,7 @@ public class GodotPaymentV3 extends Godot.SingletonBase {
 	
 	public GodotPaymentV3(Activity p_activity) {
 
-		registerClass("GodotPayments", new String[] {"purchase", "setPurchaseCallbackId", "setPurchaseValidationUrlPrefix", "setTransactionId", "getSignature", "consumeUnconsumedPurchases"});
+		registerClass("GodotPayments", new String[] {"purchase", "setPurchaseCallbackId", "setPurchaseValidationUrlPrefix", "setTransactionId", "getSignature", "consumeUnconsumedPurchases", "requestPurchased", "setAutoConsume", "consume"});
 		activity=(Godot) p_activity;
 	}
 
@@ -54,7 +54,6 @@ public class GodotPaymentV3 extends Godot.SingletonBase {
 				activity.getPaymentsManager().consumeUnconsumedPurchases();				
 			}
 		});
-		
 	}
 
 	private String signature;
@@ -63,25 +62,26 @@ public class GodotPaymentV3 extends Godot.SingletonBase {
 	}
 	
 	
-	public void callbackSuccess(String ticket, String signature){
-//        Log.d(this.getClass().getName(), "PRE-Send callback to purchase success");
-        GodotLib.callobject(purchaseCallbackId, "purchase_success", new Object[]{ticket, signature});
-//    	Log.d(this.getClass().getName(), "POST-Send callback to purchase success");
+	public void callbackSuccess(String ticket, String signature, String sku){
+//		Log.d(this.getClass().getName(), "PRE-Send callback to purchase success");
+		GodotLib.callobject(purchaseCallbackId, "purchase_success", new Object[]{ticket, signature, sku});
+//		Log.d(this.getClass().getName(), "POST-Send callback to purchase success");
 }
 
 	public void callbackSuccessProductMassConsumed(String ticket, String signature, String sku){
-//        Log.d(this.getClass().getName(), "PRE-Send callback to consume success");
-        GodotLib.calldeferred(purchaseCallbackId, "consume_success", new Object[]{ticket, signature, sku});
-//    	Log.d(this.getClass().getName(), "POST-Send callback to consume success");
+//		Log.d(this.getClass().getName(), "PRE-Send callback to consume success");
+		Log.d(this.getClass().getName(), "callbackSuccessProductMassConsumed > "+ticket+","+signature+","+sku);
+        	GodotLib.calldeferred(purchaseCallbackId, "consume_success", new Object[]{ticket, signature, sku});
+//		Log.d(this.getClass().getName(), "POST-Send callback to consume success");
 	}
 
 	public void callbackSuccessNoUnconsumedPurchases(){
-        GodotLib.calldeferred(purchaseCallbackId, "no_validation_required", new Object[]{});
+		GodotLib.calldeferred(purchaseCallbackId, "no_validation_required", new Object[]{});
 	}
 	
 	public void callbackFail(){
-                GodotLib.calldeferred(purchaseCallbackId, "purchase_fail", new Object[]{});
-//                GodotLib.callobject(purchaseCallbackId, "purchase_fail", new Object[]{});
+		GodotLib.calldeferred(purchaseCallbackId, "purchase_fail", new Object[]{});
+//		GodotLib.callobject(purchaseCallbackId, "purchase_fail", new Object[]{});
 	}
 	
 	public void callbackCancel(){
@@ -89,6 +89,10 @@ public class GodotPaymentV3 extends Godot.SingletonBase {
 //		GodotLib.callobject(purchaseCallbackId, "purchase_cancel", new Object[]{});
 	}
 	
+	public void callbackAlreadyOwned(String sku){
+		GodotLib.calldeferred(purchaseCallbackId, "purchase_owned", new Object[]{sku});
+	}
+	
 	public int getPurchaseCallbackId() {
 		return purchaseCallbackId;
 	}
@@ -97,8 +101,6 @@ public class GodotPaymentV3 extends Godot.SingletonBase {
 		this.purchaseCallbackId = purchaseCallbackId;
 	}
 
-
-
 	public String getPurchaseValidationUrlPrefix(){
 		return this.purchaseValidationUrlPrefix ;
 	}
@@ -107,12 +109,10 @@ public class GodotPaymentV3 extends Godot.SingletonBase {
 		this.purchaseValidationUrlPrefix = url;
 	}
 
-
 	public String getAccessToken() {
 		return accessToken;
 	}
 
-
 	public void setAccessToken(String accessToken) {
 		this.accessToken = accessToken;
 	}
@@ -125,4 +125,30 @@ public class GodotPaymentV3 extends Godot.SingletonBase {
 		return this.transactionId;
 	}
 	
+	// request purchased items are not consumed
+	public void requestPurchased(){
+		activity.getPaymentsManager().setBaseSingleton(this);
+		activity.runOnUiThread(new Runnable() {
+			@Override
+			public void run() {
+				activity.getPaymentsManager().requestPurchased();				
+			}
+		});
+	}
+	
+	// callback for requestPurchased()
+	public void callbackPurchased(String receipt, String signature, String sku){
+		GodotLib.calldeferred(purchaseCallbackId, "has_purchased", new Object[]{receipt, signature, sku});
+	}
+	
+	// consume item automatically after purchase. default is true.
+	public void setAutoConsume(boolean autoConsume){
+		activity.getPaymentsManager().setAutoConsume(autoConsume);
+	}
+	
+	// consume a specific item
+	public void consume(String sku){
+		activity.getPaymentsManager().consume(sku);
+	}
 }
+
diff --git a/platform/android/java/src/com/android/godot/GodotView.java b/platform/android/java/src/com/android/godot/GodotView.java
index ad0354e624..bc249d46c6 100644
--- a/platform/android/java/src/com/android/godot/GodotView.java
+++ b/platform/android/java/src/com/android/godot/GodotView.java
@@ -371,8 +371,8 @@ public class GodotView extends GLSurfaceView {
 
 		if (use_32) {
 			setEGLConfigChooser( translucent ?
-						new ConfigChooser(8, 8, 8, 8, 24, stencil) :
-						new ConfigChooser(8, 8, 8, 8, 24, stencil) );
+						new FallbackConfigChooser(8, 8, 8, 8, 24, stencil, new ConfigChooser(8, 8, 8, 8, 16, stencil)) :
+						new FallbackConfigChooser(8, 8, 8, 8, 24, stencil, new ConfigChooser(5, 6, 5, 0, 16, stencil)) );
 
 		} else {
 			setEGLConfigChooser( translucent ?
@@ -410,6 +410,26 @@ public class GodotView extends GLSurfaceView {
 	    Log.e(TAG, String.format("%s: EGL error: 0x%x", prompt, error));
 	}
     }
+    	/* Fallback if 32bit View is not supported*/
+	private static class FallbackConfigChooser extends ConfigChooser {
+		private ConfigChooser fallback;
+		
+		public FallbackConfigChooser(int r, int g, int b, int a, int depth, int stencil, ConfigChooser fallback) {
+			super(r, g, b, a, depth, stencil);
+			this.fallback = fallback;
+		}
+      
+      		@Override
+		public EGLConfig chooseConfig(EGL10 egl, EGLDisplay display, EGLConfig[] configs) {
+			EGLConfig ec = super.chooseConfig(egl, display, configs);
+			if (ec == null) {
+	  			Log.w(TAG, "Trying ConfigChooser fallback");
+	  			ec = fallback.chooseConfig(egl, display, configs);
+				use_32=false;
+			}
+			return ec;
+      		}
+    	}
 
 	private static class ConfigChooser implements GLSurfaceView.EGLConfigChooser {
 
@@ -635,7 +655,7 @@ public class GodotView extends GLSurfaceView {
 		}
 
 		public void onSurfaceCreated(GL10 gl, EGLConfig config) {
-			GodotLib.newcontext();
+			GodotLib.newcontext(use_32);
 		}
 	}
 }
diff --git a/platform/android/java/src/com/android/godot/payments/PaymentsManager.java b/platform/android/java/src/com/android/godot/payments/PaymentsManager.java
index 5bf86d0b69..189f7108c1 100644
--- a/platform/android/java/src/com/android/godot/payments/PaymentsManager.java
+++ b/platform/android/java/src/com/android/godot/payments/PaymentsManager.java
@@ -25,10 +25,8 @@ import com.android.vending.billing.IInAppBillingService;
 public class PaymentsManager {
 
 	public static final int BILLING_RESPONSE_RESULT_OK = 0;
-
-	
 	public static final int REQUEST_CODE_FOR_PURCHASE = 0x1001;
-	
+	private static boolean auto_consume = true;
 	
 	private Activity activity;
 	IInAppBillingService mService;
@@ -69,13 +67,12 @@ public class PaymentsManager {
 	    }
 
 	    @Override
-	    public void onServiceConnected(ComponentName name, 
-	    		IBinder service) {
-		mService = IInAppBillingService.Stub.asInterface(service);
+	    public void onServiceConnected(ComponentName name, IBinder service) {
+			mService = IInAppBillingService.Stub.asInterface(service);
 	    }
 	};
 	
-	public void requestPurchase(String sku, String transactionId){
+	public void requestPurchase(final String sku, String transactionId){
 		new PurchaseTask(mService, Godot.getInstance()) {
 			
 			@Override
@@ -88,6 +85,12 @@ public class PaymentsManager {
 			protected void canceled() {
 				godotPaymentV3.callbackCancel();
 			}
+			
+			@Override
+			protected void alreadyOwned() {
+				godotPaymentV3.callbackAlreadyOwned(sku);
+			}
+			
 		}.purchase(sku, transactionId);
 
 	}
@@ -114,26 +117,82 @@ public class PaymentsManager {
 		}.consumeItAll();
 	}
 	
+	public void requestPurchased(){
+		try{
+			PaymentsCache pc = new PaymentsCache(Godot.getInstance());
+
+//			Log.d("godot", "requestPurchased for " + activity.getPackageName());
+			Bundle bundle = mService.getPurchases(3, activity.getPackageName(), "inapp",null);
+
+/*			
+			for (String key : bundle.keySet()) {
+			    Object value = bundle.get(key);
+			    Log.d("godot", String.format("%s %s (%s)", key, value.toString(), value.getClass().getName()));
+			}
+*/			
+			
+			if (bundle.getInt("RESPONSE_CODE") == 0){
+
+				final ArrayList<String> myPurchases = bundle.getStringArrayList("INAPP_PURCHASE_DATA_LIST");
+				final ArrayList<String> mySignatures = bundle.getStringArrayList("INAPP_DATA_SIGNATURE_LIST");
+				
+
+				if (myPurchases == null || myPurchases.size() == 0){
+//					Log.d("godot", "No purchases!");
+					godotPaymentV3.callbackPurchased("", "", "");
+					return;
+				}
+		
+//				Log.d("godot", "# products are purchased:" + myPurchases.size());
+				for (int i=0;i<myPurchases.size();i++)
+				{
+					
+					try{
+						String receipt = myPurchases.get(i);
+						JSONObject inappPurchaseData = new JSONObject(receipt);
+						String sku = inappPurchaseData.getString("productId");
+						String token = inappPurchaseData.getString("purchaseToken");
+						String signature = mySignatures.get(i);
+//						Log.d("godot", "purchased item:" + token + "\n" + receipt);
+
+						pc.setConsumableValue("ticket_signautre", sku, signature);
+						pc.setConsumableValue("ticket", sku, receipt);
+						pc.setConsumableFlag("block", sku, true);
+						pc.setConsumableValue("token", sku, token);
+
+						godotPaymentV3.callbackPurchased(receipt, signature, sku);
+					} catch (JSONException e) {
+					}
+				}
+
+			}
+		}catch(Exception e){
+			Log.d("godot", "Error requesting purchased products:" + e.getClass().getName() + ":" + e.getMessage());
+		}
+	}
+	
 	public void processPurchaseResponse(int resultCode, Intent data) {
 		new HandlePurchaseTask(activity){
 
 			@Override
 			protected void success(final String sku, final String signature, final String ticket) {
-				godotPaymentV3.callbackSuccess(ticket, signature);
-				new ConsumeTask(mService, activity) {
+				godotPaymentV3.callbackSuccess(ticket, signature, sku);
+
+				if (auto_consume){
+					new ConsumeTask(mService, activity) {
 					
-					@Override
-					protected void success(String ticket) {
-//						godotPaymentV3.callbackSuccess("");
-					}
+						@Override
+						protected void success(String ticket) {
+//							godotPaymentV3.callbackSuccess("");
+						}
 					
-					@Override
-					protected void error(String message) {
-						godotPaymentV3.callbackFail();
+						@Override
+						protected void error(String message) {
+							godotPaymentV3.callbackFail();
 						
-					}
-				}.consume(sku);
-
+						}
+					}.consume(sku);
+				}
 				
 //				godotPaymentV3.callbackSuccess(new PaymentsCache(activity).getConsumableValue("ticket", sku),signature);
 //			    godotPaymentV3.callbackSuccess(ticket);
@@ -151,7 +210,7 @@ public class PaymentsManager {
 				godotPaymentV3.callbackCancel();
 				
 			}
-			}.handlePurchaseRequest(resultCode, data);
+		}.handlePurchaseRequest(resultCode, data);
 	}
 	
 	public void validatePurchase(String purchaseToken, final String sku){
@@ -165,7 +224,7 @@ public class PaymentsManager {
 					
 					@Override
 					protected void success(String ticket) {
-						godotPaymentV3.callbackSuccess(ticket, null);
+						godotPaymentV3.callbackSuccess(ticket, null, sku);
 						
 					}
 					
@@ -192,11 +251,31 @@ public class PaymentsManager {
 		}.validatePurchase(sku);
 	}
 	
+	public void setAutoConsume(boolean autoConsume){
+		auto_consume = autoConsume;
+	}
+	
+	public void consume(final String sku){
+		new ConsumeTask(mService, activity) {
+			
+			@Override
+			protected void success(String ticket) {
+				godotPaymentV3.callbackSuccessProductMassConsumed(ticket, "", sku);
+				
+			}
+			
+			@Override
+			protected void error(String message) {
+				godotPaymentV3.callbackFail();
+				
+			}
+		}.consume(sku);
+	}
+	
 	private GodotPaymentV3 godotPaymentV3;
 	
 	public void setBaseSingleton(GodotPaymentV3 godotPaymentV3) {
 		this.godotPaymentV3 = godotPaymentV3;
-		
 	}
 
 }
diff --git a/platform/android/java/src/com/android/godot/payments/PurchaseTask.java b/platform/android/java/src/com/android/godot/payments/PurchaseTask.java
index 75662a442e..c1f9d164a1 100644
--- a/platform/android/java/src/com/android/godot/payments/PurchaseTask.java
+++ b/platform/android/java/src/com/android/godot/payments/PurchaseTask.java
@@ -62,7 +62,11 @@ abstract public class PurchaseTask {
 //		Log.d("XXX", "Buy intent response code: " + responseCode);
 		if(responseCode == 1 || responseCode == 3 || responseCode == 4){
 			canceled();
-			return ;
+			return;
+		}
+		if(responseCode == 7){
+			alreadyOwned();
+			return;
 		}
 			
 		
@@ -92,6 +96,6 @@ abstract public class PurchaseTask {
 
 	abstract protected void error(String message);
 	abstract protected void canceled();
-
+	abstract protected void alreadyOwned();
 	
 }
diff --git a/platform/android/java_glue.cpp b/platform/android/java_glue.cpp
index d001cface2..2cc74c3bb0 100644
--- a/platform/android/java_glue.cpp
+++ b/platform/android/java_glue.cpp
@@ -920,14 +920,20 @@ JNIEXPORT void JNICALL Java_com_android_godot_GodotLib_resize(JNIEnv * env, jobj
 
 }
 
-JNIEXPORT void JNICALL Java_com_android_godot_GodotLib_newcontext(JNIEnv * env, jobject obj) {
+JNIEXPORT void JNICALL Java_com_android_godot_GodotLib_newcontext(JNIEnv * env, jobject obj,bool p_32_bits) {
 
 	__android_log_print(ANDROID_LOG_INFO,"godot","^_^_^_^_^ newcontext %lld\n",Thread::get_caller_ID());
+
+	if (os_android) {
+		os_android->set_context_is_16_bits(!p_32_bits);
+	}
+
 	if (os_android && step > 0) {
 
 		os_android->reload_gfx();
 	}
 
+
 }
 
 
diff --git a/platform/android/java_glue.h b/platform/android/java_glue.h
index 9410fe7132..e22b6775d8 100644
--- a/platform/android/java_glue.h
+++ b/platform/android/java_glue.h
@@ -38,7 +38,7 @@
 extern "C" {
     JNIEXPORT void JNICALL Java_com_android_godot_GodotLib_initialize(JNIEnv * env, jobject obj, jobject activity,jboolean p_need_reload_hook, jobjectArray p_cmdline,jobject p_asset_manager);
     JNIEXPORT void JNICALL Java_com_android_godot_GodotLib_resize(JNIEnv * env, jobject obj,  jint width, jint height, jboolean reload);
-    JNIEXPORT void JNICALL Java_com_android_godot_GodotLib_newcontext(JNIEnv * env, jobject obj);
+    JNIEXPORT void JNICALL Java_com_android_godot_GodotLib_newcontext(JNIEnv * env, jobject obj, bool p_32_bits);
     JNIEXPORT void JNICALL Java_com_android_godot_GodotLib_step(JNIEnv * env, jobject obj);
     JNIEXPORT void JNICALL Java_com_android_godot_GodotLib_quit(JNIEnv * env, jobject obj);
     JNIEXPORT void JNICALL Java_com_android_godot_GodotLib_touch(JNIEnv * env, jobject obj, jint ev,jint pointer, jint count, jintArray positions);
diff --git a/platform/android/os_android.cpp b/platform/android/os_android.cpp
index e2ff128f0d..e5d11bef69 100644
--- a/platform/android/os_android.cpp
+++ b/platform/android/os_android.cpp
@@ -141,6 +141,8 @@ void OS_Android::initialize(const VideoMode& p_desired,int p_video_driver,int p_
 
 	}
 
+	rasterizer->set_force_16_bits_fbo(use_16bits_fbo);
+
 	visual_server = memnew( VisualServerRaster(rasterizer) );
 	if (get_render_thread_mode() != RENDER_THREAD_UNSAFE) {
 
@@ -725,6 +727,13 @@ void OS_Android::native_video_stop() {
 		video_stop_func();
 }
 
+void OS_Android::set_context_is_16_bits(bool p_is_16) {
+
+	use_16bits_fbo=p_is_16;
+	if (rasterizer)
+		rasterizer->set_force_16_bits_fbo(p_is_16);
+}
+
 OS_Android::OS_Android(GFXInitFunc p_gfx_init_func,void*p_gfx_init_ud, OpenURIFunc p_open_uri_func, GetDataDirFunc p_get_data_dir_func,GetLocaleFunc p_get_locale_func,GetModelFunc p_get_model_func, ShowVirtualKeyboardFunc p_show_vk, HideVirtualKeyboardFunc p_hide_vk,  SetScreenOrientationFunc p_screen_orient,GetUniqueIDFunc p_get_unique_id,GetSystemDirFunc p_get_sdir_func, VideoPlayFunc p_video_play_func, VideoIsPlayingFunc p_video_is_playing_func, VideoPauseFunc p_video_pause_func, VideoStopFunc p_video_stop_func,bool p_use_apk_expansion) {
 
 
diff --git a/platform/android/os_android.h b/platform/android/os_android.h
index dcaa1db654..94c6250ae8 100644
--- a/platform/android/os_android.h
+++ b/platform/android/os_android.h
@@ -95,6 +95,8 @@ private:
 	bool use_reload_hooks;
 	bool use_apk_expansion;
 
+	bool use_16bits_fbo;
+
 	Rasterizer *rasterizer;
 	VisualServer *visual_server;
 	AudioServerSW *audio_server;
@@ -200,6 +202,7 @@ public:
 	void set_display_size(Size2 p_size);
 
 	void reload_gfx();
+	void set_context_is_16_bits(bool p_is_16);
 
 	void set_need_reload_hooks(bool p_needs_them);
 	virtual void set_screen_orientation(ScreenOrientation p_orientation);
diff --git a/platform/bb10/SCsub b/platform/bb10/SCsub
index 24f2b5d242..81f6e726e4 100644
--- a/platform/bb10/SCsub
+++ b/platform/bb10/SCsub
@@ -19,4 +19,3 @@ if env['bb10_lgles_override'] == "yes":
 
 prog = None
 prog = env_bps.Program('#bin/godot', bb10_lib)
-
diff --git a/platform/bb10/export/export.cpp b/platform/bb10/export/export.cpp
index 434aaff414..2acd920f31 100644
--- a/platform/bb10/export/export.cpp
+++ b/platform/bb10/export/export.cpp
@@ -275,10 +275,16 @@ Error EditorExportPlatformBB10::export_project(const String& p_path, bool p_debu
 
 	EditorProgress ep("export","Exporting for BlackBerry 10",104);
 
-	String template_path = EditorSettings::get_singleton()->get_settings_path()+"/templates/";
-
-	String src_template=custom_package!=""?custom_package:template_path.plus_file("bb10.zip");
-
+	String src_template=custom_package;
+
+	if (src_template=="") {
+		String err;
+		src_template = find_export_template("bb10.zip", &err);
+		if (src_template=="") {
+			EditorNode::add_io_error(err);
+			return ERR_FILE_NOT_FOUND;
+		}
+	}
 
 	FileAccess *src_f=NULL;
 	zlib_filefunc_def io = zipio_create_io_from_file(&src_f);
@@ -733,9 +739,7 @@ bool EditorExportPlatformBB10::can_export(String *r_error) const {
 		err+="Blackberry host tools not configured in editor settings.\n";
 	}
 
-	String exe_path = EditorSettings::get_singleton()->get_settings_path()+"/templates/";
-
-	if (!FileAccess::exists(exe_path+"bb10.zip")) {
+	if (!exists_export_template("bb10.zip")) {
 		valid=false;
 		err+="No export template found.\nDownload and install export templates.\n";
 	}
diff --git a/platform/bb10/os_bb10.cpp b/platform/bb10/os_bb10.cpp
index d89033b1df..c8e7ae561e 100644
--- a/platform/bb10/os_bb10.cpp
+++ b/platform/bb10/os_bb10.cpp
@@ -195,9 +195,10 @@ void OSBB10::finalize() {
 //		memdelete(debugger_connection_console);
 //}
 
+	memdelete(sample_manager);
+
 	audio_server->finish();
 	memdelete(audio_server);
-	memdelete(sample_manager);
 
 	visual_server->finish();
 	memdelete(visual_server);
diff --git a/platform/flash/SCsub b/platform/flash/SCsub
index b7aef3b65d..4e5f26d52a 100644
--- a/platform/flash/SCsub
+++ b/platform/flash/SCsub
@@ -36,5 +36,3 @@ java -jar $ALCHEMY/usr/lib/asc2.jar -md -strict -optimize -AS3 \
 -import ../platform/flash/lib/libGL.abc \
 ../platform/flash/Console.as
 """
-
-
diff --git a/platform/haiku/os_haiku.cpp b/platform/haiku/os_haiku.cpp
index 1edb23d504..ef483657ca 100644
--- a/platform/haiku/os_haiku.cpp
+++ b/platform/haiku/os_haiku.cpp
@@ -137,9 +137,10 @@ void OS_Haiku::finalize() {
 	spatial_sound_2d_server->finish();
 	memdelete(spatial_sound_2d_server);
 
+	memdelete(sample_manager);
+	
 	audio_server->finish();
 	memdelete(audio_server);
-	memdelete(sample_manager);
 
 	visual_server->finish();
 	memdelete(visual_server);
diff --git a/platform/iphone/app_delegate.mm b/platform/iphone/app_delegate.mm
index 647bf1a2d1..e5bd7a96b4 100644
--- a/platform/iphone/app_delegate.mm
+++ b/platform/iphone/app_delegate.mm
@@ -56,6 +56,8 @@
 #import "Appirater.h"
 #endif
 
+Error _shell_open(String);
+
 Error _shell_open(String p_uri) {
 	NSString* url = [[NSString alloc] initWithUTF8String:p_uri.utf8().get_data()];
 
diff --git a/platform/iphone/game_center.mm b/platform/iphone/game_center.mm
index 79c056776d..4cb7a20a7f 100644
--- a/platform/iphone/game_center.mm
+++ b/platform/iphone/game_center.mm
@@ -30,8 +30,18 @@
 
 #include "game_center.h"
 
+#ifdef __IPHONE_9_0
+
+#import <GameKit/GameKit.h>
+extern "C" {
+
+#else
+
 extern "C" {
 #import <GameKit/GameKit.h>
+
+#endif
+
 #import "app_delegate.h"
 };
 
diff --git a/platform/iphone/gl_view.mm b/platform/iphone/gl_view.mm
index 279fbdafa8..f19e16f3f6 100755
--- a/platform/iphone/gl_view.mm
+++ b/platform/iphone/gl_view.mm
@@ -54,6 +54,14 @@ static bool video_playing = false;
 static float video_previous_volume = 0.0f;
 static CMTime video_current_time;
 
+void _show_keyboard(String);
+void _hide_keyboard();
+bool _play_video(String, float, String, String);
+bool _is_video_playing();
+void _focus_out_video();
+void _unpause_video();
+void _stop_video();
+
 void _show_keyboard(String p_existing) {
 	keyboard_text = p_existing;
 	printf("instance on show is %p\n", _instance);
@@ -618,7 +626,7 @@ static void clear_touches() {
 
 - (void)audioRouteChangeListenerCallback:(NSNotification*)notification
 {
-	printf("*********** route changed!%i\n");
+	printf("*********** route changed!\n");
 	NSDictionary *interuptionDict = notification.userInfo;
 
 	NSInteger routeChangeReason = [[interuptionDict valueForKey:AVAudioSessionRouteChangeReasonKey] integerValue];
diff --git a/platform/iphone/godot_iphone.cpp b/platform/iphone/godot_iphone.cpp
index b7b9b747b4..f0cb929429 100644
--- a/platform/iphone/godot_iphone.cpp
+++ b/platform/iphone/godot_iphone.cpp
@@ -40,6 +40,8 @@ int add_path(int p_argc, char** p_args);
 int add_cmdline(int p_argc, char** p_args);
 };
 
+int iphone_main(int, int, int, char**);
+
 int iphone_main(int width, int height, int argc, char** argv) {
 
 	int len = strlen(argv[0]);
diff --git a/platform/iphone/icloud.mm b/platform/iphone/icloud.mm
index 2dc2f7d9c1..518385992d 100644
--- a/platform/iphone/icloud.mm
+++ b/platform/iphone/icloud.mm
@@ -30,10 +30,16 @@
 
 #include "icloud.h"
 
+#ifndef __IPHONE_9_0
 extern "C" {
+#endif
+
 #import <Foundation/Foundation.h>
 #import "app_delegate.h"
+
+#ifndef __IPHONE_9_0
 };
+#endif
 
 ICloud* ICloud::instance = NULL;
 
diff --git a/platform/iphone/sem_iphone.cpp b/platform/iphone/sem_iphone.cpp
index 5afaa7b308..36baa40427 100644
--- a/platform/iphone/sem_iphone.cpp
+++ b/platform/iphone/sem_iphone.cpp
@@ -31,6 +31,11 @@
 #include <unistd.h>
 #include <fcntl.h>
 
+void cgsem_init(cgsem_t*);
+void cgsem_post(cgsem_t*);
+void cgsem_wait(cgsem_t*);
+void cgsem_destroy(cgsem_t*);
+
 void cgsem_init(cgsem_t *cgsem)
 {
 	int flags, fd, i;
diff --git a/platform/iphone/view_controller.mm b/platform/iphone/view_controller.mm
index 6a9c3ac9ec..f98fb7c355 100644
--- a/platform/iphone/view_controller.mm
+++ b/platform/iphone/view_controller.mm
@@ -32,6 +32,9 @@
 
 extern "C" {
 
+int add_path(int, char**);
+int add_cmdline(int, char**);
+
 int add_path(int p_argc, char** p_args) {
 
 	NSString* str = [[[NSBundle mainBundle] infoDictionary] objectForKey:@"godot_path"];
diff --git a/platform/javascript/export/export.cpp b/platform/javascript/export/export.cpp
index 9e2595f4a1..acbcbb4652 100644
--- a/platform/javascript/export/export.cpp
+++ b/platform/javascript/export/export.cpp
@@ -205,18 +205,24 @@ Error EditorExportPlatformJavaScript::export_project(const String& p_path, bool
 
 	EditorProgress ep("export","Exporting for javascript",104);
 
-	String template_path = EditorSettings::get_singleton()->get_settings_path()+"/templates/";
-
-	if (p_debug) {
-
-		src_template=custom_debug_package!=""?custom_debug_package:template_path+"javascript_debug.zip";
-	} else {
-
-		src_template=custom_release_package!=""?custom_release_package:template_path+"javascript_release.zip";
+	if (p_debug)
+		src_template=custom_debug_package;
+	else
+		src_template=custom_release_package;
 
+	if (src_template=="") {
+		String err;
+		if (p_debug) {
+			src_template=find_export_template("javascript_debug.zip", &err);
+		} else {
+			src_template=find_export_template("javascript_release.zip", &err);
+		}
+		if (src_template=="") {
+			EditorNode::add_io_error(err);
+			return ERR_FILE_NOT_FOUND;
+		}
 	}
 
-
 	FileAccess *src_f=NULL;
 	zlib_filefunc_def io = zipio_create_io_from_file(&src_f);
 
@@ -337,9 +343,8 @@ bool EditorExportPlatformJavaScript::can_export(String *r_error) const {
 
 	bool valid=true;
 	String err;
-	String exe_path = EditorSettings::get_singleton()->get_settings_path()+"/templates/";
 
-	if (!FileAccess::exists(exe_path+"javascript_debug.zip") || !FileAccess::exists(exe_path+"javascript_release.zip")) {
+	if (!exists_export_template("javascript_debug.zip") || !exists_export_template("javascript_release.zip")) {
 		valid=false;
 		err+="No export templates found.\nDownload and install export templates.\n";
 	}
diff --git a/platform/nacl/html/icon_128.png b/platform/nacl/html/icon_128.png
index 1793aa7e7a..653669c38d 100644
--- a/platform/nacl/html/icon_128.png
+++ b/platform/nacl/html/icon_128.png
diff --git a/platform/nacl/html/icon_16.png b/platform/nacl/html/icon_16.png
index 09de19e418..9f6678c289 100644
--- a/platform/nacl/html/icon_16.png
+++ b/platform/nacl/html/icon_16.png
diff --git a/platform/osx/detect.py b/platform/osx/detect.py
index 22cee0527e..f7cf5111f5 100644
--- a/platform/osx/detect.py
+++ b/platform/osx/detect.py
@@ -116,4 +116,4 @@ def configure(env):
 	env.Append( BUILDERS = { 'GLSL120GLES' : env.Builder(action = methods.build_gles2_headers, suffix = 'glsl.h',src_suffix = '.glsl') } )
 	#env.Append( BUILDERS = { 'HLSL9' : env.Builder(action = methods.build_hlsl_dx9_headers, suffix = 'hlsl.h',src_suffix = '.hlsl') } )
 
-
+	env["x86_opt_gcc"]=True
diff --git a/platform/osx/export/export.cpp b/platform/osx/export/export.cpp
index 79ee91bc61..0bece6ec76 100644
--- a/platform/osx/export/export.cpp
+++ b/platform/osx/export/export.cpp
@@ -251,15 +251,19 @@ Error EditorExportPlatformOSX::export_project(const String& p_path, bool p_debug
 
 	EditorProgress ep("export","Exporting for OSX",104);
 
-	String pkg_path = EditorSettings::get_singleton()->get_settings_path()+"/templates/osx.zip";
-
-	if (p_debug) {
-
-		src_pkg=custom_debug_package!=""?custom_debug_package:pkg_path;
-	} else {
-
-		src_pkg=custom_release_package!=""?custom_release_package:pkg_path;
 
+	if (p_debug)
+		src_pkg=custom_debug_package;
+	else
+		src_pkg=custom_release_package;
+
+	if (src_pkg=="") {
+		String err;
+		src_pkg=find_export_template("osx.zip", &err);
+		if (src_pkg=="") {
+			EditorNode::add_io_error(err);
+			return ERR_FILE_NOT_FOUND;
+		}
 	}
 
 
@@ -464,9 +468,8 @@ bool EditorExportPlatformOSX::can_export(String *r_error) const {
 
 	bool valid=true;
 	String err;
-	String exe_path = EditorSettings::get_singleton()->get_settings_path()+"/templates/";
 
-	if (!FileAccess::exists(exe_path+"osx.zip")) {
+	if (!exists_export_template("osx.zip")) {
 		valid=false;
 		err+="No export templates found.\nDownload and install export templates.\n";
 	}
diff --git a/platform/server/os_server.cpp b/platform/server/os_server.cpp
index 75e0878bac..8b831140d6 100644
--- a/platform/server/os_server.cpp
+++ b/platform/server/os_server.cpp
@@ -107,9 +107,10 @@ void OS_Server::finalize() {
 //		memdelete(debugger_connection_console);
 //}
 
+	memdelete(sample_manager);
+
 	audio_server->finish();
 	memdelete(audio_server);
-	memdelete(sample_manager);
 
 	visual_server->finish();
 	memdelete(visual_server);
diff --git a/platform/windows/SCsub b/platform/windows/SCsub
index 1ad32e7989..f98c1b01ff 100644
--- a/platform/windows/SCsub
+++ b/platform/windows/SCsub
@@ -11,9 +11,15 @@ common_win=[
 	"stream_peer_winsock.cpp",
 ]
 
+restarget="godot_res"+env["OBJSUFFIX"]
+
+obj = env.RES(restarget,'godot_res.rc')
+
+common_win.append(obj)
+
 env.Program('#bin/godot',['godot_win.cpp']+common_win,PROGSUFFIX=env["PROGSUFFIX"])
 
-# Microsoft Visual Studio Project Generation			
+# Microsoft Visual Studio Project Generation
 if (env['vsproj'])=="yes":
 	env.vs_srcs = env.vs_srcs + ["platform/windows/godot_win.cpp"]
 	for x in common_win:
diff --git a/platform/windows/detect.py b/platform/windows/detect.py
index f0d2a7cc40..3193a2acbb 100644
--- a/platform/windows/detect.py
+++ b/platform/windows/detect.py
@@ -88,7 +88,7 @@
 
 import os
 
-import sys	
+import sys
 
 
 def is_active():
@@ -170,16 +170,32 @@ def get_flags():
 	return [
 		('freetype','builtin'), #use builtin freetype
 		('openssl','builtin'), #use builtin openssl
-		('theora','no'),
 	]
 			
+def build_res_file( target, source, env ):
 
+	cmdbase = ""
+	if (env["bits"] == "32"):
+		cmdbase = env['mingw_prefix']
+	else:
+		cmdbase = env['mingw_prefix_64']
+	CPPPATH = env['CPPPATH']
+	cmdbase = cmdbase + 'windres --include-dir . '
+	import subprocess
+	for x in range(len(source)):
+		cmd = cmdbase + '-i ' + str(source[x]) + ' -o ' + str(target[x])
+		try:
+			out = subprocess.Popen(cmd,shell = True,stderr = subprocess.PIPE).communicate()
+			if len(out[1]):
+				return 1
+		except:
+			return 1
+	return 0
 
 def configure(env):
 
 	env.Append(CPPPATH=['#platform/windows'])
-
-
+	env['is_mingw']=False
 	if (os.name=="nt" and os.getenv("VSINSTALLDIR")!=None):
 		#build using visual studio
 		env['ENV']['TMP'] = os.environ['TMP']
@@ -203,14 +219,14 @@ def configure(env):
 			env.Append(LINKFLAGS=['/SUBSYSTEM:CONSOLE'])
 		elif (env["target"]=="debug_release"):
 
-			env.Append(CCFLAGS=['/Zi','/Od'])
+			env.Append(CCFLAGS=['/Z7','/Od'])
 			env.Append(LINKFLAGS=['/DEBUG'])
 			env.Append(LINKFLAGS=['/SUBSYSTEM:WINDOWS'])
 			env.Append(LINKFLAGS=['/ENTRY:mainCRTStartup'])
 
 		elif (env["target"]=="debug"):
 
-			env.Append(CCFLAGS=['/Zi','/DDEBUG_ENABLED','/DDEBUG_MEMORY_ENABLED','/DD3D_DEBUG_INFO','/Od'])
+			env.Append(CCFLAGS=['/Z7','/DDEBUG_ENABLED','/DDEBUG_MEMORY_ENABLED','/DD3D_DEBUG_INFO','/Od'])
 			env.Append(LINKFLAGS=['/SUBSYSTEM:CONSOLE'])
 			env.Append(LINKFLAGS=['/DEBUG'])
 
@@ -246,6 +262,7 @@ def configure(env):
 		env.Append(CCFLAGS=["/I"+DIRECTX_PATH+"/Include"])
 		env.Append(LIBPATH=[DIRECTX_PATH+"/Lib/x86"])
 		env['ENV'] = os.environ;
+		env["x86_opt_vc"]=True
 	else:
 
 		# Workaround for MinGW. See:
@@ -344,6 +361,7 @@ def configure(env):
 		env['AR'] = mingw_prefix+"ar"
 		env['RANLIB'] = mingw_prefix+"ranlib"
 		env['LD'] = mingw_prefix+"g++"
+		env["x86_opt_gcc"]=True
 
 		#env['CC'] = "winegcc"
 		#env['CXX'] = "wineg++"
@@ -354,7 +372,7 @@ def configure(env):
 		env.Append(LIBS=['mingw32','opengl32', 'dsound', 'ole32', 'd3d9','winmm','gdi32','iphlpapi','shlwapi','wsock32','kernel32'])
 
 		# if (env["bits"]=="32"):
-# #			env.Append(LIBS=['gcc_s'])
+			# env.Append(LIBS=['gcc_s'])
 			# #--with-arch=i686
 			# env.Append(CPPFLAGS=['-march=i686'])
 			# env.Append(LINKFLAGS=['-march=i686'])
@@ -366,6 +384,10 @@ def configure(env):
 		env.Append(CPPFLAGS=['-DMINGW_ENABLED'])
 		env.Append(LINKFLAGS=['-g'])
 
+		# resrc
+		env['is_mingw']=True
+		env.Append( BUILDERS = { 'RES' : env.Builder(action = build_res_file, suffix = '.o',src_suffix = '.rc') } )
+
 	import methods
 	env.Append( BUILDERS = { 'GLSL120' : env.Builder(action = methods.build_legacygl_headers, suffix = 'glsl.h',src_suffix = '.glsl') } )
 	env.Append( BUILDERS = { 'GLSL' : env.Builder(action = methods.build_glsl_headers, suffix = 'glsl.h',src_suffix = '.glsl') } )
@@ -373,4 +395,3 @@ def configure(env):
 	env.Append( BUILDERS = { 'GLSL120GLES' : env.Builder(action = methods.build_gles2_headers, suffix = 'glsl.h',src_suffix = '.glsl') } )
 
 	
-
diff --git a/platform/windows/export/export.cpp b/platform/windows/export/export.cpp
index 952f51fdd4..9cfd475091 100644
--- a/platform/windows/export/export.cpp
+++ b/platform/windows/export/export.cpp
@@ -1,6 +1,345 @@
+/*************************************************************************/
+/*  export.cpp                                                           */
+/*************************************************************************/
+/*                       This file is part of:                           */
+/*                           GODOT ENGINE                                */
+/*                    http://www.godotengine.org                         */
+/*************************************************************************/
+/* Copyright (c) 2007-2015 Juan Linietsky, Ariel Manzur.                 */
+/*                                                                       */
+/* Permission is hereby granted, free of charge, to any person obtaining */
+/* a copy of this software and associated documentation files (the       */
+/* "Software"), to deal in the Software without restriction, including   */
+/* without limitation the rights to use, copy, modify, merge, publish,   */
+/* distribute, sublicense, and/or sell copies of the Software, and to    */
+/* permit persons to whom the Software is furnished to do so, subject to */
+/* the following conditions:                                             */
+/*                                                                       */
+/* The above copyright notice and this permission notice shall be        */
+/* included in all copies or substantial portions of the Software.       */
+/*                                                                       */
+/* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,       */
+/* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF    */
+/* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.*/
+/* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY  */
+/* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,  */
+/* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE     */
+/* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.                */
+/*************************************************************************/
+
 #include "export.h"
 #include "platform/windows/logo.h"
-#include "tools/editor/editor_import_export.h"
+#include "os/os.h"
+#include "globals.h"
+#include "tools/editor/editor_node.h"
+#include "tools/pe_bliss/pe_bliss_godot.h"
+
+/**
+	@author Masoud BaniHashemian <masoudbh3@gmail.com>
+*/
+
+
+void EditorExportPlatformWindows::store_16(DVector<uint8_t>& vector, uint16_t value) {
+	const uint8_t* bytes = reinterpret_cast<const uint8_t*>(&value);
+	int size = vector.size();
+	vector.resize( size + 2 );
+	DVector<uint8_t>::Write w = vector.write();
+	w[size]=bytes[0];
+	w[size+1]=bytes[1];
+}
+void EditorExportPlatformWindows::store_32(DVector<uint8_t>& vector, uint32_t value) {
+	const uint8_t* bytes = reinterpret_cast<const uint8_t*>(&value);
+	int size = vector.size();
+	vector.resize( size + 4 );
+	DVector<uint8_t>::Write w = vector.write();
+	w[size]=bytes[0];
+	w[size+1]=bytes[1];
+	w[size+2]=bytes[2];
+	w[size+3]=bytes[3];
+}
+
+bool EditorExportPlatformWindows::_set(const StringName& p_name, const Variant& p_value) {
+
+	String n = p_name;
+
+	if (n=="icon/icon_ico") {
+
+		icon_ico=p_value;
+	} else if (n=="icon/icon_png") {
+
+		icon_png=p_value;
+	} else if (n=="icon/icon_png16x16") {
+
+		icon16=p_value;
+	} else if (n=="icon/icon_png32x32") {
+
+		icon32=p_value;
+	} else if (n=="icon/icon_png48x48") {
+
+		icon48=p_value;
+	} else if (n=="icon/icon_png64x64") {
+
+		icon64=p_value;
+	} else if (n=="icon/icon_png128x128") {
+
+		icon128=p_value;
+	} else if (n=="icon/icon_png256x256") {
+
+		icon256=p_value;
+	} else if (n=="version_info/version_major") {
+
+		version_major=p_value;
+	} else if (n=="version_info/version_minor") {
+
+		version_minor=p_value;
+	} else if (n=="version_info/version_text") {
+
+		version_text=p_value;
+	} else if (n=="version_info/company_name") {
+
+		company_name=p_value;
+	} else if (n=="version_info/file_description") {
+
+		file_description=p_value;
+	} else if (n=="version_info/product_name") {
+
+		product_name=p_value;
+	} else if (n=="version_info/legal_copyright") {
+
+		legal_copyright=p_value;
+	} else if (n=="version_info/add_godot_version") {
+
+		set_godot_version=p_value;
+	} else
+		return false;
+
+	return true;
+
+}
+
+bool EditorExportPlatformWindows::_get(const StringName& p_name,Variant &r_ret) const {
+
+	String n = p_name;
+
+	if (n=="icon/icon_ico") {
+
+		r_ret=icon_ico;
+	} else if (n=="icon/icon_png") {
+
+		r_ret=icon_png;
+	} else if (n=="icon/icon_png16x16") {
+
+		r_ret=icon16;
+	} else if (n=="icon/icon_png32x32") {
+
+		r_ret=icon32;
+	} else if (n=="icon/icon_png48x48") {
+
+		r_ret=icon48;
+	} else if (n=="icon/icon_png64x64") {
+
+		r_ret=icon64;
+	} else if (n=="icon/icon_png128x128") {
+
+		r_ret=icon128;
+	} else if (n=="icon/icon_png256x256") {
+
+		r_ret=icon256;
+	} else if (n=="version_info/version_major") {
+
+		r_ret=version_major;
+	} else if (n=="version_info/version_minor") {
+
+		r_ret=version_minor;
+	} else if (n=="version_info/version_text") {
+
+		r_ret=version_text;
+	} else if (n=="version_info/company_name") {
+
+		r_ret=company_name;
+	} else if (n=="version_info/file_description") {
+
+		r_ret=file_description;
+	} else if (n=="version_info/product_name") {
+
+		r_ret=product_name;
+	} else if (n=="version_info/legal_copyright") {
+
+		r_ret=legal_copyright;
+	} else if (n=="version_info/add_godot_version") {
+
+		r_ret=set_godot_version;
+	} else
+		return false;
+
+	return true;
+
+}
+
+void EditorExportPlatformWindows::_get_property_list( List<PropertyInfo> *p_list) const {
+
+	p_list->push_back( PropertyInfo( Variant::STRING, "icon/icon_ico",PROPERTY_HINT_FILE,"ico") );
+	p_list->push_back( PropertyInfo( Variant::STRING, "icon/icon_png",PROPERTY_HINT_FILE,"png") );
+	p_list->push_back( PropertyInfo( Variant::BOOL, "icon/icon_png16x16") );
+	p_list->push_back( PropertyInfo( Variant::BOOL, "icon/icon_png32x32") );
+	p_list->push_back( PropertyInfo( Variant::BOOL, "icon/icon_png48x48") );
+	p_list->push_back( PropertyInfo( Variant::BOOL, "icon/icon_png64x64") );
+	p_list->push_back( PropertyInfo( Variant::BOOL, "icon/icon_png128x128") );
+	p_list->push_back( PropertyInfo( Variant::BOOL, "icon/icon_png256x256") );
+	p_list->push_back( PropertyInfo( Variant::INT, "version_info/version_major", PROPERTY_HINT_RANGE,"0,65535,1"));
+	p_list->push_back( PropertyInfo( Variant::INT, "version_info/version_minor", PROPERTY_HINT_RANGE,"0,65535,0"));
+	p_list->push_back( PropertyInfo( Variant::STRING, "version_info/version_text") );
+	p_list->push_back( PropertyInfo( Variant::STRING, "version_info/company_name") );
+	p_list->push_back( PropertyInfo( Variant::STRING, "version_info/file_description") );
+	p_list->push_back( PropertyInfo( Variant::STRING, "version_info/product_name") );
+	p_list->push_back( PropertyInfo( Variant::STRING, "version_info/legal_copyright") );
+	p_list->push_back( PropertyInfo( Variant::BOOL, "version_info/add_godot_version") );
+	
+}
+
+Error EditorExportPlatformWindows::export_project(const String& p_path, bool p_debug, int p_flags) {
+
+	Error err = EditorExportPlatformPC::export_project(p_path, p_debug, p_flags);
+	if(err != OK)
+	{
+		return err;
+	}
+	EditorProgress ep("editexe","Edit EXE File",102);
+	ep.step("Create ico file..",0);
+	
+		DVector<uint8_t> icon_content;
+		if (this->icon_ico!="" && this->icon_ico.ends_with(".ico")) {
+			FileAccess *f = FileAccess::open(this->icon_ico,FileAccess::READ);
+			if (f) {
+				icon_content.resize(f->get_len());
+				DVector<uint8_t>::Write write = icon_content.write();
+				f->get_buffer(write.ptr(),icon_content.size());
+				f->close();
+				memdelete(f);
+			}
+		} else if (this->icon_png!="" && this->icon_png.ends_with(".png") && (icon16 || icon32 || icon48 || icon64 || icon128 || icon256)) {
+			#ifdef PNG_ENABLED
+			Vector<Image> pngs;
+			Image png;
+			Error err_png = png.load(this->icon_png);
+			if (err_png==OK && !png.empty()) {
+				if(icon256) {
+					Image icon_256(png);
+					if(!(png.get_height()==256 && png.get_width()==256)) icon_256.resize(256,256);
+					pngs.push_back(icon_256);
+				}
+				if(icon128) {
+					Image icon_128(png);
+					if(!(png.get_height()==128 && png.get_width()==128)) icon_128.resize(128,128);
+					pngs.push_back(icon_128);
+				}
+				if(icon64) {
+					Image icon_64(png);
+					if(!(png.get_height()==64 && png.get_width()==64)) icon_64.resize(64,64);
+					pngs.push_back(icon_64);
+				}
+				if(icon48) {
+					Image icon_48(png);
+					if(!(png.get_height()==48 && png.get_width()==48)) icon_48.resize(48,48);
+					pngs.push_back(icon_48);
+				}
+				if(icon32) {
+					Image icon_32(png);
+					if(!(png.get_height()==32 && png.get_width()==32)) icon_32.resize(32,32);
+					pngs.push_back(icon_32);
+				}
+				if(icon16) {
+					Image icon_16(png);
+					if(!(png.get_height()==16 && png.get_width()==16)) icon_16.resize(16,16);
+					pngs.push_back(icon_16);
+				}
+				// create icon according to https://www.daubnet.com/en/file-format-ico
+				store_16(icon_content,0); //Reserved
+				store_16(icon_content,1); //Type
+				store_16(icon_content,pngs.size()); //Count
+				int offset = 6+pngs.size()*16;
+				//List of bitmaps 
+				for(int i=0;i<pngs.size();i++) {
+					int w = pngs[i].get_width();
+					int h = pngs[i].get_height();
+					icon_content.push_back(w<256?w:0); //width
+					icon_content.push_back(h<256?h:0); //height
+					icon_content.push_back(0); //ColorCount = 0
+					icon_content.push_back(0); //Reserved
+					store_16(icon_content,1); //Planes
+					store_16(icon_content,32); //BitCount (bit per pixel)
+					int size = 40 + (w * h * 4) + (w * h / 8);
+					store_32(icon_content,size); //Size of (InfoHeader + ANDbitmap + XORbitmap) 
+					store_32(icon_content,offset); //FileOffset
+					offset += size;
+				}
+				//Write bmp files.
+				for(int i=0;i<pngs.size();i++) {
+					int w = pngs[i].get_width();
+					int h = pngs[i].get_height();
+					store_32(icon_content,40); //Size of InfoHeader structure = 40
+					store_32(icon_content,w); //Width
+					store_32(icon_content,h*2); //Height
+					store_16(icon_content,1); //Planes
+					store_16(icon_content,32); //BitCount
+					store_32(icon_content,0); //Compression
+					store_32(icon_content,w*h*4); //ImageSize = Size of Image in Bytes
+					store_32(icon_content,0); //unused = 0 
+					store_32(icon_content,0); //unused = 0 
+					store_32(icon_content,0); //unused = 0 
+					store_32(icon_content,0); //unused = 0 
+					//XORBitmap
+					for(int y=h-1;y>=0;y--) {
+						for(int x=0;x<w;x++) {
+							store_32(icon_content,pngs[i].get_pixel(x,y).to_32());
+						}
+					}
+					//ANDBitmap
+					for(int m=0;m<(w * h / 8);m+=4) store_32(icon_content,0x00000000); // Add empty ANDBitmap , TODO create full ANDBitmap Structure if need.
+				}
+			}
+			#endif
+		}
+	
+	ep.step("Add rsrc..",50);
+	
+		String basename = Globals::get_singleton()->get("application/name");
+		product_name=product_name.replace("$genname",basename);
+		String godot_version;
+		if(set_godot_version) godot_version = String( VERSION_MKSTRING );
+		String ret = pe_bliss_add_resrc(p_path.utf8(), version_major, version_minor,
+																						company_name, file_description, legal_copyright, version_text,
+																						product_name, godot_version, icon_content);
+		if (ret.empty()) {
+			return OK;
+		} else {
+			EditorNode::add_io_error(ret);
+			return ERR_FILE_CANT_WRITE;
+		}
+}
+
+EditorExportPlatformWindows::EditorExportPlatformWindows() {
+
+	icon16=true;
+	icon32=true;
+	icon48=true;
+	icon64=true;
+	icon128=true;
+	icon256=true;
+	product_name="$genname";
+	company_name="Godot Engine";
+	file_description="Created With Godot Engine";
+	version_text="1.0";
+	OS::Date date = OS::get_singleton()->get_date();
+	legal_copyright="Copyright (c) 2007-";
+	legal_copyright+=String::num(date.year);
+	legal_copyright+=" Juan Linietsky, Ariel Manzur";
+	version_major=1;
+	version_minor=0;
+	set_godot_version=true;
+}
+
+
 
 void register_windows_exporter() {
 
@@ -9,7 +348,7 @@ void register_windows_exporter() {
 	logo->create_from_image(img);
 
 	{
-		Ref<EditorExportPlatformPC> exporter = Ref<EditorExportPlatformPC>( memnew(EditorExportPlatformPC) );
+		Ref<EditorExportPlatformWindows> exporter = Ref<EditorExportPlatformWindows>( memnew(EditorExportPlatformWindows) );
 		exporter->set_binary_extension("exe");
 		exporter->set_release_binary32("windows_32_release.exe");
 		exporter->set_debug_binary32("windows_32_debug.exe");
diff --git a/platform/windows/export/export.h b/platform/windows/export/export.h
index de3dc3fa50..2424efc861 100644
--- a/platform/windows/export/export.h
+++ b/platform/windows/export/export.h
@@ -1,3 +1,37 @@
+#include "tools/editor/editor_import_export.h"
+
+class EditorExportPlatformWindows : public EditorExportPlatformPC {
+	OBJ_TYPE( EditorExportPlatformWindows,EditorExportPlatformPC );
+	
+private:
+	String icon_ico;
+	String icon_png;
+	bool icon16;
+	bool icon32;
+	bool icon48;
+	bool icon64;
+	bool icon128;
+	bool icon256;
+	String company_name;
+	String file_description;
+	String product_name;
+	String legal_copyright;
+	String version_text;
+	int version_major;
+	int version_minor;
+	bool set_godot_version;
+	void store_16(DVector<uint8_t>& vector, uint16_t value); ///< store 16 bits uint 
+	void store_32(DVector<uint8_t>& vector, uint32_t value); ///< store 32 bits uint 
+	
+protected:
+	bool _set(const StringName& p_name, const Variant& p_value);
+	bool _get(const StringName& p_name,Variant &r_ret) const;
+	void _get_property_list( List<PropertyInfo> *p_list) const;
+	
+public:
+	Error export_project(const String& p_path, bool p_debug,int p_flags=0);
+	EditorExportPlatformWindows();
+};
 
 void register_windows_exporter();
 
diff --git a/platform/windows/godot.ico b/platform/windows/godot.ico
new file mode 100644
index 0000000000..e57ce36529
--- /dev/null
+++ b/platform/windows/godot.ico
diff --git a/platform/windows/godot_res.rc b/platform/windows/godot_res.rc
new file mode 100644
index 0000000000..f77182f909
--- /dev/null
+++ b/platform/windows/godot_res.rc
@@ -0,0 +1,33 @@
+#include "core/version.h"
+#ifndef _STR
+#define _STR(m_x) #m_x
+#define _MKSTR(m_x) _STR(m_x)
+#endif
+
+GODOT_ICON ICON platform/windows/godot.ico
+
+1 VERSIONINFO
+FILEVERSION    	VERSION_MAJOR,VERSION_MINOR,0
+PRODUCTVERSION 	VERSION_MAJOR,VERSION_MINOR,0
+FILEOS         	4
+FILETYPE       	1
+BEGIN
+    BLOCK "StringFileInfo"
+    BEGIN
+        BLOCK "040904b0"
+        BEGIN
+            VALUE "CompanyName",            "Godot Engine"
+            VALUE "FileDescription",        _MKSTR(VERSION_NAME) " Editor (" _MKSTR(VERSION_STATUS) ")"
+            VALUE "FileVersion",            _MKSTR(VERSION_MAJOR) "." _MKSTR(VERSION_MINOR) "."_MKSTR(VERSION_REVISION)
+            VALUE "ProductName",            _MKSTR(VERSION_NAME)
+            VALUE "Licence",                "MIT"
+            VALUE "LegalCopyright",         "Copyright (c) 2007-" _MKSTR(VERSION_YEAR) " Juan Linietsky, Ariel Manzur"
+            VALUE "Info",                   "http://www.godotengine.org"
+            VALUE "ProductVersion",         _MKSTR(VERSION_MAJOR) "." _MKSTR(VERSION_MINOR) "."_MKSTR(VERSION_REVISION)
+        END
+    END
+    BLOCK "VarFileInfo"
+    BEGIN
+        VALUE "Translation", 0x409, 1200
+    END
+END
+\ No newline at end of file
diff --git a/platform/windows/os_windows.cpp b/platform/windows/os_windows.cpp
index 438a5a6903..1fb8e6dbd0 100644
--- a/platform/windows/os_windows.cpp
+++ b/platform/windows/os_windows.cpp
@@ -1350,7 +1350,9 @@ void OS_Windows::finalize() {
 		memdelete(main_loop);
 
 	main_loop=NULL;
-	
+
+	memdelete(input);
+
 	visual_server->finish();
 	memdelete(visual_server);
 #ifdef OPENGL_ENABLED
@@ -1373,11 +1375,10 @@ void OS_Windows::finalize() {
 //		memdelete(debugger_connection_console);
 //}
 
-	audio_server->finish();
-	memdelete(audio_server);
 	memdelete(sample_manager);
 
-	memdelete(input);
+	audio_server->finish();
+	memdelete(audio_server);
 
 	physics_server->finish();
 	memdelete(physics_server);
@@ -1764,73 +1765,96 @@ bool OS_Windows::is_window_maximized() const{
 }
 
 
-void OS_Windows::print_error(const char* p_function,const char* p_file,int p_line,const char *p_code,const char*p_rationale,ErrorType p_type) {
-
-	HANDLE hCon=GetStdHandle(STD_OUTPUT_HANDLE);
-	if (!hCon || hCon==INVALID_HANDLE_VALUE) {
-		if (p_rationale && p_rationale[0]) {
+void OS_Windows::print_error(const char* p_function, const char* p_file, int p_line, const char* p_code, const char* p_rationale, ErrorType p_type) {
 
-			print("\E[1;31;40mERROR: %s: \E[1;37;40m%s\n",p_function,p_rationale);
-			print("\E[0;31;40m   At: %s:%i.\E[0;0;37m\n",p_file,p_line);
+	HANDLE hCon = GetStdHandle(STD_OUTPUT_HANDLE);
+	if (!hCon || hCon == INVALID_HANDLE_VALUE) {
 
-		} else {
-			print("\E[1;31;40mERROR: %s: \E[1;37;40m%s\n",p_function,p_code);
-			print("\E[0;31;40m   At: %s:%i.\E[0;0;37m\n",p_file,p_line);
+		const char* err_details;
+		if (p_rationale && p_rationale[0])
+			err_details = p_rationale;
+		else
+			err_details = p_code;
 
+		switch(p_type) {
+			case ERR_ERROR:
+				print("ERROR: %s: %s\n", p_function, err_details);
+				print("   At: %s:%i\n", p_file, p_line);
+				break;
+			case ERR_WARNING:
+				print("WARNING: %s: %s\n", p_function, err_details);
+				print("     At: %s:%i\n", p_file, p_line);
+				break;
+			case ERR_SCRIPT:
+				print("SCRIPT ERROR: %s: %s\n", p_function, err_details);
+				print("          At: %s:%i\n", p_file, p_line);
+				break;
 		}
+
 	} else {
 
 		CONSOLE_SCREEN_BUFFER_INFO sbi; //original
-		GetConsoleScreenBufferInfo(hCon,&sbi);
-
-		SetConsoleTextAttribute(hCon,sbi.wAttributes);
+		GetConsoleScreenBufferInfo(hCon, &sbi);
 
+		WORD current_fg = sbi.wAttributes & (FOREGROUND_RED | FOREGROUND_GREEN | FOREGROUND_BLUE | FOREGROUND_INTENSITY);
+		WORD current_bg = sbi.wAttributes & (BACKGROUND_RED | BACKGROUND_GREEN | BACKGROUND_BLUE | BACKGROUND_INTENSITY);
 
-
-		uint32_t basecol=0;
+		uint32_t basecol = 0;
 		switch(p_type) {
 			case ERR_ERROR: basecol = FOREGROUND_RED; break;
-			case ERR_WARNING: basecol = FOREGROUND_RED|FOREGROUND_GREEN; break;
-			case ERR_SCRIPT: basecol = FOREGROUND_GREEN; break;
+			case ERR_WARNING: basecol = FOREGROUND_RED | FOREGROUND_GREEN; break;
+			case ERR_SCRIPT: basecol = FOREGROUND_RED | FOREGROUND_BLUE; break;
 		}
 
-		if (p_rationale && p_rationale[0]) {
-
-			SetConsoleTextAttribute(hCon,basecol|FOREGROUND_INTENSITY);
+		basecol |= current_bg;
 
+		if (p_rationale && p_rationale[0]) {
 
+			SetConsoleTextAttribute(hCon, basecol | FOREGROUND_INTENSITY);
 			switch(p_type) {
 				case ERR_ERROR: print("ERROR: "); break;
 				case ERR_WARNING: print("WARNING: "); break;
 				case ERR_SCRIPT: print("SCRIPT ERROR: "); break;
 			}
 
-			SetConsoleTextAttribute(hCon,FOREGROUND_RED|FOREGROUND_BLUE|FOREGROUND_GREEN|FOREGROUND_INTENSITY);
-			print(" %s\n",p_rationale);
-			SetConsoleTextAttribute(hCon,basecol);
-			print("At: ");
-			SetConsoleTextAttribute(hCon,FOREGROUND_RED|FOREGROUND_BLUE|FOREGROUND_GREEN);
-			print(" %s:%i\n",p_file,p_line);
+			SetConsoleTextAttribute(hCon, current_fg | current_bg | FOREGROUND_INTENSITY);
+			print("%s\n", p_rationale);
+
+			SetConsoleTextAttribute(hCon, basecol);
+			switch (p_type) {
+				case ERR_ERROR: print("   At: "); break;
+				case ERR_WARNING: print("     At: "); break;
+				case ERR_SCRIPT: print("          At: "); break;
+			}
 
+			SetConsoleTextAttribute(hCon, current_fg | current_bg);
+			print("%s:%i\n", p_file, p_line);
 
 		} else {
-			SetConsoleTextAttribute(hCon,basecol|FOREGROUND_INTENSITY);
+
+			SetConsoleTextAttribute(hCon, basecol | FOREGROUND_INTENSITY);
 			switch(p_type) {
-				case ERR_ERROR: print("ERROR: %s: ",p_function); break;
-				case ERR_WARNING: print("WARNING: %s: ",p_function); break;
-				case ERR_SCRIPT: print("SCRIPT ERROR: %s: ",p_function); break;
+				case ERR_ERROR: print("ERROR: %s: ", p_function); break;
+				case ERR_WARNING: print("WARNING: %s: ", p_function); break;
+				case ERR_SCRIPT: print("SCRIPT ERROR: %s: ", p_function); break;
 			}
-			SetConsoleTextAttribute(hCon,FOREGROUND_RED|FOREGROUND_BLUE|FOREGROUND_GREEN|FOREGROUND_INTENSITY);
-			print(" %s\n",p_code);
-			SetConsoleTextAttribute(hCon,basecol);
-			print("At: ");
-			SetConsoleTextAttribute(hCon,FOREGROUND_RED|FOREGROUND_BLUE|FOREGROUND_GREEN);
-			print(" %s:%i\n",p_file,p_line);
+
+			SetConsoleTextAttribute(hCon, current_fg | current_bg | FOREGROUND_INTENSITY);
+			print("%s\n", p_code);
+
+			SetConsoleTextAttribute(hCon, basecol);
+			switch (p_type) {
+				case ERR_ERROR: print("   At: "); break;
+				case ERR_WARNING: print("     At: "); break;
+				case ERR_SCRIPT: print("          At: "); break;
+			}
+
+			SetConsoleTextAttribute(hCon, current_fg | current_bg);
+			print("%s:%i\n", p_file, p_line);
 		}
 
-		SetConsoleTextAttribute(hCon,sbi.wAttributes);
+		SetConsoleTextAttribute(hCon, sbi.wAttributes);
 	}
-
 }
 
 
diff --git a/platform/windows/packet_peer_udp_winsock.cpp b/platform/windows/packet_peer_udp_winsock.cpp
index aff92b8fc8..0ca2d358af 100644
--- a/platform/windows/packet_peer_udp_winsock.cpp
+++ b/platform/windows/packet_peer_udp_winsock.cpp
@@ -121,7 +121,7 @@ Error PacketPeerUDPWinsock::_poll(bool p_wait) {
 	struct sockaddr_in from = {0};
 	int len = sizeof(struct sockaddr_in);
 	int ret;
-	while ( (ret = recvfrom(sockfd, (char*)recv_buffer, MIN(sizeof(recv_buffer),rb.data_left()-12), 0, (struct sockaddr*)&from, &len)) > 0) {
+	while ( (ret = recvfrom(sockfd, (char*)recv_buffer, MIN((int)sizeof(recv_buffer),MAX(rb.space_left()-12, 0)), 0, (struct sockaddr*)&from, &len)) > 0) {
 		rb.write((uint8_t*)&from.sin_addr, 4);
 		uint32_t port = ntohs(from.sin_port);
 		rb.write((uint8_t*)&port, 4);
@@ -132,8 +132,25 @@ Error PacketPeerUDPWinsock::_poll(bool p_wait) {
 		++queue_count;
 	};
 
+	if (ret == SOCKET_ERROR){
+		int error = WSAGetLastError();
+
+		if (error == WSAEWOULDBLOCK){
+			// Expected when doing non-blocking sockets, retry later.
+		}
+		else if (error == WSAECONNRESET){
+			// If the remote target does not accept messages, this error may occur, but is harmless.
+			// Once the remote target gets available, this message will disappear for new messages.
+		}
+		else
+		{
+			close();
+			return FAILED;
+		}
+	}
+
 
-	if (ret == 0 || (ret == SOCKET_ERROR && WSAGetLastError() != WSAEWOULDBLOCK) ) {
+	if (ret == 0) {
 		close();
 		return FAILED;
 	};
diff --git a/platform/winrt/os_winrt.cpp b/platform/winrt/os_winrt.cpp
index 3e06d9d59b..24be2f47e7 100644
--- a/platform/winrt/os_winrt.cpp
+++ b/platform/winrt/os_winrt.cpp
@@ -326,10 +326,11 @@ void OSWinrt::finalize() {
 	//if (debugger_connection_console) {
 //		memdelete(debugger_connection_console);
 //}
+	
+	memdelete(sample_manager);
 
 	audio_server->finish();
 	memdelete(audio_server);
-	memdelete(sample_manager);
 
 	memdelete(input);
 
@@ -422,17 +423,27 @@ void OSWinrt::get_fullscreen_mode_list(List<VideoMode> *p_list,int p_screen) con
 	
 }
 
-void OSWinrt::print_error(const char* p_function,const char* p_file,int p_line,const char *p_code,const char*p_rationale,ErrorType p_type) {
-
-	if (p_rationale && p_rationale[0]) {
-
-		print("\E[1;31;40mERROR: %s: \E[1;37;40m%s\n",p_function,p_rationale);
-		print("\E[0;31;40m   At: %s:%i.\E[0;0;37m\n",p_file,p_line);
+void OSWinrt::print_error(const char* p_function, const char* p_file, int p_line, const char* p_code, const char* p_rationale, ErrorType p_type) {
 
-	} else {
-		print("\E[1;31;40mERROR: %s: \E[1;37;40m%s\n",p_function,p_code);
-		print("\E[0;31;40m   At: %s:%i.\E[0;0;37m\n",p_file,p_line);
+	const char* err_details;
+	if (p_rationale && p_rationale[0])
+		err_details = p_rationale;
+	else
+		err_details = p_code;
 
+	switch(p_type) {
+		case ERR_ERROR:
+			print("ERROR: %s: %s\n", p_function, err_details);
+			print("   At: %s:%i\n", p_file, p_line);
+			break;
+		case ERR_WARNING:
+			print("WARNING: %s: %s\n", p_function, err_details);
+			print("     At: %s:%i\n", p_file, p_line);
+			break;
+		case ERR_SCRIPT:
+			print("SCRIPT ERROR: %s: %s\n", p_function, err_details);
+			print("          At: %s:%i\n", p_file, p_line);
+			break;
 	}
 }
 
diff --git a/platform/x11/detect.py b/platform/x11/detect.py
index 9a52a7c92b..349e58cd7a 100644
--- a/platform/x11/detect.py
+++ b/platform/x11/detect.py
@@ -1,6 +1,7 @@
 
 import os
 import sys	
+import platform
 
 
 def is_active():
@@ -118,6 +119,8 @@ def configure(env):
 	elif (env["target"]=="release_debug"):
 
 		env.Append(CCFLAGS=['-O2','-ffast-math','-DDEBUG_ENABLED'])
+		if (env["debug_release"]=="yes"):
+			env.Append(CCFLAGS=['-g2'])
 
 	elif (env["target"]=="debug"):
 
@@ -145,7 +148,9 @@ def configure(env):
 
 	
 	env.Append(CPPFLAGS=['-DOPENGL_ENABLED','-DGLEW_ENABLED'])
-	env.Append(CPPFLAGS=["-DALSA_ENABLED"])
+	if platform.platform() == 'Linux':
+		env.Append(CPPFLAGS=["-DALSA_ENABLED"])
+		env.Append(LIBS=['asound'])
 
 	if (env["pulseaudio"]=="yes"):
 		if not os.system("pkg-config --exists libpulse-simple"):
@@ -156,7 +161,7 @@ def configure(env):
 			print("PulseAudio development libraries not found, disabling driver")
 
 	env.Append(CPPFLAGS=['-DX11_ENABLED','-DUNIX_ENABLED','-DGLES2_ENABLED','-DGLES_OVER_GL'])
-	env.Append(LIBS=['GL', 'GLU', 'pthread','asound','z']) #TODO detect linux/BSD!
+	env.Append(LIBS=['GL', 'GLU', 'pthread', 'z'])
 	#env.Append(CPPFLAGS=['-DMPC_FIXED_POINT'])
 
 #host compiler is default..
@@ -180,3 +185,5 @@ def configure(env):
 		env.Append(CPPFLAGS=['-DNEW_WM_API'])
 		env.ParseConfig('pkg-config xinerama --cflags --libs')
 
+	env["x86_opt_gcc"]=True
+
diff --git a/platform/x11/os_x11.cpp b/platform/x11/os_x11.cpp
index 34bd1184a1..4f1b475d06 100644
--- a/platform/x11/os_x11.cpp
+++ b/platform/x11/os_x11.cpp
@@ -32,9 +32,10 @@
 #include "key_mapping_x11.h"
 #include <stdio.h>
 #include <stdlib.h>
+#include <string.h>
 #include "print_string.h"
 #include "servers/physics/physics_server_sw.h"
-
+#include "errno.h"
 
 #include "X11/Xutil.h"
 
@@ -452,9 +453,10 @@ void OS_X11::finalize() {
 //		memdelete(debugger_connection_console);
 //}
 
+	memdelete(sample_manager);
+
 	audio_server->finish();
 	memdelete(audio_server);
-	memdelete(sample_manager);
 
 	visual_server->finish();
 	memdelete(visual_server);
@@ -1671,7 +1673,7 @@ void OS_X11::close_joystick(int p_id) {
 };
 
 void OS_X11::probe_joystick(int p_id) {
-	#ifndef __FreeBSD__
+	#if !defined(__FreeBSD__) && !defined(__OpenBSD__)
 
 	if (p_id == -1) {
 
@@ -1726,7 +1728,7 @@ void OS_X11::move_window_to_foreground() {
 }
 
 void OS_X11::process_joysticks() {
-	#ifndef __FreeBSD__
+	#if !defined(__FreeBSD__) && !defined(__OpenBSD__)
 	int bytes;
 	js_event events[32];
 	InputEvent ievent;
diff --git a/platform/x11/platform_config.h b/platform/x11/platform_config.h
index c01d0aa380..1556b56058 100644
--- a/platform/x11/platform_config.h
+++ b/platform/x11/platform_config.h
@@ -29,7 +29,7 @@
 #ifdef __linux__
 #include <alloca.h>
 #endif
-#ifdef __FreeBSD__
+#if defined(__FreeBSD__) || defined(__OpenBSD__)
 #include <stdlib.h>
 #endif
 
diff --git a/scene/2d/SCsub b/scene/2d/SCsub
index 055d2f2474..bbe59b3054 100644
--- a/scene/2d/SCsub
+++ b/scene/2d/SCsub
@@ -3,5 +3,3 @@ Import('env')
 env.add_source_files(env.scene_sources,"*.cpp")
 
 Export('env')
-
-
diff --git a/scene/2d/animated_sprite.cpp b/scene/2d/animated_sprite.cpp
index 342b86b4c1..458246671c 100644
--- a/scene/2d/animated_sprite.cpp
+++ b/scene/2d/animated_sprite.cpp
@@ -165,7 +165,8 @@ void AnimatedSprite::_notification(int p_what) {
 			if (vflip)
 				dst_rect.size.y=-dst_rect.size.y;
 
-			texture->draw_rect(ci,dst_rect,false,modulate);
+			//texture->draw_rect(ci,dst_rect,false,modulate);
+			texture->draw_rect_region(ci,dst_rect,Rect2(Vector2(),texture->get_size()),modulate);
 //			VisualServer::get_singleton()->canvas_item_add_texture_rect_region(ci,dst_rect,texture->get_rid(),src_rect,modulate);
 
 		} break;
diff --git a/scene/2d/camera_2d.cpp b/scene/2d/camera_2d.cpp
index 49683da226..52ae5d2954 100644
--- a/scene/2d/camera_2d.cpp
+++ b/scene/2d/camera_2d.cpp
@@ -57,7 +57,9 @@ void Camera2D::_update_scroll() {
 void Camera2D::set_zoom(const Vector2 &p_zoom) {
 
 	zoom = p_zoom;
+	Point2 old_smoothed_camera_pos = smoothed_camera_pos;
 	_update_scroll();
+	smoothed_camera_pos = old_smoothed_camera_pos;
 };
 
 Vector2 Camera2D::get_zoom() const {
diff --git a/scene/2d/canvas_item.cpp b/scene/2d/canvas_item.cpp
index 357aaa225b..abd532c156 100644
--- a/scene/2d/canvas_item.cpp
+++ b/scene/2d/canvas_item.cpp
@@ -309,6 +309,15 @@ void CanvasItem::hide() {
 	_change_notify("visibility/visible");
 }
 
+void CanvasItem::set_hidden(bool p_hidden) {
+	
+	if (hidden == p_hidden) {
+		return;
+	}
+	
+	_set_visible_(!p_hidden);
+}
+
 
 Variant CanvasItem::edit_get_state() const {
 
@@ -1043,6 +1052,7 @@ void CanvasItem::_bind_methods() {
 	ObjectTypeDB::bind_method(_MD("is_hidden"),&CanvasItem::is_hidden);
 	ObjectTypeDB::bind_method(_MD("show"),&CanvasItem::show);
 	ObjectTypeDB::bind_method(_MD("hide"),&CanvasItem::hide);
+	ObjectTypeDB::bind_method(_MD("set_hidden","hidden"),&CanvasItem::set_hidden);
 
 	ObjectTypeDB::bind_method(_MD("update"),&CanvasItem::update);
 
diff --git a/scene/2d/canvas_item.h b/scene/2d/canvas_item.h
index 4885256c64..667fedc956 100644
--- a/scene/2d/canvas_item.h
+++ b/scene/2d/canvas_item.h
@@ -190,6 +190,7 @@ public:
 	bool is_hidden() const;
 	void show();
 	void hide();
+	void set_hidden(bool p_hidden);
 
 	void update();
 
diff --git a/scene/2d/node_2d.cpp b/scene/2d/node_2d.cpp
index 6141b6a09e..52b112f090 100644
--- a/scene/2d/node_2d.cpp
+++ b/scene/2d/node_2d.cpp
@@ -354,7 +354,7 @@ void Node2D::look_at(const Vector2& p_pos) {
 
 float Node2D::get_angle_to(const Vector2& p_pos) const {
 
-	return (get_global_transform().affine_inverse().xform(p_pos)).atan2();
+	return (get_global_transform().affine_inverse().xform(p_pos)).angle();
 }
 
 void Node2D::_bind_methods() {
diff --git a/scene/2d/path_2d.cpp b/scene/2d/path_2d.cpp
index 7ba1bb28b6..8f110b3931 100644
--- a/scene/2d/path_2d.cpp
+++ b/scene/2d/path_2d.cpp
@@ -118,7 +118,7 @@ void PathFollow2D::_update_transform() {
 		pos+=n*h_offset;
 		pos+=t*v_offset;
 
-		set_rot(t.atan2());
+		set_rot(t.angle());
 
 	} else {
 
diff --git a/scene/2d/ray_cast_2d.cpp b/scene/2d/ray_cast_2d.cpp
index acc4c620e6..4a199e3418 100644
--- a/scene/2d/ray_cast_2d.cpp
+++ b/scene/2d/ray_cast_2d.cpp
@@ -131,7 +131,7 @@ void RayCast2D::_notification(int p_what) {
 			if (!get_tree()->is_editor_hint()  && !get_tree()->is_debugging_collisions_hint())
 				break;
 			Matrix32 xf;
-			xf.rotate(cast_to.atan2());
+			xf.rotate(cast_to.angle());
 			xf.translate(Vector2(0,cast_to.length()));
 
 			//Vector2 tip = Vector2(0,s->get_length());
diff --git a/scene/2d/tile_map.cpp b/scene/2d/tile_map.cpp
index 418ee192b2..167b637bdc 100644
--- a/scene/2d/tile_map.cpp
+++ b/scene/2d/tile_map.cpp
@@ -1031,13 +1031,12 @@ Vector2 TileMap::world_to_map(const Vector2& p_pos) const{
 	switch(half_offset) {
 
 		case HALF_OFFSET_X: {
-			if (int(ret.y)&1) {
-
+			if ( ret.y > 0 ? int(ret.y)&1 : (int(ret.y)-1)&1 ) {
 				ret.x-=0.5;
 			}
 		} break;
 		case HALF_OFFSET_Y: {
-			if (int(ret.x)&1) {
+			if ( ret.x > 0 ? int(ret.x)&1 : (int(ret.x)-1)&1) {
 				ret.y-=0.5;
 			}
 		} break;
diff --git a/scene/3d/SCsub b/scene/3d/SCsub
index 3c2144bedc..116e641593 100644
--- a/scene/3d/SCsub
+++ b/scene/3d/SCsub
@@ -4,10 +4,8 @@ Import('env')
 if (env["disable_3d"]=="yes"):
 
 	env.scene_sources.append("3d/spatial.cpp")
-	env.scene_sources.append("3d/skeleton.cpp")        
+	env.scene_sources.append("3d/skeleton.cpp")
 else:
 	env.add_source_files(env.scene_sources,"*.cpp")
 
 Export('env')
-
-
diff --git a/scene/3d/physics_body.cpp b/scene/3d/physics_body.cpp
index d61859a3d0..de50484a1e 100644
--- a/scene/3d/physics_body.cpp
+++ b/scene/3d/physics_body.cpp
@@ -1073,7 +1073,7 @@ Vector3 KinematicBody::move_to(const Vector3& p_position) {
 	return move(p_position-get_global_transform().origin);
 }
 
-bool KinematicBody::can_move_to(const Vector3& p_position, bool p_discrete) {
+bool KinematicBody::can_teleport_to(const Vector3& p_position) {
 
 	ERR_FAIL_COND_V(!is_inside_tree(),false);
 	PhysicsDirectSpaceState *dss = PhysicsServer::get_singleton()->space_get_direct_state(get_world()->get_space());
@@ -1089,25 +1089,18 @@ bool KinematicBody::can_move_to(const Vector3& p_position, bool p_discrete) {
 	if (collide_character)
 		mask|=PhysicsDirectSpaceState::TYPE_MASK_CHARACTER_BODY;
 
-	Vector3 motion = p_position-get_global_transform().origin;
 	Transform xform=get_global_transform();
-
-	if (true || p_discrete) {
-
-		xform.origin+=motion;
-		motion=Vector3();
-	}
+	xform.origin=p_position;
 
 	Set<RID> exclude;
 	exclude.insert(get_rid());
 
-	//fill exclude list..
 	for(int i=0;i<get_shape_count();i++) {
 
 		if (is_shape_set_as_trigger(i))
 			continue;
 
-		bool col = dss->intersect_shape(get_shape(i)->get_rid(), xform * get_shape_transform(i),0,NULL,0,exclude,get_layer_mask(),mask);
+		bool col = dss->intersect_shape(get_shape(i)->get_rid(), xform * get_shape_transform(i),0,NULL,1,exclude,get_layer_mask(),mask);
 		if (col)
 			return false;
 	}
@@ -1205,7 +1198,7 @@ void KinematicBody::_bind_methods() {
 	ObjectTypeDB::bind_method(_MD("move","rel_vec"),&KinematicBody::move);
 	ObjectTypeDB::bind_method(_MD("move_to","position"),&KinematicBody::move_to);
 
-	ObjectTypeDB::bind_method(_MD("can_move_to","position"),&KinematicBody::can_move_to);
+	ObjectTypeDB::bind_method(_MD("can_teleport_to","position"),&KinematicBody::can_teleport_to);
 
 	ObjectTypeDB::bind_method(_MD("is_colliding"),&KinematicBody::is_colliding);
 
diff --git a/scene/3d/physics_body.h b/scene/3d/physics_body.h
index 66490ba925..0e63b77118 100644
--- a/scene/3d/physics_body.h
+++ b/scene/3d/physics_body.h
@@ -304,7 +304,7 @@ public:
 	Vector3 move(const Vector3& p_motion);
 	Vector3 move_to(const Vector3& p_position);
 
-	bool can_move_to(const Vector3& p_position,bool p_discrete=false);
+	bool can_teleport_to(const Vector3& p_position);
 	bool is_colliding() const;
 	Vector3 get_collision_pos() const;
 	Vector3 get_collision_normal() const;
diff --git a/scene/3d/skeleton.cpp b/scene/3d/skeleton.cpp
index 4712ba308a..cb81228dff 100644
--- a/scene/3d/skeleton.cpp
+++ b/scene/3d/skeleton.cpp
@@ -250,7 +250,7 @@ void Skeleton::_notification(int p_what) {
 					ERR_CONTINUE(!obj);
 					Spatial *sp = obj->cast_to<Spatial>();
 					ERR_CONTINUE(!sp);
-					sp->set_transform(b.pose_global * b.rest_global_inverse);
+					sp->set_transform(b.pose_global);
 				}
 			}
 
diff --git a/scene/3d/spatial.cpp b/scene/3d/spatial.cpp
index a65f68ed2c..7d48420a83 100644
--- a/scene/3d/spatial.cpp
+++ b/scene/3d/spatial.cpp
@@ -594,6 +594,15 @@ bool Spatial::is_hidden() const{
 	return !data.visible;
 }
 
+void Spatial::set_hidden(bool p_hidden) {
+	
+	if (data.visible != p_hidden) {
+		return;
+	}
+	
+	_set_visible_(!p_hidden);
+}
+
 void Spatial::_set_visible_(bool p_visible) {
 
 	if (p_visible)
@@ -742,6 +751,7 @@ void Spatial::_bind_methods() {
 	ObjectTypeDB::bind_method(_MD("hide"), &Spatial::hide);
 	ObjectTypeDB::bind_method(_MD("is_visible"), &Spatial::is_visible);
 	ObjectTypeDB::bind_method(_MD("is_hidden"), &Spatial::is_hidden);
+	ObjectTypeDB::bind_method(_MD("set_hidden","hidden"), &Spatial::set_hidden);
 
 	ObjectTypeDB::bind_method(_MD("_set_visible_"), &Spatial::_set_visible_);
 	ObjectTypeDB::bind_method(_MD("_is_visible_"), &Spatial::_is_visible_);
diff --git a/scene/3d/spatial.h b/scene/3d/spatial.h
index 7fa6099d7a..b1e3a82868 100644
--- a/scene/3d/spatial.h
+++ b/scene/3d/spatial.h
@@ -191,6 +191,7 @@ public:
 	void hide();
 	bool is_visible() const;
 	bool is_hidden() const;
+	void set_hidden(bool p_hidden);
 
 #ifdef TOOLS_ENABLED
 	void set_import_transform(const Transform& p_transform)	;
diff --git a/scene/SCsub b/scene/SCsub
index 8c4f0499c4..6d1dd0044f 100644
--- a/scene/SCsub
+++ b/scene/SCsub
@@ -18,5 +18,3 @@ SConscript('io/SCsub');
 lib = env.Library("scene",env.scene_sources)
 
 env.Prepend(LIBS=[lib])
-
-
diff --git a/scene/animation/SCsub b/scene/animation/SCsub
index 055d2f2474..bbe59b3054 100644
--- a/scene/animation/SCsub
+++ b/scene/animation/SCsub
@@ -3,5 +3,3 @@ Import('env')
 env.add_source_files(env.scene_sources,"*.cpp")
 
 Export('env')
-
-
diff --git a/scene/animation/animation_player.cpp b/scene/animation/animation_player.cpp
index c2ea1c8bb6..b040e59d16 100644
--- a/scene/animation/animation_player.cpp
+++ b/scene/animation/animation_player.cpp
@@ -268,6 +268,7 @@ void AnimationPlayer::_generate_node_caches(AnimationData* p_anim) {
 		TrackNodeCacheKey key;
 		key.id=id;
 		key.bone_idx=bone_idx;
+
 		
 		if (node_cache_map.has(key)) {
 		
@@ -278,6 +279,7 @@ void AnimationPlayer::_generate_node_caches(AnimationData* p_anim) {
 			node_cache_map[key]=TrackNodeCache();
 			
 			p_anim->node_cache[i]=&node_cache_map[key];
+			p_anim->node_cache[i]->path=a->track_get_path(i);
 			p_anim->node_cache[i]->node=child;
 			p_anim->node_cache[i]->resource=resource;
 			p_anim->node_cache[i]->node_2d=child->cast_to<Node2D>();
@@ -320,6 +322,7 @@ void AnimationPlayer::_generate_node_caches(AnimationData* p_anim) {
 				pa.prop=property;
 				pa.object=resource.is_valid()?(Object*)resource.ptr():(Object*)child;
 				pa.special=SP_NONE;
+				pa.owner=p_anim->node_cache[i];
 				if (false && p_anim->node_cache[i]->node_2d) {
 
 					if (pa.prop==SceneStringNames::get_singleton()->transform_pos)
@@ -410,7 +413,7 @@ void AnimationPlayer::_animation_process_animation(AnimationData* p_anim,float p
 				TrackNodeCache::PropertyAnim *pa = &E->get();
 
 
-				if (a->value_track_is_continuous(i) || p_delta==0) {
+				if (a->value_track_is_continuous(i) || p_delta==0) { //delta == 0 means seek
 
 
 					Variant value=a->value_track_interpolate(i,p_time);
@@ -436,10 +439,42 @@ void AnimationPlayer::_animation_process_animation(AnimationData* p_anim,float p
 						Variant value=a->track_get_key_value(i,F->get());
 						switch(pa->special) {
 
-							case SP_NONE: pa->object->set(pa->prop,value); break; //you are not speshul
-							case SP_NODE2D_POS: static_cast<Node2D*>(pa->object)->set_pos(value); break;
-							case SP_NODE2D_ROT: static_cast<Node2D*>(pa->object)->set_rot(Math::deg2rad(value)); break;
-							case SP_NODE2D_SCALE: static_cast<Node2D*>(pa->object)->set_scale(value); break;
+							case SP_NONE: {
+								bool valid;
+								pa->object->set(pa->prop,value,&valid);  //you are not speshul
+#ifdef DEBUG_ENABLED
+								if (!valid) {
+									ERR_PRINTS("Failed setting track value '"+String(pa->owner->path)+"'. Check if property exists or the type of key is valid");
+								}
+#endif
+
+							} break;
+							case SP_NODE2D_POS: {
+#ifdef DEBUG_ENABLED
+								if (value.get_type()!=Variant::VECTOR2) {
+									ERR_PRINTS("Position key at time "+rtos(p_time)+" in Animation Track '"+String(pa->owner->path)+"' not of type Vector2()");
+								}
+#endif
+								static_cast<Node2D*>(pa->object)->set_pos(value);
+							} break;
+							case SP_NODE2D_ROT: {
+#ifdef DEBUG_ENABLED
+								if (value.is_num()) {
+									ERR_PRINTS("Rotation key at time "+rtos(p_time)+" in Animation Track '"+String(pa->owner->path)+"' not numerical");
+								}
+#endif
+
+								static_cast<Node2D*>(pa->object)->set_rot(Math::deg2rad(value));
+							} break;
+							case SP_NODE2D_SCALE: {
+#ifdef DEBUG_ENABLED
+								if (value.get_type()!=Variant::VECTOR2) {
+									ERR_PRINTS("Scale key at time "+rtos(p_time)+" in Animation Track '"+String(pa->owner->path)+"' not of type Vector2()");
+								}
+#endif
+
+								static_cast<Node2D*>(pa->object)->set_scale(value);
+							} break;
 						}
 
 					}
@@ -607,13 +642,53 @@ void AnimationPlayer::_animation_update_transforms() {
 		ERR_CONTINUE( pa->accum_pass!=accum_pass );
 
 #if 1
-		switch(pa->special) {
+/*		switch(pa->special) {
 
 
 			case SP_NONE: pa->object->set(pa->prop,pa->value_accum); break; //you are not speshul
 			case SP_NODE2D_POS: static_cast<Node2D*>(pa->object)->set_pos(pa->value_accum); break;
 			case SP_NODE2D_ROT: static_cast<Node2D*>(pa->object)->set_rot(Math::deg2rad(pa->value_accum)); break;
 			case SP_NODE2D_SCALE: static_cast<Node2D*>(pa->object)->set_scale(pa->value_accum); break;
+		}*/
+
+		switch(pa->special) {
+
+			case SP_NONE: {
+				bool valid;
+				pa->object->set(pa->prop,pa->value_accum,&valid);  //you are not speshul
+#ifdef DEBUG_ENABLED
+				if (!valid) {
+					ERR_PRINTS("Failed setting key at time "+rtos(playback.current.pos)+" in Animation '"+get_current_animation()+"', Track '"+String(pa->owner->path)+"'. Check if property exists or the type of key is right for the property");
+				}
+#endif
+
+			} break;
+			case SP_NODE2D_POS: {
+#ifdef DEBUG_ENABLED
+				if (pa->value_accum.get_type()!=Variant::VECTOR2) {
+					ERR_PRINTS("Position key at time "+rtos(playback.current.pos)+" in Animation '"+get_current_animation()+"', Track '"+String(pa->owner->path)+"' not of type Vector2()");
+				}
+#endif
+				static_cast<Node2D*>(pa->object)->set_pos(pa->value_accum);
+			} break;
+			case SP_NODE2D_ROT: {
+#ifdef DEBUG_ENABLED
+				if (pa->value_accum.is_num()) {
+					ERR_PRINTS("Rotation key at time "+rtos(playback.current.pos)+" in Animation '"+get_current_animation()+"', Track '"+String(pa->owner->path)+"' not numerical");
+				}
+#endif
+
+				static_cast<Node2D*>(pa->object)->set_rot(Math::deg2rad(pa->value_accum));
+			} break;
+			case SP_NODE2D_SCALE: {
+#ifdef DEBUG_ENABLED
+				if (pa->value_accum.get_type()!=Variant::VECTOR2) {
+					ERR_PRINTS("Scale key at time "+rtos(playback.current.pos)+" in Animation '"+get_current_animation()+"', Track '"+String(pa->owner->path)+"' not of type Vector2()");
+				}
+#endif
+
+				static_cast<Node2D*>(pa->object)->set_scale(pa->value_accum);
+			} break;
 		}
 #else
 
diff --git a/scene/animation/animation_player.h b/scene/animation/animation_player.h
index 1e3c37c4d6..18cedee796 100644
--- a/scene/animation/animation_player.h
+++ b/scene/animation/animation_player.h
@@ -68,6 +68,7 @@ private:
 
 	struct TrackNodeCache {
 
+		NodePath path;
 		uint32_t id;
 		RES resource;
 		Node *node;
@@ -84,6 +85,7 @@ private:
 
 		struct PropertyAnim {
 
+			TrackNodeCache *owner;
 			SpecialProperty special; //small optimization
 			StringName prop;
 			Object *object;
diff --git a/scene/animation/animation_tree_player.cpp b/scene/animation/animation_tree_player.cpp
index 14f2110915..c7e259c3c6 100644
--- a/scene/animation/animation_tree_player.cpp
+++ b/scene/animation/animation_tree_player.cpp
@@ -29,6 +29,42 @@
 #include "animation_tree_player.h"
 #include "animation_player.h"
 
+#include "scene/scene_string_names.h"
+
+
+void AnimationTreePlayer::set_animation_process_mode(AnimationProcessMode p_mode) {
+
+	if (animation_process_mode == p_mode)
+		return;
+
+	bool pr = processing;
+	if (pr)
+		_set_process(false);
+	animation_process_mode = p_mode;
+	if (pr)
+		_set_process(true);
+
+}
+
+AnimationTreePlayer::AnimationProcessMode AnimationTreePlayer::get_animation_process_mode() const{
+
+	return animation_process_mode;
+}
+
+void AnimationTreePlayer::_set_process(bool p_process, bool p_force)
+{
+	if (processing == p_process && !p_force)
+		return;
+
+	switch (animation_process_mode) {
+
+	case ANIMATION_PROCESS_FIXED: set_fixed_process(p_process && active); break;
+	case ANIMATION_PROCESS_IDLE: set_process(p_process && active); break;
+	}
+
+	processing = p_process;
+}
+
 
 bool AnimationTreePlayer::_set(const StringName& p_name, const Variant& p_value) {
 
@@ -42,6 +78,11 @@ bool AnimationTreePlayer::_set(const StringName& p_name, const Variant& p_value)
 		return true;
 	}
 
+	if(String(p_name) == SceneStringNames::get_singleton()->playback_active) {
+		set_active(p_value);
+		return true;
+	}
+
 	if (String(p_name)!="data")
 		return false;
 
@@ -190,6 +231,11 @@ bool AnimationTreePlayer::_get(const StringName& p_name,Variant &r_ret) const {
 		return true;
 	}
 
+	if (String(p_name) == "playback/active") {
+		r_ret=is_active();
+		return true;
+	}
+
 	if (String(p_name)!="data")
 		return false;
 
@@ -342,11 +388,24 @@ void AnimationTreePlayer::_get_property_list( List<PropertyInfo> *p_list) const
 	p_list->push_back( PropertyInfo(Variant::DICTIONARY,"data",PROPERTY_HINT_NONE,"",PROPERTY_USAGE_STORAGE|PROPERTY_USAGE_NETWORK) );
 }
 
+void AnimationTreePlayer::advance(float p_time) {
+
+	_process_animation(p_time);
+}
 
 void AnimationTreePlayer::_notification(int p_what) {
 
 	switch(p_what) {
 
+		case NOTIFICATION_ENTER_TREE: {
+
+			if (!processing) {
+				//make sure that a previous process state was not saved
+				//only process if "processing" is set
+				set_fixed_process(false);
+				set_process(false);
+			}
+		} break;
 		case NOTIFICATION_READY: {
 			dirty_caches=true;
 			if (master!=NodePath()) {
@@ -354,7 +413,19 @@ void AnimationTreePlayer::_notification(int p_what) {
 			}
 		} break;
 		case NOTIFICATION_PROCESS: {
-			_process_animation();
+			if (animation_process_mode==ANIMATION_PROCESS_FIXED)
+				break;
+
+			if (processing)
+				_process_animation( get_process_delta_time() );
+		} break;
+		case NOTIFICATION_FIXED_PROCESS: {
+		
+			if (animation_process_mode==ANIMATION_PROCESS_IDLE)
+				break;
+
+			if (processing)
+				_process_animation(get_fixed_process_delta_time());
 		} break;
 	}
 
@@ -656,10 +727,7 @@ float AnimationTreePlayer::_process_node(const StringName& p_node,AnimationNode
 }
 
 
-void AnimationTreePlayer::_process_animation() {
-
-	if (!active)
-		return;
+void AnimationTreePlayer::_process_animation(float p_delta) {
 
 	if (last_error!=CONNECT_OK)
 		return;
@@ -675,7 +743,7 @@ void AnimationTreePlayer::_process_animation() {
 		_process_node(out_name,&prev, 1.0, 0, true );
 		reset_request=false;
 	} else
-		_process_node(out_name,&prev, 1.0, get_process_delta_time(), false );
+		_process_node(out_name,&prev, 1.0, p_delta, false );
 
 	if (dirty_caches) {
 		//some animation changed.. ignore this pass
@@ -1520,8 +1588,12 @@ void AnimationTreePlayer::recompute_caches() {
 
 void AnimationTreePlayer::set_active(bool p_active) {
 
-	active=p_active;
-	set_process(active);
+	if (active == p_active)
+		return;
+
+	active = p_active;
+	processing = active;
+	_set_process(processing, true);
 }
 
 bool AnimationTreePlayer::is_active() const {
@@ -1743,13 +1815,18 @@ void AnimationTreePlayer::_bind_methods() {
 
 	ObjectTypeDB::bind_method(_MD("get_node_list"),&AnimationTreePlayer::_get_node_list);
 
+	ObjectTypeDB::bind_method(_MD("set_animation_process_mode","mode"),&AnimationTreePlayer::set_animation_process_mode);
+	ObjectTypeDB::bind_method(_MD("get_animation_process_mode"),&AnimationTreePlayer::get_animation_process_mode);
 
+	ObjectTypeDB::bind_method(_MD("advance", "delta"), &AnimationTreePlayer::advance);
 
 
 	ObjectTypeDB::bind_method(_MD("reset"),&AnimationTreePlayer::reset);
 
 	ObjectTypeDB::bind_method(_MD("recompute_caches"),&AnimationTreePlayer::recompute_caches);	
 
+	ADD_PROPERTY(PropertyInfo(Variant::INT, "playback/process_mode", PROPERTY_HINT_ENUM, "Fixed,Idle"), _SCS("set_animation_process_mode"), _SCS("get_animation_process_mode"));
+
 	BIND_CONSTANT( NODE_OUTPUT );
 	BIND_CONSTANT( NODE_ANIMATION );
 	BIND_CONSTANT( NODE_ONESHOT );
@@ -1770,6 +1847,9 @@ AnimationTreePlayer::AnimationTreePlayer() {
 	out_name="out";
 	out->pos=Point2(40,40);
 	node_map.insert( out_name , out);
+	AnimationProcessMode animation_process_mode;
+	animation_process_mode = ANIMATION_PROCESS_IDLE;
+	processing = false;
 	active=false;
 	dirty_caches=true;
 	reset_request=false;
diff --git a/scene/animation/animation_tree_player.h b/scene/animation/animation_tree_player.h
index 9e936304c6..9ea5ccf330 100644
--- a/scene/animation/animation_tree_player.h
+++ b/scene/animation/animation_tree_player.h
@@ -34,6 +34,7 @@
 #include "scene/3d/spatial.h"
 #include "scene/3d/skeleton.h"
 #include "scene/main/misc.h"
+#include "animation_player.h"
 
 
 class AnimationTreePlayer : public Node {
@@ -42,7 +43,10 @@ class AnimationTreePlayer : public Node {
 	OBJ_CATEGORY("Animation Nodes");
 
 public:
-
+	enum AnimationProcessMode {
+		ANIMATION_PROCESS_FIXED,
+		ANIMATION_PROCESS_IDLE,
+	};
 
 	enum NodeType {
 
@@ -256,13 +260,15 @@ private:
 
 	ConnectError last_error;
 	AnimationNode *active_list;
+	AnimationProcessMode animation_process_mode;
+	bool processing;
 	bool active;
 	bool dirty_caches;
 	Map<StringName,NodeBase*> node_map;
 
 	// return time left to finish animation
 	float _process_node(const StringName& p_node,AnimationNode **r_prev_anim, float p_weight,float p_step, bool p_seek=false,const HashMap<NodePath,bool> *p_filter=NULL, float p_reverse_weight=0);
-	void _process_animation();
+	void _process_animation(float p_delta);
 	bool reset_request;
 
 	ConnectError _cycle_test(const StringName &p_at_node);
@@ -409,12 +415,21 @@ public:
 
 	ConnectError get_last_error() const;
 
+	void set_animation_process_mode(AnimationProcessMode p_mode);
+	AnimationProcessMode get_animation_process_mode() const;
+
+	void _set_process(bool p_process, bool p_force = false);
+
+	void advance(float p_time);
+
 	AnimationTreePlayer();
 	~AnimationTreePlayer();
 
 };
 
 VARIANT_ENUM_CAST( AnimationTreePlayer::NodeType );
+VARIANT_ENUM_CAST( AnimationTreePlayer::AnimationProcessMode );
+
 #endif // ANIMATION_TREE_PLAYER_H
 
 
diff --git a/scene/audio/SCsub b/scene/audio/SCsub
index 055d2f2474..bbe59b3054 100644
--- a/scene/audio/SCsub
+++ b/scene/audio/SCsub
@@ -3,5 +3,3 @@ Import('env')
 env.add_source_files(env.scene_sources,"*.cpp")
 
 Export('env')
-
-
diff --git a/scene/gui/SCsub b/scene/gui/SCsub
index 055d2f2474..bbe59b3054 100644
--- a/scene/gui/SCsub
+++ b/scene/gui/SCsub
@@ -3,5 +3,3 @@ Import('env')
 env.add_source_files(env.scene_sources,"*.cpp")
 
 Export('env')
-
-
diff --git a/scene/gui/button_group.cpp b/scene/gui/button_group.cpp
index 8d1fa80b84..c92d7f2696 100644
--- a/scene/gui/button_group.cpp
+++ b/scene/gui/button_group.cpp
@@ -155,6 +155,6 @@ void ButtonGroup::_bind_methods() {
 
 }
 
-ButtonGroup::ButtonGroup()
+ButtonGroup::ButtonGroup() : BoxContainer(true)
 {
 }
diff --git a/scene/gui/button_group.h b/scene/gui/button_group.h
index 24edf94994..74e847e937 100644
--- a/scene/gui/button_group.h
+++ b/scene/gui/button_group.h
@@ -29,14 +29,14 @@
 #ifndef BUTTON_GROUP_H
 #define BUTTON_GROUP_H
 
-#include "scene/gui/control.h"
+#include "scene/gui/box_container.h"
 
 
 class BaseButton;
 
-class ButtonGroup : public Control {
+class ButtonGroup : public BoxContainer {
 
-	OBJ_TYPE(ButtonGroup,Control);
+	OBJ_TYPE(ButtonGroup,BoxContainer);
 
 
 	Set<BaseButton*> buttons;
diff --git a/scene/gui/container.cpp b/scene/gui/container.cpp
index 6c74bc3977..8cdf4dd039 100644
--- a/scene/gui/container.cpp
+++ b/scene/gui/container.cpp
@@ -52,6 +52,14 @@ void Container::add_child_notify(Node *p_child) {
 
 }
 
+void Container::move_child_notify(Node *p_child) {
+
+	if (!p_child->cast_to<Control>())
+		return;
+
+	queue_sort();
+}
+
 void Container::remove_child_notify(Node *p_child) {
 
 
diff --git a/scene/gui/container.h b/scene/gui/container.h
index ba9bf2d60f..04d5d6ab36 100644
--- a/scene/gui/container.h
+++ b/scene/gui/container.h
@@ -42,6 +42,7 @@ protected:
 
 	void queue_sort();
 	virtual void add_child_notify(Node *p_child);
+	virtual void move_child_notify(Node *p_child);
 	virtual void remove_child_notify(Node *p_child);
 
 	void _notification(int p_what);
diff --git a/scene/gui/dialogs.cpp b/scene/gui/dialogs.cpp
index 0c0f924f52..efda8a66e1 100644
--- a/scene/gui/dialogs.cpp
+++ b/scene/gui/dialogs.cpp
@@ -308,7 +308,9 @@ void AcceptDialog::_bind_methods() {
 	ADD_SIGNAL( MethodInfo("confirmed") );
 	ADD_SIGNAL( MethodInfo("custom_action",PropertyInfo(Variant::STRING,"action")) );
 
-	
+	ADD_PROPERTYNZ( PropertyInfo(Variant::STRING,"dialog/text",PROPERTY_HINT_MULTILINE_TEXT,"",PROPERTY_USAGE_DEFAULT_INTL),_SCS("set_text"),_SCS("get_text"));
+	ADD_PROPERTY( PropertyInfo(Variant::BOOL, "dialog/hide_on_ok"),_SCS("set_hide_on_ok"),_SCS("get_hide_on_ok") );
+
 }
 
 
diff --git a/scene/gui/file_dialog.cpp b/scene/gui/file_dialog.cpp
index 8e428fd71c..22e3a81e52 100644
--- a/scene/gui/file_dialog.cpp
+++ b/scene/gui/file_dialog.cpp
@@ -46,6 +46,11 @@ VBoxContainer *FileDialog::get_vbox() {
 }
 
 void FileDialog::_notification(int p_what) {
+
+	if (p_what==NOTIFICATION_ENTER_TREE) {
+
+		refresh->set_icon(get_icon("Reload","EditorIcons"));
+	}
 	
 	if (p_what==NOTIFICATION_DRAW) {
 
@@ -618,7 +623,7 @@ void FileDialog::_update_drives() {
 	}
 }
 
-bool FileDialog::default_show_hidden_files=true;
+bool FileDialog::default_show_hidden_files=false;
 
 
 void FileDialog::_bind_methods() {
@@ -700,6 +705,10 @@ FileDialog::FileDialog() {
 	pathhb->add_child(dir);
 	dir->set_h_size_flags(SIZE_EXPAND_FILL);
 
+	refresh = memnew( ToolButton );
+	refresh->connect("pressed",this,"_update_file_list");
+	pathhb->add_child(refresh);
+
 	drives = memnew( OptionButton );
 	pathhb->add_child(drives);
 	drives->connect("item_selected",this,"_select_drive");
diff --git a/scene/gui/file_dialog.h b/scene/gui/file_dialog.h
index ec42c7744a..370088b215 100644
--- a/scene/gui/file_dialog.h
+++ b/scene/gui/file_dialog.h
@@ -34,6 +34,7 @@
 #include "scene/gui/line_edit.h"
 #include "scene/gui/option_button.h"
 #include "scene/gui/dialogs.h"
+#include "scene/gui/tool_button.h"
 #include "os/dir_access.h"
 #include "box_container.h"
 /**
@@ -86,6 +87,8 @@ private:
 	OptionButton *filter;
 	DirAccess *dir_access;
 	ConfirmationDialog *confirm_save;
+
+	ToolButton *refresh;
 	
 	Vector<String> filters;
 
diff --git a/scene/gui/item_list.cpp b/scene/gui/item_list.cpp
index 40fade840c..f035cb7722 100644
--- a/scene/gui/item_list.cpp
+++ b/scene/gui/item_list.cpp
@@ -235,6 +235,37 @@ int ItemList::get_current() const {
 	return current;
 }
 
+void ItemList::move_item(int p_item,int p_to_pos) {
+
+	ERR_FAIL_INDEX(p_item,items.size());
+	ERR_FAIL_INDEX(p_to_pos,items.size()+1);
+
+	Item it=items[p_item];
+	items.remove(p_item);;
+
+	if (p_to_pos>p_item) {
+		p_to_pos--;
+	}
+
+	if (p_to_pos>=items.size()) {
+		items.push_back(it);
+	} else {
+		items.insert(p_to_pos,it);
+	}
+
+	if (current<0) {
+		//do none
+	} if (p_item==current) {
+		current=p_to_pos;
+	} else if (p_to_pos>p_item && current>p_item && current<p_to_pos) {
+		current--;
+	} else if (p_to_pos<p_item && current<p_item && current>p_to_pos) {
+		current++;
+	}
+
+
+	update();
+}
 
 int ItemList::get_item_count() const{
 
diff --git a/scene/gui/item_list.h b/scene/gui/item_list.h
index 7cf58a6426..bd3cf6484e 100644
--- a/scene/gui/item_list.h
+++ b/scene/gui/item_list.h
@@ -101,6 +101,7 @@ public:
 	void set_current(int p_current);
 	int get_current() const;
 
+	void move_item(int p_item,int p_to_pos);
 
 	int get_item_count() const;
 	void remove_item(int p_idx);
diff --git a/scene/gui/line_edit.cpp b/scene/gui/line_edit.cpp
index 2b4d7db01e..10ba20a833 100644
--- a/scene/gui/line_edit.cpp
+++ b/scene/gui/line_edit.cpp
@@ -145,6 +145,13 @@ void LineEdit::_input_event(InputEvent p_event) {
 
 							int old_cursor_pos = cursor_pos;
 							text = undo_text;
+
+							Ref<Font> font = get_font("font");
+
+							cached_width = 0;
+							for (int i = 0; i<text.length(); i++)
+								cached_width += font->get_char_size(text[i]).width;
+
 							if(old_cursor_pos > text.length()) {
 								set_cursor_pos(text.length());
 							} else {
@@ -164,6 +171,15 @@ void LineEdit::_input_event(InputEvent p_event) {
 							selection_clear();
 							undo_text = text;
 							text = text.substr(cursor_pos,text.length()-cursor_pos);
+
+							Ref<Font> font = get_font("font");
+
+							cached_width = 0;
+							if (font != NULL) {
+								for (int i = 0; i < text.length(); i++)
+									cached_width += font->get_char_size(text[i]).width;
+							}
+
 							set_cursor_pos(0);
 							emit_signal("text_changed",text);
 							_change_notify("text");
@@ -192,6 +208,9 @@ void LineEdit::_input_event(InputEvent p_event) {
 						}
 
 					} break;
+					case (KEY_A): { //Select All
+						select();
+					} break;
 					default: { handled=false;}
 				}
 
@@ -303,6 +322,18 @@ void LineEdit::_input_event(InputEvent p_event) {
 	}
 }
 
+void LineEdit::set_align(Align p_align) {
+
+	ERR_FAIL_INDEX(p_align, 4);
+	align = p_align;
+	update();
+}
+
+LineEdit::Align LineEdit::get_align() const{
+
+	return align;
+}
+
 Variant LineEdit::get_drag_data(const Point2& p_point) {
 
 	if (selection.drag_attempt && selection.enabled) {
@@ -325,7 +356,15 @@ void LineEdit::drop_data(const Point2& p_point,const Variant& p_data){
 	if (p_data.get_type()==Variant::STRING) {
 		set_cursor_at_pixel_pos(p_point.x);
 		int selected = selection.end - selection.begin;
+
+		Ref<Font> font = get_font("font");
+		if (font != NULL) {
+			for (int i = selection.begin; i < selection.end; i++)
+				cached_width -= font->get_char_size(text[i]).width;
+		}
+
 		text.erase(selection.begin, selected);
+
 		append_at_cursor(p_data);
 		selection.begin = cursor_pos-selected;
 		selection.end = cursor_pos;
@@ -365,8 +404,25 @@ void LineEdit::_notification(int p_what) {
 				get_stylebox("focus")->draw( ci, Rect2( Point2(), size ) );
 			}
 
-						
-			int ofs=style->get_offset().x;
+			int x_ofs=0;
+
+			switch (align) {
+				
+				case ALIGN_FILL:
+				case ALIGN_LEFT: {
+
+					x_ofs=style->get_offset().x;
+				} break;
+				case ALIGN_CENTER: {
+
+					x_ofs=x_ofs=int(size.width-(cached_width))/2;
+				} break;
+				case ALIGN_RIGHT: {
+
+					x_ofs=x_ofs=int(size.width-style->get_offset().x-(cached_width));
+				} break;
+			}
+
 			int ofs_max=width-style->get_minimum_size().width;
 			int char_ofs=window_pos;
 			
@@ -391,29 +447,29 @@ void LineEdit::_notification(int p_what) {
 				int char_width=font->get_char_size( cchar,next ).width;
 								
 		// end of widget, break!
-				if ( (ofs+char_width) > ofs_max )
+				if ((x_ofs + char_width) > ofs_max)
 					break;
 				
 				
 				bool selected=selection.enabled && char_ofs>=selection.begin && char_ofs<selection.end;
 				
 				if (selected)
-					VisualServer::get_singleton()->canvas_item_add_rect(ci,Rect2( Point2( ofs , y_ofs ),Size2( char_width, y_area )),selection_color);
+					VisualServer::get_singleton()->canvas_item_add_rect(ci, Rect2(Point2(x_ofs, y_ofs), Size2(char_width, y_area)), selection_color);
 				
 
-				font->draw_char(ci,Point2( ofs , y_ofs+font_ascent ), cchar, next,selected?font_color_selected:font_color );
+				font->draw_char(ci, Point2(x_ofs, y_ofs + font_ascent), cchar, next, selected ? font_color_selected : font_color);
 				
 				if (char_ofs==cursor_pos && has_focus())
 					VisualServer::get_singleton()->canvas_item_add_rect(ci, Rect2(
-						Point2( ofs , y_ofs ), Size2( 1, y_area ) ), cursor_color );
+						Point2( x_ofs , y_ofs ), Size2( 1, y_area ) ), cursor_color );
 				
-				ofs+=char_width;
+				x_ofs+=char_width;
 				char_ofs++;
 			}
 
 			if (char_ofs==cursor_pos && has_focus()) //may be at the end
 				VisualServer::get_singleton()->canvas_item_add_rect(ci, Rect2(
-					Point2( ofs , y_ofs ), Size2( 1, y_area ) ), cursor_color );		
+					Point2( x_ofs , y_ofs ), Size2( 1, y_area ) ), cursor_color );		
 			
 		} break;
 		case NOTIFICATION_FOCUS_ENTER: {
@@ -484,13 +540,36 @@ void LineEdit::shift_selection_check_post(bool p_shift) {
 
 void LineEdit::set_cursor_at_pixel_pos(int p_x) {
 	
-	int ofs=window_pos;
-	int pixel_ofs=get_stylebox("normal")->get_offset().x;
-	Ref<Font> font=get_font("font");
+	Ref<Font> font = get_font("font");
+	int ofs = window_pos;
+	Ref<StyleBox> style = get_stylebox("normal");
+	int pixel_ofs = 0;
+	Size2 size = get_size();
+
+	switch (align) {
+				
+		case ALIGN_FILL:
+		case ALIGN_LEFT: {
+			
+			pixel_ofs = int(style->get_offset().x);
+		} break;
+		case ALIGN_CENTER: {
+
+			pixel_ofs=int(size.width-(cached_width))/2;
+		} break;
+		case ALIGN_RIGHT: {
+
+			pixel_ofs=int(size.width-style->get_offset().x-(cached_width));
+		} break;
+	}
+
 
 	while (ofs<text.length()) {
 		
-		int char_w=font->get_char_size( text[ofs] ).width;
+		int char_w = 0;
+		if (font != NULL) {
+			char_w = font->get_char_size(text[ofs]).width;
+		}
 		pixel_ofs+=char_w;
 		
 		if (pixel_ofs > p_x) { //found what we look for
@@ -523,6 +602,10 @@ void LineEdit::delete_char() {
 	
 	if ((text.length()<=0) || (cursor_pos==0)) return;
 	
+	Ref<Font> font = get_font("font");
+	if (font != NULL) {
+		cached_width -= font->get_char_size(text[cursor_pos - 1]).width;
+	}
 	
 	text.erase( cursor_pos-1, 1 );
 	
@@ -593,13 +676,15 @@ void LineEdit::set_cursor_pos(int p_pos) {
 		int width_to_cursor=0;
 		int wp=window_pos;
 		
-		for (int i=window_pos;i<cursor_pos;i++)
-			width_to_cursor+=font->get_char_size( text[i] ).width;
+		if (font != NULL) {
+			for (int i=window_pos;i<cursor_pos;i++)
+				width_to_cursor+=font->get_char_size( text[i] ).width;
 		
-		while(width_to_cursor>=window_width && wp<text.length()) {			
-			
-			width_to_cursor-=font->get_char_size( text[ wp ] ).width;
-			wp++;
+			while (width_to_cursor >= window_width && wp < text.length()) {
+
+				width_to_cursor -= font->get_char_size(text[wp]).width;
+				wp++;
+			}
 		}
 		
 		if (wp!=window_pos)
@@ -626,17 +711,26 @@ void LineEdit::append_at_cursor(String p_text) {
 	if ( ( max_length <= 0 ) || (text.length()+p_text.length() <= max_length)) {
 		
 		undo_text = text;
+
+		Ref<Font> font = get_font("font");
+		if (font != NULL) {
+			for (int i = 0; i < p_text.length(); i++)
+				cached_width += font->get_char_size(p_text[i]).width;
+		}
+		else {
+			cached_width = 0;
+		}
+
 		String pre = text.substr( 0, cursor_pos );
 		String post = text.substr( cursor_pos, text.length()-cursor_pos );
 		text=pre+p_text+post;
 		set_cursor_pos(cursor_pos+p_text.length());
-		
 	}
-	
 }
 
 void LineEdit::clear_internal() {
 	
+	cached_width = 0;
 	cursor_pos=0;
 	window_pos=0;
 	undo_text="";
@@ -676,6 +770,20 @@ void LineEdit::selection_delete() {
 	if (selection.enabled) {
 		
 		undo_text = text;
+
+		if (text.size() > 0)
+		{
+			Ref<Font> font = get_font("font");
+			if (font != NULL) {
+				for (int i = selection.begin; i < selection.end; i++)
+					cached_width -= font->get_char_size(text[i]).width;
+			}
+		}
+		else
+		{
+			cached_width = 0;
+		}
+
 		text.erase(selection.begin,selection.end-selection.begin);
 		cursor_pos-=CLAMP( cursor_pos-selection.begin, 0, selection.end-selection.begin);
 		
@@ -789,6 +897,8 @@ bool LineEdit::is_text_field() const {
 
 void LineEdit::_bind_methods() {
 	
+	ObjectTypeDB::bind_method(_MD("set_align", "align"), &LineEdit::set_align);
+	ObjectTypeDB::bind_method(_MD("get_align"), &LineEdit::get_align);
 
 	ObjectTypeDB::bind_method(_MD("_input_event"),&LineEdit::_input_event);
 	ObjectTypeDB::bind_method(_MD("clear"),&LineEdit::clear);	
@@ -809,15 +919,22 @@ void LineEdit::_bind_methods() {
 	ADD_SIGNAL( MethodInfo("text_changed", PropertyInfo( Variant::STRING, "text" )) );
 	ADD_SIGNAL( MethodInfo("text_entered", PropertyInfo( Variant::STRING, "text" )) );
 
+	BIND_CONSTANT(ALIGN_LEFT);
+	BIND_CONSTANT(ALIGN_CENTER);
+	BIND_CONSTANT(ALIGN_RIGHT);
+	BIND_CONSTANT(ALIGN_FILL);
+
 	ADD_PROPERTY( PropertyInfo( Variant::STRING, "text" ), _SCS("set_text"),_SCS("get_text") );
+	ADD_PROPERTYNZ(PropertyInfo(Variant::INT, "align", PROPERTY_HINT_ENUM, "Left,Center,Right,Fill"), _SCS("set_align"), _SCS("get_align"));
 	ADD_PROPERTY( PropertyInfo( Variant::INT, "max_length" ), _SCS("set_max_length"),_SCS("get_max_length") );
 	ADD_PROPERTY( PropertyInfo( Variant::BOOL, "editable" ), _SCS("set_editable"),_SCS("is_editable") );
 	ADD_PROPERTY( PropertyInfo( Variant::BOOL, "secret" ), _SCS("set_secret"),_SCS("is_secret") );
-
 }
 
 LineEdit::LineEdit() {
 	
+	align = ALIGN_LEFT;
+	cached_width = 0;
 	cursor_pos=0;
 	window_pos=0;
 	max_length = 0;
diff --git a/scene/gui/line_edit.h b/scene/gui/line_edit.h
index b1c4c8f616..f28136d66e 100644
--- a/scene/gui/line_edit.h
+++ b/scene/gui/line_edit.h
@@ -36,7 +36,18 @@
 class LineEdit : public Control {
 	
 	OBJ_TYPE( LineEdit, Control );
-	
+
+public:
+	enum Align {
+
+		ALIGN_LEFT,
+		ALIGN_CENTER,
+		ALIGN_RIGHT,
+		ALIGN_FILL
+	};
+private:
+	Align align;
+
 	bool editable;
 	bool pass;
 	
@@ -46,6 +57,8 @@ class LineEdit : public Control {
 	int cursor_pos;
 	int window_pos;
 	int max_length; // 0 for no maximum
+
+	int cached_width;
 	
 	struct Selection {
 		
@@ -83,7 +96,8 @@ class LineEdit : public Control {
 protected:	
 	static void _bind_methods();	
 public:
-	
+	void set_align(Align p_align);
+	Align get_align() const;
 		
 	virtual Variant get_drag_data(const Point2& p_point);
 	virtual bool can_drop_data(const Point2& p_point,const Variant& p_data) const;
@@ -119,4 +133,7 @@ public:
 	
 };
 
+
+VARIANT_ENUM_CAST(LineEdit::Align);
+
 #endif
diff --git a/scene/gui/menu_button.cpp b/scene/gui/menu_button.cpp
index 13ff7074ea..be7a6b468a 100644
--- a/scene/gui/menu_button.cpp
+++ b/scene/gui/menu_button.cpp
@@ -54,6 +54,8 @@ void MenuButton::_unhandled_key_input(InputEvent p_event) {
 
 
 		int item = popup->find_item_by_accelerator(code);
+
+
 		if (item>=0 && ! popup->is_item_disabled(item))
 			popup->activate_item(item);
 		/*
diff --git a/scene/gui/popup_menu.cpp b/scene/gui/popup_menu.cpp
index 6c21ea639f..20f28ecf10 100644
--- a/scene/gui/popup_menu.cpp
+++ b/scene/gui/popup_menu.cpp
@@ -170,7 +170,14 @@ void PopupMenu::_activate_submenu(int over) {
 	Point2 p = get_global_pos();
 	Rect2 pr(p,get_size());
 	Ref<StyleBox> style = get_stylebox("panel");
-	pm->set_pos(p+Point2(get_size().width,items[over]._ofs_cache-style->get_offset().y));
+
+	Point2 pos = p+Point2(get_size().width,items[over]._ofs_cache-style->get_offset().y);
+	Size2 size = pm->get_size();
+	// fix pos
+	if (pos.x+size.width > get_viewport_rect().size.width)
+		pos.x=p.x-size.width;
+
+	pm->set_pos(pos);
 	pm->popup();
 
 	PopupMenu *pum = pm->cast_to<PopupMenu>();
@@ -323,11 +330,14 @@ void PopupMenu::_input_event(const InputEvent &p_event) {
 						invalidated_click=false;
 						break;
 					}
-					if (over<0 || items[over].separator || items[over].disabled) {
+					if (over<0) {
 						hide();
 						break; //non-activable
 					}
 
+					if (items[over].separator || items[over].disabled)
+						break;
+
 					if (items[over].submenu!="") {
 
 						_activate_submenu(over);
@@ -362,8 +372,11 @@ void PopupMenu::_input_event(const InputEvent &p_event) {
 			int over=_get_mouse_over(Point2(m.x,m.y));
 			int id = (over<0 || items[over].separator || items[over].disabled)?-1:items[over].ID;
 
-			if (id<0)
+			if (id<0) {
+				mouse_over=-1;
+				update();
 				break;
+			}
 
 			if (items[over].submenu!="" && submenu_over!=over) {
 				submenu_over=over;
@@ -774,6 +787,7 @@ void PopupMenu::add_separator() {
 void PopupMenu::clear()  {
 
 	items.clear();
+	mouse_over=-1;
 	update();
 	idcount=0;
 
diff --git a/scene/gui/range.cpp b/scene/gui/range.cpp
index ad708d16f0..7103ee651f 100644
--- a/scene/gui/range.cpp
+++ b/scene/gui/range.cpp
@@ -243,7 +243,7 @@ void Range::_bind_methods() {
 	ADD_PROPERTY( PropertyInfo( Variant::REAL, "range/step" ), _SCS("set_step"), _SCS("get_step") );
 	ADD_PROPERTY( PropertyInfo( Variant::REAL, "range/page" ), _SCS("set_page"), _SCS("get_page") );
 	ADD_PROPERTY( PropertyInfo( Variant::REAL, "range/value" ), _SCS("set_val"), _SCS("get_val") );
-	ADD_PROPERTY( PropertyInfo( Variant::REAL, "range/exp_edit" ), _SCS("set_exp_unit_value"), _SCS("is_unit_value_exp") );
+	ADD_PROPERTY( PropertyInfo( Variant::BOOL, "range/exp_edit" ), _SCS("set_exp_unit_value"), _SCS("is_unit_value_exp") );
 	ADD_PROPERTY( PropertyInfo( Variant::BOOL, "rounded_values" ), _SCS("set_rounded_values"), _SCS("get_rounded_values") );
 
 }
diff --git a/scene/gui/rich_text_label.cpp b/scene/gui/rich_text_label.cpp
index ef6a2ba6aa..5abb6c1d01 100644
--- a/scene/gui/rich_text_label.cpp
+++ b/scene/gui/rich_text_label.cpp
@@ -719,7 +719,7 @@ void RichTextLabel::_input_event(InputEvent p_event) {
 		case InputEvent::KEY: {
 
 			const InputEventKey &k=p_event.key;
-			if (k.pressed) {
+			if (k.pressed && !k.mod.alt && !k.mod.shift && !k.mod.meta) {
 				bool handled=true;
 				switch(k.scancode) {
 					case KEY_PAGEUP: {
@@ -765,6 +765,7 @@ void RichTextLabel::_input_event(InputEvent p_event) {
 					default: handled=false;
 				}
 
+
 				if (handled)
 					accept_event();
 			}
@@ -1503,7 +1504,6 @@ Error RichTextLabel::append_bbcode(const String& p_bbcode) {
 
 void RichTextLabel::scroll_to_line(int p_line) {
 
-	p_line -= 1;
 	ERR_FAIL_INDEX(p_line,lines.size());
 	_validate_line_caches();
 	vscroll->set_val(lines[p_line].height_accum_cache-lines[p_line].height_cache);
@@ -1571,11 +1571,8 @@ bool RichTextLabel::search(const String& p_string,bool p_from_selection) {
 
 				}
 
-				if (line > 1) {
-					line-=1;
-				}
-
-				scroll_to_line(line);
+				line-=2;
+				scroll_to_line(line<0?0:line);
 
 				return true;
 			}
@@ -1688,10 +1685,11 @@ void RichTextLabel::_bind_methods() {
 
 	ObjectTypeDB::bind_method(_MD("get_v_scroll"),&RichTextLabel::get_v_scroll);
 
+	ObjectTypeDB::bind_method(_MD("scroll_to_line"),&RichTextLabel::scroll_to_line);
+
 	ObjectTypeDB::bind_method(_MD("set_tab_size","spaces"),&RichTextLabel::set_tab_size);
 	ObjectTypeDB::bind_method(_MD("get_tab_size"),&RichTextLabel::get_tab_size);
 
-
 	ObjectTypeDB::bind_method(_MD("set_selection_enabled","enabled"),&RichTextLabel::set_selection_enabled);
 	ObjectTypeDB::bind_method(_MD("is_selection_enabled"),&RichTextLabel::is_selection_enabled);
 
diff --git a/scene/gui/split_container.cpp b/scene/gui/split_container.cpp
index d7ee7a6b86..49067bb3a0 100644
--- a/scene/gui/split_container.cpp
+++ b/scene/gui/split_container.cpp
@@ -435,8 +435,8 @@ void SplitContainer::_bind_methods() {
 	ADD_SIGNAL( MethodInfo("dragged",PropertyInfo(Variant::INT,"offset")));
 
 	ADD_PROPERTY( PropertyInfo(Variant::INT,"split/offset"),_SCS("set_split_offset"),_SCS("get_split_offset"));
-	ADD_PROPERTY( PropertyInfo(Variant::INT,"split/collapsed"),_SCS("set_collapsed"),_SCS("is_collapsed"));
-	ADD_PROPERTY( PropertyInfo(Variant::INT,"split/dragger_visible"),_SCS("set_dragger_visible"),_SCS("is_dragger_visible"));
+	ADD_PROPERTY( PropertyInfo(Variant::BOOL,"split/collapsed"),_SCS("set_collapsed"),_SCS("is_collapsed"));
+	ADD_PROPERTY( PropertyInfo(Variant::BOOL,"split/dragger_visible"),_SCS("set_dragger_visible"),_SCS("is_dragger_visible"));
 
 
 }
diff --git a/scene/gui/tabs.cpp b/scene/gui/tabs.cpp
index 6d84f028b3..47a55e0716 100644
--- a/scene/gui/tabs.cpp
+++ b/scene/gui/tabs.cpp
@@ -58,7 +58,7 @@ Size2 Tabs::get_minimum_size() const {
 
 		if (tabs[i].right_button.is_valid()) {
 			Ref<Texture> rb=tabs[i].right_button;
-			Size2 bms = rb->get_size()+get_stylebox("button")->get_minimum_size();
+			Size2 bms = rb->get_size();//+get_stylebox("button")->get_minimum_size();
 			bms.width+=get_constant("hseparation");
 
 			ms.width+=bms.width;
@@ -67,9 +67,8 @@ Size2 Tabs::get_minimum_size() const {
 
 		if (tabs[i].close_button.is_valid()) {
 			Ref<Texture> cb=tabs[i].close_button;
-			Size2 bms = cb->get_size()+get_stylebox("button")->get_minimum_size();
+			Size2 bms = cb->get_size();//+get_stylebox("button")->get_minimum_size();
 			bms.width+=get_constant("hseparation");
-
 			ms.width+=bms.width;
 			ms.height=MAX(bms.height+tab_bg->get_minimum_size().height,ms.height);
 		}
@@ -103,11 +102,13 @@ void Tabs::_input_event(const InputEvent& p_event) {
 			// test hovering right button and close button
 			if (tabs[i].rb_rect.has_point(pos)) {
 				rb_hover=i;
+				cb_hover=-1;
 				hover_buttons = i;
 				break;
 			}
 			else if (tabs[i].cb_rect.has_point(pos)) {
 				cb_hover=i;
+				rb_hover=-1;
 				hover_buttons = i;
 				break;
 			}
@@ -262,9 +263,9 @@ void Tabs::_notification(int p_what) {
 					Ref<Texture> rb=tabs[i].right_button;
 
 					lsize+=get_constant("hseparation");
-					lsize+=style->get_margin(MARGIN_LEFT);
+					//lsize+=style->get_margin(MARGIN_LEFT);
 					lsize+=rb->get_width();
-					lsize+=style->get_margin(MARGIN_RIGHT);
+					//lsize+=style->get_margin(MARGIN_RIGHT);
 
 				}
 
@@ -276,9 +277,9 @@ void Tabs::_notification(int p_what) {
 						Ref<Texture> rb=tabs[i].close_button;
 
 						lsize+=get_constant("hseparation");
-						lsize+=style->get_margin(MARGIN_LEFT);
+						//lsize+=style->get_margin(MARGIN_LEFT);
 						lsize+=rb->get_width();
-						lsize+=style->get_margin(MARGIN_RIGHT);
+						//lsize+=style->get_margin(MARGIN_RIGHT);
 
 					}
 				} break;
@@ -289,9 +290,9 @@ void Tabs::_notification(int p_what) {
 							Ref<Texture> rb=tabs[i].close_button;
 
 							lsize+=get_constant("hseparation");
-							lsize+=style->get_margin(MARGIN_LEFT);
+							//lsize+=style->get_margin(MARGIN_LEFT);
 							lsize+=rb->get_width();
-							lsize+=style->get_margin(MARGIN_RIGHT);
+							//lsize+=style->get_margin(MARGIN_RIGHT);
 
 						}
 					}
@@ -303,9 +304,9 @@ void Tabs::_notification(int p_what) {
 							Ref<Texture> rb=tabs[i].close_button;
 
 							lsize+=get_constant("hseparation");
-							lsize+=style->get_margin(MARGIN_LEFT);
+							//lsize+=style->get_margin(MARGIN_LEFT);
 							lsize+=rb->get_width();
-							lsize+=style->get_margin(MARGIN_RIGHT);
+							//lsize+=style->get_margin(MARGIN_RIGHT);
 
 						}
 					}
@@ -404,11 +405,11 @@ void Tabs::_notification(int p_what) {
 								style->draw(ci,cb_rect);
 						}
 
-						w+=style->get_margin(MARGIN_LEFT);
+						//w+=style->get_margin(MARGIN_LEFT);
 
 						cb->draw(ci,Point2i( w,cb_rect.pos.y+style->get_margin(MARGIN_TOP) ));
 						w+=cb->get_width();
-						w+=style->get_margin(MARGIN_RIGHT);
+						//w+=style->get_margin(MARGIN_RIGHT);
 						tabs[i].cb_rect=cb_rect;
 					}
 				} break;
@@ -432,11 +433,11 @@ void Tabs::_notification(int p_what) {
 									style->draw(ci,cb_rect);
 							}
 
-							w+=style->get_margin(MARGIN_LEFT);
+							//w+=style->get_margin(MARGIN_LEFT);
 
 							cb->draw(ci,Point2i( w,cb_rect.pos.y+style->get_margin(MARGIN_TOP) ));
 							w+=cb->get_width();
-							w+=style->get_margin(MARGIN_RIGHT);
+							//w+=style->get_margin(MARGIN_RIGHT);
 							tabs[i].cb_rect=cb_rect;
 						}
 					}
@@ -461,11 +462,11 @@ void Tabs::_notification(int p_what) {
 									style->draw(ci,cb_rect);
 							}
 
-							w+=style->get_margin(MARGIN_LEFT);
+							//w+=style->get_margin(MARGIN_LEFT);
 
 							cb->draw(ci,Point2i( w,cb_rect.pos.y+style->get_margin(MARGIN_TOP) ));
 							w+=cb->get_width();
-							w+=style->get_margin(MARGIN_RIGHT);
+							//w+=style->get_margin(MARGIN_RIGHT);
 							tabs[i].cb_rect=cb_rect;
 						}
 					}
diff --git a/scene/gui/text_edit.cpp b/scene/gui/text_edit.cpp
index be6c0d0a8b..78792dc785 100644
--- a/scene/gui/text_edit.cpp
+++ b/scene/gui/text_edit.cpp
@@ -1094,16 +1094,16 @@ void TextEdit::backspace_at_cursor() {
 }
 
 
-bool TextEdit::_get_mouse_pos(const Point2i& p_mouse, int &r_row, int &r_col) const {
+void TextEdit::_get_mouse_pos(const Point2i& p_mouse, int &r_row, int &r_col) const {
 	
-	int row=p_mouse.y;
-	row-=cache.style_normal->get_margin(MARGIN_TOP);
-	row/=get_row_height();
-	
-	if (row<0 || row>=get_visible_rows())
-		return false;
-	
-	row+=cursor.line_ofs;
+	float rows=p_mouse.y;
+	rows-=cache.style_normal->get_margin(MARGIN_TOP);
+	rows/=get_row_height();
+	int row=cursor.line_ofs+rows;
+
+	if (row<0)
+		row=0;
+
 	int col=0;
 	
 	if (row>=text.size()) {
@@ -1119,7 +1119,6 @@ bool TextEdit::_get_mouse_pos(const Point2i& p_mouse, int &r_row, int &r_col) co
 	
 	r_row=row;
 	r_col=col;
-	return true;
 }
 
 void TextEdit::_input_event(const InputEvent& p_input_event) {
@@ -1177,8 +1176,7 @@ void TextEdit::_input_event(const InputEvent& p_input_event) {
 				if (mb.button_index==BUTTON_LEFT) {
 					
 					int row,col;
-					if (!_get_mouse_pos(Point2i(mb.x,mb.y), row,col))
-						return;
+					_get_mouse_pos(Point2i(mb.x,mb.y), row,col);
 					
 					int prev_col=cursor.column;
 					int prev_line=cursor.line;
@@ -1210,27 +1208,30 @@ void TextEdit::_input_event(const InputEvent& p_input_event) {
 							update();
 						} else {
 
-							if (cursor.line<selection.from_line || (cursor.line==selection.from_line && cursor.column<=selection.from_column)) {
-								selection.from_column=cursor.column;
-								selection.from_line=cursor.line;
-							} else if (cursor.line>selection.to_line || (cursor.line==selection.to_line && cursor.column>=selection.to_column)) {
-								selection.to_column=cursor.column;
-								selection.to_line=cursor.line;
-
-							} else if (!selection.shiftclick_left) {
+							if (cursor.line<selection.selecting_line || (cursor.line==selection.selecting_line && cursor.column<selection.selecting_column)) {
 
+								if (selection.shiftclick_left) {
+									SWAP(selection.from_column,selection.to_column);
+									SWAP(selection.from_line,selection.to_line);
+									selection.shiftclick_left = !selection.shiftclick_left;
+								}
 								selection.from_column=cursor.column;
 								selection.from_line=cursor.line;
-							} else {
 
+							} else if (cursor.line>selection.selecting_line || (cursor.line==selection.selecting_line && cursor.column>selection.selecting_column)) {
+
+								if (!selection.shiftclick_left) {
+									SWAP(selection.from_column,selection.to_column);
+									SWAP(selection.from_line,selection.to_line);
+									selection.shiftclick_left = !selection.shiftclick_left;
+								}
 								selection.to_column=cursor.column;
 								selection.to_line=cursor.line;
-							}
 
-							if (selection.from_line>selection.to_line || (selection.from_line==selection.to_line && selection.from_column>selection.to_column)) {
-								SWAP(selection.from_column,selection.to_column);
-								SWAP(selection.from_line,selection.to_line);
+							} else {
+								selection.active=false;
 							}
+
 							update();
 						}
 
@@ -1255,6 +1256,7 @@ void TextEdit::_input_event(const InputEvent& p_input_event) {
 					if (!mb.doubleclick && (OS::get_singleton()->get_ticks_msec()-last_dblclk)<600 && cursor.line==prev_line) {
 						//tripleclick select line
 						select(cursor.line,0,cursor.line,text[cursor.line].length());
+						selection.selecting_column=0;
 						last_dblclk=0;
 						
 					} else if (mb.doubleclick && text[cursor.line].length()) {
@@ -1279,6 +1281,8 @@ void TextEdit::_input_event(const InputEvent& p_input_event) {
 								end+=1;
 							
 							select(cursor.line,beg,cursor.line,end);
+
+							selection.selecting_column=beg;
 						}
 						
 						last_dblclk = OS::get_singleton()->get_ticks_msec();
@@ -1289,7 +1293,6 @@ void TextEdit::_input_event(const InputEvent& p_input_event) {
 				}
 			} else {
 				
-				selection.selecting_mode=Selection::MODE_NONE;
 				// notify to show soft keyboard
 				notification(NOTIFICATION_FOCUS_ENTER);
 			}
@@ -1302,10 +1305,9 @@ void TextEdit::_input_event(const InputEvent& p_input_event) {
 			if (mm.button_mask&BUTTON_MASK_LEFT) {
 				
 				int row,col;
-				if (!_get_mouse_pos(Point2i(mm.x,mm.y), row,col))
-					return;
+				_get_mouse_pos(Point2i(mm.x,mm.y), row,col);
 				
-				if (selection.selecting_mode==Selection::MODE_POINTER) {
+				if (selection.selecting_mode!=Selection::MODE_NONE) {
 					
 					select(selection.selecting_line,selection.selecting_column,row,col);
 					
@@ -1585,7 +1587,7 @@ void TextEdit::_input_event(const InputEvent& p_input_event) {
 					break;
 			}
 			
-			selection.selecting_test=false;
+			selection.selecting_text=false;
 			
 			bool scancode_handled=true;
 			
@@ -1608,6 +1610,13 @@ void TextEdit::_input_event(const InputEvent& p_input_event) {
 						else
 							break;
 					}
+					if(auto_indent){
+						// indent once again if previous line will end with ':'
+						// (i.e. colon precedes current cursor position)
+						if(cursor.column>0 && text[cursor.line][cursor.column-1]==':') {
+							ins+="\t";
+						}
+					}
 					
 					_insert_text_at_cursor(ins);
 					_push_current_op();
@@ -1647,8 +1656,60 @@ void TextEdit::_input_event(const InputEvent& p_input_event) {
 				case KEY_BACKSPACE: {
 					if (readonly)
 						break;
-					backspace_at_cursor();
-					
+
+#ifdef APPLE_STYLE_KEYS
+					if (k.mod.alt) {
+#else
+					if (k.mod.alt) {
+						scancode_handled=false;
+						break;
+					} else if (k.mod.command) {
+#endif
+						int line=cursor.line;
+						int column=cursor.column;
+
+						bool prev_char=false;
+						bool only_whitespace=true;
+
+						while (only_whitespace && line > 0) {
+
+							while (column>0) {
+								CharType c=text[line][column-1];
+
+								if (c != '\t' && c != ' ') {
+									only_whitespace=false;
+									break;
+								}
+
+								column--;
+							}
+
+							if (only_whitespace) {
+								line--;
+								column=text[line].length();
+							}
+						}
+
+						while (column>0) {
+							bool ischar=_is_text_char(text[line][column-1]);
+
+							if (prev_char && !ischar)
+								break;
+
+							prev_char=ischar;
+							column--;
+
+						}
+
+						_remove_text(line, column, cursor.line, cursor.column);
+
+						cursor_set_line(line);
+						cursor_set_column(column);
+
+					} else {
+						backspace_at_cursor();
+					}
+
 				} break;
 				case KEY_LEFT: {
 					
@@ -1789,10 +1850,63 @@ void TextEdit::_input_event(const InputEvent& p_input_event) {
 					if (cursor.line==text.size()-1 && cursor.column==curline_len)
 						break; //nothing to do
 					
-					int next_line = cursor.column<curline_len?cursor.line:cursor.line+1;
-					int next_column = cursor.column<curline_len?(cursor.column+1):0;
+					int next_line=cursor.column<curline_len?cursor.line:cursor.line+1;
+					int next_column;
+
+#ifdef APPLE_STYLE_KEYS
+					if (k.mod.alt) {
+#else
+					if (k.mod.alt) {
+						scancode_handled=false;
+						break;
+					} else if (k.mod.command) {
+#endif
+						int last_line=text.size()-1;
+
+						int line=cursor.line;
+						int column=cursor.column;
+
+						bool prev_char=false;
+						bool only_whitespace=true;
+
+						while (only_whitespace && line < last_line) {
+
+							while (column<text[line].length()) {
+								CharType c=text[line][column];
+
+								if (c != '\t' && c != ' ') {
+									only_whitespace=false;
+									break;
+								}
+
+								column++;
+							}
+
+							if (only_whitespace) {
+								line++;
+								column=0;
+							}
+						}
+
+						while (column<text[line].length()) {
+
+							bool ischar=_is_text_char(text[line][column]);
+
+							if (prev_char && !ischar)
+								break;
+							prev_char=ischar;
+							column++;
+						}
+
+						next_line=line;
+						next_column=column;
+					} else {
+						next_column=cursor.column<curline_len?(cursor.column+1):0;
+					}
+
 					_remove_text(cursor.line,cursor.column,next_line,next_column);
 					update();
+
 				} break;
 #ifdef APPLE_STYLE_KEYS
 				case KEY_HOME: {
@@ -1903,15 +2017,7 @@ void TextEdit::_input_event(const InputEvent& p_input_event) {
 						break;
 					}
 					
-					if (text.size()==1 && text[0].length()==0)
-						break;
-					selection.active=true;
-					selection.from_line=0;
-					selection.from_column=0;
-					selection.to_line=text.size()-1;
-					selection.to_column=text[selection.to_line].length();
-					selection.selecting_mode=Selection::MODE_NONE;
-					update();
+					select_all();
 					
 				} break;
 				case KEY_X: {
@@ -2096,12 +2202,6 @@ void TextEdit::_input_event(const InputEvent& p_input_event) {
 				}
 			}
 			
-			
-			if (!selection.selecting_test) {
-				
-				selection.selecting_mode=Selection::MODE_NONE;
-			}
-			
 			return;
 		} break;
 			
@@ -2113,13 +2213,14 @@ void TextEdit::_input_event(const InputEvent& p_input_event) {
 void TextEdit::_pre_shift_selection() {
 	
 	
-	if (!selection.active || selection.selecting_mode!=Selection::MODE_SHIFT) {
+	if (!selection.active || selection.selecting_mode==Selection::MODE_NONE) {
 		
 		selection.selecting_line=cursor.line;
 		selection.selecting_column=cursor.column;
 		selection.active=true;
-		selection.selecting_mode=Selection::MODE_SHIFT;
 	}
+
+	selection.selecting_mode=Selection::MODE_SHIFT;
 }
 
 void TextEdit::_post_shift_selection() {
@@ -2132,7 +2233,7 @@ void TextEdit::_post_shift_selection() {
 	}
 	
 	
-	selection.selecting_test=true;
+	selection.selecting_text=true;
 }
 
 /**** TEXT EDIT CORE API ****/
@@ -2428,7 +2529,7 @@ void TextEdit::adjust_viewport_to_cursor() {
 	
 }
 
-void TextEdit::cursor_set_column(int p_col) {
+void TextEdit::cursor_set_column(int p_col, bool p_adjust_viewport) {
 	
 	if (p_col<0)
 		p_col=0;
@@ -2439,7 +2540,8 @@ void TextEdit::cursor_set_column(int p_col) {
 	
 	cursor.last_fit_x=get_column_x_offset(cursor.column,get_line(cursor.line));
 	
-	adjust_viewport_to_cursor();
+	if (p_adjust_viewport)
+		adjust_viewport_to_cursor();
 	
 	if (!cursor_changed_dirty) {
 		if (is_inside_tree())
@@ -2450,7 +2552,7 @@ void TextEdit::cursor_set_column(int p_col) {
 }
 
 
-void TextEdit::cursor_set_line(int p_row) {
+void TextEdit::cursor_set_line(int p_row, bool p_adjust_viewport) {
 	
 	if (setting_row)
 		return;
@@ -2466,8 +2568,8 @@ void TextEdit::cursor_set_line(int p_row) {
 	cursor.line=p_row;
 	cursor.column=get_char_pos_for( cursor.last_fit_x, get_line( cursor.line) );
 	
-	
-	adjust_viewport_to_cursor();
+	if (p_adjust_viewport)
+		adjust_viewport_to_cursor();
 	
 	setting_row=false;
 	
@@ -2774,6 +2876,10 @@ bool TextEdit::is_syntax_coloring_enabled() const {
 	return syntax_coloring;
 }
 
+void TextEdit::set_auto_indent(bool p_auto_indent) {
+	auto_indent = p_auto_indent;
+}
+
 void TextEdit::cut() {
 	
 	if (!selection.active)
@@ -2832,9 +2938,14 @@ void TextEdit::select_all() {
 	selection.active=true;
 	selection.from_line=0;
 	selection.from_column=0;
+	selection.selecting_line=0;
+	selection.selecting_column=0;
 	selection.to_line=text.size()-1;
 	selection.to_column=text[selection.to_line].length();
-	selection.selecting_mode=Selection::MODE_NONE;
+	selection.selecting_mode=Selection::MODE_SHIFT;
+	selection.shiftclick_left=true;
+	cursor_set_line( selection.to_line, false );
+	cursor_set_column( selection.to_column, false );
 	update();
 	
 }
@@ -2873,12 +2984,20 @@ void TextEdit::select(int p_from_line,int p_from_column,int p_to_line,int p_to_c
 			
 		} else if (selection.from_column>selection.to_column) {
 			
+			selection.shiftclick_left = false;
 			SWAP( selection.from_column, selection.to_column );
+		} else {
+
+			selection.shiftclick_left = true;
 		}
 	} else if (selection.from_line>selection.to_line) {
 		
+		selection.shiftclick_left = false;
 		SWAP( selection.from_line, selection.to_line );
 		SWAP( selection.from_column, selection.to_column );
+	} else {
+
+		selection.shiftclick_left = true;
 	}
 	
 	
@@ -3502,10 +3621,8 @@ String TextEdit::get_tooltip(const Point2& p_pos) const {
 	if (!tooltip_obj)
 		return Control::get_tooltip(p_pos);
 	int row,col;
-	if (!_get_mouse_pos(p_pos, row,col)) {
-		return Control::get_tooltip(p_pos);
-	}
-	
+	_get_mouse_pos(p_pos, row, col);
+
 	String s = text[row];
 	if (s.length()==0)
 		return Control::get_tooltip(p_pos);
@@ -3684,7 +3801,7 @@ TextEdit::TextEdit()  {
 	selection.selecting_mode=Selection::MODE_NONE;
 	selection.selecting_line=0;
 	selection.selecting_column=0;
-	selection.selecting_test=false;
+	selection.selecting_text=false;
 	selection.active=false;
 	syntax_coloring=false;
 	
@@ -3730,7 +3847,7 @@ TextEdit::TextEdit()  {
 	next_operation_is_complex=false;
 	auto_brace_completion_enabled=false;
 	brace_matching_enabled=false;
-	
+	auto_indent=false;
 }
 
 TextEdit::~TextEdit()
diff --git a/scene/gui/text_edit.h b/scene/gui/text_edit.h
index 9ffe8a5bae..91369309cf 100644
--- a/scene/gui/text_edit.h
+++ b/scene/gui/text_edit.h
@@ -55,7 +55,7 @@ class TextEdit : public Control  {
 
 		Mode selecting_mode;
 		int selecting_line,selecting_column;
-		bool selecting_test;
+		bool selecting_text;
 
 
 		bool active;
@@ -213,6 +213,7 @@ class TextEdit : public Control  {
 	
 	bool auto_brace_completion_enabled;
 	bool brace_matching_enabled;
+	bool auto_indent;
 	bool cut_copy_line;
 
 	uint64_t last_dblclk;
@@ -270,7 +271,7 @@ class TextEdit : public Control  {
 	void _confirm_completion();
 	void _update_completion_candidates();
 
-	bool _get_mouse_pos(const Point2i& p_mouse, int &r_row, int &r_col) const;
+	void _get_mouse_pos(const Point2i& p_mouse, int &r_row, int &r_col) const;
 
 protected:
 
@@ -323,9 +324,10 @@ public:
 		brace_matching_enabled=p_enabled;
 		update();
 	}
+	void set_auto_indent(bool p_auto_indent);
 
-	void cursor_set_column(int p_col);
-	void cursor_set_line(int p_row);
+	void cursor_set_column(int p_col, bool p_adjust_viewport=true);
+	void cursor_set_line(int p_row, bool p_adjust_viewport=true);
 
 	int cursor_get_column() const;
 	int cursor_get_line() const;
diff --git a/scene/gui/texture_progress.cpp b/scene/gui/texture_progress.cpp
index 0d549108fa..c8930add6e 100644
--- a/scene/gui/texture_progress.cpp
+++ b/scene/gui/texture_progress.cpp
@@ -233,11 +233,7 @@ float TextureProgress::get_radial_initial_angle()
 
 void TextureProgress::set_fill_degrees(float p_angle)
 {
-	while(p_angle>360)
-		p_angle-=360;
-	while (p_angle<0)
-		p_angle+=360;
-	rad_max_degrees=p_angle;
+	rad_max_degrees=CLAMP(p_angle,0,360);
 	update();
 }
 
@@ -302,4 +298,5 @@ TextureProgress::TextureProgress()
 {
 	mode=FILL_LEFT_TO_RIGHT;
 	rad_center_off=Point2();
+	rad_max_degrees=360;
 }
diff --git a/scene/gui/tree.cpp b/scene/gui/tree.cpp
index 5df6f2ced9..16a12fe407 100644
--- a/scene/gui/tree.cpp
+++ b/scene/gui/tree.cpp
@@ -962,7 +962,9 @@ int Tree::draw_item(const Point2i& p_pos,const Point2& p_draw_ofs, const Size2&
 
 		Point2i guide_space=Point2i( cache.guide_width , height );
 
-		if (p_item->childs) { //has childs, draw the guide box
+
+
+		if (!hide_folding && p_item->childs) { //has childs, draw the guide box
 
 			Ref<Texture> arrow;
 
@@ -986,7 +988,7 @@ int Tree::draw_item(const Point2i& p_pos,const Point2& p_draw_ofs, const Size2&
 
 		int font_ascent=font->get_ascent();
 
-		int ofs = p_pos.x + cache.item_margin;
+		int ofs = p_pos.x + (hide_folding?cache.hseparation:cache.item_margin);
 		for (int i=0;i<columns.size();i++) {
 
 			int w = get_column_width(i);
@@ -1062,7 +1064,10 @@ int Tree::draw_item(const Point2i& p_pos,const Point2& p_draw_ofs, const Size2&
 
 			if (p_item->cells[i].custom_bg_color) {
 
-				VisualServer::get_singleton()->canvas_item_add_rect(ci,cell_rect,p_item->cells[i].bg_color);
+				Rect2 r=cell_rect;
+				r.pos.x-=cache.hseparation;
+				r.size.x+=cache.hseparation;
+				VisualServer::get_singleton()->canvas_item_add_rect(ci,r,p_item->cells[i].bg_color);
 			}
 
 			Color col=p_item->cells[i].custom_color?p_item->cells[i].color:get_color( p_item->cells[i].selected?"font_color_selected":"font_color");
@@ -1376,7 +1381,7 @@ int Tree::propagate_mouse_event(const Point2i &p_pos,int x_ofs,int y_ofs,bool p_
 	if (!skip && p_pos.y<item_h) {
 		// check event!
 
-		if (p_pos.x >=x_ofs && p_pos.x < (x_ofs+cache.item_margin) ) {
+		if (!hide_folding && (p_pos.x >=x_ofs && p_pos.x < (x_ofs+cache.item_margin) )) {
 
 
 			if (p_item->childs)
@@ -2266,9 +2271,12 @@ bool Tree::edit_selected() {
 
 	TreeItem::Cell &c = s->cells[col];
 
+	if (c.mode==TreeItem::CELL_MODE_CHECK) {
 
-
-	if (c.mode==TreeItem::CELL_MODE_CUSTOM) {
+		s->set_checked(col, !c.checked);
+		item_edited(col,s);
+		return true;
+	} else if (c.mode==TreeItem::CELL_MODE_CUSTOM) {
 
 		edited_item=s;
 		edited_col=col;
@@ -3114,6 +3122,16 @@ bool Tree::can_cursor_exit_tree() const {
 	return cursor_can_exit_tree;
 }
 
+void Tree::set_hide_folding(bool p_hide) {
+	hide_folding=p_hide;
+	update();
+}
+
+bool Tree::is_folding_hidden() const {
+
+	return hide_folding;
+}
+
 
 void Tree::_bind_methods() {
 
@@ -3155,6 +3173,9 @@ void Tree::_bind_methods() {
 	ObjectTypeDB::bind_method(_MD("get_column_title","column"),&Tree::get_column_title);
 	ObjectTypeDB::bind_method(_MD("get_scroll"),&Tree::get_scroll);
 
+	ObjectTypeDB::bind_method(_MD("set_hide_folding","hide"),&Tree::set_hide_folding);
+	ObjectTypeDB::bind_method(_MD("is_folding_hidden"),&Tree::is_folding_hidden);
+
 
 	ADD_SIGNAL( MethodInfo("item_selected"));
 	ADD_SIGNAL( MethodInfo("cell_selected"));
@@ -3242,6 +3263,8 @@ Tree::Tree() {
 	pressing_for_editor=false;
 	range_drag_enabled=false;
 
+	hide_folding=false;
+
 }
 
 
diff --git a/scene/gui/tree.h b/scene/gui/tree.h
index 3fbd7c95d9..8fb9b802a1 100644
--- a/scene/gui/tree.h
+++ b/scene/gui/tree.h
@@ -228,6 +228,7 @@ public:
 	void set_tooltip(int p_column, const String& p_tooltip);
 	String get_tooltip(int p_column) const;
 
+
 	void clear_children();
 
 	void move_to_top();
@@ -410,6 +411,8 @@ friend class TreeItem;
 	bool drag_touching_deaccel;
 	bool click_handled;
 
+	bool hide_folding;
+
 protected:
 	static void _bind_methods();
 	
@@ -467,6 +470,11 @@ public:
 
 	VScrollBar *get_vscroll_bar() { return v_scroll; }
 
+	void set_hide_folding(bool p_hide);
+	bool is_folding_hidden() const;
+
+
+
 	Tree();
 	~Tree();	
 
diff --git a/scene/gui/video_player.cpp b/scene/gui/video_player.cpp
index f50552b32c..d99da5e906 100644
--- a/scene/gui/video_player.cpp
+++ b/scene/gui/video_player.cpp
@@ -27,7 +27,7 @@
 /* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.                */
 /*************************************************************************/
 #include "video_player.h"
-
+#include "os/os.h"
 
 
 int VideoPlayer::InternalStream::get_channel_count() const {
@@ -130,7 +130,7 @@ void VideoPlayer::_notification(int p_notification) {
 			if (!playback->is_playing())
 				return;
 
-			double audio_time = AudioServer::get_singleton()->get_mix_time();
+			double audio_time = OS::get_singleton()->get_ticks_usec()/1000000.0; //AudioServer::get_singleton()->get_mix_time();
 
 			double delta = last_audio_time==0?0:audio_time-last_audio_time;
 			last_audio_time=audio_time;
@@ -196,10 +196,10 @@ void VideoPlayer::set_stream(const Ref<VideoStream> &p_stream) {
 
 	stream=p_stream;
     if (stream.is_valid()) {
-        stream->set_audio_track(audio_track);
-        playback=stream->instance_playback();
+	stream->set_audio_track(audio_track);
+	playback=stream->instance_playback();
     } else {
-        playback=Ref<VideoStreamPlayback>();
+	playback=Ref<VideoStreamPlayback>();
     }
 
 	if (!playback.is_null()) {
@@ -249,6 +249,8 @@ void VideoPlayer::stop() {
 		return;
 
 	playback->stop();
+	AudioServer::get_singleton()->stream_set_active(stream_rid,false);
+	resampler.clear();
 	set_process(false);
 	last_audio_time=0;
 };
@@ -382,13 +384,13 @@ void VideoPlayer::_bind_methods() {
 	ObjectTypeDB::bind_method(_MD("set_buffering_msec","msec"),&VideoPlayer::set_buffering_msec);
 	ObjectTypeDB::bind_method(_MD("get_buffering_msec"),&VideoPlayer::get_buffering_msec);
 
+	ADD_PROPERTY( PropertyInfo(Variant::INT, "stream/audio_track",PROPERTY_HINT_RANGE,"0,128,1"), _SCS("set_audio_track"), _SCS("get_audio_track") );
 	ADD_PROPERTY( PropertyInfo(Variant::OBJECT, "stream/stream", PROPERTY_HINT_RESOURCE_TYPE,"VideoStream"), _SCS("set_stream"), _SCS("get_stream") );
 //	ADD_PROPERTY( PropertyInfo(Variant::BOOL, "stream/loop"), _SCS("set_loop"), _SCS("has_loop") );
 	ADD_PROPERTY( PropertyInfo(Variant::REAL, "stream/volume_db", PROPERTY_HINT_RANGE,"-80,24,0.01"), _SCS("set_volume_db"), _SCS("get_volume_db") );
 	ADD_PROPERTY( PropertyInfo(Variant::BOOL, "stream/autoplay"), _SCS("set_autoplay"), _SCS("has_autoplay") );
 	ADD_PROPERTY( PropertyInfo(Variant::BOOL, "stream/paused"), _SCS("set_paused"), _SCS("is_paused") );
-    ADD_PROPERTY( PropertyInfo(Variant::INT, "stream/audio_track",PROPERTY_HINT_RANGE,"0,128,1"), _SCS("set_audio_track"), _SCS("get_audio_track") );
-    ADD_PROPERTY( PropertyInfo( Variant::BOOL, "expand" ), _SCS("set_expand"),_SCS("has_expand") );
+	ADD_PROPERTY( PropertyInfo( Variant::BOOL, "expand" ), _SCS("set_expand"),_SCS("has_expand") );
 }
 
 
diff --git a/scene/io/SCsub b/scene/io/SCsub
index 055d2f2474..bbe59b3054 100644
--- a/scene/io/SCsub
+++ b/scene/io/SCsub
@@ -3,5 +3,3 @@ Import('env')
 env.add_source_files(env.scene_sources,"*.cpp")
 
 Export('env')
-
-
diff --git a/scene/main/SCsub b/scene/main/SCsub
index 055d2f2474..bbe59b3054 100644
--- a/scene/main/SCsub
+++ b/scene/main/SCsub
@@ -3,5 +3,3 @@ Import('env')
 env.add_source_files(env.scene_sources,"*.cpp")
 
 Export('env')
-
-
diff --git a/scene/main/instance_placeholder.cpp b/scene/main/instance_placeholder.cpp
index 370eb1e74a..12f6086bf0 100644
--- a/scene/main/instance_placeholder.cpp
+++ b/scene/main/instance_placeholder.cpp
@@ -22,12 +22,12 @@ void InstancePlaceholder::_get_property_list( List<PropertyInfo> *p_list) const{
 }
 
 
-void InstancePlaceholder::set_path(const String& p_name) {
+void InstancePlaceholder::set_instance_path(const String& p_name) {
 
 	path=p_name;
 }
 
-String InstancePlaceholder::get_path() const {
+String InstancePlaceholder::get_instance_path() const {
 
 	return path;
 }
@@ -66,6 +66,7 @@ void InstancePlaceholder::replace_by_instance(const Ref<PackedScene> &p_custom_s
 void InstancePlaceholder::_bind_methods() {
 
 	ObjectTypeDB::bind_method(_MD("replace_by_instance","custom_scene:PackedScene"),&InstancePlaceholder::replace_by_instance,DEFVAL(Variant()));
+	ObjectTypeDB::bind_method(_MD("get_instance_path"),&InstancePlaceholder::get_instance_path);
 }
 
 InstancePlaceholder::InstancePlaceholder() {
diff --git a/scene/main/instance_placeholder.h b/scene/main/instance_placeholder.h
index e9e76e7a2d..9c47655ce7 100644
--- a/scene/main/instance_placeholder.h
+++ b/scene/main/instance_placeholder.h
@@ -26,8 +26,8 @@ protected:
 
 public:
 
-	void set_path(const String& p_name);
-	String get_path() const;
+	void set_instance_path(const String& p_name);
+	String get_instance_path() const;
 
 	void replace_by_instance(const Ref<PackedScene>& p_custom_scene=Ref<PackedScene>());
 
diff --git a/scene/main/node.cpp b/scene/main/node.cpp
index 631dc8dcc7..a832162994 100644
--- a/scene/main/node.cpp
+++ b/scene/main/node.cpp
@@ -628,11 +628,11 @@ String Node::validate_child_name(const String& p_name) const {
 
 }
 
-void Node::_validate_child_name(Node *p_child) {
+void Node::_validate_child_name(Node *p_child, bool p_force_human_readable) {
 
 	/* Make sure the name is unique */
 
-	if (node_hrcr) {
+	if (node_hrcr || p_force_human_readable) {
 
 		//this approach to autoset node names is human readable but very slow
 		//it's turned on while running in the editor
@@ -700,11 +700,7 @@ void Node::_validate_child_name(Node *p_child) {
 		if (!unique) {
 
 			node_hrcr_count.ref();
-#ifdef DEBUG_ENABLED
-			String name = "@"+String(p_child->get_type_name())+itos(node_hrcr_count.get());
-#else
-			String name = "@"+itos(node_hrcr_count.get());
-#endif
+			String name = "@"+String(p_child->get_name())+"@"+itos(node_hrcr_count.get());
 			p_child->data.name=name;
 		}
 	}
@@ -732,24 +728,27 @@ void Node::_add_child_nocheck(Node* p_child,const StringName& p_name) {
 }
 
 
-void Node::add_child(Node *p_child) {
+void Node::add_child(Node *p_child, bool p_legible_unique_name) {
 
 	ERR_FAIL_NULL(p_child);
 	/* Fail if node has a parent */
-	ERR_EXPLAIN("Can't add child "+p_child->get_name()+" to itself.")
-	ERR_FAIL_COND( p_child==this ); // adding to itself!
+	if (p_child==this) {
+		ERR_EXPLAIN("Can't add child "+p_child->get_name()+" to itself.")
+		ERR_FAIL_COND( p_child==this ); // adding to itself!
+	}
 	ERR_EXPLAIN("Can't add child, already has a parent");
 	ERR_FAIL_COND( p_child->data.parent );
 	ERR_EXPLAIN("Can't add child while a notification is happening");
 	ERR_FAIL_COND( data.blocked > 0 );
 		
 	/* Validate name */
-	_validate_child_name(p_child);
+	_validate_child_name(p_child,p_legible_unique_name);
 
 	_add_child_nocheck(p_child,p_child->data.name);
 	
 }
 
+
 void Node::_propagate_validate_owner() {
 
 	if (data.owner) {
@@ -1984,7 +1983,7 @@ void Node::_bind_methods() {
 
 	ObjectTypeDB::bind_method(_MD("set_name","name"),&Node::set_name);
 	ObjectTypeDB::bind_method(_MD("get_name"),&Node::get_name);
-	ObjectTypeDB::bind_method(_MD("add_child","node:Node"),&Node::add_child);
+	ObjectTypeDB::bind_method(_MD("add_child","node:Node","legible_unique_name"),&Node::add_child,DEFVAL(false));
 	ObjectTypeDB::bind_method(_MD("remove_child","node:Node"),&Node::remove_child);
 	//ObjectTypeDB::bind_method(_MD("remove_and_delete_child","node:Node"),&Node::remove_and_delete_child);
 	ObjectTypeDB::bind_method(_MD("get_child_count"),&Node::get_child_count);
@@ -2039,6 +2038,10 @@ void Node::_bind_methods() {
 	ObjectTypeDB::bind_method(_MD("duplicate:Node","use_instancing"),&Node::duplicate,DEFVAL(false));
 	ObjectTypeDB::bind_method(_MD("replace_by","node:Node","keep_data"),&Node::replace_by,DEFVAL(false));
 
+	ObjectTypeDB::bind_method(_MD("set_scene_instance_load_placeholder","load_placeholder"),&Node::set_scene_instance_load_placeholder);
+	ObjectTypeDB::bind_method(_MD("get_scene_instance_load_placeholder"),&Node::get_scene_instance_load_placeholder);
+
+
 	ObjectTypeDB::bind_method(_MD("get_viewport"),&Node::get_viewport);
 
 	ObjectTypeDB::bind_method(_MD("queue_free"),&Node::queue_delete);
diff --git a/scene/main/node.h b/scene/main/node.h
index 87fa4dd6ca..196c4a06eb 100644
--- a/scene/main/node.h
+++ b/scene/main/node.h
@@ -122,7 +122,7 @@ private:
 
 
 
-	void _validate_child_name(Node *p_name);
+	void _validate_child_name(Node *p_name, bool p_force_human_readable=false);
 
 	void _propagate_reverse_notification(int p_notification);	
 	void _propagate_deferred_notification(int p_notification, bool p_reverse);
@@ -187,7 +187,7 @@ public:
 	StringName get_name() const;
 	void set_name(const String& p_name);
 	
-	void add_child(Node *p_child);
+	void add_child(Node *p_child,bool p_legible_unique_name=false);
 	void remove_child(Node *p_child);
 	
 	int get_child_count() const;
diff --git a/scene/register_scene_types.cpp b/scene/register_scene_types.cpp
index fe6b192d78..b3a9ab922e 100644
--- a/scene/register_scene_types.cpp
+++ b/scene/register_scene_types.cpp
@@ -231,6 +231,7 @@ static ResourceFormatLoaderTheme *resource_loader_theme=NULL;
 static ResourceFormatLoaderShader *resource_loader_shader=NULL;
 
 static ResourceFormatSaverText *resource_saver_text=NULL;
+static ResourceFormatLoaderText *resource_loader_text=NULL;
 
 //static SceneStringNames *string_names;
 
@@ -619,6 +620,9 @@ void register_scene_types() {
 	resource_saver_text = memnew( ResourceFormatSaverText );
 	ResourceSaver::add_resource_format_saver(resource_saver_text);
 
+	resource_loader_text = memnew( ResourceFormatLoaderText );
+	ResourceLoader::add_resource_format_loader(resource_loader_text);
+
 }
 
 void unregister_scene_types() {
@@ -640,5 +644,8 @@ void unregister_scene_types() {
 	if (resource_saver_text) {
 		memdelete(resource_saver_text);
 	}
+	if (resource_loader_text) {
+		memdelete(resource_loader_text);
+	}
 	SceneStringNames::free();
 }
diff --git a/scene/resources/SCsub b/scene/resources/SCsub
index eaa282ae1a..bb9766e1ca 100644
--- a/scene/resources/SCsub
+++ b/scene/resources/SCsub
@@ -6,4 +6,3 @@ env.add_source_files(env.scene_sources,"*.c")
 Export('env')
 
 SConscript("default_theme/SCsub");
-
diff --git a/scene/resources/concave_polygon_shape_2d.cpp b/scene/resources/concave_polygon_shape_2d.cpp
index 923e2817ef..01b7531f14 100644
--- a/scene/resources/concave_polygon_shape_2d.cpp
+++ b/scene/resources/concave_polygon_shape_2d.cpp
@@ -34,6 +34,7 @@
 void ConcavePolygonShape2D::set_segments(const DVector<Vector2>& p_segments) {
 
 	Physics2DServer::get_singleton()->shape_set_data(get_rid(),p_segments);
+	emit_changed();
 }
 
 DVector<Vector2> ConcavePolygonShape2D::get_segments() const {
diff --git a/scene/resources/convex_polygon_shape_2d.cpp b/scene/resources/convex_polygon_shape_2d.cpp
index dac39fc846..a1137ba614 100644
--- a/scene/resources/convex_polygon_shape_2d.cpp
+++ b/scene/resources/convex_polygon_shape_2d.cpp
@@ -33,6 +33,7 @@
 void ConvexPolygonShape2D::_update_shape() {
 
 	Physics2DServer::get_singleton()->shape_set_data(get_rid(),points);
+	emit_changed();
 
 }
 
@@ -62,7 +63,7 @@ void ConvexPolygonShape2D::_bind_methods() {
 
 
 
-	ADD_PROPERTY( PropertyInfo(Variant::VECTOR2,"points"),_SCS("set_points"),_SCS("get_points") );
+	ADD_PROPERTY( PropertyInfo(Variant::VECTOR2_ARRAY,"points"),_SCS("set_points"),_SCS("get_points") );
 
 }
 
diff --git a/scene/resources/default_theme/SCsub b/scene/resources/default_theme/SCsub
index 055d2f2474..bbe59b3054 100644
--- a/scene/resources/default_theme/SCsub
+++ b/scene/resources/default_theme/SCsub
@@ -3,5 +3,3 @@ Import('env')
 env.add_source_files(env.scene_sources,"*.cpp")
 
 Export('env')
-
-
diff --git a/scene/resources/default_theme/button_focus.png b/scene/resources/default_theme/button_focus.png
new file mode 100644
index 0000000000..52af26483e
--- /dev/null
+++ b/scene/resources/default_theme/button_focus.png
diff --git a/scene/resources/default_theme/checked.png b/scene/resources/default_theme/checked.png
index a41b33cccf..a2240c227f 100644
--- a/scene/resources/default_theme/checked.png
+++ b/scene/resources/default_theme/checked.png
diff --git a/scene/resources/default_theme/default_theme.cpp b/scene/resources/default_theme/default_theme.cpp
index 7e36366e74..25407a5b84 100644
--- a/scene/resources/default_theme/default_theme.cpp
+++ b/scene/resources/default_theme/default_theme.cpp
@@ -188,6 +188,7 @@ void make_default_theme() {
 	// Font Colors
 
 	Color control_font_color = Color::html("e0e0e0");
+	Color control_font_color_lower = Color::html("a0a0a0");
 	Color control_font_color_low = Color::html("b0b0b0");
 	Color control_font_color_hover = Color::html("f0f0f0");
 	Color control_font_color_disabled = Color(0.9,0.9,0.9,0.2);
@@ -212,8 +213,8 @@ void make_default_theme() {
 
 	// Button
 
-	Ref<StyleBox> sb_button_normal = sb_expand( make_stylebox( button_normal_png,4,4,4,4,6,2,6,2),2,2,2,2);
-	Ref<StyleBox> sb_button_pressed = sb_expand( make_stylebox( button_pressed_png,4,4,4,4,6,2,6,2),2,2,2,2);
+	Ref<StyleBox> sb_button_normal = sb_expand( make_stylebox( button_normal_png,4,4,4,4,6,3,6,3),2,2,2,2);
+	Ref<StyleBox> sb_button_pressed = sb_expand( make_stylebox( button_pressed_png,4,4,4,4,6,3,6,3),2,2,2,2);
 	Ref<StyleBox> sb_button_hover = sb_expand( make_stylebox( button_hover_png,4,4,4,4,6,2,6,2),2,2,2,2);
 	Ref<StyleBox> sb_button_disabled = sb_expand( make_stylebox( button_disabled_png,4,4,4,4,6,2,6,2),2,2,2,2);
 	Ref<StyleBox> sb_button_focus = sb_expand( make_stylebox( button_focus_png,4,4,4,4,6,2,6,2),2,2,2,2);
@@ -273,14 +274,14 @@ void make_default_theme() {
 	t->set_color("font_color_hover","ToolButton", control_font_color_hover );
 	t->set_color("font_color_disabled","ToolButton", Color(0.9,0.95,1,0.3) );
 
-	t->set_constant("hseparation","ToolButton", 0 );
+	t->set_constant("hseparation","ToolButton", 3 );
 
 
 
 	// OptionButton
 
-	Ref<StyleBox> sb_optbutton_normal = sb_expand( make_stylebox( option_button_normal_png,4,4,21,4,6,2,21,2),2,2,2,2);
-	Ref<StyleBox> sb_optbutton_pressed = sb_expand( make_stylebox( option_button_pressed_png,4,4,21,4,6,2,21,2),2,2,2,2);
+	Ref<StyleBox> sb_optbutton_normal = sb_expand( make_stylebox( option_button_normal_png,4,4,21,4,6,3,21,3),2,2,2,2);
+	Ref<StyleBox> sb_optbutton_pressed = sb_expand( make_stylebox( option_button_pressed_png,4,4,21,4,6,3,21,3),2,2,2,2);
 	Ref<StyleBox> sb_optbutton_hover = sb_expand( make_stylebox( option_button_hover_png,4,4,21,4,6,2,21,2),2,2,2,2);
 	Ref<StyleBox> sb_optbutton_disabled = sb_expand( make_stylebox( option_button_disabled_png,4,4,21,4,6,2,21,2),2,2,2,2);
 	Ref<StyleBox> sb_optbutton_focus = sb_expand( make_stylebox( button_focus_png,4,4,4,4,6,2,6,2),2,2,2,2);
@@ -311,6 +312,7 @@ void make_default_theme() {
 	t->set_stylebox("pressed","MenuButton", sb_button_pressed );
 	t->set_stylebox("hover","MenuButton", sb_button_pressed );
 	t->set_stylebox("disabled","MenuButton", make_empty_stylebox(0,0,0,0) );
+	t->set_stylebox("focus","MenuButton", sb_button_focus );
 
 	t->set_font("font","MenuButton", default_font );
 
@@ -321,6 +323,10 @@ void make_default_theme() {
 
 	t->set_constant("hseparation","MenuButton", 3 );
 
+	// ButtonGroup
+
+	t->set_stylebox("panel","ButtonGroup", memnew( StyleBoxEmpty ));
+
 	// CheckBox
 
 	Ref<StyleBox> cbx_empty = memnew( StyleBoxEmpty );
@@ -460,10 +466,10 @@ void make_default_theme() {
 
 	// HScrollBar
 
-	t->set_stylebox("scroll","HScrollBar", make_stylebox( scroll_bg_png,3,3,3,3,0,0,0,0) );
-	t->set_stylebox("scroll_focus","HScrollBar", make_stylebox( scroll_bg_png,3,3,3,3,0,0,0,0) );
-	t->set_stylebox("grabber","HScrollBar", make_stylebox( scroll_grabber_png,3,3,3,3,2,2,2,2) );
-	t->set_stylebox("grabber_hilite","HScrollBar", make_stylebox( scroll_grabber_hl_png,3,3,3,3,2,2,2,2) );
+	t->set_stylebox("scroll","HScrollBar", make_stylebox( scroll_bg_png,5,5,5,5,0,0,0,0) );
+	t->set_stylebox("scroll_focus","HScrollBar", make_stylebox( scroll_bg_png,5,5,5,5,0,0,0,0) );
+	t->set_stylebox("grabber","HScrollBar", make_stylebox( scroll_grabber_png,5,5,5,5,2,2,2,2) );
+	t->set_stylebox("grabber_hilite","HScrollBar", make_stylebox( scroll_grabber_hl_png,5,5,5,5,2,2,2,2) );
 
 	t->set_icon("increment","HScrollBar",empty_icon);
 	t->set_icon("increment_hilite","HScrollBar",empty_icon);
@@ -474,10 +480,10 @@ void make_default_theme() {
 
 	// VScrollBar
 
-	t->set_stylebox("scroll","VScrollBar", make_stylebox( scroll_bg_png,3,3,3,3,0,0,0,0) );
-	t->set_stylebox("scroll_focus","VScrollBar", make_stylebox( scroll_bg_png,3,3,3,3,0,0,0,0) );
-	t->set_stylebox("grabber","VScrollBar", make_stylebox( scroll_grabber_png,3,3,3,3,2,2,2,2) );
-	t->set_stylebox("grabber_hilite","VScrollBar", make_stylebox( scroll_grabber_hl_png,3,3,3,3,2,2,2,2) );
+	t->set_stylebox("scroll","VScrollBar", make_stylebox( scroll_bg_png,5,5,5,5,0,0,0,0) );
+	t->set_stylebox("scroll_focus","VScrollBar", make_stylebox( scroll_bg_png,5,5,5,5,0,0,0,0) );
+	t->set_stylebox("grabber","VScrollBar", make_stylebox( scroll_grabber_png,5,5,5,5,2,2,2,2) );
+	t->set_stylebox("grabber_hilite","VScrollBar", make_stylebox( scroll_grabber_hl_png,5,5,5,5,2,2,2,2) );
 
 	t->set_icon("increment","VScrollBar",empty_icon);
 	t->set_icon("increment_hilite","VScrollBar",empty_icon);
@@ -649,7 +655,7 @@ void make_default_theme() {
 	t->set_constant("icon_margin","ItemList",4);
 	t->set_constant("line_separation","ItemList",2);
 	t->set_font("font","ItemList", default_font );
-	t->set_color("font_color","ItemList", control_font_color_low );
+	t->set_color("font_color","ItemList", control_font_color_lower );
 	t->set_color("font_color_selected","ItemList", control_font_color_pressed );
 	t->set_color("guide_color","ItemList", Color(0,0,0,0.1) );
 	t->set_stylebox("selected","ItemList", item_selected_oof );
@@ -697,8 +703,8 @@ void make_default_theme() {
 
 	// Tabs
 
-	t->set_stylebox("tab_fg","Tabs", sb_expand( make_stylebox( tab_current_png,4,4,4,1,16,4,16,4),2,2,2,2) );
-	t->set_stylebox("tab_bg","Tabs", sb_expand( make_stylebox( tab_behind_png,5,5,5,1,16,6,16,4),3,3,3,3) );
+	t->set_stylebox("tab_fg","Tabs", sb_expand( make_stylebox( tab_current_png,4,3,4,1,16,3,16,2),2,2,2,2) );
+	t->set_stylebox("tab_bg","Tabs", sb_expand( make_stylebox( tab_behind_png,5,4,5,1,16,5,16,2),3,3,3,3) );
 	t->set_stylebox("panel","Tabs",tc_sb );
 	t->set_stylebox("button_pressed","Tabs", make_stylebox( button_pressed_png,4,4,4,4) );
 	t->set_stylebox("button","Tabs", make_stylebox( button_normal_png,4,4,4,4) );
diff --git a/scene/resources/default_theme/frame_focus.png b/scene/resources/default_theme/frame_focus.png
new file mode 100644
index 0000000000..0df195d83e
--- /dev/null
+++ b/scene/resources/default_theme/frame_focus.png
diff --git a/scene/resources/default_theme/hslider_bg.png b/scene/resources/default_theme/hslider_bg.png
index 963e4c8456..701c1d43b5 100644
--- a/scene/resources/default_theme/hslider_bg.png
+++ b/scene/resources/default_theme/hslider_bg.png
diff --git a/scene/resources/default_theme/hslider_grabber.png b/scene/resources/default_theme/hslider_grabber.png
index b72ec4d8f4..343d247bc5 100644
--- a/scene/resources/default_theme/hslider_grabber.png
+++ b/scene/resources/default_theme/hslider_grabber.png
diff --git a/scene/resources/default_theme/hslider_grabber_hl.png b/scene/resources/default_theme/hslider_grabber_hl.png
index 0dc5f2b615..f0ba7dd212 100644
--- a/scene/resources/default_theme/hslider_grabber_hl.png
+++ b/scene/resources/default_theme/hslider_grabber_hl.png
diff --git a/scene/resources/default_theme/scroll_bg.png b/scene/resources/default_theme/scroll_bg.png
index 53797886cd..e1e419d663 100644
--- a/scene/resources/default_theme/scroll_bg.png
+++ b/scene/resources/default_theme/scroll_bg.png
diff --git a/scene/resources/default_theme/scroll_grabber.png b/scene/resources/default_theme/scroll_grabber.png
index 16beda1514..3a193e5448 100644
--- a/scene/resources/default_theme/scroll_grabber.png
+++ b/scene/resources/default_theme/scroll_grabber.png
diff --git a/scene/resources/default_theme/scroll_grabber_hl.png b/scene/resources/default_theme/scroll_grabber_hl.png
index acfb7c835b..82c94c03c8 100644
--- a/scene/resources/default_theme/scroll_grabber_hl.png
+++ b/scene/resources/default_theme/scroll_grabber_hl.png
diff --git a/scene/resources/default_theme/theme_data.h b/scene/resources/default_theme/theme_data.h
index dcfaa6e3c0..c338126d3a 100644
--- a/scene/resources/default_theme/theme_data.h
+++ b/scene/resources/default_theme/theme_data.h
@@ -50,7 +50,7 @@ static const unsigned char button_pressed_png[]={
 
 
 static const unsigned char checked_png[]={
-0x89,0x50,0x4e,0x47,0xd,0xa,0x1a,0xa,0x0,0x0,0x0,0xd,0x49,0x48,0x44,0x52,0x0,0x0,0x0,0x10,0x0,0x0,0x0,0x10,0x8,0x6,0x0,0x0,0x0,0x1f,0xf3,0xff,0x61,0x0,0x0,0x0,0x6,0x62,0x4b,0x47,0x44,0x0,0x0,0x0,0x0,0x0,0x0,0xf9,0x43,0xbb,0x7f,0x0,0x0,0x0,0x9,0x70,0x48,0x59,0x73,0x0,0x0,0xb,0x13,0x0,0x0,0xb,0x13,0x1,0x0,0x9a,0x9c,0x18,0x0,0x0,0x0,0x7,0x74,0x49,0x4d,0x45,0x7,0xde,0x9,0x12,0x0,0x36,0x36,0x55,0x46,0x2e,0x76,0x0,0x0,0x0,0x19,0x74,0x45,0x58,0x74,0x43,0x6f,0x6d,0x6d,0x65,0x6e,0x74,0x0,0x43,0x72,0x65,0x61,0x74,0x65,0x64,0x20,0x77,0x69,0x74,0x68,0x20,0x47,0x49,0x4d,0x50,0x57,0x81,0xe,0x17,0x0,0x0,0x1,0xbd,0x49,0x44,0x41,0x54,0x38,0x8d,0x8d,0x93,0x4f,0x6b,0x13,0x51,0x14,0xc5,0x7f,0x93,0x99,0x84,0xa1,0xd2,0x8c,0x90,0x49,0xba,0xea,0xb6,0x1f,0xa0,0x8,0x76,0xa3,0xad,0x6d,0x53,0x4a,0x96,0x22,0x4a,0xb,0x2e,0x55,0xfc,0x6,0x36,0x74,0x2b,0x45,0xba,0xaf,0x71,0x61,0x71,0x95,0x42,0x37,0x52,0xdc,0x34,0x85,0x92,0xa2,0xb8,0xb7,0x4,0x4d,0xd5,0xb8,0xd2,0x85,0x98,0xcc,0x22,0x23,0x99,0xc9,0xcc,0xa4,0x6f,0xba,0xc8,0x4c,0x9b,0x92,0x97,0xd2,0xb,0x8f,0xb,0xef,0xbd,0x73,0xce,0xfd,0xab,0xd0,0x37,0x5,0x48,0x0,0x5a,0xe4,0xaf,0x32,0x1,0xf4,0x22,0x1f,0x2a,0x11,0x38,0x9,0x64,0x80,0x49,0x20,0x7d,0x5,0x89,0x0,0x6c,0xe0,0x37,0x60,0x1,0x41,0xac,0x98,0xd1,0x54,0x6d,0x6e,0x61,0x3e,0x5f,0x4e,0x26,0x93,0x52,0x64,0x36,0x97,0x65,0xad,0xf8,0x2,0xd7,0x75,0x99,0x99,0xb9,0xfd,0xd8,0x75,0xdd,0x43,0xe0,0x9f,0xa,0xa4,0x80,0xa9,0xa5,0xfc,0xf2,0xbe,0xe7,0x79,0xd8,0xb6,0x8d,0xe3,0x38,0x43,0xe7,0xd9,0xd3,0x27,0x8c,0xa7,0xc7,0x29,0x6d,0xbd,0x21,0x97,0x9d,0xb8,0xff,0xfd,0xc7,0xc9,0x2e,0xd0,0x4c,0x44,0x11,0x18,0x9a,0xa6,0xd1,0xed,0x76,0xa5,0xea,0x85,0x42,0x81,0xe9,0x5b,0xd3,0xec,0xbd,0xdf,0xa3,0x56,0xab,0x11,0x86,0x21,0x80,0x1,0x24,0xe2,0x5c,0x95,0x41,0x80,0xaa,0xaa,0x97,0x8,0x1e,0x3e,0x7a,0x40,0xe3,0xe7,0x2f,0xaa,0xd5,0x23,0x82,0x20,0x40,0x8,0x41,0x24,0xac,0x48,0x8b,0xb5,0xb2,0xba,0x42,0xa9,0xf4,0x1a,0xc3,0x30,0xb8,0x3b,0x77,0x87,0xb1,0x1b,0x63,0x54,0x2a,0x7,0x74,0x3a,0x9d,0xa1,0xbf,0x9a,0x8c,0x60,0xa7,0xbc,0xc3,0xe2,0xe2,0x3c,0x1b,0x1b,0x2f,0xe9,0x7a,0x1e,0xdf,0xbe,0xd6,0x39,0xfe,0x72,0x2c,0x4d,0x4f,0x1a,0x81,0x10,0x82,0x62,0x71,0x9d,0xf4,0x4d,0x3,0xd3,0xcc,0x50,0xd9,0x3f,0xc0,0xb2,0xac,0xeb,0x13,0x84,0x61,0x88,0xd5,0x6a,0xb1,0xf9,0x6a,0x13,0xdb,0xfe,0x4f,0xbd,0x5e,0x97,0x82,0x47,0x12,0x0,0x78,0x9e,0x8f,0xe3,0xb8,0x6c,0xbf,0xdd,0xa6,0xdd,0x6e,0x8f,0x24,0x88,0x6b,0x10,0xca,0x1e,0x1b,0x8d,0x6,0xa6,0x99,0x89,0xdb,0x76,0xa1,0xda,0x6f,0xde,0x79,0x2b,0x4,0xd0,0xee,0xf5,0x7a,0xe8,0xba,0x3e,0x94,0x4a,0xb3,0xd9,0xba,0x74,0xa7,0xeb,0x3a,0xbe,0xef,0x43,0x7f,0xa4,0x4f,0x15,0x40,0x5,0x26,0x52,0xa9,0x54,0xfe,0xde,0xec,0xc2,0xbb,0x51,0xa3,0x1c,0x5b,0x10,0x4,0x7c,0xfa,0xfc,0xf1,0xb9,0xe3,0x74,0x3e,0x0,0x7f,0x7,0x97,0xc9,0xe4,0xfa,0xcb,0xf4,0x7,0x68,0x1,0x7e,0x3c,0x81,0x83,0xeb,0xac,0xca,0xb1,0xe7,0x76,0xca,0xc0,0x3a,0x9f,0x1,0x62,0x9,0xad,0x4a,0x1e,0xbc,0xe7,0x4d,0x0,0x0,0x0,0x0,0x49,0x45,0x4e,0x44,0xae,0x42,0x60,0x82
+0x89,0x50,0x4e,0x47,0xd,0xa,0x1a,0xa,0x0,0x0,0x0,0xd,0x49,0x48,0x44,0x52,0x0,0x0,0x0,0x10,0x0,0x0,0x0,0x10,0x8,0x6,0x0,0x0,0x0,0x1f,0xf3,0xff,0x61,0x0,0x0,0x0,0x6,0x62,0x4b,0x47,0x44,0x0,0xff,0x0,0xff,0x0,0xff,0xa0,0xbd,0xa7,0x93,0x0,0x0,0x0,0x9,0x70,0x48,0x59,0x73,0x0,0x0,0xb,0x13,0x0,0x0,0xb,0x13,0x1,0x0,0x9a,0x9c,0x18,0x0,0x0,0x0,0x7,0x74,0x49,0x4d,0x45,0x7,0xdf,0xb,0xd,0x16,0x30,0x0,0x6d,0xeb,0x4,0xa7,0x0,0x0,0x0,0x19,0x74,0x45,0x58,0x74,0x43,0x6f,0x6d,0x6d,0x65,0x6e,0x74,0x0,0x43,0x72,0x65,0x61,0x74,0x65,0x64,0x20,0x77,0x69,0x74,0x68,0x20,0x47,0x49,0x4d,0x50,0x57,0x81,0xe,0x17,0x0,0x0,0x1,0xc1,0x49,0x44,0x41,0x54,0x38,0xcb,0xad,0x93,0x41,0x6b,0x13,0x51,0x14,0x85,0xbf,0x77,0x33,0xe3,0x80,0x5d,0x58,0x82,0x13,0x5b,0x28,0xa6,0xdb,0xda,0x3f,0xd0,0x76,0x67,0x17,0x5d,0xa4,0x2d,0x23,0xa,0x62,0xc0,0x5f,0xa0,0x69,0x37,0x82,0xd0,0x5d,0xb0,0x8b,0x52,0xa1,0x20,0x14,0x9,0x48,0x5,0x3,0x52,0x29,0x36,0x48,0x28,0x2e,0x14,0x8a,0xab,0x26,0xf5,0x7,0x84,0xb1,0xbb,0x11,0xa,0xb6,0x49,0x9,0xd9,0xa6,0x49,0xdf,0x73,0xe1,0x4c,0x48,0x71,0xd2,0x8d,0x5e,0x78,0xf0,0xe0,0x9e,0x7b,0xee,0xb9,0xf7,0x9d,0x7,0xff,0x18,0xa,0x60,0x71,0x3e,0xeb,0x0,0xe3,0xc0,0x4,0x90,0x2,0x24,0xe,0x3c,0x34,0x74,0x5d,0x1e,0x65,0xef,0xdf,0x7a,0xbb,0xf5,0x7e,0x1b,0x8,0xf6,0x3e,0x7f,0x68,0x5b,0x61,0x6e,0xbc,0x71,0xf6,0x6b,0xe3,0xe4,0xf4,0x78,0x4e,0x6b,0x6d,0x2b,0xa5,0x62,0xbb,0xe5,0x72,0x4f,0xf0,0xee,0x65,0xb8,0xe6,0x58,0x8f,0xb,0xaf,0xdf,0x65,0x80,0xa3,0x88,0x60,0xe2,0xe4,0xf4,0x78,0xe,0xb0,0x2d,0xcb,0x26,0x8e,0x20,0x9d,0x4e,0xb3,0xb4,0xf4,0x14,0x63,0xc,0xc5,0x62,0xf1,0x76,0xa8,0xf6,0x28,0x92,0x9a,0xd2,0x5a,0xdb,0x22,0x9,0x12,0x89,0x4,0x22,0xc2,0xe4,0xe4,0x9d,0xde,0x5d,0x44,0x58,0x5d,0xcd,0xe3,0x38,0xe,0xbb,0xbb,0x25,0xaa,0xd5,0x43,0x2b,0x1c,0xb5,0x37,0xab,0x28,0xa5,0x7a,0x9d,0x37,0x37,0x5f,0x51,0x2e,0x97,0x98,0x9e,0x9e,0x2,0xc0,0xf3,0x16,0x99,0x99,0x99,0xa2,0xd9,0x6c,0xb2,0xbe,0xbe,0x11,0xe1,0x84,0x41,0xcb,0xaa,0xd5,0x6a,0x0,0x64,0xb3,0xf,0x19,0x1e,0xbe,0xc1,0xca,0xca,0x73,0x0,0xd6,0xd6,0x5e,0xd2,0x6a,0xb5,0x2e,0x61,0xad,0x38,0x82,0x52,0xe9,0x13,0xcb,0xcb,0x39,0x66,0x67,0xef,0x72,0x7e,0xde,0x21,0x99,0x4c,0x52,0xa9,0x1c,0x52,0x2e,0xef,0xfd,0x85,0x8d,0x55,0xd0,0x68,0x9c,0xb1,0xbf,0xff,0xd,0xdb,0xb6,0x59,0x58,0xc8,0xd0,0x6e,0xb7,0xc9,0xe7,0x5f,0xc4,0xbe,0x8c,0xc,0x32,0xc8,0xce,0xce,0xc7,0x3f,0x46,0x51,0x8a,0x42,0xe1,0xd,0x41,0xf0,0x33,0x16,0x67,0xd,0x22,0x38,0x38,0xa8,0xe0,0x79,0xf,0x70,0xdd,0x9b,0x54,0xab,0xdf,0x7,0x3a,0x31,0x22,0xd0,0xc6,0x18,0x8c,0x31,0xbd,0x84,0x31,0x6,0xdf,0xff,0x81,0xef,0x5f,0x2e,0xe8,0xc3,0xe9,0x7e,0x82,0xba,0x88,0x74,0xb4,0xbe,0xb0,0x23,0xd9,0x71,0x61,0x8c,0x41,0xeb,0xb,0x44,0xa4,0xb,0xd4,0xfb,0x77,0xe0,0x8f,0xa4,0xc6,0xbe,0x2,0xdd,0x6e,0xb7,0xc3,0x55,0x7,0x54,0x67,0x24,0x35,0xf6,0x5,0xf0,0xfb,0x15,0x4,0xae,0x3b,0xfa,0xcc,0x75,0x47,0xb7,0xae,0xfa,0x4c,0xa1,0xec,0x7a,0x58,0x1c,0xf0,0x3f,0xe2,0x37,0x4f,0x82,0xa8,0x62,0xbb,0xcc,0x8d,0x11,0x0,0x0,0x0,0x0,0x49,0x45,0x4e,0x44,0xae,0x42,0x60,0x82
 };
 
 
@@ -150,17 +150,17 @@ static const unsigned char hseparator_png[]={
 
 
 static const unsigned char hslider_bg_png[]={
-0x89,0x50,0x4e,0x47,0xd,0xa,0x1a,0xa,0x0,0x0,0x0,0xd,0x49,0x48,0x44,0x52,0x0,0x0,0x0,0x10,0x0,0x0,0x0,0x10,0x8,0x6,0x0,0x0,0x0,0x1f,0xf3,0xff,0x61,0x0,0x0,0x0,0x6,0x62,0x4b,0x47,0x44,0x0,0xdd,0x0,0xdd,0x0,0xdd,0xf5,0x15,0x8,0x9d,0x0,0x0,0x0,0x9,0x70,0x48,0x59,0x73,0x0,0x0,0xb,0x13,0x0,0x0,0xb,0x13,0x1,0x0,0x9a,0x9c,0x18,0x0,0x0,0x0,0x7,0x74,0x49,0x4d,0x45,0x7,0xde,0x9,0x14,0xf,0xc,0x8,0x9f,0xb9,0xf5,0x45,0x0,0x0,0x0,0x19,0x74,0x45,0x58,0x74,0x43,0x6f,0x6d,0x6d,0x65,0x6e,0x74,0x0,0x43,0x72,0x65,0x61,0x74,0x65,0x64,0x20,0x77,0x69,0x74,0x68,0x20,0x47,0x49,0x4d,0x50,0x57,0x81,0xe,0x17,0x0,0x0,0x0,0x4d,0x49,0x44,0x41,0x54,0x38,0x8d,0x63,0x60,0x18,0x5,0x14,0x3,0x46,0x18,0x43,0x42,0x42,0x62,0x1a,0x3,0x3,0x43,0x26,0x91,0xfa,0xa6,0xbf,0x78,0xf1,0x22,0xb,0xdd,0x80,0xff,0xec,0xec,0x9c,0x44,0xe9,0xfe,0xf9,0xf3,0x3b,0xc3,0x8b,0x17,0x2f,0x18,0x19,0x18,0x18,0x18,0x98,0x88,0x76,0x2b,0xe,0x30,0xf0,0x6,0xb0,0x20,0xb1,0xa7,0xff,0xfc,0xf9,0x9d,0xe8,0x40,0xa4,0xd4,0xe2,0x51,0x80,0x4,0x0,0x2b,0x51,0x10,0x8d,0x9f,0x1f,0x30,0xd7,0x0,0x0,0x0,0x0,0x49,0x45,0x4e,0x44,0xae,0x42,0x60,0x82
+0x89,0x50,0x4e,0x47,0xd,0xa,0x1a,0xa,0x0,0x0,0x0,0xd,0x49,0x48,0x44,0x52,0x0,0x0,0x0,0x10,0x0,0x0,0x0,0x10,0x8,0x6,0x0,0x0,0x0,0x1f,0xf3,0xff,0x61,0x0,0x0,0x0,0x6,0x62,0x4b,0x47,0x44,0x0,0xff,0x0,0xff,0x0,0xff,0xa0,0xbd,0xa7,0x93,0x0,0x0,0x0,0x9,0x70,0x48,0x59,0x73,0x0,0x0,0xb,0x13,0x0,0x0,0xb,0x13,0x1,0x0,0x9a,0x9c,0x18,0x0,0x0,0x0,0x7,0x74,0x49,0x4d,0x45,0x7,0xdf,0xb,0xd,0x16,0x2c,0x16,0x7f,0x48,0xec,0xab,0x0,0x0,0x0,0x19,0x74,0x45,0x58,0x74,0x43,0x6f,0x6d,0x6d,0x65,0x6e,0x74,0x0,0x43,0x72,0x65,0x61,0x74,0x65,0x64,0x20,0x77,0x69,0x74,0x68,0x20,0x47,0x49,0x4d,0x50,0x57,0x81,0xe,0x17,0x0,0x0,0x1,0x47,0x49,0x44,0x41,0x54,0x38,0xcb,0xc5,0x92,0x31,0x8e,0x52,0x61,0x14,0x85,0xbf,0xff,0x5e,0x22,0xfc,0xbc,0x79,0x3a,0xcd,0xd3,0x4,0x42,0x62,0x61,0x78,0x15,0x95,0xcb,0x98,0xb8,0x0,0x13,0x66,0x1,0x66,0xda,0xd9,0x89,0xb1,0xb6,0x30,0xc1,0x5,0x18,0x97,0xe1,0x2,0x4,0xb,0xd,0x19,0xa,0x5f,0xcc,0x20,0x4f,0x78,0x23,0x70,0xff,0xdf,0x2,0xd0,0x40,0x43,0x22,0xc5,0x9c,0xe4,0x54,0xe7,0x7c,0xb7,0xb9,0x7,0xee,0x5b,0xe,0xe0,0xc5,0xc5,0xcb,0xff,0x82,0x3f,0x7c,0x7c,0x8f,0xdb,0xc2,0x35,0x20,0x3,0x72,0xa0,0x3,0x78,0x40,0xe,0xfa,0x1,0xa8,0x80,0x31,0xf0,0x19,0x28,0x80,0x75,0x6d,0xb,0x3f,0x2b,0xcb,0x9f,0x57,0xdf,0x8b,0x9b,0xcb,0x45,0x35,0x3f,0xb,0x21,0xaa,0x73,0xfb,0x74,0x8c,0x20,0xe2,0xac,0xe9,0x93,0x5f,0x8f,0xb3,0xf6,0xbb,0x34,0x7d,0xf4,0x6,0xf8,0xa2,0x79,0xb7,0xf7,0x64,0x56,0x4e,0xaf,0xbf,0x7e,0x1b,0xbd,0x5a,0xaf,0xd7,0x5e,0x44,0x45,0x55,0x10,0xd1,0x3d,0xab,0xa,0xce,0x89,0x2c,0x57,0xcb,0xc6,0xed,0xf4,0xc7,0x73,0xef,0x13,0xa9,0xd7,0x1b,0x9f,0x4,0xc8,0x8b,0x62,0xd2,0x77,0xce,0xa9,0xaa,0xa2,0xba,0x83,0xe4,0xc0,0x9b,0x4c,0x55,0x71,0xce,0x69,0x51,0x4c,0xfa,0x40,0x5e,0x3,0x3a,0x8b,0x6a,0x9e,0xee,0xa0,0x63,0x12,0x11,0x62,0x8c,0x2c,0xaa,0x79,0xa,0x74,0xe4,0xd4,0x37,0xa,0x30,0x6e,0xfa,0xa4,0xc,0x21,0x10,0x42,0x38,0xa,0xec,0x7a,0x4d,0x9f,0x94,0xc0,0x58,0x80,0x61,0x96,0xb5,0x6,0x31,0x46,0x33,0x33,0xcc,0x8c,0x10,0xec,0x6f,0xf1,0x9f,0x37,0x99,0x99,0x11,0x63,0xb4,0x2c,0x6b,0xd,0x80,0xa1,0xe6,0xdd,0x5e,0x55,0xaf,0x37,0x46,0xde,0x27,0xba,0x5a,0xde,0x75,0x97,0xab,0xdf,0xf,0xcc,0x82,0x8b,0x71,0xff,0x80,0x59,0x0,0x62,0x68,0xfa,0x64,0xd6,0x6e,0x3d,0x7d,0xfb,0x30,0x3d,0x7f,0xd,0x8c,0x4f,0x1e,0xd2,0xc9,0x53,0xbe,0x7f,0xfd,0x1,0xde,0x4b,0xa1,0x14,0xaf,0xc,0xa2,0x3a,0x0,0x0,0x0,0x0,0x49,0x45,0x4e,0x44,0xae,0x42,0x60,0x82
 };
 
 
 static const unsigned char hslider_grabber_png[]={
-0x89,0x50,0x4e,0x47,0xd,0xa,0x1a,0xa,0x0,0x0,0x0,0xd,0x49,0x48,0x44,0x52,0x0,0x0,0x0,0x10,0x0,0x0,0x0,0x10,0x8,0x6,0x0,0x0,0x0,0x1f,0xf3,0xff,0x61,0x0,0x0,0x0,0x6,0x62,0x4b,0x47,0x44,0x0,0x92,0x0,0x92,0x0,0x99,0x25,0xc1,0x88,0x71,0x0,0x0,0x0,0x9,0x70,0x48,0x59,0x73,0x0,0x0,0xb,0x13,0x0,0x0,0xb,0x13,0x1,0x0,0x9a,0x9c,0x18,0x0,0x0,0x0,0x7,0x74,0x49,0x4d,0x45,0x7,0xde,0x9,0x12,0x0,0x2,0x21,0x6d,0xbf,0x58,0x46,0x0,0x0,0x0,0x19,0x74,0x45,0x58,0x74,0x43,0x6f,0x6d,0x6d,0x65,0x6e,0x74,0x0,0x43,0x72,0x65,0x61,0x74,0x65,0x64,0x20,0x77,0x69,0x74,0x68,0x20,0x47,0x49,0x4d,0x50,0x57,0x81,0xe,0x17,0x0,0x0,0x1,0x4b,0x49,0x44,0x41,0x54,0x38,0x8d,0xa5,0x93,0x31,0x6b,0xc2,0x40,0x14,0xc7,0xff,0x77,0xbd,0x34,0x26,0x97,0x5a,0x2c,0x41,0xa,0x9d,0x6b,0x8,0x86,0x2e,0xe,0xfd,0x4,0x1d,0xb2,0xf8,0x29,0x3a,0x15,0xec,0x87,0xa9,0x63,0xfb,0x3d,0x9c,0x3b,0x76,0xd0,0x82,0x8,0xe1,0x14,0xdc,0x82,0x74,0xc8,0x50,0xc4,0xdc,0x5d,0x32,0xd8,0xe5,0x22,0xa1,0x60,0x95,0xfa,0x1f,0x1f,0xef,0xf7,0xe3,0xdd,0xe3,0x1d,0x70,0x62,0xc8,0xef,0x42,0x10,0x4,0x37,0x0,0x6,0x0,0x62,0x0,0x1d,0x53,0x9e,0x3,0x18,0x1,0x18,0xa,0x21,0xd2,0xbd,0x82,0x20,0x8,0x1e,0x1,0xbc,0x84,0x61,0xe4,0xb8,0x2e,0x87,0x65,0x9d,0x3,0x0,0xca,0xb2,0x40,0x9e,0x6f,0x90,0x24,0x33,0x9,0xe0,0x59,0x8,0xf1,0x56,0x31,0x67,0x75,0xb8,0xd5,0xba,0x7a,0xed,0x76,0xef,0x2c,0xcf,0x6b,0x82,0x31,0x6,0x42,0x8,0x8,0x21,0x60,0x8c,0xc1,0x71,0x5c,0xb4,0xdb,0xd7,0x96,0x52,0xb2,0xcf,0x39,0x4f,0xb3,0x2c,0x9b,0xec,0x26,0x30,0x63,0x2f,0x7a,0xbd,0x7b,0xc7,0xb6,0x1b,0x7f,0xbe,0x59,0x6b,0x85,0xf1,0xf8,0x43,0x2,0xb8,0x15,0x42,0xa4,0xd4,0xd4,0x7,0x61,0x18,0x1d,0x84,0x1,0xc0,0xb6,0x1b,0x8,0xc3,0xc8,0x31,0x7b,0x42,0x25,0x88,0x5d,0x97,0x1f,0x84,0xab,0x98,0xde,0xb8,0x2e,0xe8,0x54,0xb,0x3b,0x26,0xa6,0xb7,0x53,0x17,0xfc,0x3b,0x95,0x60,0x5e,0x96,0xc5,0xd1,0x90,0xe9,0x9d,0xd7,0x5,0xa3,0x3c,0xdf,0x1c,0x2d,0x30,0xbd,0xa3,0xba,0x60,0x98,0x24,0x33,0xa9,0xb5,0x3a,0x8,0x6b,0xad,0xaa,0x83,0x1a,0x2,0xe6,0x90,0xb2,0x2c,0x5b,0xfb,0xbe,0xff,0xa5,0x94,0xec,0x37,0x9b,0x97,0x60,0x8c,0xed,0x85,0x97,0xcb,0x5,0x94,0x92,0x4f,0x42,0x88,0xf7,0x9d,0xc0,0x48,0x26,0x9c,0xf3,0x74,0xb5,0x4a,0x1f,0x3c,0xef,0xc2,0xa2,0x94,0x82,0x52,0x8a,0xed,0x76,0x8b,0xa2,0xd0,0x58,0xaf,0xbf,0x31,0x9d,0x7e,0x4a,0x3,0xef,0x4e,0xf9,0xe4,0xcf,0x74,0x72,0x7e,0x0,0xd9,0x87,0x82,0x9b,0x21,0x12,0xa2,0x6e,0x0,0x0,0x0,0x0,0x49,0x45,0x4e,0x44,0xae,0x42,0x60,0x82
+0x89,0x50,0x4e,0x47,0xd,0xa,0x1a,0xa,0x0,0x0,0x0,0xd,0x49,0x48,0x44,0x52,0x0,0x0,0x0,0x10,0x0,0x0,0x0,0x10,0x8,0x6,0x0,0x0,0x0,0x1f,0xf3,0xff,0x61,0x0,0x0,0x0,0x6,0x62,0x4b,0x47,0x44,0x0,0xff,0x0,0xff,0x0,0xff,0xa0,0xbd,0xa7,0x93,0x0,0x0,0x0,0x9,0x70,0x48,0x59,0x73,0x0,0x0,0xb,0x13,0x0,0x0,0xb,0x13,0x1,0x0,0x9a,0x9c,0x18,0x0,0x0,0x0,0x7,0x74,0x49,0x4d,0x45,0x7,0xdf,0xb,0xd,0x15,0x1a,0x26,0xd2,0xcb,0xf3,0x2b,0x0,0x0,0x0,0x19,0x74,0x45,0x58,0x74,0x43,0x6f,0x6d,0x6d,0x65,0x6e,0x74,0x0,0x43,0x72,0x65,0x61,0x74,0x65,0x64,0x20,0x77,0x69,0x74,0x68,0x20,0x47,0x49,0x4d,0x50,0x57,0x81,0xe,0x17,0x0,0x0,0x1,0xae,0x49,0x44,0x41,0x54,0x38,0xcb,0xa5,0x93,0x3f,0x8f,0x12,0x41,0x18,0xc6,0x7f,0x3b,0xbb,0xcb,0xc2,0x62,0xb2,0x85,0xc5,0xd1,0x10,0x8b,0xa5,0x93,0x5c,0x61,0xc9,0x77,0xb0,0xa2,0xb1,0xb1,0xdd,0xca,0x4f,0xe0,0xa7,0xb0,0x20,0x39,0x42,0x63,0x41,0x63,0x48,0xae,0x54,0x1a,0xb,0xa,0xb,0x8b,0x23,0x4,0xa3,0xd,0x39,0x42,0x22,0x24,0x86,0x9c,0xde,0xf1,0x3f,0xcc,0x32,0xbb,0x63,0xb3,0x87,0x66,0xef,0xc0,0x4b,0x7c,0xab,0xc9,0x33,0x33,0xbf,0xbc,0xef,0xf3,0xcc,0xc0,0x7f,0x96,0x71,0x8f,0x66,0x2,0x2e,0xe0,0x1,0xf9,0x44,0x5b,0x3,0x73,0x60,0x3,0x44,0xc7,0x0,0x26,0xf0,0x38,0x8,0x82,0x17,0xbe,0xef,0x57,0x3d,0xcf,0x3b,0x5,0x98,0xcf,0xe7,0x5f,0x86,0xc3,0xe1,0x79,0xa3,0xd1,0x78,0x7,0xfc,0xfa,0x1b,0x62,0xa6,0x0,0x8f,0x82,0x20,0x78,0x59,0x2e,0x97,0x5f,0x1b,0x86,0xf1,0x74,0xb5,0x5a,0x65,0xb7,0xdb,0x6d,0x36,0x93,0xc9,0x3c,0x29,0x14,0xa,0xcf,0x8a,0xc5,0xe2,0x4d,0xb7,0xdb,0xfd,0x6,0x84,0xb7,0x17,0x44,0xa,0xe0,0xf9,0xbe,0x5f,0x55,0x4a,0x9d,0x6c,0x36,0x1b,0x94,0x52,0x28,0xa5,0x48,0xd6,0x27,0xbe,0xef,0x57,0x93,0xd1,0x38,0x4,0xc8,0x7b,0x9e,0x77,0x2a,0xa5,0x44,0x6b,0xbd,0x17,0xb5,0xd6,0x48,0x29,0x49,0x46,0xca,0x1f,0x3,0x18,0x71,0x1c,0x1b,0x87,0x1c,0x8f,0xa2,0xc8,0x48,0xfb,0x96,0x6,0x84,0x52,0xca,0x2b,0xd3,0x34,0xef,0xc6,0x65,0x18,0x48,0x29,0xaf,0x1,0x75,0xc,0xb0,0x5e,0x2c,0x16,0x5f,0x6d,0xdb,0xbe,0x3,0xb0,0x6d,0x1b,0xad,0xf5,0x8f,0x24,0xca,0x83,0x80,0x65,0xbf,0xdf,0xff,0xe0,0x38,0x4e,0x24,0xc4,0x9f,0x2d,0x21,0x4,0xb9,0x5c,0x4e,0x77,0x3a,0x9d,0xb7,0xc0,0xe2,0x18,0x40,0xf6,0x7a,0xbd,0xb,0x29,0xe5,0xc4,0xb2,0xac,0xbd,0x68,0x59,0x16,0xbb,0xdd,0xee,0x7b,0xbb,0xdd,0xfe,0xf4,0xaf,0xe,0xe2,0xd1,0x68,0x34,0x9e,0x4e,0xa7,0x1f,0x5d,0xd7,0x45,0x8,0x81,0x10,0x2,0xd7,0x75,0x19,0x8f,0xc7,0xef,0x95,0x52,0x57,0x40,0x7c,0xc,0xa0,0x81,0x79,0xad,0x56,0x7b,0x13,0x45,0xd1,0xa5,0xe3,0x38,0x38,0x8e,0x43,0x1c,0xc7,0xc3,0x7a,0xbd,0x7e,0x96,0x6e,0xff,0xbe,0x97,0x8,0x10,0x87,0x61,0xb8,0x9e,0xcd,0x66,0x97,0x95,0x4a,0xe5,0x79,0x36,0x9b,0x8d,0x5b,0xad,0xd6,0xab,0xc1,0x60,0x70,0x1,0x6c,0x1f,0x2,0x0,0x50,0x93,0xc9,0xe4,0x67,0xa9,0x54,0xba,0x59,0x2e,0x97,0x9f,0x9b,0xcd,0xe6,0x39,0x30,0x7b,0xe8,0x6f,0xdc,0x7b,0x7,0x38,0xc9,0x99,0x6d,0x3a,0xff,0xdb,0xfa,0xd,0x29,0xd4,0xb4,0x4b,0x76,0xdc,0xe7,0x79,0x0,0x0,0x0,0x0,0x49,0x45,0x4e,0x44,0xae,0x42,0x60,0x82
 };
 
 
 static const unsigned char hslider_grabber_hl_png[]={
-0x89,0x50,0x4e,0x47,0xd,0xa,0x1a,0xa,0x0,0x0,0x0,0xd,0x49,0x48,0x44,0x52,0x0,0x0,0x0,0x10,0x0,0x0,0x0,0x10,0x8,0x6,0x0,0x0,0x0,0x1f,0xf3,0xff,0x61,0x0,0x0,0x0,0x6,0x62,0x4b,0x47,0x44,0x0,0x92,0x0,0x92,0x0,0x99,0x25,0xc1,0x88,0x71,0x0,0x0,0x0,0x9,0x70,0x48,0x59,0x73,0x0,0x0,0xb,0x13,0x0,0x0,0xb,0x13,0x1,0x0,0x9a,0x9c,0x18,0x0,0x0,0x0,0x7,0x74,0x49,0x4d,0x45,0x7,0xde,0x9,0x12,0x0,0x2,0x1d,0x42,0xd0,0x24,0xc1,0x0,0x0,0x0,0x19,0x74,0x45,0x58,0x74,0x43,0x6f,0x6d,0x6d,0x65,0x6e,0x74,0x0,0x43,0x72,0x65,0x61,0x74,0x65,0x64,0x20,0x77,0x69,0x74,0x68,0x20,0x47,0x49,0x4d,0x50,0x57,0x81,0xe,0x17,0x0,0x0,0x1,0x30,0x49,0x44,0x41,0x54,0x38,0x8d,0xa5,0x93,0xb1,0x6a,0xc2,0x50,0x14,0x86,0xbf,0x63,0xe2,0x90,0xd0,0x2e,0xc1,0xad,0x8b,0xad,0x60,0x9e,0xc0,0xbe,0x41,0x7,0x33,0x38,0x7,0x1d,0xba,0x74,0x2a,0xe8,0xc3,0xe8,0x68,0xc1,0x49,0xc8,0xec,0x10,0x9f,0xa1,0x8,0xee,0x71,0xcf,0x52,0x4d,0x70,0x73,0x8a,0xb7,0x83,0x37,0x72,0x11,0xb4,0xa1,0xfe,0xe3,0xe5,0x7c,0xdf,0xbd,0x1c,0xfe,0xb,0x77,0x46,0x2e,0xf,0x7c,0xdf,0x7f,0x2,0x86,0x40,0x17,0x68,0xeb,0xe3,0xd,0xb0,0x4,0x26,0x49,0x92,0xa4,0x57,0x5,0xbe,0xef,0x7f,0x0,0xe3,0x30,0x1c,0x38,0x9e,0xd7,0x40,0xa4,0x6,0x80,0x52,0x47,0xf2,0x7c,0x47,0x14,0xcd,0xf,0xc0,0x28,0x49,0x92,0xaf,0x92,0xb1,0x4c,0xb8,0xd9,0x7c,0x9e,0xf6,0xfb,0xef,0x75,0xd7,0x7d,0xd4,0xb0,0x0,0x82,0x48,0xd,0xd7,0x7d,0xa0,0xd3,0x79,0xad,0x6f,0xb7,0x3f,0x3d,0xdb,0xb6,0xd2,0x2c,0xcb,0xd6,0x0,0x35,0xe3,0xd9,0xe3,0x20,0xe8,0x21,0x62,0x71,0x2d,0x22,0x16,0x41,0xd0,0x3,0x18,0x6b,0xe6,0x24,0x0,0x86,0x61,0x38,0x70,0x6e,0xc1,0xa6,0x24,0xc,0x7,0x8e,0xde,0xd3,0x59,0xd0,0xf5,0xbc,0xc6,0x9f,0x70,0x19,0x3d,0xdb,0x35,0x5,0xed,0x72,0x61,0x55,0xa2,0x67,0xdb,0xa6,0xe0,0xdf,0x29,0x5,0x1b,0xa5,0x8e,0x95,0x21,0x3d,0xbb,0x31,0x5,0xcb,0x3c,0xdf,0x55,0x16,0xe8,0xd9,0xa5,0x29,0x98,0x44,0xd1,0xfc,0xa0,0x54,0x51,0xe1,0xf6,0xa2,0x2c,0xd4,0xe4,0x2c,0xd0,0xf5,0x1c,0xc5,0xf1,0x82,0x5b,0x12,0xa5,0xa,0xe2,0x78,0x1,0xa7,0x36,0xa6,0x60,0x34,0x31,0xcb,0xb2,0xb5,0x6d,0x5b,0xe9,0x6a,0xf5,0xfd,0xd6,0x6a,0xbd,0xd4,0x1d,0xc7,0x41,0x44,0x34,0x78,0x24,0xcf,0xb7,0xcc,0x66,0xd3,0xc3,0x7e,0xbf,0xff,0x34,0xab,0x7c,0xf7,0x67,0xba,0x3b,0xbf,0x4d,0x78,0x75,0x34,0x1f,0x21,0x5d,0xa6,0x0,0x0,0x0,0x0,0x49,0x45,0x4e,0x44,0xae,0x42,0x60,0x82
+0x89,0x50,0x4e,0x47,0xd,0xa,0x1a,0xa,0x0,0x0,0x0,0xd,0x49,0x48,0x44,0x52,0x0,0x0,0x0,0x10,0x0,0x0,0x0,0x10,0x8,0x6,0x0,0x0,0x0,0x1f,0xf3,0xff,0x61,0x0,0x0,0x0,0x6,0x62,0x4b,0x47,0x44,0x0,0xff,0x0,0xff,0x0,0xff,0xa0,0xbd,0xa7,0x93,0x0,0x0,0x0,0x9,0x70,0x48,0x59,0x73,0x0,0x0,0xb,0x13,0x0,0x0,0xb,0x13,0x1,0x0,0x9a,0x9c,0x18,0x0,0x0,0x0,0x7,0x74,0x49,0x4d,0x45,0x7,0xdf,0xb,0xd,0x15,0x17,0x2d,0xf0,0xb7,0x54,0xee,0x0,0x0,0x0,0x19,0x74,0x45,0x58,0x74,0x43,0x6f,0x6d,0x6d,0x65,0x6e,0x74,0x0,0x43,0x72,0x65,0x61,0x74,0x65,0x64,0x20,0x77,0x69,0x74,0x68,0x20,0x47,0x49,0x4d,0x50,0x57,0x81,0xe,0x17,0x0,0x0,0x1,0xcb,0x49,0x44,0x41,0x54,0x38,0xcb,0xa5,0x93,0xcd,0x4a,0x1b,0x51,0x1c,0xc5,0xcf,0x9d,0x3b,0x33,0x4e,0x3a,0x43,0x7,0x9b,0x16,0x93,0x82,0x14,0x8b,0x50,0xa8,0x61,0x28,0xf5,0x1,0xba,0xb5,0xb,0x41,0xc8,0x46,0x4,0xdf,0xa0,0x4f,0x50,0x70,0x95,0x95,0xb8,0x77,0x5b,0x10,0x84,0x6e,0xa2,0xab,0x4a,0x17,0x42,0x5a,0x74,0xe1,0xc2,0x82,0xa8,0xb5,0x14,0x12,0xa,0x2a,0xda,0xd4,0xc4,0x4c,0x32,0x33,0x72,0xe7,0xeb,0x5e,0x37,0x93,0x5a,0x26,0x31,0x16,0x3c,0xcb,0xc3,0xbd,0x3f,0xce,0xff,0xb,0xb8,0xa7,0x48,0x1f,0x8f,0x2,0x78,0x0,0xc0,0x4,0xa0,0x27,0x9e,0x7,0xa0,0xd,0xe0,0xa,0x40,0x3c,0x8,0x40,0x1,0x64,0xa7,0x4a,0xa5,0xd9,0x5c,0xc1,0x2a,0x2a,0x46,0xde,0x2,0x80,0xd0,0x3d,0xdf,0xff,0x7d,0xb8,0x5f,0xfe,0xbc,0xb0,0xf0,0x11,0x40,0xf3,0x5f,0x8,0x4d,0x1,0x8c,0xa9,0x52,0x69,0x3e,0xff,0xea,0xcd,0x7b,0xb7,0x6d,0x4e,0xd8,0x67,0xbe,0xe6,0x36,0x23,0x8d,0x13,0xf3,0x59,0x76,0x6c,0xf4,0x75,0xee,0xe5,0xf3,0x56,0xb5,0x52,0xf9,0xe,0x20,0xe8,0x7e,0x90,0x52,0x0,0x33,0x57,0xb0,0x8a,0x4e,0x4b,0x1d,0x71,0x1b,0xc,0x21,0x8b,0x11,0xb2,0x18,0x6e,0x83,0xc1,0x69,0xa9,0x23,0xb9,0x82,0x55,0x4c,0x4a,0xc3,0x6d,0x0,0x5d,0x31,0xf2,0x16,0xeb,0x4,0x10,0x5c,0xfc,0x35,0x5,0x17,0x60,0x9d,0x0,0x49,0x49,0xfa,0x20,0x0,0xe1,0x11,0x27,0x42,0xf4,0x76,0x56,0x70,0x80,0x47,0x31,0x49,0xf7,0x2d,0xd,0x8,0x78,0xe4,0x5e,0xc8,0x8a,0xd4,0x3,0x90,0x64,0x82,0x38,0xf0,0x2e,0x1,0x44,0x83,0x0,0x5e,0xe0,0xd4,0xf,0x55,0x5d,0xee,0x1,0xa8,0x19,0x19,0x94,0x6,0xe7,0xc9,0x28,0x6f,0x5,0x38,0xbf,0xb6,0xb6,0x37,0x32,0xf,0x95,0x58,0xa2,0x37,0x49,0x25,0x4a,0xa0,0xf,0xab,0xe2,0x60,0xbd,0xfc,0x1,0x40,0x67,0x10,0xc0,0xaf,0x7d,0xfd,0xb2,0x2b,0x11,0xfb,0x54,0xd1,0x6e,0x52,0x28,0x9a,0xc,0x99,0x76,0x8e,0xbf,0xad,0xac,0x6c,0xdf,0x95,0x80,0xd7,0x8f,0x7e,0x9c,0x78,0xf5,0xda,0xa6,0xf1,0x78,0x8,0x12,0x25,0x90,0x28,0x81,0xf1,0x44,0x83,0x7d,0x72,0xf4,0x29,0xe,0xc3,0xb,0x0,0xfc,0xae,0x55,0x56,0x34,0xd3,0x7c,0x31,0xb7,0xba,0xb6,0x6e,0xff,0xc9,0x8c,0xb,0x1,0x3c,0x7a,0xea,0xd7,0x56,0x67,0x67,0x66,0x58,0xbb,0xfd,0x13,0x40,0x98,0x5e,0xdd,0xb4,0x78,0xe4,0xfb,0xde,0xd5,0x65,0xbd,0x5a,0x98,0x7e,0x3b,0x6d,0x64,0x15,0xbe,0xb3,0xbc,0xf4,0xee,0x6c,0x6f,0x6f,0x17,0x0,0xeb,0x77,0x38,0xfd,0x14,0x35,0xab,0xb5,0xc6,0xe8,0xe4,0x44,0x2b,0x70,0x1a,0x3b,0x95,0xc5,0xc5,0x32,0x0,0xfb,0x7f,0xaf,0xb1,0x2b,0x19,0xc0,0x50,0xf2,0x86,0xa5,0xe7,0xdf,0xd5,0x35,0xea,0x59,0xb2,0xa3,0x9f,0xba,0x1f,0x74,0x0,0x0,0x0,0x0,0x49,0x45,0x4e,0x44,0xae,0x42,0x60,0x82
 };
 
 
@@ -310,7 +310,7 @@ static const unsigned char reference_border_png[]={
 
 
 static const unsigned char scroll_bg_png[]={
-0x89,0x50,0x4e,0x47,0xd,0xa,0x1a,0xa,0x0,0x0,0x0,0xd,0x49,0x48,0x44,0x52,0x0,0x0,0x0,0x8,0x0,0x0,0x0,0x8,0x8,0x6,0x0,0x0,0x0,0xc4,0xf,0xbe,0x8b,0x0,0x0,0x0,0x6,0x62,0x4b,0x47,0x44,0x0,0x26,0x0,0x26,0x0,0x26,0x59,0xf,0xde,0x74,0x0,0x0,0x0,0x9,0x70,0x48,0x59,0x73,0x0,0x0,0xb,0x13,0x0,0x0,0xb,0x13,0x1,0x0,0x9a,0x9c,0x18,0x0,0x0,0x0,0x7,0x74,0x49,0x4d,0x45,0x7,0xde,0x9,0x14,0x17,0x37,0x2c,0x8d,0x3d,0xc,0x64,0x0,0x0,0x0,0x19,0x74,0x45,0x58,0x74,0x43,0x6f,0x6d,0x6d,0x65,0x6e,0x74,0x0,0x43,0x72,0x65,0x61,0x74,0x65,0x64,0x20,0x77,0x69,0x74,0x68,0x20,0x47,0x49,0x4d,0x50,0x57,0x81,0xe,0x17,0x0,0x0,0x0,0x3f,0x49,0x44,0x41,0x54,0x18,0x95,0xad,0x8e,0x31,0xa,0x0,0x20,0xc,0x3,0xaf,0xda,0x47,0xf4,0xe5,0xf6,0xb3,0x4a,0x5d,0x1c,0x54,0x50,0x17,0xf,0x42,0x96,0x24,0x44,0xcc,0x8c,0x1b,0x69,0x78,0x1,0x62,0x53,0x1,0x90,0xb1,0x10,0xb5,0xb6,0xa5,0xa9,0x9a,0x1,0x24,0xf1,0xe0,0x5f,0xc0,0x55,0x33,0xb3,0x0,0x9f,0x4f,0x1e,0xe9,0xf,0x1d,0xb,0x68,0x95,0x6b,0x4f,0xeb,0x0,0x0,0x0,0x0,0x49,0x45,0x4e,0x44,0xae,0x42,0x60,0x82
+0x89,0x50,0x4e,0x47,0xd,0xa,0x1a,0xa,0x0,0x0,0x0,0xd,0x49,0x48,0x44,0x52,0x0,0x0,0x0,0xc,0x0,0x0,0x0,0xc,0x8,0x6,0x0,0x0,0x0,0x56,0x75,0x5c,0xe7,0x0,0x0,0x0,0x6,0x62,0x4b,0x47,0x44,0x0,0xff,0x0,0xff,0x0,0xff,0xa0,0xbd,0xa7,0x93,0x0,0x0,0x0,0x9,0x70,0x48,0x59,0x73,0x0,0x0,0xb,0x13,0x0,0x0,0xb,0x13,0x1,0x0,0x9a,0x9c,0x18,0x0,0x0,0x0,0x7,0x74,0x49,0x4d,0x45,0x7,0xdf,0xb,0xd,0x15,0x25,0x28,0x6d,0xad,0xf4,0x10,0x0,0x0,0x0,0x19,0x74,0x45,0x58,0x74,0x43,0x6f,0x6d,0x6d,0x65,0x6e,0x74,0x0,0x43,0x72,0x65,0x61,0x74,0x65,0x64,0x20,0x77,0x69,0x74,0x68,0x20,0x47,0x49,0x4d,0x50,0x57,0x81,0xe,0x17,0x0,0x0,0x1,0x3c,0x49,0x44,0x41,0x54,0x28,0xcf,0x95,0x92,0x31,0x6e,0x13,0x61,0x10,0x46,0xdf,0xcc,0x44,0xd8,0xff,0x2e,0xb,0x34,0xb,0x92,0x23,0x4b,0x14,0xc8,0x5b,0xa5,0xe2,0x18,0x88,0x3,0x20,0x25,0x7,0x40,0xb4,0xb9,0x49,0x44,0x4d,0x81,0x14,0xe,0x80,0x38,0x6,0x7,0x20,0xa1,0x0,0x59,0xa4,0x60,0x85,0x8,0x59,0xbc,0xb,0xb6,0xff,0x19,0x8a,0xc5,0xd,0x28,0x45,0x9e,0xf4,0x15,0xdf,0xa7,0x27,0x4d,0x33,0x2,0xf0,0xf4,0xc9,0xb3,0x3d,0xa0,0x6,0x1a,0x60,0xe,0x24,0x46,0x6,0x60,0x9,0x7c,0x0,0xda,0xb7,0xef,0xde,0x6c,0xe5,0xaf,0xfc,0xa8,0xeb,0x7e,0xbc,0xf8,0xda,0x7e,0x39,0xea,0x87,0xd5,0x6d,0xf7,0x30,0x0,0x55,0xc9,0x45,0x2a,0x7f,0xde,0xaf,0xf7,0x5f,0x57,0xd5,0xdd,0x97,0xc0,0x47,0x6b,0x16,0x7,0xf,0xae,0xba,0xcb,0xe3,0x4f,0x9f,0xcf,0x9f,0x6f,0xb7,0xdb,0xa4,0x6a,0x6a,0xa6,0x98,0x29,0x22,0xaa,0xeb,0xcd,0x7a,0xfa,0xfd,0xf2,0xdb,0xe3,0x94,0x4a,0x9d,0x4c,0xa6,0xef,0x15,0x68,0xda,0xf6,0xe2,0x50,0x44,0xcc,0xcc,0x30,0x33,0x54,0xc7,0xec,0xba,0x88,0x58,0xdb,0x5e,0x1c,0x2,0xcd,0x1e,0x30,0xef,0x87,0x55,0x35,0x4a,0xca,0xbf,0xa8,0x2a,0x11,0x41,0x3f,0xac,0x2a,0x60,0xae,0x40,0xe1,0x1e,0x2a,0xc2,0xb5,0x88,0x80,0x7b,0x28,0x50,0x28,0x37,0x44,0x81,0x5e,0x55,0x3c,0xe2,0x7a,0x29,0x2,0x54,0xc5,0x81,0x5e,0x81,0x65,0x91,0xca,0xce,0xdd,0x71,0xf7,0xff,0xe4,0xdd,0x5e,0xa4,0xb2,0x3,0x96,0xa,0x9c,0xd5,0xf5,0xec,0x34,0x22,0x72,0xce,0x99,0x9c,0x33,0xee,0x63,0x76,0x3d,0x22,0x72,0x5d,0xcf,0x4e,0x81,0x33,0x6b,0x16,0x7,0xc3,0x64,0x32,0x3d,0x4f,0xa9,0xb4,0xcd,0xfa,0xd7,0x62,0xbd,0xf9,0x7d,0x2b,0x67,0x97,0xf1,0x5a,0x78,0x91,0xca,0xab,0xfd,0xd9,0xc3,0x57,0x77,0xaa,0x7b,0x27,0xc0,0x52,0x6e,0xfa,0x1a,0x7f,0x0,0x2,0xd3,0x92,0x1e,0xd2,0x75,0x7c,0x7f,0x0,0x0,0x0,0x0,0x49,0x45,0x4e,0x44,0xae,0x42,0x60,0x82
 };
 
 
@@ -355,12 +355,12 @@ static const unsigned char scroll_button_up_hl_png[]={
 
 
 static const unsigned char scroll_grabber_png[]={
-0x89,0x50,0x4e,0x47,0xd,0xa,0x1a,0xa,0x0,0x0,0x0,0xd,0x49,0x48,0x44,0x52,0x0,0x0,0x0,0x8,0x0,0x0,0x0,0x8,0x8,0x6,0x0,0x0,0x0,0xc4,0xf,0xbe,0x8b,0x0,0x0,0x0,0x6,0x62,0x4b,0x47,0x44,0x0,0x26,0x0,0x26,0x0,0x26,0x59,0xf,0xde,0x74,0x0,0x0,0x0,0x9,0x70,0x48,0x59,0x73,0x0,0x0,0xb,0x13,0x0,0x0,0xb,0x13,0x1,0x0,0x9a,0x9c,0x18,0x0,0x0,0x0,0x7,0x74,0x49,0x4d,0x45,0x7,0xde,0x9,0x14,0x17,0x25,0x29,0x85,0xa3,0x88,0x38,0x0,0x0,0x0,0x19,0x74,0x45,0x58,0x74,0x43,0x6f,0x6d,0x6d,0x65,0x6e,0x74,0x0,0x43,0x72,0x65,0x61,0x74,0x65,0x64,0x20,0x77,0x69,0x74,0x68,0x20,0x47,0x49,0x4d,0x50,0x57,0x81,0xe,0x17,0x0,0x0,0x0,0x72,0x49,0x44,0x41,0x54,0x18,0x95,0x7d,0xcd,0xb1,0x9,0xc3,0x30,0x0,0x44,0xd1,0x17,0x21,0x91,0x42,0x10,0xdc,0x4,0x34,0x80,0x77,0xc8,0x4,0x6e,0x32,0xb0,0x1b,0xf,0xe2,0x26,0x4d,0x6a,0x37,0xc2,0x5d,0x40,0x45,0x1a,0x1b,0x4c,0x20,0xfe,0xe5,0xbf,0x3b,0xee,0x52,0x4a,0x71,0x46,0x8c,0x31,0x1a,0x86,0x67,0x87,0x19,0x7b,0x7b,0x41,0x3f,0x4d,0xe3,0x1a,0x36,0x31,0xa3,0x3b,0xc,0x6f,0x78,0x41,0x6c,0xad,0x39,0x2c,0x77,0xae,0xb8,0xb7,0xd6,0x84,0x94,0xd2,0xdf,0xff,0x94,0x92,0x90,0x73,0x86,0xf7,0x4f,0xf6,0xc1,0x92,0x73,0x16,0x6a,0xad,0xf0,0xd8,0xe4,0xce,0x8a,0x7e,0xcb,0xce,0xf9,0x2,0x99,0xd9,0x19,0x5e,0xac,0x65,0x2e,0x22,0x0,0x0,0x0,0x0,0x49,0x45,0x4e,0x44,0xae,0x42,0x60,0x82
+0x89,0x50,0x4e,0x47,0xd,0xa,0x1a,0xa,0x0,0x0,0x0,0xd,0x49,0x48,0x44,0x52,0x0,0x0,0x0,0xc,0x0,0x0,0x0,0xc,0x8,0x6,0x0,0x0,0x0,0x56,0x75,0x5c,0xe7,0x0,0x0,0x0,0x6,0x62,0x4b,0x47,0x44,0x0,0xff,0x0,0xff,0x0,0xff,0xa0,0xbd,0xa7,0x93,0x0,0x0,0x0,0x9,0x70,0x48,0x59,0x73,0x0,0x0,0xb,0x13,0x0,0x0,0xb,0x13,0x1,0x0,0x9a,0x9c,0x18,0x0,0x0,0x0,0x7,0x74,0x49,0x4d,0x45,0x7,0xdf,0xb,0xd,0x16,0x26,0x17,0xf2,0xa0,0x34,0xb7,0x0,0x0,0x0,0x19,0x74,0x45,0x58,0x74,0x43,0x6f,0x6d,0x6d,0x65,0x6e,0x74,0x0,0x43,0x72,0x65,0x61,0x74,0x65,0x64,0x20,0x77,0x69,0x74,0x68,0x20,0x47,0x49,0x4d,0x50,0x57,0x81,0xe,0x17,0x0,0x0,0x0,0xd4,0x49,0x44,0x41,0x54,0x28,0xcf,0xbd,0x8f,0x31,0x4a,0x3,0x61,0x10,0x85,0xdf,0xfb,0x67,0xb1,0x91,0x3d,0x80,0x6d,0xca,0x20,0x39,0xc1,0x7a,0xe,0x59,0x2c,0xc4,0x3b,0x8,0xb9,0x43,0xf0,0xe,0x62,0x11,0x82,0xa0,0x65,0x8a,0xd4,0xe6,0x4,0x31,0x24,0x67,0x48,0xbd,0xe5,0xbf,0x33,0x2f,0x8d,0xbf,0xae,0x6c,0xad,0xf,0x6,0x66,0x98,0xf7,0x31,0xf3,0x80,0xbf,0x16,0x87,0xc3,0x5d,0xfb,0x70,0xd,0xe0,0x49,0x52,0x3,0xe0,0x92,0xe4,0x6,0xc0,0xe3,0x72,0xf5,0xbc,0x1f,0x1,0xed,0xed,0xfd,0x8c,0xe4,0xb6,0xef,0xfb,0x3a,0x14,0x0,0x80,0xc4,0x84,0xaa,0xaa,0x3a,0x49,0xcd,0xea,0xf5,0x65,0x7,0x0,0xe9,0x9b,0x24,0x17,0x39,0xe7,0xda,0xdd,0xa1,0x10,0x14,0x82,0xbb,0x23,0xe7,0x5c,0x93,0x5c,0x14,0x5f,0x55,0x1a,0x8f,0x68,0x3c,0x2,0x90,0x7e,0xfd,0xec,0x11,0xb0,0x88,0x9b,0x11,0x20,0x9,0x92,0x46,0x40,0xd9,0x15,0xa5,0x41,0x98,0x2d,0x7,0x60,0xa9,0xaf,0x90,0x1f,0x23,0x20,0x22,0xe6,0x66,0xd6,0x91,0xfc,0xb9,0x46,0xc2,0xcc,0xba,0x88,0x98,0x17,0x9f,0x95,0xe6,0x70,0xfc,0x3c,0x4d,0xa7,0xb3,0xb5,0x99,0x4d,0x52,0x4a,0x57,0x66,0x76,0x61,0x66,0x1b,0x1,0xed,0xdb,0xfb,0x72,0x87,0x7f,0xd3,0x19,0xa9,0x1d,0x69,0xa0,0x52,0x76,0xa0,0x72,0x0,0x0,0x0,0x0,0x49,0x45,0x4e,0x44,0xae,0x42,0x60,0x82
 };
 
 
 static const unsigned char scroll_grabber_hl_png[]={
-0x89,0x50,0x4e,0x47,0xd,0xa,0x1a,0xa,0x0,0x0,0x0,0xd,0x49,0x48,0x44,0x52,0x0,0x0,0x0,0x8,0x0,0x0,0x0,0x8,0x8,0x6,0x0,0x0,0x0,0xc4,0xf,0xbe,0x8b,0x0,0x0,0x0,0x6,0x62,0x4b,0x47,0x44,0x0,0x26,0x0,0x26,0x0,0x26,0x59,0xf,0xde,0x74,0x0,0x0,0x0,0x9,0x70,0x48,0x59,0x73,0x0,0x0,0xb,0x13,0x0,0x0,0xb,0x13,0x1,0x0,0x9a,0x9c,0x18,0x0,0x0,0x0,0x7,0x74,0x49,0x4d,0x45,0x7,0xde,0x9,0x14,0x17,0x25,0x15,0xaa,0xcc,0xf4,0xbf,0x0,0x0,0x0,0x19,0x74,0x45,0x58,0x74,0x43,0x6f,0x6d,0x6d,0x65,0x6e,0x74,0x0,0x43,0x72,0x65,0x61,0x74,0x65,0x64,0x20,0x77,0x69,0x74,0x68,0x20,0x47,0x49,0x4d,0x50,0x57,0x81,0xe,0x17,0x0,0x0,0x0,0x86,0x49,0x44,0x41,0x54,0x18,0x95,0x7d,0x8e,0xb1,0xa,0x83,0x30,0x18,0x84,0xbf,0x36,0x81,0x6e,0xe5,0xf,0xad,0x6e,0xe,0x11,0x97,0x2e,0xfa,0xae,0x6e,0x7d,0x29,0xa1,0xce,0xa5,0xae,0x5,0x7,0x83,0x53,0xb,0xf9,0xa1,0x4b,0x94,0xe,0xe2,0x8d,0x77,0xdf,0x71,0x77,0x68,0xdb,0x3b,0x7b,0xb2,0x0,0xc3,0xf0,0xba,0x86,0x30,0x75,0x40,0x91,0xfc,0x51,0xc4,0x55,0xde,0x97,0xb3,0x5,0x8,0x61,0xea,0xea,0xba,0xb9,0x18,0x63,0x1,0x50,0x8d,0xe7,0xbe,0x7f,0x3c,0x81,0xdc,0xa6,0x46,0xb1,0x84,0x0,0xc6,0xd8,0x13,0x90,0x1,0x1c,0x77,0xf,0xfc,0x1,0xa3,0x6a,0xfc,0x2e,0xa6,0x6a,0xfc,0x0,0xef,0xf5,0xa4,0x88,0xab,0xd2,0x66,0x96,0x98,0x20,0xe2,0x6e,0x2b,0xe0,0x7d,0x39,0x3,0xf9,0xd6,0xc4,0xf,0x70,0x6e,0x25,0xf5,0x5c,0xbc,0xd7,0xd3,0x0,0x0,0x0,0x0,0x49,0x45,0x4e,0x44,0xae,0x42,0x60,0x82
+0x89,0x50,0x4e,0x47,0xd,0xa,0x1a,0xa,0x0,0x0,0x0,0xd,0x49,0x48,0x44,0x52,0x0,0x0,0x0,0xc,0x0,0x0,0x0,0xc,0x8,0x6,0x0,0x0,0x0,0x56,0x75,0x5c,0xe7,0x0,0x0,0x0,0x6,0x62,0x4b,0x47,0x44,0x0,0xff,0x0,0xff,0x0,0xff,0xa0,0xbd,0xa7,0x93,0x0,0x0,0x0,0x9,0x70,0x48,0x59,0x73,0x0,0x0,0xb,0x13,0x0,0x0,0xb,0x13,0x1,0x0,0x9a,0x9c,0x18,0x0,0x0,0x0,0x7,0x74,0x49,0x4d,0x45,0x7,0xdf,0xb,0xd,0x16,0xe,0x1f,0xa1,0x26,0x12,0x2f,0x0,0x0,0x0,0x19,0x74,0x45,0x58,0x74,0x43,0x6f,0x6d,0x6d,0x65,0x6e,0x74,0x0,0x43,0x72,0x65,0x61,0x74,0x65,0x64,0x20,0x77,0x69,0x74,0x68,0x20,0x47,0x49,0x4d,0x50,0x57,0x81,0xe,0x17,0x0,0x0,0x1,0x3,0x49,0x44,0x41,0x54,0x28,0xcf,0xbd,0x90,0x3f,0x4b,0xc3,0x50,0x14,0xc5,0x4f,0xf2,0xf2,0xca,0x33,0x12,0xf3,0x10,0x44,0x3a,0x74,0x10,0x91,0xae,0x75,0x13,0xe7,0xea,0x17,0xe8,0xa7,0x10,0x67,0x41,0xec,0x1e,0x4,0x67,0xf1,0x5b,0xf4,0x3,0x88,0xb3,0xb8,0xd9,0xb5,0x88,0xb8,0x28,0x5,0xb5,0x35,0xf4,0xc5,0x90,0xe6,0xbe,0x3f,0x4e,0x29,0xd,0x75,0xd5,0x3,0x7,0xee,0xb9,0xf0,0xe3,0x1e,0x2e,0xf0,0xd7,0xf2,0x96,0xc3,0xcd,0xf0,0x65,0x8f,0xc8,0x5c,0xce,0xd2,0xa2,0x6b,0x1d,0x36,0xa4,0x14,0x3,0xde,0x60,0xe7,0x27,0x9d,0x9d,0xa7,0x15,0xe0,0x7a,0xf8,0xdc,0x9e,0x7e,0xe4,0xf,0xe3,0xb1,0x92,0x45,0xa1,0xe1,0xe0,0xb0,0x26,0x38,0x9a,0xcd,0x28,0xdd,0xdc,0xa,0xf,0x4e,0x3b,0xbb,0x23,0x0,0x8,0x2a,0x20,0x2f,0xca,0xe4,0xed,0xf5,0x4b,0x7e,0xab,0xf9,0xe2,0xa2,0x2a,0x4a,0x58,0xa3,0xa5,0x88,0x82,0x4,0x40,0xaf,0x6,0x4c,0x27,0x59,0x37,0x53,0x39,0x34,0xd9,0x5a,0xe7,0x4c,0x19,0x4c,0x3e,0xd5,0x71,0x95,0x17,0x40,0x49,0x1a,0x46,0x13,0xac,0x36,0x35,0xc0,0x78,0xc,0xb4,0xb4,0xf3,0xab,0x41,0x48,0x71,0xc7,0x98,0x83,0xa1,0x79,0xcd,0x8c,0x39,0x8,0x29,0x6e,0x57,0x0,0xcb,0xfd,0x7e,0xdc,0x8a,0x53,0xbe,0xce,0xe1,0x39,0x82,0xe7,0x8,0x41,0x18,0x20,0x6e,0xc5,0xa9,0xe5,0x7e,0xff,0xd7,0xb7,0x9e,0xdd,0x3f,0xb6,0x29,0xa7,0x24,0x7b,0x9f,0x1d,0xc1,0xd9,0x28,0xda,0x8e,0x7,0x41,0xd8,0xb8,0xb8,0x3a,0xdc,0x1f,0xe1,0xdf,0xf4,0x3,0x35,0xb3,0x71,0xac,0x4,0x64,0x3e,0xbb,0x0,0x0,0x0,0x0,0x49,0x45,0x4e,0x44,0xae,0x42,0x60,0x82
 };
 
 
@@ -415,12 +415,12 @@ static const unsigned char tab_menu_hl_png[]={
 
 
 static const unsigned char toggle_off_png[]={
-0x89,0x50,0x4e,0x47,0xd,0xa,0x1a,0xa,0x0,0x0,0x0,0xd,0x49,0x48,0x44,0x52,0x0,0x0,0x0,0x40,0x0,0x0,0x0,0x20,0x8,0x6,0x0,0x0,0x0,0xa2,0x9d,0x7e,0x84,0x0,0x0,0x0,0x6,0x62,0x4b,0x47,0x44,0x0,0x26,0x0,0x26,0x0,0x26,0x59,0xf,0xde,0x74,0x0,0x0,0x0,0x9,0x70,0x48,0x59,0x73,0x0,0x0,0xb,0x13,0x0,0x0,0xb,0x13,0x1,0x0,0x9a,0x9c,0x18,0x0,0x0,0x0,0x7,0x74,0x49,0x4d,0x45,0x7,0xde,0x9,0x14,0x17,0x2,0x16,0xe9,0x0,0x17,0x60,0x0,0x0,0x0,0x19,0x74,0x45,0x58,0x74,0x43,0x6f,0x6d,0x6d,0x65,0x6e,0x74,0x0,0x43,0x72,0x65,0x61,0x74,0x65,0x64,0x20,0x77,0x69,0x74,0x68,0x20,0x47,0x49,0x4d,0x50,0x57,0x81,0xe,0x17,0x0,0x0,0x2,0x45,0x49,0x44,0x41,0x54,0x68,0x81,0xed,0x99,0x5f,0x4f,0xd3,0x50,0x18,0x87,0x9f,0xb3,0x96,0xb5,0xcc,0x3a,0xa6,0xac,0x6d,0xa,0x91,0x84,0x5b,0x8d,0x7e,0x21,0x60,0x2c,0x5c,0x1a,0x63,0xe2,0x27,0x31,0x31,0xc6,0x4b,0x32,0xa7,0x7e,0x20,0x84,0x6b,0x6f,0xc,0x90,0x6d,0x32,0xca,0x6c,0xb0,0x24,0x75,0xc7,0x8b,0xba,0xb9,0x35,0xdd,0x84,0xec,0xe0,0x26,0xed,0x73,0xd7,0xf7,0xed,0xce,0x7b,0x7e,0xbf,0xbe,0x67,0x7f,0xde,0x41,0x4e,0x4e,0xa6,0x11,0x89,0xeb,0x12,0x50,0x1,0x2c,0x40,0x4b,0xc9,0xff,0xaf,0x48,0xe0,0x27,0x10,0x0,0x3e,0x70,0x39,0x48,0xe8,0x89,0x1b,0x2b,0xc0,0x33,0xe0,0x11,0x60,0x72,0xb7,0xc,0x8,0x81,0xaf,0xc0,0x67,0xa6,0x18,0x60,0x1,0x1b,0x4f,0x1e,0x3f,0x7d,0x6d,0x57,0xed,0x65,0x40,0x48,0x64,0xfc,0x72,0x95,0x8,0x10,0x8,0x74,0x5d,0x67,0x69,0x49,0x47,0xd3,0x92,0xdb,0x50,0x4b,0x14,0x45,0xf2,0xf8,0xe4,0xf8,0xc7,0xe1,0xd1,0xc1,0x2b,0xe0,0xcb,0x68,0x2e,0x59,0x59,0x7,0xc,0xbb,0x6a,0x97,0xce,0xba,0x67,0x4,0x41,0x80,0xec,0xf7,0x91,0x8a,0x1d,0x10,0x8,0x44,0xa1,0x80,0x51,0x2c,0x52,0x2a,0xdd,0xa3,0x58,0x2c,0x2a,0x5d,0x3f,0x89,0x69,0x9a,0x62,0x7d,0x6d,0xbd,0x74,0x78,0x74,0x60,0x90,0xd0,0x9c,0x66,0xbd,0x0,0x8,0x82,0xef,0x54,0x2a,0xf,0x70,0x6c,0x7,0xa1,0xf8,0x24,0x48,0x24,0xed,0x4e,0x1b,0xdf,0x3f,0xc7,0x30,0x4c,0xa5,0x6b,0xa7,0x11,0x86,0x21,0x96,0x65,0x41,0xca,0x91,0x4e,0xed,0x3d,0x29,0x25,0xb2,0x2f,0x71,0x1d,0x97,0x30,0xc,0x39,0x6d,0x9d,0x2a,0xdd,0x90,0xe7,0x7a,0xb8,0x8e,0xcb,0x79,0xb7,0xab,0xbc,0xbb,0x6e,0x4a,0x61,0x52,0x62,0x70,0xf6,0x55,0x8b,0x87,0xdf,0x6b,0x4a,0xe6,0x2e,0x1e,0xa6,0x18,0x90,0x15,0x32,0x6f,0xc0,0xed,0x7e,0xfe,0xcc,0x80,0x65,0x59,0xd4,0xf7,0x6a,0x63,0xb1,0xc6,0x7e,0x93,0x20,0x8,0x78,0xf1,0xf2,0xf9,0x58,0xfc,0xed,0x9b,0x77,0xa9,0xb1,0xeb,0xb0,0xb0,0x6,0xc,0xc4,0x7f,0x78,0xff,0x9,0x80,0x9d,0xdd,0x2d,0xea,0x7b,0xb5,0xa1,0xb0,0x34,0x81,0xd7,0x15,0x3d,0xca,0xc2,0x1a,0x0,0xd0,0x6c,0x7c,0xa4,0xd7,0xeb,0x1,0xb1,0x11,0x3b,0xbb,0x5b,0xc3,0xdc,0xe8,0x13,0x1f,0x8,0x4f,0x8b,0xfd,0x8d,0x85,0x36,0x60,0x1a,0xaa,0x3a,0x60,0xa1,0xdf,0x4,0x6b,0xf5,0x6d,0xca,0xe5,0x32,0xe5,0x72,0x79,0xec,0xe9,0xab,0x24,0x69,0x80,0x9,0xac,0xdc,0x4a,0xa5,0x1b,0xd2,0xd8,0x6f,0x2,0xb1,0x9,0xb5,0xfa,0xf6,0x58,0x6c,0x6,0x56,0x88,0x35,0xe,0x49,0x1e,0x81,0x10,0xb8,0x98,0xb5,0x8a,0xa,0x82,0x20,0x98,0xd8,0xd2,0x33,0xb4,0xff,0x5,0xb1,0xc6,0x21,0xb,0x7d,0x4,0xfe,0x5,0xb9,0x1,0xf3,0xde,0xc0,0xbc,0x99,0x68,0x80,0x88,0xa7,0x16,0x78,0xae,0xa7,0xbc,0xa8,0xe7,0x7a,0xc3,0xa1,0xc8,0xbc,0x49,0xfd,0x1e,0x20,0x84,0x40,0x14,0x4,0xad,0x76,0xb,0xc7,0x76,0xd8,0xdc,0xd8,0x54,0x5a,0x54,0x22,0x69,0xb5,0x5b,0x88,0x82,0x98,0xbb,0x9,0x69,0x6,0x48,0x0,0xcb,0xba,0x8f,0xef,0xfb,0xb7,0xf2,0x9b,0xfd,0xcf,0x44,0xc8,0x40,0xd3,0x34,0xa5,0x6b,0xa7,0x61,0x9a,0x26,0x51,0x14,0x41,0xca,0x70,0x2f,0x69,0x40,0x4,0x5c,0x75,0xbe,0x75,0x2e,0xed,0xaa,0xbd,0xbc,0xfa,0x70,0xf5,0x4e,0xcd,0x4,0x81,0x2b,0x62,0x8d,0xa3,0x5b,0x19,0x63,0x8d,0x6c,0x4c,0x85,0x4f,0x6,0x89,0xcc,0xff,0x2f,0x90,0x93,0x93,0x93,0x6d,0x7e,0x1,0x6b,0xe,0xc1,0xdb,0xd6,0xe0,0xc4,0xba,0x0,0x0,0x0,0x0,0x49,0x45,0x4e,0x44,0xae,0x42,0x60,0x82
+0x89,0x50,0x4e,0x47,0xd,0xa,0x1a,0xa,0x0,0x0,0x0,0xd,0x49,0x48,0x44,0x52,0x0,0x0,0x0,0x40,0x0,0x0,0x0,0x20,0x8,0x6,0x0,0x0,0x0,0xa2,0x9d,0x7e,0x84,0x0,0x0,0x0,0x6,0x62,0x4b,0x47,0x44,0x0,0xff,0x0,0xff,0x0,0xff,0xa0,0xbd,0xa7,0x93,0x0,0x0,0x0,0x9,0x70,0x48,0x59,0x73,0x0,0x0,0xb,0x13,0x0,0x0,0xb,0x13,0x1,0x0,0x9a,0x9c,0x18,0x0,0x0,0x0,0x7,0x74,0x49,0x4d,0x45,0x7,0xdf,0xb,0xd,0x15,0x16,0x28,0x99,0xc6,0x91,0x20,0x0,0x0,0x0,0x19,0x74,0x45,0x58,0x74,0x43,0x6f,0x6d,0x6d,0x65,0x6e,0x74,0x0,0x43,0x72,0x65,0x61,0x74,0x65,0x64,0x20,0x77,0x69,0x74,0x68,0x20,0x47,0x49,0x4d,0x50,0x57,0x81,0xe,0x17,0x0,0x0,0x5,0xb5,0x49,0x44,0x41,0x54,0x68,0xde,0xed,0x99,0xdb,0x6f,0x54,0x55,0x14,0xc6,0x7f,0xfb,0xdc,0xe6,0x72,0x66,0x3a,0x9d,0x76,0x5a,0xa0,0x3,0xe5,0xd2,0xc4,0xb4,0x21,0x26,0x46,0x2,0x31,0xbe,0x11,0xc0,0x17,0x62,0x14,0xe2,0xe5,0xc1,0xc4,0x77,0xc2,0xe5,0xf,0x30,0x31,0xd1,0x18,0x8,0xbc,0x5a,0x2,0xaf,0x3e,0x90,0x80,0xa8,0x28,0x1a,0x5f,0x4,0x9,0x8f,0x62,0x42,0x88,0x1a,0x9c,0x82,0x5c,0x14,0x68,0x11,0x5a,0x3a,0xed,0xdc,0x3b,0xe7,0xb2,0x7d,0x70,0x77,0x98,0xe,0x67,0x86,0x96,0xe,0x97,0x4,0x56,0xb2,0x33,0x33,0xeb,0x5c,0xe6,0xac,0x6f,0x7f,0x7b,0x9d,0xb5,0xbe,0xd,0x2f,0xec,0xf9,0x36,0xb1,0xc0,0x73,0x45,0xdd,0x35,0xe2,0x19,0x89,0x41,0xd6,0x7d,0xca,0xba,0xdf,0x6d,0x3,0x40,0x53,0xc3,0xac,0x1b,0xba,0xf2,0x89,0x67,0x20,0x78,0x1f,0xf0,0x0,0xa7,0x6e,0xf8,0x6a,0x2c,0xa,0x0,0xa1,0x2,0xb5,0x80,0x18,0xd0,0xa9,0x46,0x7,0x10,0x51,0x40,0x3c,0x4d,0x10,0x66,0x83,0x77,0x80,0x32,0x90,0x3,0xa6,0xd4,0x28,0x0,0x55,0x5,0x8c,0x7c,0x14,0x0,0x34,0xc0,0x50,0x81,0xf7,0x0,0xcb,0x53,0xa9,0x25,0xef,0xc6,0x62,0x1d,0x6f,0x1a,0x86,0xde,0x3,0xc2,0x0,0x84,0x10,0x4f,0x97,0x0,0x52,0x4a,0x5,0x84,0x74,0x5d,0xd7,0x1b,0x2f,0x14,0x72,0x3f,0x4c,0x4c,0xdc,0xf9,0xa,0xb8,0x5,0x8c,0x2b,0x20,0xdc,0x56,0x6c,0x10,0x4d,0x82,0xb7,0x80,0x4,0xd0,0xdf,0xd9,0xd9,0xbd,0xad,0xab,0x2b,0xb5,0x5b,0xd3,0x44,0xec,0x7e,0xbc,0x62,0x1e,0x18,0xca,0x26,0x3e,0xa1,0x9e,0xa7,0xfd,0xe4,0x91,0x52,0xe2,0xfb,0x7e,0x61,0x72,0x72,0x62,0x78,0x6a,0xea,0xde,0xb7,0xc0,0xd,0x60,0x5a,0xb1,0xc1,0x9f,0xf,0x0,0x42,0x5,0x9f,0x4,0xd6,0x2c,0x5b,0xb6,0xfc,0x23,0xdb,0x8e,0x6d,0x15,0x2,0xc2,0xc2,0x60,0x95,0x69,0xd3,0x6b,0x84,0xb1,0x35,0x3,0xbd,0xc5,0xec,0x3b,0xd2,0x67,0xd4,0x29,0x73,0xa5,0x9a,0xa7,0x22,0xfd,0x0,0x50,0x7c,0xf5,0x57,0xda,0x63,0x63,0x46,0xb1,0x98,0xff,0xf1,0xf6,0xed,0x5b,0xfb,0x80,0x6b,0x40,0x56,0x81,0x20,0x5b,0x1,0x30,0xbb,0xe6,0x13,0xc0,0x40,0x5f,0xdf,0x8a,0xcf,0xa2,0x51,0xfb,0xd,0x21,0x4,0x6b,0xcc,0x18,0x2b,0x2d,0x9b,0xdb,0x4e,0x85,0x51,0xa7,0x44,0x41,0xba,0x81,0xf3,0x2b,0x80,0x30,0x1a,0x1d,0xba,0x49,0xaf,0x11,0xa6,0xd7,0x8,0x71,0xbd,0x5a,0xe4,0x6f,0xa7,0x18,0x0,0xbf,0x7c,0xac,0xe9,0x43,0x4a,0x49,0xa9,0x54,0xf8,0x69,0x6c,0xec,0xe6,0xc7,0xc0,0x55,0xc5,0x84,0x7,0x72,0x82,0xde,0x40,0xfd,0x30,0xb0,0x2c,0x99,0x4c,0x7d,0x90,0x48,0x74,0x7e,0x28,0x84,0x60,0x6d,0x28,0x41,0x4a,0xf,0xf1,0x47,0x65,0x9a,0x5b,0x6e,0x99,0xea,0x43,0x92,0xab,0x8b,0xa4,0x28,0x3d,0xee,0x7a,0x33,0xe4,0x3c,0x87,0x95,0x96,0x4d,0x87,0x6e,0x72,0xcf,0x9b,0x69,0x0,0xed,0xf1,0xe6,0xf,0x21,0x4,0xa6,0x69,0xd,0x0,0xd3,0xe5,0x72,0xe9,0x12,0x50,0x54,0xf9,0x40,0x36,0xae,0xf7,0xfa,0xef,0x31,0x60,0x79,0x57,0x57,0xf7,0x2e,0x21,0xa0,0xdf,0xb4,0x49,0xe8,0x16,0x17,0xca,0x59,0xa6,0x7c,0x67,0xc1,0xf,0x71,0xcf,0xaf,0xf2,0x5b,0x39,0x4b,0x42,0xb7,0x58,0x61,0x46,0x9f,0xf8,0xeb,0x42,0x8,0x41,0x32,0xd9,0xbd,0x13,0x58,0xa1,0x62,0xd3,0x1b,0x91,0xd7,0xea,0xa6,0xc3,0x4,0x12,0x3d,0x3d,0x4b,0xdf,0xd7,0x34,0x11,0xb1,0x84,0xc1,0x6a,0xd3,0x26,0x53,0xc9,0x51,0x9e,0xdf,0x2b,0x35,0xd0,0x4a,0xf8,0x8c,0x54,0x72,0xac,0x31,0x63,0x58,0x42,0x7b,0xe2,0x6f,0xa,0x4d,0xd3,0x23,0x3d,0x3d,0x4b,0xdf,0x53,0x4b,0xdb,0x8,0xca,0xf8,0xf5,0x0,0x74,0xda,0x76,0x7c,0x2b,0x40,0xda,0x88,0x30,0xe9,0x55,0xc9,0xfa,0xd5,0x45,0x3f,0xc4,0xa4,0x5f,0xe5,0x9e,0x37,0x43,0x9f,0x11,0x69,0x79,0x5e,0x3a,0xdd,0xc7,0xfe,0xfd,0x7b,0xb9,0x7c,0xf9,0x22,0xe7,0xcf,0xff,0xc2,0xf6,0xed,0x6f,0x3,0xb0,0x61,0xc3,0x7a,0x2e,0x5f,0xbe,0x38,0x67,0x4,0xf9,0xe2,0xf1,0x78,0xe0,0x7d,0x6d,0x3b,0xb6,0x55,0x25,0x76,0xab,0x91,0x1,0x46,0x3,0x0,0x1d,0x86,0xa1,0xa7,0x40,0xb0,0xc4,0x8,0xf3,0x8f,0x53,0x6c,0xdb,0x4c,0x8c,0x7b,0x33,0xac,0x34,0x6d,0xae,0xb7,0xb8,0xe7,0x81,0x3,0xfb,0xc8,0xe5,0xf2,0x6c,0xdc,0xb8,0x85,0xa1,0xa1,0x41,0xe,0x1d,0x1a,0xe6,0xdc,0xb9,0x5f,0x6b,0xc7,0xd7,0xad,0x7b,0x8d,0x7c,0x3e,0x5f,0x3,0xa5,0xd1,0xd7,0xcc,0xc,0xc3,0xe8,0x51,0x5,0x9c,0xd1,0xc,0x80,0xd9,0x84,0x18,0x9e,0xf5,0xc5,0x34,0x83,0x69,0xcf,0x69,0x1b,0x0,0xd3,0x9e,0x43,0x2c,0x64,0xb4,0x9c,0xfd,0xd,0x1b,0xd6,0xd7,0x2,0x1a,0x1d,0x1d,0xe3,0xf4,0xe9,0x33,0x6c,0xde,0xbc,0x89,0x4c,0x66,0x84,0x4c,0x66,0xe4,0x81,0x6b,0x82,0x7c,0x4d,0xb2,0x81,0xa1,0xaa,0xd7,0xa6,0x39,0xa0,0xfe,0x35,0x28,0x0,0x4c,0xa1,0x51,0x96,0x5e,0xdb,0x0,0x28,0x4b,0xf,0xb3,0x45,0xe,0x48,0xa7,0xd3,0x64,0x32,0x23,0x73,0x66,0x33,0x93,0xc9,0x30,0x34,0x34,0x8,0x40,0x3e,0x9f,0xe7,0xf0,0xe1,0x61,0x8e,0x1c,0xf9,0xa2,0x46,0xf5,0x20,0x5f,0xb3,0x64,0xd8,0xac,0xf2,0x32,0x2,0x6a,0xeb,0x5a,0x31,0x13,0x11,0x3a,0xa5,0x36,0x81,0x10,0x11,0x3a,0x8e,0x6c,0x9e,0x4c,0x33,0x99,0x11,0x86,0x86,0x6,0x89,0xc7,0xe3,0x35,0x10,0xd2,0xe9,0xf4,0x1c,0x40,0x76,0xec,0xd8,0xfd,0x0,0xdd,0x83,0x7c,0x4d,0x4a,0x66,0x3f,0xa8,0x10,0xaa,0x9f,0x12,0xef,0xff,0xa6,0x42,0xba,0x20,0x29,0xf8,0x2e,0x9,0xdd,0x6c,0x1b,0x3,0x12,0xba,0x49,0xc1,0x77,0x9b,0x1e,0xcf,0xe7,0xf3,0x64,0x32,0x23,0xb5,0xc4,0x17,0x8f,0xc7,0xd9,0xb2,0x65,0xd3,0x2,0x68,0xde,0x12,0x2,0x57,0x35,0x4c,0x5e,0xb3,0x3a,0x40,0xaa,0xae,0x2a,0xef,0xba,0xfe,0x38,0xc0,0x1d,0xb7,0x42,0x4a,0xf,0xb5,0xd,0x80,0x1e,0x3d,0xc4,0x1d,0xb7,0xd2,0xf2,0x9c,0xbd,0x7b,0xf7,0xb3,0x67,0xcf,0x4e,0x4e,0x9e,0xfc,0x86,0xb3,0x67,0x4f,0x71,0xea,0xd4,0xcf,0x9c,0x38,0xf1,0xdd,0xa2,0xff,0xdb,0x75,0xdd,0x71,0xd5,0x2d,0xba,0xf,0xab,0x4,0x2d,0xd3,0xb4,0x7a,0xc3,0xe1,0xf0,0xab,0x25,0xe9,0xf3,0x92,0x15,0x27,0xe7,0xb9,0x54,0x16,0xb9,0xc,0xba,0x34,0x8b,0xd5,0x56,0x8c,0x3f,0xab,0x39,0xbc,0x16,0xdd,0xe9,0xe8,0xe8,0x18,0x47,0x8f,0x7e,0x9,0xc0,0xf0,0xf0,0x21,0x8e,0x1d,0x3b,0x3e,0x87,0x21,0xd7,0xae,0x5d,0xa7,0x5a,0xad,0xb6,0xf4,0x5,0xb3,0x6b,0xfa,0x78,0xa9,0x54,0x38,0x3,0x4c,0x2a,0x10,0x2,0xeb,0x51,0x3,0xe8,0x6,0x5e,0x1e,0x18,0x18,0xfc,0x5e,0xd3,0x44,0xa4,0xdf,0xb4,0x59,0x6e,0x46,0xb9,0x50,0x9a,0x7c,0xe4,0x62,0x28,0x8a,0xc6,0x2b,0xd1,0x2e,0x46,0x9d,0x12,0x37,0x9c,0xd2,0xc2,0xe4,0x9a,0x36,0x98,0xef,0x7b,0xe5,0xab,0x57,0x2f,0xbd,0x5,0xfc,0x5e,0x7,0x80,0xc,0x62,0x40,0xad,0x43,0x11,0x2,0x37,0x12,0x89,0xbe,0x9e,0xf3,0x1d,0x3a,0x34,0x93,0x55,0x96,0x4d,0xde,0x77,0x1b,0x3a,0xbb,0x87,0x5b,0xb7,0x66,0xb1,0x36,0x9c,0x20,0xe7,0x3b,0x5c,0xad,0x16,0x9e,0x78,0xf0,0x52,0x4a,0xb2,0xd9,0x89,0xcf,0xcb,0xe5,0xd2,0x59,0x60,0x22,0xa8,0x23,0x6c,0x4,0xc0,0x7,0xbc,0x72,0xb9,0x74,0x3b,0x1c,0x8e,0xac,0x34,0x4d,0x73,0x60,0xdc,0x9b,0xc1,0x14,0x1a,0x83,0xe1,0xe,0xc2,0x42,0x67,0xc6,0xf7,0x70,0x9a,0xb0,0x41,0x0,0x11,0x34,0xba,0x75,0x8b,0x35,0x56,0x8c,0x81,0x50,0x8c,0x51,0xb7,0xc2,0x5f,0xd5,0xfc,0x22,0x8a,0xe9,0xc5,0x75,0x83,0x77,0xef,0xfe,0x7b,0x50,0x9,0x24,0xc5,0x20,0x4d,0xa0,0x95,0x1e,0x30,0xd0,0xd7,0xd7,0xff,0x49,0x34,0x1a,0xdd,0x2c,0x4,0x84,0x84,0xce,0x6a,0x33,0xb6,0x48,0x3d,0xe0,0x49,0x6,0x5f,0x3c,0x3d,0x36,0x76,0xe3,0x53,0xe0,0xca,0x7c,0xf5,0x80,0x46,0x45,0xa8,0x13,0xe8,0x4f,0x26,0xbb,0xb7,0x25,0x93,0xdd,0xbb,0x74,0x5d,0x8b,0xdd,0xef,0xe1,0x9b,0x9,0xc3,0xf,0x23,0xf9,0xec,0xf5,0x8d,0xf7,0x69,0x9f,0x79,0x9e,0x57,0xc8,0x66,0x27,0xe,0x66,0xb3,0x35,0x45,0x68,0x6a,0x21,0x8a,0x50,0x3d,0x8,0xa6,0x6a,0x21,0x53,0xc0,0x8a,0x54,0x6a,0xc9,0x3b,0x4a,0x13,0xec,0x5,0x61,0x3c,0x6d,0x3d,0x70,0x6e,0x91,0x23,0x5d,0xd7,0xf5,0xee,0x2a,0x4d,0xf0,0x6b,0xe0,0xa6,0x5a,0xf3,0x85,0x3a,0x95,0xf8,0x91,0x55,0xe1,0x10,0x60,0x2b,0x46,0x24,0x3,0x54,0xe1,0xa7,0x69,0x8d,0xaa,0x70,0x56,0xcd,0x78,0x11,0x98,0x59,0x8c,0x2a,0x5c,0x7f,0x7c,0x16,0x8,0x43,0x2d,0xd,0xb3,0xae,0x67,0x78,0x16,0xf6,0x5,0x24,0xf7,0xf7,0x5,0xaa,0xea,0x35,0xe7,0x31,0xcf,0x4d,0x92,0x47,0xdd,0x19,0x7a,0x56,0x76,0x85,0x1a,0x81,0x58,0xf0,0xce,0xd0,0xb,0x7b,0x61,0xcf,0xb9,0xfd,0x7,0xde,0xc4,0x73,0xf5,0xe8,0x6c,0xed,0xda,0x0,0x0,0x0,0x0,0x49,0x45,0x4e,0x44,0xae,0x42,0x60,0x82
 };
 
 
 static const unsigned char toggle_on_png[]={
-0x89,0x50,0x4e,0x47,0xd,0xa,0x1a,0xa,0x0,0x0,0x0,0xd,0x49,0x48,0x44,0x52,0x0,0x0,0x0,0x40,0x0,0x0,0x0,0x20,0x8,0x6,0x0,0x0,0x0,0xa2,0x9d,0x7e,0x84,0x0,0x0,0x0,0x6,0x62,0x4b,0x47,0x44,0x0,0x26,0x0,0x26,0x0,0x26,0x59,0xf,0xde,0x74,0x0,0x0,0x0,0x9,0x70,0x48,0x59,0x73,0x0,0x0,0xb,0x13,0x0,0x0,0xb,0x13,0x1,0x0,0x9a,0x9c,0x18,0x0,0x0,0x0,0x7,0x74,0x49,0x4d,0x45,0x7,0xde,0x9,0x14,0x17,0x2,0x12,0xee,0x6d,0xd3,0x79,0x0,0x0,0x0,0x19,0x74,0x45,0x58,0x74,0x43,0x6f,0x6d,0x6d,0x65,0x6e,0x74,0x0,0x43,0x72,0x65,0x61,0x74,0x65,0x64,0x20,0x77,0x69,0x74,0x68,0x20,0x47,0x49,0x4d,0x50,0x57,0x81,0xe,0x17,0x0,0x0,0x2,0xa7,0x49,0x44,0x41,0x54,0x68,0x81,0xed,0x99,0x4d,0x4f,0x13,0x51,0x14,0x86,0x9f,0x99,0x4e,0x6c,0x4b,0xc6,0x52,0xc5,0xb6,0x49,0x1b,0x5d,0xb1,0xf4,0x63,0x69,0x28,0x89,0xfc,0x7,0x63,0x42,0x8,0xff,0x40,0x11,0xc4,0x8d,0x51,0x2,0xb,0x5,0x56,0x86,0x8f,0xb8,0x51,0xff,0x0,0x69,0x20,0x24,0x6,0x24,0xfc,0x1,0x68,0x58,0x82,0x11,0x77,0x6e,0xc,0x24,0x2d,0x6,0x3a,0x49,0xad,0xd3,0x32,0xe5,0xba,0x18,0xdb,0x94,0x71,0x2,0xb1,0xb9,0x63,0x23,0x9d,0x67,0xd5,0xde,0x73,0xe6,0xcc,0x7b,0xde,0xe9,0x9d,0xcc,0x9c,0x82,0x8f,0x4f,0x5b,0xa3,0x38,0xbe,0x77,0x0,0x51,0x40,0x7,0x2,0x2e,0xf1,0xff,0x15,0x1,0x54,0x81,0x22,0x50,0x0,0x4a,0xb5,0x80,0xe6,0x48,0x8c,0x2,0xb7,0x81,0xeb,0x40,0x88,0x8b,0x65,0x80,0x9,0x7c,0x3,0x76,0x38,0xc3,0x0,0x1d,0xb8,0x71,0xeb,0xe6,0x9d,0xd9,0x54,0x32,0x15,0xd6,0x34,0xcd,0x53,0x3,0xaa,0x55,0x8b,0xe3,0x63,0xb,0xcb,0xb2,0x10,0x8,0x5b,0xa6,0x4c,0x14,0x50,0xec,0x6b,0x28,0xe,0xbe,0x1f,0xfc,0xfc,0xbc,0xfb,0x69,0x14,0xf8,0xda,0x98,0xe2,0x34,0x40,0x3,0x82,0xa9,0x64,0xaa,0xc3,0x34,0x4d,0x4c,0xd3,0x94,0xac,0xe8,0x34,0x95,0x4a,0x85,0x52,0xe9,0x7,0xe5,0x4a,0x5,0x71,0x72,0x62,0x9b,0x20,0x11,0x5,0x5,0x45,0x55,0xd1,0x75,0x5d,0x89,0x5d,0x8b,0x75,0x0,0x41,0x1c,0x3d,0x3b,0xd,0x0,0x50,0x34,0x4d,0xf3,0xbc,0x79,0x80,0x6a,0xb5,0x4a,0xb9,0x52,0x26,0x1a,0xbd,0x42,0x3c,0x16,0xaf,0x5d,0x2d,0x69,0x8,0x4,0xf9,0x83,0x3c,0x85,0xc2,0x11,0x5d,0x57,0xbb,0xc0,0x65,0x4b,0xbb,0x19,0xf0,0xcf,0x10,0x8,0xc4,0x89,0x20,0x11,0x4f,0x60,0x9a,0x26,0xf7,0x1f,0xf4,0x4b,0xad,0xbf,0xbc,0x94,0x21,0x11,0x4f,0x70,0x74,0x78,0x88,0x10,0xee,0xbf,0x2e,0x55,0xea,0x19,0x9b,0xa0,0xb6,0xf7,0x65,0x37,0xf,0xbf,0x6b,0xa,0xce,0xdc,0x5a,0x2d,0x37,0xa0,0xd5,0xf8,0x6,0x34,0x73,0x90,0xae,0xeb,0x8c,0x4f,0x8c,0xb1,0xb6,0xbe,0xca,0xda,0xfa,0x2a,0xe3,0x13,0x63,0xe8,0xba,0xe,0x40,0x76,0x6b,0x83,0xe7,0x2f,0x9e,0xd5,0x73,0xb3,0x5b,0x1b,0x72,0x94,0x7a,0x44,0x53,0x6,0x8c,0x3e,0x1d,0x1,0x60,0xa0,0x7f,0x90,0x81,0xfe,0x41,0x54,0x35,0x50,0x5f,0x3,0x48,0xf7,0xa6,0xe9,0xee,0xee,0x96,0xa3,0xd0,0x63,0x9a,0x32,0x20,0xdd,0x9b,0x66,0x7e,0xee,0xd,0x86,0x61,0x60,0x18,0x6,0x73,0xb3,0xf3,0xf4,0xa4,0x7b,0xea,0xf1,0x77,0x6f,0xdf,0xf3,0x64,0x74,0x58,0x9a,0x48,0x2f,0xf1,0xe4,0x1e,0xb0,0xba,0xf2,0x91,0x50,0x38,0x4c,0x5f,0xdf,0x3d,0x2f,0xca,0x4b,0xa5,0x29,0x3,0x36,0x37,0x36,0x79,0x3c,0x3c,0x44,0x24,0x12,0x21,0x12,0x89,0x30,0x3c,0x32,0x44,0x76,0x33,0x7b,0x2a,0x67,0xe6,0xf5,0xc,0x8f,0x86,0x1e,0x4a,0x11,0xe9,0x25,0x4e,0x3,0x42,0x40,0xe7,0x79,0x7,0xcd,0xce,0xcc,0xa3,0xaa,0xa,0x99,0xc5,0x5,0x32,0x8b,0xb,0xf5,0xb5,0x46,0x76,0x77,0xbf,0xb0,0xbd,0xbd,0x23,0x4b,0xa7,0x2c,0x3a,0xb1,0x7b,0xac,0xe3,0x7c,0x12,0x34,0x1,0xe3,0xbc,0x2a,0xc5,0x62,0x91,0x57,0x2f,0xa7,0x5c,0x63,0x3d,0x77,0x7b,0xeb,0x9f,0xa7,0x26,0xa7,0x99,0x9a,0x9c,0xfe,0x6b,0x95,0x1e,0x62,0x60,0xf7,0x58,0xc7,0x7f,0xe,0x68,0xb5,0x80,0x56,0xe3,0x1b,0xd0,0x6a,0x1,0x8a,0x3d,0xb5,0x60,0x79,0x29,0x23,0xbd,0xf6,0xf2,0x52,0xa6,0x71,0x28,0xe2,0x4a,0x4b,0x5f,0x87,0xed,0x81,0x85,0x42,0x2e,0x9f,0x23,0x1e,0x8b,0xb3,0xb6,0xf2,0x41,0x6a,0xfd,0x60,0x30,0x48,0x2e,0x9f,0x43,0x51,0x15,0x14,0xc5,0xdd,0x4,0x37,0x3,0x84,0x65,0x59,0x84,0x42,0x21,0xcf,0x87,0x22,0x81,0x40,0x80,0xe0,0xa5,0x20,0x85,0x42,0xc1,0x7e,0x67,0xf7,0x6c,0x22,0x74,0xb9,0xb6,0xf4,0xc7,0x9,0x9c,0x6,0x58,0x40,0x79,0x6f,0x7f,0xaf,0x94,0x4a,0xa6,0xc2,0xba,0xae,0x5f,0xa8,0x99,0x20,0x50,0xc6,0xee,0xb1,0x31,0xe5,0x14,0x49,0xda,0x63,0x2a,0xbc,0x5f,0xb,0xb4,0xfd,0xff,0x2,0x3e,0x3e,0x3e,0xed,0xcd,0x2f,0xbd,0x80,0xe4,0x2f,0x1f,0x1c,0x6a,0x6c,0x0,0x0,0x0,0x0,0x49,0x45,0x4e,0x44,0xae,0x42,0x60,0x82
+0x89,0x50,0x4e,0x47,0xd,0xa,0x1a,0xa,0x0,0x0,0x0,0xd,0x49,0x48,0x44,0x52,0x0,0x0,0x0,0x40,0x0,0x0,0x0,0x20,0x8,0x6,0x0,0x0,0x0,0xa2,0x9d,0x7e,0x84,0x0,0x0,0x0,0x6,0x62,0x4b,0x47,0x44,0x0,0xff,0x0,0xff,0x0,0xff,0xa0,0xbd,0xa7,0x93,0x0,0x0,0x0,0x9,0x70,0x48,0x59,0x73,0x0,0x0,0xb,0x13,0x0,0x0,0xb,0x13,0x1,0x0,0x9a,0x9c,0x18,0x0,0x0,0x0,0x7,0x74,0x49,0x4d,0x45,0x7,0xdf,0xb,0xd,0x15,0xc,0x32,0xd4,0x89,0x92,0x81,0x0,0x0,0x0,0x19,0x74,0x45,0x58,0x74,0x43,0x6f,0x6d,0x6d,0x65,0x6e,0x74,0x0,0x43,0x72,0x65,0x61,0x74,0x65,0x64,0x20,0x77,0x69,0x74,0x68,0x20,0x47,0x49,0x4d,0x50,0x57,0x81,0xe,0x17,0x0,0x0,0x5,0xba,0x49,0x44,0x41,0x54,0x68,0xde,0xed,0x99,0x5d,0x6c,0x93,0x55,0x18,0xc7,0x7f,0xe7,0xbc,0xdd,0xdb,0x8e,0xf6,0xed,0xba,0x76,0x8d,0x8e,0x7d,0xf0,0x31,0x15,0x88,0x9a,0x11,0x95,0x85,0x18,0x24,0x12,0x21,0xde,0x1a,0xae,0x4d,0x8c,0xd1,0xe0,0x85,0x89,0x37,0x5e,0x68,0xe4,0xca,0xb,0xcc,0x48,0xbc,0x23,0x7a,0x41,0xd4,0xa0,0x21,0x41,0x4d,0x20,0x70,0x69,0xc0,0x8f,0xe0,0x47,0xc,0x86,0x44,0x8c,0xda,0x39,0x1,0x27,0x6c,0x40,0xe8,0xe7,0xf6,0xb6,0x5b,0xfb,0xb6,0xef,0x7b,0xbc,0xe8,0x59,0xa9,0xa5,0xd4,0x32,0x96,0x6d,0x6,0x4e,0x72,0xd2,0xf4,0xeb,0xbc,0xf9,0xff,0x9f,0xff,0x79,0xce,0xf3,0xfc,0xf,0xdc,0x1b,0x77,0xf7,0x10,0xb7,0xf9,0x5b,0x51,0xf7,0x1f,0xb1,0x42,0x30,0xa8,0xba,0x57,0x55,0xf7,0x7e,0xd1,0x8,0x90,0x7a,0x76,0xd4,0x4d,0x43,0x7f,0x26,0x56,0x0,0x78,0xf,0x70,0x81,0x72,0xdd,0xf4,0xf4,0xbc,0x23,0x2,0x84,0x6,0x6a,0x2,0x21,0x20,0xa2,0x67,0x18,0xe8,0xd4,0x44,0x2c,0x27,0x9,0xf3,0xe0,0xcb,0xc0,0x1c,0x30,0x3,0xe4,0xf4,0xcc,0x3,0x8e,0x26,0x46,0x2d,0x84,0x0,0x9,0xf8,0x34,0xf0,0x38,0xd0,0x6f,0x59,0x5d,0x4f,0x59,0x56,0xd7,0x2e,0xd3,0x34,0xd7,0x4b,0x69,0x84,0xa5,0x94,0x9d,0x42,0x88,0x65,0x55,0x80,0x52,0x4a,0x79,0x9e,0x37,0xe7,0x79,0xee,0x8c,0xe3,0x38,0x17,0x6d,0x7b,0xfa,0xa4,0x6d,0x4f,0x7f,0xb,0x4c,0x2,0x49,0x4d,0x44,0xa5,0x95,0x1a,0xc4,0x2d,0xc0,0x9b,0x40,0x17,0x30,0xb8,0x6a,0x55,0x68,0x7b,0x2c,0x16,0xdf,0xe3,0xf7,0x7,0x1e,0x5a,0x66,0xbc,0xed,0x10,0x42,0xa9,0x54,0x1c,0x4f,0xa7,0x93,0x7,0x67,0x67,0xf3,0xa7,0x81,0x4b,0xc0,0xb4,0x56,0x83,0xd7,0xe,0x1,0x42,0x83,0xef,0x6,0xd6,0x77,0x77,0xf7,0x3c,0x1f,0x8d,0xf6,0xbc,0x24,0xa5,0x34,0x97,0x1c,0x8d,0x3f,0x84,0x1c,0x18,0x46,0xf6,0xac,0x45,0x74,0x46,0x40,0xfa,0x6e,0xd,0xbc,0x52,0x42,0x5d,0xfb,0x3,0x77,0xe2,0x27,0x28,0xe5,0xf1,0x3c,0xcf,0xc9,0x64,0x52,0x1f,0x66,0xb3,0xa9,0xc3,0xc0,0x45,0x20,0xab,0x49,0x50,0xad,0x8,0x98,0xdf,0xf3,0x5d,0xc0,0x50,0x34,0x1a,0x7f,0x39,0x1a,0xed,0x79,0x79,0xc9,0x65,0x2e,0xd,0x64,0xff,0x30,0xb2,0xff,0x51,0xbc,0xeb,0x17,0xf0,0xae,0x8e,0xc1,0x6c,0x16,0x54,0x93,0x0,0xa,0x1,0xfe,0x10,0x58,0x3d,0xc8,0xd8,0x1a,0x64,0x6c,0x1d,0xde,0xe4,0xcf,0x78,0x97,0xcf,0xa1,0xdc,0x8a,0xca,0x64,0x52,0x1f,0x64,0x32,0xc9,0xf,0x80,0xb,0x5a,0x9,0x37,0xe5,0x4,0xa3,0x41,0xfa,0x1,0xa0,0x37,0x18,0xb4,0x9e,0x8d,0xc7,0xef,0x7b,0x5d,0x4a,0x69,0x2c,0x39,0xf8,0x7,0xb6,0x21,0xa2,0x3,0xb8,0x63,0x5f,0xa3,0xae,0xfe,0xe,0xe5,0xd9,0x16,0x79,0x4c,0x41,0xa5,0x4,0xb3,0x59,0x54,0x6a,0x2,0xcf,0xbe,0x8e,0xec,0x1f,0x46,0x84,0x62,0x90,0x9b,0x14,0x1,0xbf,0x7f,0xd8,0x71,0x4a,0x7f,0x95,0xcb,0xce,0x65,0xa0,0xa0,0xf3,0xc1,0x2d,0x9,0x30,0x74,0x96,0x7f,0xb0,0xb7,0xb7,0xff,0xed,0x8e,0x8e,0x8e,0xd8,0xd2,0x56,0x24,0x2,0xb9,0xfa,0x61,0x44,0xcf,0x1a,0xdc,0x5f,0xbf,0x80,0x42,0xfa,0xf6,0xd7,0x28,0xda,0xa8,0xec,0x24,0x72,0x60,0x18,0x21,0x4,0xe4,0x53,0xd2,0x34,0xcd,0xa1,0xe9,0xe9,0xec,0x69,0xbd,0xd,0x4a,0x8d,0x4,0xc8,0x3a,0xf9,0x77,0x0,0x5d,0xe1,0x70,0x64,0xbb,0xdf,0x1f,0x58,0xb7,0xe4,0x7b,0xde,0x17,0x40,0xe,0x6c,0xc6,0xfd,0xf3,0x7b,0x28,0xce,0x2c,0x7c,0x9d,0xe2,0xc,0xee,0xf9,0x1f,0x90,0x83,0x8f,0x41,0x47,0x27,0x7e,0x7f,0x60,0x5d,0x38,0x1c,0x79,0x4a,0x6f,0x6d,0x5f,0xb3,0x8c,0x5f,0x4f,0x40,0x24,0x14,0xa,0xef,0x6c,0xf7,0x59,0x7d,0x7d,0xab,0x19,0x1d,0xdd,0xc7,0xf8,0xf8,0x6f,0x9c,0x3d,0xfb,0x23,0xbb,0x77,0x3f,0x7,0xc0,0xc8,0xc8,0x16,0xc6,0xc7,0x7f,0x63,0xd3,0xa6,0x8d,0xb5,0xf7,0x87,0xf,0x1f,0x6a,0xad,0xfe,0xde,0x8d,0x78,0xb9,0x29,0x98,0xbe,0x72,0xe7,0x64,0xe6,0xa6,0xf0,0xb2,0x93,0xc8,0xfb,0x37,0x0,0x10,0xa,0x59,0x3b,0x75,0x62,0x37,0x1b,0x13,0x7f,0x23,0x1,0x61,0xd3,0x34,0xdb,0x8e,0xfe,0xfe,0xfd,0xef,0x10,0xe,0x87,0xd9,0xb1,0x63,0x17,0x6f,0xbc,0xf1,0x16,0xa3,0xa3,0xfb,0xe8,0xeb,0x5b,0x5d,0xfb,0x7e,0xef,0xde,0x37,0xdb,0xdf,0x1,0xb1,0xb5,0xa8,0xcc,0xa5,0xc5,0x3b,0x12,0x53,0x7f,0x23,0x62,0x6b,0x1,0x30,0x4d,0xff,0x90,0x2e,0xe0,0x7c,0x8d,0x4,0xf8,0x1a,0x72,0x40,0x40,0x4a,0x23,0xdc,0x6e,0xf4,0x47,0x46,0xb6,0xf0,0xf8,0xe3,0x5b,0xb1,0x6d,0x9b,0xa9,0xa9,0x2b,0x9c,0x3a,0xf5,0x15,0x3b,0x77,0x3e,0x43,0x22,0x31,0x46,0x22,0x31,0xc6,0xa6,0x4d,0x1b,0x19,0x19,0xd9,0xd2,0x1e,0x1,0xc1,0x28,0xca,0x4e,0x2e,0x1e,0x1,0xf9,0x24,0x22,0x18,0xad,0x2,0x33,0x7c,0x11,0x5d,0xbd,0x1a,0xb7,0x52,0x40,0xed,0x18,0x94,0x52,0x76,0xb6,0x47,0x40,0x1f,0x89,0xc4,0x18,0xb6,0x6d,0xd7,0x3e,0x4b,0x24,0x12,0x35,0xd9,0xdb,0xb6,0xcd,0xa1,0x43,0x9f,0xb4,0xad,0x2,0xd1,0x11,0x80,0x62,0x7e,0xf1,0x72,0x4a,0x31,0x5f,0x5d,0x13,0xd0,0x75,0x4c,0xd3,0xb2,0x5d,0x36,0xd6,0xd6,0x4a,0xa9,0x72,0x3b,0xeb,0xcf,0x47,0xd8,0xb2,0xac,0x7f,0x91,0x52,0x4f,0xc8,0x81,0x3,0xef,0x13,0xe,0x87,0x6b,0xb9,0xa1,0x65,0xc4,0xca,0x45,0x8,0x84,0x16,0x8f,0x80,0x40,0xa8,0xba,0x26,0xe0,0x79,0xde,0x7c,0x25,0xa8,0x5a,0x11,0xe0,0x2,0x73,0xae,0x5b,0xc9,0xb5,0xb3,0xbe,0x6d,0xdb,0x24,0x12,0x63,0x35,0x70,0x96,0x65,0xb1,0x6b,0x57,0x55,0xfe,0xf5,0xe3,0xc0,0x81,0xf7,0xda,0x23,0xa0,0x90,0x41,0x58,0xf1,0xc5,0x3b,0x55,0x43,0x71,0x54,0x21,0x53,0x5,0x56,0xc5,0x34,0xd7,0xac,0x10,0x92,0x75,0xd1,0x2f,0x3,0xb6,0xe3,0x38,0x17,0xda,0x7d,0xc8,0xbe,0x7d,0xa3,0xbc,0xf6,0xda,0xab,0x9c,0x38,0x71,0x94,0x6f,0xbe,0x39,0xc9,0xc9,0x93,0x5f,0x72,0xec,0xd8,0xf1,0x7f,0xfd,0xe6,0xd8,0xb1,0xe3,0x37,0x91,0xd2,0x94,0x80,0xf4,0x4,0x22,0x3a,0xb8,0x78,0x4,0xf4,0xac,0x41,0xa5,0x27,0x0,0x70,0x9c,0xd2,0x5,0xdd,0x2d,0x56,0xfe,0xab,0x12,0x34,0x85,0x10,0x56,0x28,0x64,0x6d,0x6f,0xe7,0x21,0x53,0x53,0x57,0x38,0x72,0xe4,0xb3,0x9a,0xdc,0x3f,0xfd,0xf4,0xf3,0x9b,0x14,0x2,0x70,0xee,0xdc,0x2f,0xa4,0x52,0xa9,0x96,0x44,0xa8,0xd9,0x69,0x8c,0xf5,0x5b,0xf1,0xec,0x14,0x94,0xec,0x3b,0x43,0x1f,0xe9,0xc3,0x18,0xdc,0x8c,0x3b,0x7e,0x1a,0xdc,0x32,0xd9,0x6c,0xfa,0xe3,0x52,0xa9,0x78,0x6,0xc8,0x68,0x12,0x9a,0xf6,0x2,0x3e,0x20,0x6,0x3c,0x3a,0x38,0xb8,0xfe,0xe0,0x92,0x17,0x43,0x42,0x20,0x57,0x3f,0x82,0xe8,0xdd,0x58,0xad,0x4,0x17,0x5a,0xc,0x5,0xc2,0x18,0x8f,0x3c,0x8b,0xba,0x36,0x86,0x37,0xf5,0x2b,0xa5,0xe2,0xdc,0x5f,0x97,0x2e,0x5d,0x7c,0x5,0xf8,0xa5,0x8e,0x0,0xd5,0x4c,0x1,0xf3,0x5b,0x41,0x54,0x2a,0x95,0x7c,0x30,0x68,0x3d,0x2d,0x84,0x90,0x4b,0xda,0xce,0x16,0xd2,0x88,0x60,0xc,0x39,0x30,0x8c,0x2a,0x64,0xa1,0x94,0xbf,0xfd,0xc8,0x6f,0xd8,0x81,0xb2,0xaf,0xe3,0xfd,0x7d,0x16,0xcf,0xad,0x54,0x92,0xc9,0x6b,0xef,0x96,0xcb,0xce,0x19,0x20,0xd5,0xac,0x23,0x6c,0x24,0xc0,0x3,0xdc,0x72,0xd9,0x99,0x16,0x42,0xc8,0xce,0xce,0x55,0x8f,0x2d,0x69,0x37,0xa8,0x14,0x2a,0x3b,0x89,0xf0,0x99,0x18,0x43,0x4f,0x82,0xdf,0x42,0x95,0x66,0xab,0xd,0xcf,0x4d,0xb6,0xa4,0x0,0x21,0x21,0x60,0x41,0x77,0x1f,0x72,0x70,0x33,0xc6,0x9a,0x27,0xaa,0x91,0x9f,0x38,0x83,0x72,0x2b,0x2a,0x9b,0x4d,0x7f,0x34,0x33,0x93,0x3d,0xaa,0xd,0x92,0x42,0x33,0x4f,0xa0,0x95,0x1f,0x30,0x14,0x8b,0xc5,0x5f,0x8c,0x44,0x62,0x2f,0x48,0xd9,0xa2,0x19,0x5f,0x99,0x7e,0x40,0x25,0x97,0x4b,0x7f,0x9c,0x4e,0x27,0xf,0x1,0xe7,0xdb,0xf5,0x3,0x1a,0x1d,0xa1,0x8,0x30,0x18,0xc,0x86,0xb6,0x47,0xa3,0xf1,0x3d,0x81,0x40,0xe7,0x83,0xff,0x7,0x9b,0xbb,0x58,0x9c,0xfb,0x33,0x93,0x49,0x1e,0x2c,0x14,0x6a,0x8e,0x50,0xee,0x76,0x1c,0xa1,0x7a,0x12,0x3a,0xb4,0x27,0xd8,0x3,0xc,0x58,0x56,0xd7,0x36,0xed,0x9,0x3e,0x60,0x18,0xbe,0xc8,0xb2,0xb8,0x44,0x4d,0x86,0xe7,0x79,0x8e,0xeb,0x56,0x72,0x8e,0xe3,0x9c,0xd7,0x9e,0xe0,0x77,0xc0,0x65,0xbd,0xe7,0xf3,0x75,0x2e,0xf1,0x82,0x5d,0x61,0x3f,0x10,0xd4,0x8a,0xe8,0x6e,0xe2,0xa,0x2f,0x2b,0xfe,0x6,0x57,0x38,0xab,0x23,0x5e,0xd0,0xbd,0xff,0x82,0x5d,0x61,0x1a,0xb2,0x8e,0xa1,0x8f,0x49,0x93,0x1b,0xf7,0x2,0x82,0x95,0x71,0x2f,0xa0,0xb8,0x71,0x2f,0xe0,0xe8,0x63,0xce,0xa5,0xcd,0x4b,0x92,0x85,0xde,0xc,0xad,0x34,0x7b,0x58,0xb1,0xc0,0x9b,0xa1,0x7b,0xe3,0xde,0xb8,0xcb,0xc7,0x3f,0xd2,0xdb,0x54,0x98,0x63,0x18,0x87,0x7d,0x0,0x0,0x0,0x0,0x49,0x45,0x4e,0x44,0xae,0x42,0x60,0x82
 };
 
 
@@ -465,7 +465,7 @@ static const unsigned char tree_title_pressed_png[]={
 
 
 static const unsigned char unchecked_png[]={
-0x89,0x50,0x4e,0x47,0xd,0xa,0x1a,0xa,0x0,0x0,0x0,0xd,0x49,0x48,0x44,0x52,0x0,0x0,0x0,0x10,0x0,0x0,0x0,0x10,0x8,0x6,0x0,0x0,0x0,0x1f,0xf3,0xff,0x61,0x0,0x0,0x0,0x6,0x62,0x4b,0x47,0x44,0x0,0x0,0x0,0x0,0x0,0x0,0xf9,0x43,0xbb,0x7f,0x0,0x0,0x0,0x9,0x70,0x48,0x59,0x73,0x0,0x0,0xb,0x13,0x0,0x0,0xb,0x13,0x1,0x0,0x9a,0x9c,0x18,0x0,0x0,0x0,0x7,0x74,0x49,0x4d,0x45,0x7,0xde,0x9,0x11,0x14,0x5,0x3b,0xd6,0x6,0x93,0xb9,0x0,0x0,0x0,0x19,0x74,0x45,0x58,0x74,0x43,0x6f,0x6d,0x6d,0x65,0x6e,0x74,0x0,0x43,0x72,0x65,0x61,0x74,0x65,0x64,0x20,0x77,0x69,0x74,0x68,0x20,0x47,0x49,0x4d,0x50,0x57,0x81,0xe,0x17,0x0,0x0,0x0,0xf2,0x49,0x44,0x41,0x54,0x38,0x8d,0xed,0x93,0xb1,0x4a,0xc4,0x50,0x10,0x45,0xcf,0x4b,0x26,0x71,0x2a,0xb7,0x58,0xc1,0xca,0x6f,0xb0,0xb4,0x74,0x75,0x59,0x11,0xbf,0xc1,0x1f,0xf0,0xaf,0xac,0xfc,0x1,0x61,0x11,0x45,0x85,0x5d,0xb6,0xb0,0xf4,0x1b,0xb4,0x10,0xd1,0x42,0xc1,0x65,0x30,0xb3,0x59,0x8b,0x24,0x10,0x10,0xf2,0x82,0xb5,0xb7,0x99,0xea,0xdc,0x79,0xf3,0xb8,0x37,0x50,0x29,0x0,0x9,0x20,0xf5,0xec,0x52,0x9,0x78,0x3d,0xd7,0xa1,0x86,0x33,0x60,0x8,0xec,0x0,0x9b,0x1d,0x26,0x25,0xf0,0x9,0x3c,0x1,0xef,0x40,0xd1,0x6c,0x1c,0x4a,0x2a,0xa3,0xf1,0xe1,0xe4,0x22,0xcb,0xb2,0xce,0xf5,0x45,0x51,0x70,0x7b,0x7f,0x73,0xea,0xee,0x77,0xc0,0x6b,0x0,0x36,0x80,0xdd,0xe3,0xa3,0x93,0x7,0x33,0xc3,0xcc,0x3a,0xd,0x54,0x15,0x55,0xe5,0xea,0x7a,0xba,0x7,0x3c,0x26,0xf5,0xb,0x6,0x22,0x12,0x85,0x1,0xcc,0xc,0x11,0x1,0x18,0x0,0x49,0x73,0x6b,0x88,0x92,0xbf,0x95,0x0,0x21,0xf6,0xe3,0xbd,0x5c,0xfe,0xd,0x2a,0xad,0xff,0xc0,0x96,0x8d,0x41,0x9,0x7c,0xb8,0x3b,0xaa,0x1a,0xa5,0x54,0x15,0x77,0x87,0x2a,0xd2,0xab,0x0,0xa4,0xc0,0x76,0x9e,0xe7,0x93,0x83,0xfd,0xf1,0x79,0x9f,0x28,0xcf,0x17,0xb3,0xb3,0xe5,0xf2,0xeb,0x12,0x78,0x69,0x97,0x69,0x8b,0xfe,0x65,0x7a,0x6,0xde,0x80,0xef,0x26,0x81,0xed,0x3a,0xa7,0x91,0x2b,0x56,0xb4,0xea,0xfc,0x3,0x6e,0x28,0x47,0x29,0x38,0xc5,0x49,0x7f,0x0,0x0,0x0,0x0,0x49,0x45,0x4e,0x44,0xae,0x42,0x60,0x82
+0x89,0x50,0x4e,0x47,0xd,0xa,0x1a,0xa,0x0,0x0,0x0,0xd,0x49,0x48,0x44,0x52,0x0,0x0,0x0,0x10,0x0,0x0,0x0,0x10,0x8,0x6,0x0,0x0,0x0,0x1f,0xf3,0xff,0x61,0x0,0x0,0x0,0x6,0x62,0x4b,0x47,0x44,0x0,0xff,0x0,0xff,0x0,0xff,0xa0,0xbd,0xa7,0x93,0x0,0x0,0x0,0x9,0x70,0x48,0x59,0x73,0x0,0x0,0xb,0x13,0x0,0x0,0xb,0x13,0x1,0x0,0x9a,0x9c,0x18,0x0,0x0,0x0,0x7,0x74,0x49,0x4d,0x45,0x7,0xdf,0xb,0xd,0x16,0x2f,0x23,0x2,0xd6,0x7b,0x4b,0x0,0x0,0x0,0x19,0x74,0x45,0x58,0x74,0x43,0x6f,0x6d,0x6d,0x65,0x6e,0x74,0x0,0x43,0x72,0x65,0x61,0x74,0x65,0x64,0x20,0x77,0x69,0x74,0x68,0x20,0x47,0x49,0x4d,0x50,0x57,0x81,0xe,0x17,0x0,0x0,0x0,0xe9,0x49,0x44,0x41,0x54,0x38,0xcb,0xed,0x93,0x4d,0x4e,0xc3,0x30,0x10,0x85,0x3f,0x4f,0x6c,0x71,0x1,0x1c,0x12,0x29,0x8b,0x6e,0x73,0x91,0x6e,0x10,0x7,0xe0,0x1c,0x3d,0x7,0x37,0x60,0xcf,0x1,0x10,0x8b,0xf6,0x22,0x15,0xbb,0x2e,0x2a,0xf5,0x7,0x2e,0x80,0xec,0x8e,0xd9,0x38,0x69,0x91,0x48,0x5a,0x4,0x4b,0x9e,0x34,0x2b,0xbf,0x19,0xbf,0x37,0x3f,0xf0,0x4b,0x18,0x80,0xbb,0xdb,0xfb,0x2b,0x60,0x2,0xb4,0x40,0x9,0xc8,0x0,0x5f,0x81,0x3d,0xb0,0x4,0x56,0xcf,0x2f,0x4f,0x1f,0x36,0x3f,0x4c,0xde,0xde,0x37,0xf,0xdb,0xdd,0x7a,0xaa,0xaa,0xce,0x18,0xf3,0x6d,0x76,0x4a,0x9,0x11,0x9,0xd5,0x4d,0xb3,0xf0,0xd7,0xf5,0xc,0x78,0xed,0x7e,0x6a,0xb7,0xbb,0xf5,0x14,0x70,0xd6,0x3a,0xc6,0x2,0x70,0x99,0xdb,0x2,0x74,0xa,0x4a,0x55,0x75,0xd6,0x3a,0x8a,0xa2,0x38,0xeb,0x3b,0xc6,0xe0,0xb2,0xd5,0xde,0xab,0x18,0x63,0x18,0x92,0xfe,0xa5,0x69,0x47,0x9e,0x30,0xd2,0xac,0x8b,0xf1,0x5f,0xe0,0x58,0x40,0x53,0x4a,0xa4,0x94,0xce,0x26,0x9c,0xf0,0xf4,0x74,0xf,0xf6,0x22,0x12,0x54,0xf,0xae,0x1b,0xd5,0x50,0xb2,0xea,0x1,0x11,0x89,0x79,0xa5,0x7b,0x5,0xcb,0xaa,0x6c,0x16,0x40,0x8c,0x31,0x30,0x16,0x60,0x42,0x55,0x36,0xf3,0x7c,0xf,0xbd,0x82,0x95,0xf7,0xf5,0xcc,0xfb,0xfa,0xf1,0x27,0xc7,0xc4,0x5f,0xe0,0x13,0xe5,0xc4,0x63,0x4f,0x20,0x8a,0x2e,0x80,0x0,0x0,0x0,0x0,0x49,0x45,0x4e,0x44,0xae,0x42,0x60,0x82
 };
 
 
@@ -480,17 +480,17 @@ static const unsigned char vseparator_png[]={
 
 
 static const unsigned char vslider_bg_png[]={
-0x89,0x50,0x4e,0x47,0xd,0xa,0x1a,0xa,0x0,0x0,0x0,0xd,0x49,0x48,0x44,0x52,0x0,0x0,0x0,0x10,0x0,0x0,0x0,0x10,0x8,0x6,0x0,0x0,0x0,0x1f,0xf3,0xff,0x61,0x0,0x0,0x0,0x6,0x62,0x4b,0x47,0x44,0x0,0xdd,0x0,0xdd,0x0,0xdd,0xf5,0x15,0x8,0x9d,0x0,0x0,0x0,0x9,0x70,0x48,0x59,0x73,0x0,0x0,0xb,0x13,0x0,0x0,0xb,0x13,0x1,0x0,0x9a,0x9c,0x18,0x0,0x0,0x0,0x7,0x74,0x49,0x4d,0x45,0x7,0xde,0x9,0x14,0xf,0xc,0x18,0x82,0xe,0xe5,0x21,0x0,0x0,0x0,0x19,0x74,0x45,0x58,0x74,0x43,0x6f,0x6d,0x6d,0x65,0x6e,0x74,0x0,0x43,0x72,0x65,0x61,0x74,0x65,0x64,0x20,0x77,0x69,0x74,0x68,0x20,0x47,0x49,0x4d,0x50,0x57,0x81,0xe,0x17,0x0,0x0,0x0,0x4d,0x49,0x44,0x41,0x54,0x38,0x8d,0x63,0x60,0x18,0x68,0xc0,0x88,0x4d,0x50,0x42,0x42,0x62,0x1a,0x3,0x3,0x43,0x26,0x9a,0xf0,0xf4,0x17,0x2f,0x5e,0x64,0x11,0x6b,0xc0,0x7f,0x76,0x76,0x4e,0x14,0xb1,0x9f,0x3f,0xbf,0x33,0xbc,0x78,0xf1,0x2,0x43,0x3d,0x13,0xd1,0x6e,0xc5,0x1,0x46,0xd,0x18,0x35,0x0,0x9f,0x1,0xd3,0x7f,0xfe,0xfc,0xce,0x80,0x8c,0x19,0x18,0x18,0xa6,0x53,0x6a,0x19,0x6d,0x0,0x0,0x59,0x9c,0x18,0xe9,0x50,0xa4,0x59,0x7a,0x0,0x0,0x0,0x0,0x49,0x45,0x4e,0x44,0xae,0x42,0x60,0x82
+0x89,0x50,0x4e,0x47,0xd,0xa,0x1a,0xa,0x0,0x0,0x0,0xd,0x49,0x48,0x44,0x52,0x0,0x0,0x0,0x10,0x0,0x0,0x0,0x10,0x8,0x6,0x0,0x0,0x0,0x1f,0xf3,0xff,0x61,0x0,0x0,0x0,0x6,0x62,0x4b,0x47,0x44,0x0,0xff,0x0,0xff,0x0,0xff,0xa0,0xbd,0xa7,0x93,0x0,0x0,0x0,0x9,0x70,0x48,0x59,0x73,0x0,0x0,0xb,0x13,0x0,0x0,0xb,0x13,0x1,0x0,0x9a,0x9c,0x18,0x0,0x0,0x0,0x7,0x74,0x49,0x4d,0x45,0x7,0xdf,0xb,0xd,0x16,0x2c,0x30,0xad,0x45,0x69,0x56,0x0,0x0,0x0,0x19,0x74,0x45,0x58,0x74,0x43,0x6f,0x6d,0x6d,0x65,0x6e,0x74,0x0,0x43,0x72,0x65,0x61,0x74,0x65,0x64,0x20,0x77,0x69,0x74,0x68,0x20,0x47,0x49,0x4d,0x50,0x57,0x81,0xe,0x17,0x0,0x0,0x1,0x87,0x49,0x44,0x41,0x54,0x38,0xcb,0xcd,0x93,0xcf,0x4e,0x13,0x51,0x14,0xc6,0x7f,0xf7,0xdc,0xc6,0x3b,0xd3,0xb1,0xc0,0x66,0x48,0x5b,0x42,0xea,0xc2,0xb4,0x2b,0x9f,0x81,0x7,0x30,0x3c,0x80,0x12,0x57,0x24,0xc6,0xc4,0xc8,0xd3,0xe0,0x12,0x37,0x68,0x7c,0x0,0xe3,0x3,0xf0,0xe,0x6e,0xb0,0x2c,0x68,0x50,0x20,0xce,0x6,0x3b,0x74,0xa6,0x3,0xce,0x3d,0x6c,0x4a,0x9d,0xd2,0x62,0xa2,0x2b,0xbf,0xe4,0x2c,0xee,0x9f,0xef,0x97,0x9c,0x2f,0xe7,0x18,0x2a,0xda,0x7c,0xfa,0xc,0xa0,0x6,0xac,0x2,0x5d,0x60,0x7d,0xf2,0x74,0x2,0x1c,0x2,0x9,0xf0,0xeb,0xd3,0xe7,0x8f,0x53,0x8f,0x59,0x60,0x7e,0x3c,0x4c,0x2f,0x76,0x92,0xe4,0xf4,0x79,0x96,0x8f,0x1a,0x0,0xf5,0x30,0x4a,0xe3,0xb8,0xfd,0x7e,0xa9,0xb1,0xb2,0xb,0x1c,0x55,0x21,0xc2,0xac,0xe2,0x61,0x7a,0xf1,0xe6,0x78,0xd0,0x7f,0x99,0xe5,0xd9,0xb2,0x88,0x15,0x11,0x2b,0x59,0x9e,0x2d,0x1f,0xf,0xfa,0xaf,0xd2,0xf4,0xe7,0x6b,0x20,0xae,0x1a,0xee,0x2,0x7a,0x49,0x72,0xba,0x65,0x8c,0xb1,0xd6,0x5a,0xaa,0x65,0x8c,0xb1,0x3f,0x92,0xef,0x2f,0x80,0x5e,0xd5,0x50,0xbb,0x3,0x58,0xcf,0xf2,0x51,0x43,0xc4,0x22,0xf2,0x9b,0x2d,0x22,0xa8,0x2a,0x59,0x3e,0x7a,0x58,0xc9,0x65,0x16,0x70,0x76,0x3e,0x0,0xd8,0xf6,0x5e,0xc5,0x5a,0xe6,0x64,0xc,0x78,0xaf,0xf6,0xec,0x7c,0xb0,0xd,0xec,0xcf,0x1,0x8a,0x62,0xc,0xb0,0x61,0xcc,0x4c,0xb6,0x55,0x4,0xc6,0x40,0x51,0x8c,0x37,0xfe,0x94,0xc1,0x5f,0xeb,0x3f,0x2,0x38,0x17,0xe0,0x5c,0x70,0xa0,0xa,0xa0,0xb,0xbe,0x2a,0xaa,0xe0,0x5c,0x70,0xe0,0x5c,0x30,0xf,0x68,0x35,0x3b,0xb4,0x9a,0x9d,0x3d,0x11,0xe3,0x75,0x81,0x5f,0x15,0x44,0x4c,0xd9,0x6a,0x76,0xf6,0x5a,0xcd,0xce,0xbd,0x2d,0x9c,0xd4,0xc3,0x28,0xf5,0xde,0xe3,0xbd,0x9f,0x5e,0xde,0x9e,0xeb,0x61,0x74,0x39,0xd9,0x8b,0x7b,0x1,0x87,0x71,0xdc,0xfe,0xa0,0xaa,0x65,0x59,0x96,0x54,0x4b,0x55,0xcb,0xd5,0x78,0x6d,0x7f,0xb2,0x54,0x53,0x4d,0x47,0xa6,0xd7,0x7d,0x2,0x30,0x76,0x2e,0x38,0xa,0xc3,0xa8,0x76,0x7d,0x35,0xee,0x5e,0x5d,0x17,0xf,0x54,0x55,0xeb,0x61,0x34,0x5c,0x6b,0x3f,0x7a,0xb7,0xd4,0x58,0x79,0xb,0x7c,0x3,0xfc,0xd7,0xfe,0x17,0xe6,0x26,0xe6,0x5f,0xd6,0xf9,0x6,0xaa,0x73,0x9f,0xf0,0x6d,0xf0,0x57,0x1b,0x0,0x0,0x0,0x0,0x49,0x45,0x4e,0x44,0xae,0x42,0x60,0x82
 };
 
 
 static const unsigned char vslider_grabber_png[]={
-0x89,0x50,0x4e,0x47,0xd,0xa,0x1a,0xa,0x0,0x0,0x0,0xd,0x49,0x48,0x44,0x52,0x0,0x0,0x0,0x10,0x0,0x0,0x0,0x10,0x8,0x6,0x0,0x0,0x0,0x1f,0xf3,0xff,0x61,0x0,0x0,0x0,0x6,0x62,0x4b,0x47,0x44,0x0,0x92,0x0,0x92,0x0,0x99,0x25,0xc1,0x88,0x71,0x0,0x0,0x0,0x9,0x70,0x48,0x59,0x73,0x0,0x0,0xb,0x13,0x0,0x0,0xb,0x13,0x1,0x0,0x9a,0x9c,0x18,0x0,0x0,0x0,0x7,0x74,0x49,0x4d,0x45,0x7,0xde,0x9,0x12,0x0,0x2,0x1f,0xac,0xde,0x45,0xed,0x0,0x0,0x0,0x19,0x74,0x45,0x58,0x74,0x43,0x6f,0x6d,0x6d,0x65,0x6e,0x74,0x0,0x43,0x72,0x65,0x61,0x74,0x65,0x64,0x20,0x77,0x69,0x74,0x68,0x20,0x47,0x49,0x4d,0x50,0x57,0x81,0xe,0x17,0x0,0x0,0x1,0xa4,0x49,0x44,0x41,0x54,0x38,0x8d,0x9d,0x93,0xb1,0x6e,0x1a,0x41,0x14,0x45,0xcf,0xcc,0xce,0x9a,0x85,0x25,0x44,0x58,0x8a,0x15,0xc5,0x49,0x93,0x2,0xb2,0x2,0x51,0xf2,0x7,0x2e,0x68,0xf2,0x15,0xae,0x22,0xc1,0x77,0xa4,0xe,0x65,0x5c,0xe4,0x2f,0xf8,0xb,0x47,0xb6,0x2c,0x21,0xb0,0x44,0xb7,0x22,0x52,0x9a,0x0,0x82,0x9d,0xd9,0x1d,0x76,0xd3,0xec,0x22,0x40,0xb1,0x8d,0x72,0xa5,0x29,0xe6,0xcd,0xbd,0xf7,0xbd,0x79,0x33,0x4f,0x70,0x84,0x66,0xb3,0x79,0x1c,0x3a,0xc0,0x64,0x32,0x39,0xd8,0xcb,0x7f,0x88,0x2f,0x81,0xaf,0xc0,0x1d,0x10,0xe5,0xeb,0x2e,0x8f,0x5d,0x1e,0x27,0x10,0xdd,0x6e,0x17,0x80,0xc5,0x62,0x1,0x70,0xd,0x7c,0xb,0x82,0x76,0xb9,0x52,0xf1,0x71,0xdd,0x33,0x0,0x92,0x24,0x66,0xb3,0x59,0x33,0x1e,0x3f,0x44,0xc0,0xc0,0x5a,0x7b,0xb3,0x5e,0xaf,0x49,0xd3,0x14,0xe7,0xd3,0x87,0xe,0xbf,0xff,0xfc,0x2,0xb8,0xae,0xd7,0xcf,0xbf,0xb7,0x5a,0x1d,0xb7,0x5a,0xad,0xa1,0x94,0x42,0x8,0x81,0x10,0x2,0xa5,0x14,0xe5,0x72,0x85,0x8b,0x8b,0xb7,0xae,0xd6,0xd1,0xe7,0x38,0x36,0xa1,0xef,0xfb,0xb7,0x5a,0x6b,0x9c,0x33,0xdf,0x29,0xca,0x1e,0xb5,0x5a,0x1d,0xb7,0x54,0xf2,0x9e,0xbc,0xbf,0x52,0x8a,0x5a,0xed,0x35,0xf3,0x79,0x78,0x95,0x65,0xd9,0xf,0x6b,0xed,0xaa,0xe8,0x41,0x3f,0x8,0xda,0xe5,0xe7,0xc4,0x5,0x4a,0x25,0x8f,0x20,0x68,0x97,0x81,0xbe,0xeb,0xba,0xbb,0x26,0xf6,0x2a,0x15,0xff,0x45,0x71,0x81,0x9c,0xdb,0x53,0x4a,0x21,0xb3,0x2c,0x3,0x68,0x14,0xd,0x3b,0x5,0x39,0xb7,0x21,0xa5,0x3c,0x7c,0xc6,0xff,0x81,0x14,0x42,0x0,0x4c,0x93,0x24,0x3e,0x59,0x94,0x73,0xa7,0x69,0x9a,0xee,0x2a,0x18,0x6d,0x36,0xeb,0x93,0xd,0x72,0xee,0xc8,0x5a,0xbb,0x33,0x18,0x8e,0xc7,0xf,0x91,0x31,0xfa,0x45,0xb1,0x31,0xba,0xf8,0x50,0xc3,0x24,0x49,0x90,0xef,0xeb,0x1f,0x1,0x42,0x60,0x30,0x9b,0x3d,0xf2,0x9c,0x89,0x31,0x9a,0xd9,0xec,0x11,0x60,0xe0,0x38,0x4e,0xb8,0xdd,0x6e,0x71,0xde,0xbc,0x3b,0xc7,0xf3,0x3c,0x8c,0x31,0xb7,0x5a,0x47,0xe1,0x7c,0x1e,0x5e,0x55,0xab,0xaf,0x5c,0x29,0x25,0x52,0x4a,0xb2,0x2c,0x23,0x8e,0xd,0xab,0xd5,0x82,0xfb,0xfb,0x9f,0x91,0xd6,0xd1,0x17,0x6b,0xed,0xcd,0x72,0xb9,0x24,0x4d,0x53,0xc4,0x7e,0x86,0xbd,0x61,0xea,0x3,0x3d,0xa0,0x91,0x1f,0x4d,0x81,0x11,0x30,0x4,0xc2,0xfd,0x89,0x3c,0x30,0xd8,0x33,0x79,0x12,0xc7,0xe3,0xfc,0x17,0x9c,0xcc,0xa8,0xb2,0xd4,0xe8,0x7,0x23,0x0,0x0,0x0,0x0,0x49,0x45,0x4e,0x44,0xae,0x42,0x60,0x82
+0x89,0x50,0x4e,0x47,0xd,0xa,0x1a,0xa,0x0,0x0,0x0,0xd,0x49,0x48,0x44,0x52,0x0,0x0,0x0,0x10,0x0,0x0,0x0,0x10,0x8,0x6,0x0,0x0,0x0,0x1f,0xf3,0xff,0x61,0x0,0x0,0x0,0x6,0x62,0x4b,0x47,0x44,0x0,0xff,0x0,0xff,0x0,0xff,0xa0,0xbd,0xa7,0x93,0x0,0x0,0x0,0x9,0x70,0x48,0x59,0x73,0x0,0x0,0xb,0x13,0x0,0x0,0xb,0x13,0x1,0x0,0x9a,0x9c,0x18,0x0,0x0,0x0,0x7,0x74,0x49,0x4d,0x45,0x7,0xdf,0xb,0xd,0x15,0x1b,0x38,0x31,0xdf,0xff,0x9,0x0,0x0,0x0,0x19,0x74,0x45,0x58,0x74,0x43,0x6f,0x6d,0x6d,0x65,0x6e,0x74,0x0,0x43,0x72,0x65,0x61,0x74,0x65,0x64,0x20,0x77,0x69,0x74,0x68,0x20,0x47,0x49,0x4d,0x50,0x57,0x81,0xe,0x17,0x0,0x0,0x1,0x7d,0x49,0x44,0x41,0x54,0x38,0xcb,0xdd,0x92,0x3d,0x6b,0x53,0x61,0x14,0x80,0x9f,0xf7,0x43,0xdf,0xfb,0x11,0x6f,0x9b,0x86,0x1a,0x84,0x3a,0x84,0x98,0x25,0x29,0xd,0x11,0x87,0x40,0x3b,0x64,0x70,0x89,0x6b,0x97,0x6c,0xa5,0x7b,0x6,0xc9,0x3f,0xe8,0xe2,0xe4,0x2f,0xb8,0x8b,0x6b,0x17,0x9d,0x1d,0xb2,0x25,0x43,0x29,0x4,0x41,0x42,0x21,0x43,0x20,0x1d,0x3b,0x8,0x49,0x24,0x2f,0x5c,0xc3,0xbd,0xaf,0x4b,0xb,0xb6,0xb6,0xe,0x6e,0xfa,0x2c,0x7,0xe,0x9c,0xf,0xce,0x73,0xe0,0x9f,0x47,0x3c,0x90,0x93,0x80,0x1,0x9e,0x0,0x21,0xf0,0x18,0x70,0xc0,0xa,0x58,0x0,0x16,0x48,0xef,0x6b,0xf0,0x8,0xd8,0x28,0x95,0x4a,0xcf,0x1b,0x8d,0xc6,0xab,0x7a,0xbd,0xfe,0x26,0x8a,0xa2,0x5d,0x63,0xcc,0xb6,0x94,0xd2,0x2d,0x16,0x8b,0xaf,0xd3,0xe9,0xf4,0x63,0x1c,0xc7,0xa7,0xc0,0x37,0x20,0x15,0xbf,0x4c,0xd,0x72,0xb9,0x5c,0xa9,0xdb,0xed,0xbe,0x2d,0x16,0x8b,0xaf,0x8d,0x31,0x3b,0x49,0x92,0xa8,0xf5,0x7a,0x4d,0x9a,0xa6,0x0,0x18,0x63,0xd0,0x5a,0x5f,0x8d,0xc7,0xe3,0x77,0x71,0x1c,0x7f,0x0,0xbe,0x6b,0x40,0x3,0x9b,0xcd,0x66,0x73,0xbf,0xd3,0xe9,0xbc,0x57,0x4a,0xbd,0xb0,0xd6,0x62,0xad,0x25,0xcb,0xb2,0x5b,0xeb,0xa5,0x69,0x4a,0x10,0x4,0xc5,0x72,0xb9,0x7c,0x8,0x7c,0xba,0x69,0x60,0x7a,0xbd,0xde,0x51,0xb5,0x5a,0x3d,0x99,0xcf,0xe7,0xe1,0x6a,0xb5,0xfa,0xad,0xf0,0x6,0xe7,0x1c,0x49,0x92,0x90,0xcf,0xe7,0xf7,0xae,0x6f,0x83,0x4,0x84,0xe7,0x79,0x1,0xa0,0xfe,0xc6,0x82,0x2,0xb2,0xc1,0x60,0x70,0xb1,0x5c,0x2e,0xbf,0xd4,0x6a,0xb5,0x97,0x9e,0xe7,0x6d,0x65,0x59,0x86,0x73,0xe,0xe7,0xdc,0x6d,0x3d,0x42,0xe0,0xfb,0x3e,0xd6,0xda,0xf3,0x7e,0xbf,0x7f,0xa,0x2c,0x15,0x90,0x1,0x76,0x36,0x9b,0x5d,0xe,0x87,0xc3,0xcf,0x95,0x4a,0x85,0x42,0xa1,0xf0,0x34,0xc,0xc3,0xd,0x21,0x84,0x0,0x90,0x52,0x22,0xa5,0xc4,0xf7,0x7d,0xb4,0xd6,0x57,0x93,0xc9,0x24,0x1e,0x8d,0x46,0x67,0xc0,0x8f,0xfb,0x34,0x46,0x5a,0xeb,0xed,0x76,0xbb,0x7d,0xd0,0x6a,0xb5,0x8e,0x85,0x10,0xcf,0x8c,0x31,0x5b,0x4a,0xa9,0x3f,0x6a,0xbc,0x8b,0x4,0x2,0x20,0xba,0x8e,0xfa,0xa1,0x47,0xfa,0xf,0xf8,0x9,0xc2,0x2d,0x88,0xfe,0x7,0xd8,0xc3,0x3a,0x0,0x0,0x0,0x0,0x49,0x45,0x4e,0x44,0xae,0x42,0x60,0x82
 };
 
 
 static const unsigned char vslider_grabber_hl_png[]={
-0x89,0x50,0x4e,0x47,0xd,0xa,0x1a,0xa,0x0,0x0,0x0,0xd,0x49,0x48,0x44,0x52,0x0,0x0,0x0,0x10,0x0,0x0,0x0,0x10,0x8,0x6,0x0,0x0,0x0,0x1f,0xf3,0xff,0x61,0x0,0x0,0x0,0x6,0x62,0x4b,0x47,0x44,0x0,0x92,0x0,0x92,0x0,0x99,0x25,0xc1,0x88,0x71,0x0,0x0,0x0,0x9,0x70,0x48,0x59,0x73,0x0,0x0,0xb,0x13,0x0,0x0,0xb,0x13,0x1,0x0,0x9a,0x9c,0x18,0x0,0x0,0x0,0x7,0x74,0x49,0x4d,0x45,0x7,0xde,0x9,0x12,0x0,0x2,0x21,0x6d,0xbf,0x58,0x46,0x0,0x0,0x0,0x19,0x74,0x45,0x58,0x74,0x43,0x6f,0x6d,0x6d,0x65,0x6e,0x74,0x0,0x43,0x72,0x65,0x61,0x74,0x65,0x64,0x20,0x77,0x69,0x74,0x68,0x20,0x47,0x49,0x4d,0x50,0x57,0x81,0xe,0x17,0x0,0x0,0x1,0x30,0x49,0x44,0x41,0x54,0x38,0x8d,0xa5,0x93,0xb1,0x6a,0xc2,0x50,0x14,0x86,0xbf,0x63,0xe2,0x90,0xd0,0x2e,0xc1,0xad,0x8b,0xad,0x60,0x9e,0xc0,0xbe,0x41,0x7,0x33,0x38,0x7,0x1d,0xba,0x74,0x2a,0xe8,0xc3,0xe8,0x68,0xc1,0x49,0xc8,0xec,0x10,0x9f,0xa1,0x8,0xee,0x71,0xcf,0x52,0x4d,0x70,0x73,0x8a,0xb7,0x83,0x37,0x72,0x11,0xb4,0xa1,0xfe,0xe3,0xe5,0x7c,0xdf,0xbd,0x1c,0xfe,0xb,0x77,0x46,0x2e,0xf,0x7c,0xdf,0x7f,0x2,0x86,0x40,0x17,0x68,0xeb,0xe3,0xd,0xb0,0x4,0x26,0x49,0x92,0xa4,0x57,0x5,0xbe,0xef,0x7f,0x0,0xe3,0x30,0x1c,0x38,0x9e,0xd7,0x40,0xa4,0x6,0x80,0x52,0x47,0xf2,0x7c,0x47,0x14,0xcd,0xf,0xc0,0x28,0x49,0x92,0xaf,0x92,0xb1,0x4c,0xb8,0xd9,0x7c,0x9e,0xf6,0xfb,0xef,0x75,0xd7,0x7d,0xd4,0xb0,0x0,0x82,0x48,0xd,0xd7,0x7d,0xa0,0xd3,0x79,0xad,0x6f,0xb7,0x3f,0x3d,0xdb,0xb6,0xd2,0x2c,0xcb,0xd6,0x0,0x35,0xe3,0xd9,0xe3,0x20,0xe8,0x21,0x62,0x71,0x2d,0x22,0x16,0x41,0xd0,0x3,0x18,0x6b,0xe6,0x24,0x0,0x86,0x61,0x38,0x70,0x6e,0xc1,0xa6,0x24,0xc,0x7,0x8e,0xde,0xd3,0x59,0xd0,0xf5,0xbc,0xc6,0x9f,0x70,0x19,0x3d,0xdb,0x35,0x5,0xed,0x72,0x61,0x55,0xa2,0x67,0xdb,0xa6,0xe0,0xdf,0x29,0x5,0x1b,0xa5,0x8e,0x95,0x21,0x3d,0xbb,0x31,0x5,0xcb,0x3c,0xdf,0x55,0x16,0xe8,0xd9,0xa5,0x29,0x98,0x44,0xd1,0xfc,0xa0,0x54,0x51,0xe1,0xf6,0xa2,0x2c,0xd4,0xe4,0x2c,0xd0,0xf5,0x1c,0xc5,0xf1,0x82,0x5b,0x12,0xa5,0xa,0xe2,0x78,0x1,0xa7,0x36,0xa6,0x60,0x34,0x31,0xcb,0xb2,0xb5,0x6d,0x5b,0xe9,0x6a,0xf5,0xfd,0xd6,0x6a,0xbd,0xd4,0x1d,0xc7,0x41,0x44,0x34,0x78,0x24,0xcf,0xb7,0xcc,0x66,0xd3,0xc3,0x7e,0xbf,0xff,0x34,0xab,0x7c,0xf7,0x67,0xba,0x3b,0xbf,0x4d,0x78,0x75,0x34,0x1f,0x21,0x5d,0xa6,0x0,0x0,0x0,0x0,0x49,0x45,0x4e,0x44,0xae,0x42,0x60,0x82
+0x89,0x50,0x4e,0x47,0xd,0xa,0x1a,0xa,0x0,0x0,0x0,0xd,0x49,0x48,0x44,0x52,0x0,0x0,0x0,0x10,0x0,0x0,0x0,0x10,0x8,0x6,0x0,0x0,0x0,0x1f,0xf3,0xff,0x61,0x0,0x0,0x0,0x6,0x62,0x4b,0x47,0x44,0x0,0xff,0x0,0xff,0x0,0xff,0xa0,0xbd,0xa7,0x93,0x0,0x0,0x0,0x9,0x70,0x48,0x59,0x73,0x0,0x0,0xb,0x13,0x0,0x0,0xb,0x13,0x1,0x0,0x9a,0x9c,0x18,0x0,0x0,0x0,0x7,0x74,0x49,0x4d,0x45,0x7,0xdf,0xb,0xd,0x15,0x1c,0x11,0x3c,0x2c,0xf1,0xa2,0x0,0x0,0x0,0x19,0x74,0x45,0x58,0x74,0x43,0x6f,0x6d,0x6d,0x65,0x6e,0x74,0x0,0x43,0x72,0x65,0x61,0x74,0x65,0x64,0x20,0x77,0x69,0x74,0x68,0x20,0x47,0x49,0x4d,0x50,0x57,0x81,0xe,0x17,0x0,0x0,0x1,0x93,0x49,0x44,0x41,0x54,0x38,0xcb,0xdd,0x92,0x3f,0x4c,0x13,0x71,0x14,0xc7,0x3f,0xef,0x77,0x7f,0x5a,0xae,0xde,0x1d,0x57,0x3,0x47,0x13,0x19,0x1a,0x48,0x14,0x2,0xc,0xe2,0x62,0x80,0xc8,0x40,0x22,0x31,0x71,0x20,0xec,0x18,0x77,0x7,0x27,0xb6,0xe,0x2e,0x2e,0x32,0xe1,0xc6,0xc2,0x5a,0x7,0x9,0x21,0xe,0xdd,0x88,0x9b,0x61,0x91,0x34,0x6a,0x17,0x63,0xe2,0x60,0x28,0x10,0x4c,0x28,0x96,0x96,0xbb,0xde,0xfd,0x5c,0x20,0x51,0xfe,0x98,0x38,0xea,0x67,0x79,0xc9,0x4b,0xde,0xf7,0xbd,0x7c,0xbf,0xf,0xfe,0x79,0xe4,0x8a,0x9e,0x2,0x32,0x80,0xb,0xe4,0x0,0x1b,0xd0,0x40,0x13,0x38,0x4,0x8e,0x81,0xe4,0x32,0x1,0xb,0xf0,0xc3,0xe1,0xa1,0xfe,0x81,0x7b,0xd3,0x77,0x8a,0x53,0x93,0xf,0x6c,0x37,0x1c,0x51,0xe6,0xb5,0x1e,0x65,0x2a,0x1d,0xff,0xd8,0xa9,0xd6,0x3f,0x54,0x5f,0x57,0x4a,0xa5,0x32,0x70,0x0,0x24,0xf2,0xcb,0x56,0x27,0xeb,0xfb,0xc5,0x87,0x4b,0x2f,0x9e,0xe6,0xc2,0x81,0x99,0x54,0x77,0xdf,0x68,0x35,0x62,0x23,0x6a,0x76,0xe8,0xc4,0x29,0x22,0x90,0xf5,0x6c,0xdc,0x20,0xda,0xdd,0xd9,0x7e,0xfb,0xbc,0x52,0x2a,0xad,0x2,0x47,0x26,0x60,0x2,0xdd,0x37,0x67,0xef,0x4f,0x4c,0x3c,0x59,0x5c,0x6a,0xb7,0xbc,0xc1,0xef,0xdf,0x4e,0x88,0xdb,0x47,0xa4,0x89,0xfe,0xed,0xbc,0x4e,0xd4,0x6,0xb2,0x61,0xdf,0xc8,0xd8,0x3c,0xb0,0x76,0x26,0x90,0x99,0x7b,0xb9,0xfc,0x28,0x1c,0x9d,0x7a,0xb6,0xf7,0xa5,0x95,0x6b,0x1d,0x36,0x2f,0xc,0x9e,0xa1,0x53,0x4d,0xbb,0x11,0x11,0x14,0xa,0x63,0xa7,0xde,0xa0,0x0,0xb1,0x1c,0xc7,0x41,0x30,0xb4,0xfe,0xfb,0x14,0xc,0x20,0xfd,0xb8,0xb1,0xf1,0xe9,0xa4,0xb1,0xff,0x7e,0x70,0x7a,0xfc,0xb6,0xd9,0xe5,0xe6,0xd3,0x58,0xa3,0x53,0xcd,0x79,0x41,0x51,0x82,0x13,0x64,0x30,0xa5,0xbe,0xb5,0xfd,0xaa,0x5c,0x6,0x1a,0x6,0x90,0x2,0xc7,0x7b,0xb5,0xda,0xd7,0xda,0x9b,0xf5,0x4a,0xf1,0xee,0x2d,0xf2,0xfd,0xd7,0x7b,0xbb,0x7c,0xcf,0x17,0x11,0x41,0x40,0x44,0x30,0x2c,0x85,0x13,0x64,0x70,0x83,0x68,0xb7,0x5e,0xdd,0x5a,0xf9,0xbc,0xb9,0xf9,0xe,0x88,0x2e,0x8b,0xd1,0x33,0x2c,0xab,0x67,0x7c,0x61,0x61,0x72,0x74,0x6e,0xfe,0x71,0x92,0xd8,0x5,0xc3,0xce,0xe5,0x95,0x69,0xfc,0x31,0xc6,0xf3,0x28,0xc0,0x1,0xbc,0xd3,0x6a,0x5e,0xf5,0x48,0xff,0x1,0x3f,0x1,0xa3,0x8a,0x90,0x14,0xe9,0x66,0x95,0x43,0x0,0x0,0x0,0x0,0x49,0x45,0x4e,0x44,0xae,0x42,0x60,0x82
 };
 
 
diff --git a/scene/resources/default_theme/toggle_off.png b/scene/resources/default_theme/toggle_off.png
index 3e92aa0ece..aa1c96e5a0 100644
--- a/scene/resources/default_theme/toggle_off.png
+++ b/scene/resources/default_theme/toggle_off.png
diff --git a/scene/resources/default_theme/toggle_on.png b/scene/resources/default_theme/toggle_on.png
index a49c234f51..0a69d36ae8 100644
--- a/scene/resources/default_theme/toggle_on.png
+++ b/scene/resources/default_theme/toggle_on.png
diff --git a/scene/resources/default_theme/unchecked.png b/scene/resources/default_theme/unchecked.png
index 39a70e6003..f8710d03df 100644
--- a/scene/resources/default_theme/unchecked.png
+++ b/scene/resources/default_theme/unchecked.png
diff --git a/scene/resources/default_theme/vslider_bg.png b/scene/resources/default_theme/vslider_bg.png
index 5472bb366f..d58d4b1659 100644
--- a/scene/resources/default_theme/vslider_bg.png
+++ b/scene/resources/default_theme/vslider_bg.png
diff --git a/scene/resources/default_theme/vslider_grabber.png b/scene/resources/default_theme/vslider_grabber.png
index 988c25d9dd..50ef5680da 100644
--- a/scene/resources/default_theme/vslider_grabber.png
+++ b/scene/resources/default_theme/vslider_grabber.png
diff --git a/scene/resources/default_theme/vslider_grabber_hl.png b/scene/resources/default_theme/vslider_grabber_hl.png
index f319df3319..93eba4b174 100644
--- a/scene/resources/default_theme/vslider_grabber_hl.png
+++ b/scene/resources/default_theme/vslider_grabber_hl.png
diff --git a/scene/resources/material.cpp b/scene/resources/material.cpp
index bbb2a386f3..55bb4e9073 100644
--- a/scene/resources/material.cpp
+++ b/scene/resources/material.cpp
@@ -406,7 +406,6 @@ void FixedMaterial::_bind_methods() {
 	BIND_CONSTANT( PARAM_SHADE_PARAM );
 	BIND_CONSTANT( PARAM_MAX );
 
-
 	BIND_CONSTANT( TEXCOORD_SPHERE );
 	BIND_CONSTANT( TEXCOORD_UV );
 	BIND_CONSTANT( TEXCOORD_UV_TRANSFORM );
@@ -417,6 +416,11 @@ void FixedMaterial::_bind_methods() {
 	BIND_CONSTANT( FLAG_USE_POINT_SIZE );
 	BIND_CONSTANT( FLAG_DISCARD_ALPHA );
 
+	BIND_CONSTANT( LIGHT_SHADER_LAMBERT );
+	BIND_CONSTANT( LIGHT_SHADER_WRAP );
+	BIND_CONSTANT( LIGHT_SHADER_VELVET );
+	BIND_CONSTANT( LIGHT_SHADER_TOON );
+
 }
 
 
diff --git a/scene/resources/packed_scene.cpp b/scene/resources/packed_scene.cpp
index fdf1692495..863f2be699 100644
--- a/scene/resources/packed_scene.cpp
+++ b/scene/resources/packed_scene.cpp
@@ -124,7 +124,7 @@ Node *SceneState::instance(bool p_gen_edit_state) const {
 					ERR_FAIL_COND_V(!node,NULL);
 				} else {
 					InstancePlaceholder *ip = memnew( InstancePlaceholder );
-					ip->set_path(path);
+					ip->set_instance_path(path);
 					node=ip;
 				}
 				node->set_scene_instance_load_placeholder(true);
@@ -1280,15 +1280,18 @@ StringName SceneState::get_node_name(int p_idx) const {
 
 Ref<PackedScene> SceneState::get_node_instance(int p_idx) const {
 	ERR_FAIL_INDEX_V(p_idx,nodes.size(),Ref<PackedScene>());
+
 	if (nodes[p_idx].instance>=0) {
 		return variants[nodes[p_idx].instance];
-	} else if (nodes[p_idx].parent<=0 || nodes[p_idx].parent==NO_PARENT_SAVED) {
+	} else if (nodes[p_idx].parent<0 || nodes[p_idx].parent==NO_PARENT_SAVED) {
 
 		if (base_scene_idx>=0) {
 			return variants[base_scene_idx];
 		}
 	}
 
+
+
 	return Ref<PackedScene>();
 
 
@@ -1438,6 +1441,84 @@ Array SceneState::get_connection_binds(int p_idx) const {
 Vector<NodePath> SceneState::get_editable_instances() const {
 	return editable_instances;
 }
+//add
+
+int SceneState::add_name(const StringName& p_name) {
+
+	names.push_back(p_name);
+	return names.size()-1;
+}
+
+int SceneState::add_value(const Variant& p_value) {
+
+	variants.push_back(p_value);
+	return variants.size()-1;
+}
+
+int SceneState::add_node_path(const NodePath& p_path){
+
+	node_paths.push_back(p_path);
+	return  (node_paths.size()-1)|FLAG_ID_IS_PATH;
+}
+int SceneState::add_node(int p_parent,int p_owner,int p_type,int p_name, int p_instance){
+
+	NodeData nd;
+	nd.parent=p_parent;
+	nd.owner=p_owner;
+	nd.type=p_type;
+	nd.name=p_name;
+	nd.instance=p_instance;
+
+	nodes.push_back(nd);
+
+	return nodes.size()-1;
+}
+void SceneState::add_node_property(int p_node,int p_name,int p_value){
+
+	ERR_FAIL_INDEX(p_node,nodes.size());
+	ERR_FAIL_INDEX(p_name,names.size());
+	ERR_FAIL_INDEX(p_value,variants.size());
+
+	NodeData::Property prop;
+	prop.name=p_name;
+	prop.value=p_value;
+	nodes[p_node].properties.push_back(prop);
+}
+void SceneState::add_node_group(int p_node,int p_group){
+
+	ERR_FAIL_INDEX(p_node,nodes.size());
+	ERR_FAIL_INDEX(p_group,names.size());
+	nodes[p_node].groups.push_back(p_group);
+
+}
+void SceneState::set_base_scene(int p_idx){
+
+	ERR_FAIL_INDEX(p_idx,variants.size());
+	base_scene_idx=p_idx;
+}
+void SceneState::add_connection(int p_from,int p_to, int p_signal, int p_method, int p_flags,const Vector<int>& p_binds){
+
+	ERR_FAIL_INDEX(p_signal,names.size());
+	ERR_FAIL_INDEX(p_method,names.size());
+
+	for(int i=0;i<p_binds.size();i++) {
+		ERR_FAIL_INDEX(p_binds[i],variants.size());
+	}
+	ConnectionData c;
+	c.from=p_from;
+	c.to=p_to;
+	c.signal=p_signal;
+	c.method=p_method;
+	c.flags=p_flags;
+	c.binds=p_binds;
+	connections.push_back(c);
+
+}
+void SceneState::add_editable_instance(const NodePath& p_path){
+
+	editable_instances.push_back(p_path);
+}
+
 
 
 SceneState::SceneState() {
diff --git a/scene/resources/packed_scene.h b/scene/resources/packed_scene.h
index 3956d2abe4..f3ec0afb6d 100644
--- a/scene/resources/packed_scene.h
+++ b/scene/resources/packed_scene.h
@@ -126,7 +126,7 @@ public:
 	Node *instance(bool p_gen_edit_state=false) const;
 
 
-	//build-unbuild API
+	//unbuild API
 
 	int get_node_count() const;
 	StringName get_node_type(int p_idx) const;
@@ -150,6 +150,19 @@ public:
 
 	Vector<NodePath> get_editable_instances() const;
 
+	//build API
+
+	int add_name(const StringName& p_name);
+	int add_value(const Variant& p_value);
+	int add_node_path(const NodePath& p_path);
+	int add_node(int p_parent,int p_owner,int p_type,int p_name, int p_instance);
+	void add_node_property(int p_node,int p_name,int p_value);
+	void add_node_group(int p_node,int p_group);
+	void set_base_scene(int p_idx);
+	void add_connection(int p_from,int p_to, int p_signal, int p_method, int p_flags,const Vector<int>& p_binds);
+	void add_editable_instance(const NodePath& p_path);
+
+
 	SceneState();
 };
 
diff --git a/scene/resources/scene_format_text.cpp b/scene/resources/scene_format_text.cpp
index 8403c06ad1..5f41dc2ce8 100644
--- a/scene/resources/scene_format_text.cpp
+++ b/scene/resources/scene_format_text.cpp
@@ -6,6 +6,1022 @@
 
 #define FORMAT_VERSION 1
 
+#include "version.h"
+#include "os/dir_access.h"
+
+#define _printerr() ERR_PRINT(String(res_path+":"+itos(lines)+" - Parse Error: "+error_text).utf8().get_data());
+
+
+Error ResourceInteractiveLoaderText::parse_property(Variant& r_v, String &r_name)  {
+
+	return OK;
+}
+
+
+
+
+///
+
+void ResourceInteractiveLoaderText::set_local_path(const String& p_local_path) {
+
+	res_path=p_local_path;
+}
+
+Ref<Resource> ResourceInteractiveLoaderText::get_resource() {
+
+	return resource;
+}
+
+Error ResourceInteractiveLoaderText::_parse_sub_resource(VariantParser::Stream* p_stream,Ref<Resource>& r_res,int &line,String &r_err_str) {
+
+	VariantParser::Token token;
+	VariantParser::get_token(p_stream,token,line,r_err_str);
+	if (token.type!=VariantParser::TK_NUMBER) {
+		r_err_str="Expected number (sub-resource index)";
+		return ERR_PARSE_ERROR;
+	}
+
+	int index = token.value;
+
+	String path = local_path+"::"+itos(index);
+
+	if (!ResourceCache::has(path)) {
+		r_err_str="Can't load cached sub-resource: "+path;
+		return ERR_PARSE_ERROR;
+	}
+
+	r_res=RES(ResourceCache::get(path));
+
+	VariantParser::get_token(p_stream,token,line,r_err_str);
+	if (token.type!=VariantParser::TK_PARENTHESIS_CLOSE) {
+		r_err_str="Expected ')'";
+		return ERR_PARSE_ERROR;
+	}
+
+
+	return OK;
+}
+
+Error ResourceInteractiveLoaderText::_parse_ext_resource(VariantParser::Stream* p_stream,Ref<Resource>& r_res,int &line,String &r_err_str){
+
+	VariantParser::Token token;
+	VariantParser::get_token(p_stream,token,line,r_err_str);
+	if (token.type!=VariantParser::TK_NUMBER) {
+		r_err_str="Expected number (sub-resource index)";
+		return ERR_PARSE_ERROR;
+	}
+
+	int id = token.value;
+
+
+	if (!ext_resources.has(id)) {
+		r_err_str="Can't load cached ext-resource #"+itos(id);
+		return ERR_PARSE_ERROR;
+	}
+
+	String path = ext_resources[id].path;
+	String type = ext_resources[id].type;
+
+	if (path.find("://")==-1 && path.is_rel_path()) {
+		// path is relative to file being loaded, so convert to a resource path
+		path=Globals::get_singleton()->localize_path(res_path.get_base_dir().plus_file(path));
+
+	}
+
+	r_res=ResourceLoader::load(path,type);
+
+	if (r_res.is_null()) {
+		r_err_str="Couldn't load external resource: "+path;
+		return ERR_PARSE_ERROR;
+	}
+
+	VariantParser::get_token(p_stream,token,line,r_err_str);
+	if (token.type!=VariantParser::TK_PARENTHESIS_CLOSE) {
+		r_err_str="Expected ')'";
+		return ERR_PARSE_ERROR;
+	}
+
+
+	return OK;
+}
+
+
+Error ResourceInteractiveLoaderText::poll() {
+
+	if (error!=OK)
+		return error;
+
+	if (next_tag.name=="ext_resource") {
+
+
+		if (!next_tag.fields.has("path")) {
+			error=ERR_FILE_CORRUPT;
+			error_text="Missing 'path' in external resource tag";
+			_printerr();
+			return error;
+		}
+
+		if (!next_tag.fields.has("type")) {
+			error=ERR_FILE_CORRUPT;
+			error_text="Missing 'type' in external resource tag";
+			_printerr();
+			return error;
+		}
+
+		if (!next_tag.fields.has("id")) {
+			error=ERR_FILE_CORRUPT;
+			error_text="Missing 'id' in external resource tag";
+			_printerr();
+			return error;
+		}
+
+		String path=next_tag.fields["path"];
+		String type=next_tag.fields["type"];
+		int index=next_tag.fields["id"];
+
+
+		if (path.find("://")==-1 && path.is_rel_path()) {
+			// path is relative to file being loaded, so convert to a resource path
+			path=Globals::get_singleton()->localize_path(local_path.get_base_dir().plus_file(path));
+		}
+
+		if (remaps.has(path)) {
+			path=remaps[path];
+		}
+
+		RES res = ResourceLoader::load(path,type);
+
+		if (res.is_null()) {
+
+			if (ResourceLoader::get_abort_on_missing_resources()) {
+				error=ERR_FILE_CORRUPT;
+				error_text="[ext_resource] referenced nonexistent resource at: "+path;
+				_printerr();
+				return error;
+			} else {
+				ResourceLoader::notify_dependency_error(local_path,path,type);
+			}
+		} else {
+
+			resource_cache.push_back(res);
+		}
+
+		ExtResource er;
+		er.path=path;
+		er.type=type;
+		ext_resources[index]=er;
+
+		error = VariantParser::parse_tag(&stream,lines,error_text,next_tag,&rp);
+
+		if (error) {
+			_printerr();
+		}
+
+		return error;
+
+
+	} else if (next_tag.name=="sub_resource") {
+
+
+		if (!next_tag.fields.has("type")) {
+			error=ERR_FILE_CORRUPT;
+			error_text="Missing 'type' in external resource tag";
+			_printerr();
+			return error;
+		}
+
+		if (!next_tag.fields.has("id")) {
+			error=ERR_FILE_CORRUPT;
+			error_text="Missing 'index' in external resource tag";
+			_printerr();
+			return error;
+		}
+
+		String type=next_tag.fields["type"];
+		int id=next_tag.fields["id"];
+
+		String path = local_path+"::"+itos(id);
+
+
+		//bool exists=ResourceCache::has(path);
+
+		Ref<Resource> res;
+
+		if ( !ResourceCache::has(path)) { //only if it doesn't exist
+
+			Object *obj = ObjectTypeDB::instance(type);
+			if (!obj) {
+
+				error_text+="Can't create sub resource of type: "+type;
+				_printerr();
+				error=ERR_FILE_CORRUPT;
+				return error;
+			}
+
+
+			Resource *r = obj->cast_to<Resource>();
+			if (!r) {
+
+				error_text+="Can't create sub resource of type, because not a resource: "+type;
+				_printerr();
+				error=ERR_FILE_CORRUPT;
+				return error;
+			}
+
+			res=Ref<Resource>(r);
+			resource_cache.push_back(res);
+			res->set_path(path);
+
+		}
+
+		while(true) {
+
+			String assign;
+			Variant value;
+
+			error = VariantParser::parse_tag_assign_eof(&stream,lines,error_text,next_tag,assign,value,&rp);
+
+			if (error) {
+				_printerr();
+				return error;
+			}
+
+			if (assign!=String()) {
+				if (res.is_valid()) {
+					res->set(assign,value);
+				}
+				//it's assignment
+			} else if (next_tag.name!=String()) {
+
+				error=OK;
+				break;
+			} else {
+				error=ERR_FILE_CORRUPT;
+				error_text="Premature end of file while parsing [sub_resource]";
+				_printerr();
+				return error;
+			}
+
+
+		}
+
+		return OK;
+
+	} else if (next_tag.name=="resource") {
+
+		if (is_scene) {
+
+			error_text+="found the 'resource' tag on a scene file!";
+			_printerr();
+			error=ERR_FILE_CORRUPT;
+			return error;
+		}
+
+		Object *obj = ObjectTypeDB::instance(res_type);
+		if (!obj) {
+
+			error_text+="Can't create sub resource of type: "+res_type;
+			_printerr();
+			error=ERR_FILE_CORRUPT;
+			return error;
+		}
+
+
+		Resource *r = obj->cast_to<Resource>();
+		if (!r) {
+
+			error_text+="Can't create sub resource of type, because not a resource: "+res_type;
+			_printerr();
+			error=ERR_FILE_CORRUPT;
+			return error;
+		}
+
+		resource=Ref<Resource>(r);
+
+		while(true) {
+
+			String assign;
+			Variant value;
+
+			error = VariantParser::parse_tag_assign_eof(&stream,lines,error_text,next_tag,assign,value,&rp);
+
+			if (error) {
+				if (error!=ERR_FILE_EOF) {
+					_printerr();
+				}
+				return error;
+			}
+
+			if (assign!=String()) {
+				resource->set(assign,value);
+				//it's assignment
+			} else if (next_tag.name!=String()) {
+
+				error=ERR_FILE_CORRUPT;
+				error_text="Extra tag found when parsing main resource file";
+				_printerr();
+				return error;
+			} else {
+				error=ERR_FILE_EOF;
+				return error;
+			}
+
+		}
+
+		return OK;
+
+	} else if (next_tag.name=="node") {
+
+		if (!is_scene) {
+
+			error_text+="found the 'node' tag on a resource file!";
+			_printerr();
+			error=ERR_FILE_CORRUPT;
+			return error;
+		}
+
+		/*
+		int add_name(const StringName& p_name);
+		int add_value(const Variant& p_value);
+		int add_node_path(const NodePath& p_path);
+		int add_node(int p_parent,int p_owner,int p_type,int p_name, int p_instance);
+		void add_node_property(int p_node,int p_name,int p_value);
+		void add_node_group(int p_node,int p_group);
+		void set_base_scene(int p_idx);
+		void add_connection(int p_from,int p_to, int p_signal, int p_method, int p_flags,const Vector<int>& p_binds);
+		void add_editable_instance(const NodePath& p_path);
+
+		*/
+
+		int parent=-1;
+		int owner=-1;
+		int type=-1;
+		int name=-1;
+		int instance=-1;
+		int base_scene=-1;
+
+		if (next_tag.fields.has("name")) {
+			name=packed_scene->get_state()->add_name(next_tag.fields["name"]);
+		}
+
+		if (next_tag.fields.has("parent")) {
+			parent=packed_scene->get_state()->add_node_path(next_tag.fields["parent"]);
+		}
+
+		if (next_tag.fields.has("owner")) {
+			owner=packed_scene->get_state()->add_node_path(next_tag.fields["owner"]);
+		} else {
+			if (parent!=-1)
+				owner=0; //if no owner, owner is root
+		}
+
+
+		if (next_tag.fields.has("type")) {
+			type=packed_scene->get_state()->add_name(next_tag.fields["type"]);
+		}
+
+		if (next_tag.fields.has("instance")) {
+
+			instance=packed_scene->get_state()->add_value(next_tag.fields["instance"]);
+
+			if (packed_scene->get_state()->get_node_count()==0 && parent==-1) {
+				packed_scene->get_state()->set_base_scene(instance);
+				instance=-1;
+			}
+		}
+
+		int node_id = packed_scene->get_state()->add_node(parent,owner,type,name,instance);
+
+
+		while(true) {
+
+			String assign;
+			Variant value;
+
+			error = VariantParser::parse_tag_assign_eof(&stream,lines,error_text,next_tag,assign,value,&rp);
+
+			if (error) {
+				if (error!=ERR_FILE_EOF) {
+					_printerr();
+				} else {
+					resource=packed_scene;
+				}
+				return error;
+			}
+
+			if (assign!=String()) {
+				int nameidx = packed_scene->get_state()->add_name(assign);
+				int valueidx = packed_scene->get_state()->add_value(value);
+				packed_scene->get_state()->add_node_property(node_id,nameidx,valueidx);
+				//it's assignment
+			} else if (next_tag.name!=String()) {
+
+				error=OK;
+				return error;
+			} else {
+
+				resource=packed_scene;
+				error=ERR_FILE_EOF;
+				return error;
+			}
+
+		}
+
+		return OK;
+
+	} else if (next_tag.name=="connection") {
+
+		if (!is_scene) {
+
+			error_text+="found the 'connection' tag on a resource file!";
+			_printerr();
+			error=ERR_FILE_CORRUPT;
+			return error;
+		}
+
+		if (!next_tag.fields.has("from")) {
+			error=ERR_FILE_CORRUPT;
+			error_text="missing 'from' field fron connection tag";
+			return error;
+		}
+
+		if (!next_tag.fields.has("to")) {
+			error=ERR_FILE_CORRUPT;
+			error_text="missing 'to' field fron connection tag";
+			return error;
+		}
+
+		if (!next_tag.fields.has("signal")) {
+			error=ERR_FILE_CORRUPT;
+			error_text="missing 'signal' field fron connection tag";
+			return error;
+		}
+
+		if (!next_tag.fields.has("method")) {
+			error=ERR_FILE_CORRUPT;
+			error_text="missing 'method' field fron connection tag";
+			return error;
+		}
+
+		NodePath from = next_tag.fields["from"];
+		NodePath to = next_tag.fields["to"];
+		StringName method = next_tag.fields["method"];
+		StringName signal = next_tag.fields["signal"];
+		int flags=CONNECT_PERSIST;
+		Array binds;
+
+		if (next_tag.fields.has("flags")) {
+			flags=next_tag.fields["flags"];
+		}
+
+		if (next_tag.fields.has("binds")) {
+			binds=next_tag.fields["binds"];
+		}
+
+		Vector<int> bind_ints;
+		for(int i=9;i<binds.size();i++) {
+			bind_ints.push_back( packed_scene->get_state()->add_value( bind_ints[i] ) );
+		}
+
+		packed_scene->get_state()->add_connection(
+					packed_scene->get_state()->add_node_path(from.simplified()),
+					packed_scene->get_state()->add_node_path(to.simplified()),
+					packed_scene->get_state()->add_name(signal),
+					packed_scene->get_state()->add_name(method),
+					flags,
+					bind_ints
+					);
+
+		error = VariantParser::parse_tag(&stream,lines,error_text,next_tag,&rp);
+
+		if (error) {
+			if (error!=ERR_FILE_EOF) {
+				_printerr();
+			} else {
+				resource=packed_scene;
+			}
+		}
+
+		return error;
+	} else if (next_tag.name=="editable") {
+
+		if (!is_scene) {
+
+			error_text+="found the 'editable' tag on a resource file!";
+			_printerr();
+			error=ERR_FILE_CORRUPT;
+			return error;
+		}
+
+		if (!next_tag.fields.has("path")) {
+			error=ERR_FILE_CORRUPT;
+			error_text="missing 'path' field fron connection tag";
+			_printerr();
+			return error;
+		}
+
+		NodePath path = next_tag.fields["path"];
+
+		packed_scene->get_state()->add_editable_instance(path.simplified());
+
+		error = VariantParser::parse_tag(&stream,lines,error_text,next_tag,&rp);
+
+		if (error) {
+			if (error!=ERR_FILE_EOF) {
+				_printerr();
+			} else {
+				resource=packed_scene;
+			}
+		}
+
+		return error;
+
+	} else {
+
+		error_text+="Unknown tag in file: "+next_tag.name;
+		_printerr();
+		error=ERR_FILE_CORRUPT;
+		return error;
+	}
+
+	return OK;
+}
+
+int ResourceInteractiveLoaderText::get_stage() const {
+
+	return resource_current;
+}
+int ResourceInteractiveLoaderText::get_stage_count() const {
+
+	return resources_total;//+ext_resources;
+}
+
+ResourceInteractiveLoaderText::~ResourceInteractiveLoaderText() {
+
+	memdelete(f);
+}
+
+void ResourceInteractiveLoaderText::get_dependencies(FileAccess *f,List<String> *p_dependencies,bool p_add_types) {
+
+
+	open(f);
+	ERR_FAIL_COND(error!=OK);
+
+	while(next_tag.name=="ext_resource") {
+
+		if (!next_tag.fields.has("type")) {
+			error=ERR_FILE_CORRUPT;
+			error_text="Missing 'type' in external resource tag";
+			_printerr();
+			return;
+		}
+
+		if (!next_tag.fields.has("id")) {
+			error=ERR_FILE_CORRUPT;
+			error_text="Missing 'index' in external resource tag";
+			_printerr();
+			return;
+		}
+
+		String path=next_tag.fields["path"];
+		String type=next_tag.fields["type"];
+
+
+		if (path.find("://")==-1 && path.is_rel_path()) {
+			// path is relative to file being loaded, so convert to a resource path
+			path=Globals::get_singleton()->localize_path(local_path.get_base_dir().plus_file(path));
+		}
+
+
+		if (p_add_types) {
+			path+="::"+type;
+		}
+
+		p_dependencies->push_back(path);
+
+		Error err = VariantParser::parse_tag(&stream,lines,error_text,next_tag,&rp);
+
+		if (err) {
+			error_text="Unexpected end of file";
+			_printerr();
+			error=ERR_FILE_CORRUPT;
+		}
+
+
+	}
+}
+
+Error ResourceInteractiveLoaderText::rename_dependencies(FileAccess *p_f, const String &p_path,const Map<String,String>& p_map) {
+
+
+
+
+#if 0
+	open(p_f);
+	ERR_FAIL_COND_V(error!=OK,error);
+
+	//FileAccess
+
+	bool old_format=false;
+
+	FileAccess *fw = NULL;
+
+	String base_path=local_path.get_base_dir();
+
+	while(true) {
+		bool exit;
+		List<String> order;
+
+		Tag *tag = parse_tag(&exit,true,&order);
+
+		bool done=false;
+
+		if (!tag) {
+			if (fw) {
+				memdelete(fw);
+			}
+			error=ERR_FILE_CORRUPT;
+			ERR_FAIL_COND_V(!exit,error);
+			error=ERR_FILE_EOF;
+
+			return error;
+		}
+
+		if (tag->name=="ext_resource") {
+
+			if (!tag->args.has("index") || !tag->args.has("path") || !tag->args.has("type")) {
+				old_format=true;
+				break;
+			}
+
+			if (!fw) {
+
+				fw=FileAccess::open(p_path+".depren",FileAccess::WRITE);
+				fw->store_line("<?xml version=\"1.0\" encoding=\"UTF-8\" ?>"); //no escape
+				fw->store_line("<resource_file type=\""+resource_type+"\" subresource_count=\""+itos(resources_total)+"\" version=\""+itos(VERSION_MAJOR)+"."+itos(VERSION_MINOR)+"\" version_name=\""+VERSION_FULL_NAME+"\">");
+
+			}
+
+			String path = tag->args["path"];
+			String index = tag->args["index"];
+			String type = tag->args["type"];
+
+
+			bool relative=false;
+			if (!path.begins_with("res://")) {
+				path=base_path.plus_file(path).simplify_path();
+				relative=true;
+			}
+
+
+			if (p_map.has(path)) {
+				String np=p_map[path];
+				path=np;
+			}
+
+			if (relative) {
+				//restore relative
+				path=base_path.path_to_file(path);
+			}
+
+			tag->args["path"]=path;
+			tag->args["index"]=index;
+			tag->args["type"]=type;
+
+		} else {
+
+			done=true;
+		}
+
+		String tagt="\t<";
+		if (exit)
+			tagt+="/";
+		tagt+=tag->name;
+
+		for(List<String>::Element *E=order.front();E;E=E->next()) {
+			tagt+=" "+E->get()+"=\""+tag->args[E->get()]+"\"";
+		}
+		tagt+=">";
+		fw->store_line(tagt);
+		if (done)
+			break;
+		close_tag("ext_resource");
+		fw->store_line("\t</ext_resource>");
+
+	}
+
+
+	if (old_format) {
+		if (fw)
+			memdelete(fw);
+
+		DirAccess *da = DirAccess::create(DirAccess::ACCESS_FILESYSTEM);
+		da->remove(p_path+".depren");
+		memdelete(da);
+		//fuck it, use the old approach;
+
+		WARN_PRINT(("This file is old, so it can't refactor dependencies, opening and resaving: "+p_path).utf8().get_data());
+
+		Error err;
+		FileAccess *f2 = FileAccess::open(p_path,FileAccess::READ,&err);
+		if (err!=OK) {
+			ERR_FAIL_COND_V(err!=OK,ERR_FILE_CANT_OPEN);
+		}
+
+		Ref<ResourceInteractiveLoaderText> ria = memnew( ResourceInteractiveLoaderText );
+		ria->local_path=Globals::get_singleton()->localize_path(p_path);
+		ria->res_path=ria->local_path;
+		ria->remaps=p_map;
+	//	ria->set_local_path( Globals::get_singleton()->localize_path(p_path) );
+		ria->open(f2);
+
+		err = ria->poll();
+
+		while(err==OK) {
+			err=ria->poll();
+		}
+
+		ERR_FAIL_COND_V(err!=ERR_FILE_EOF,ERR_FILE_CORRUPT);
+		RES res = ria->get_resource();
+		ERR_FAIL_COND_V(!res.is_valid(),ERR_FILE_CORRUPT);
+
+		return ResourceFormatSaverText::singleton->save(p_path,res);
+	}
+
+	if (!fw) {
+
+		return OK; //nothing to rename, do nothing
+	}
+
+	uint8_t c=f->get_8();
+	while(!f->eof_reached()) {
+		fw->store_8(c);
+		c=f->get_8();
+	}
+
+	bool all_ok = fw->get_error()==OK;
+
+	memdelete(fw);
+
+	if (!all_ok) {
+		return ERR_CANT_CREATE;
+	}
+
+	DirAccess *da = DirAccess::create(DirAccess::ACCESS_RESOURCES);
+	da->remove(p_path);
+	da->rename(p_path+".depren",p_path);
+	memdelete(da);
+#endif
+	return OK;
+
+}
+
+
+void ResourceInteractiveLoaderText::open(FileAccess *p_f) {
+
+	error=OK;
+
+	lines=1;
+	f=p_f;
+
+
+	stream.f=f;
+	is_scene=false;
+
+
+	VariantParser::Tag tag;
+	Error err = VariantParser::parse_tag(&stream,lines,error_text,tag);
+
+	if (err) {
+
+		error=err;
+		_printerr();
+		return;
+	}
+
+	if (tag.fields.has("format")) {
+		int fmt = tag.fields["format"];
+		if (fmt>FORMAT_VERSION) {
+			error_text="Saved with newer format version";
+			_printerr();
+			error=ERR_PARSE_ERROR;
+			return;
+		}
+	}
+
+
+	if (tag.name=="gd_scene") {
+		is_scene=true;
+		packed_scene.instance();
+
+	} else if (tag.name=="gd_resource") {
+		if (!tag.fields.has("type")) {
+			error_text="Missing 'type' field in 'gd_resource' tag";
+			_printerr();
+			error=ERR_PARSE_ERROR;
+			return;
+		}
+
+		res_type=tag.fields["type"];
+
+	} else {
+		error_text="Unrecognized file type: "+tag.name;
+		_printerr();
+		error=ERR_PARSE_ERROR;
+		return;
+
+	}
+
+
+
+	if (tag.fields.has("load_steps")) {
+		resources_total=tag.fields["load_steps"];
+	} else {
+		resources_total=0;
+	}
+
+
+	err = VariantParser::parse_tag(&stream,lines,error_text,next_tag,&rp);
+
+	if (err) {
+		error_text="Unexpected end of file";
+		_printerr();
+		error=ERR_FILE_CORRUPT;
+	}
+
+	rp.ext_func=_parse_ext_resources;
+	rp.sub_func=_parse_sub_resources;
+	rp.func=NULL;
+	rp.userdata=this;
+
+}
+
+
+
+
+String ResourceInteractiveLoaderText::recognize(FileAccess *p_f) {
+
+	error=OK;
+
+	lines=1;
+	f=p_f;
+
+	stream.f=f;
+
+
+	VariantParser::Tag tag;
+	Error err = VariantParser::parse_tag(&stream,lines,error_text,tag);
+
+	if (err) {
+		_printerr();
+		return "";
+	}
+
+	if (tag.fields.has("format")) {
+		int fmt = tag.fields["format"];
+		if (fmt>FORMAT_VERSION) {
+			error_text="Saved with newer format version";
+			_printerr();
+			return "";
+		}
+	}
+
+	if (tag.name=="gd_scene")
+		return "PackedScene";
+
+	if (tag.name!="gd_resource")
+		return "";
+
+
+
+	if (!tag.fields.has("type")) {
+		error_text="Missing 'type' field in 'gd_resource' tag";
+		_printerr();
+		return "";
+	}
+
+	return tag.fields["type"];
+
+
+}
+
+/////////////////////
+
+Ref<ResourceInteractiveLoader> ResourceFormatLoaderText::load_interactive(const String &p_path, Error *r_error) {
+
+	if (r_error)
+		*r_error=ERR_CANT_OPEN;
+
+	Error err;
+	FileAccess *f = FileAccess::open(p_path,FileAccess::READ,&err);
+
+
+	if (err!=OK) {
+
+		ERR_FAIL_COND_V(err!=OK,Ref<ResourceInteractiveLoader>());
+	}
+
+	Ref<ResourceInteractiveLoaderText> ria = memnew( ResourceInteractiveLoaderText );
+	ria->local_path=Globals::get_singleton()->localize_path(p_path);
+	ria->res_path=ria->local_path;
+//	ria->set_local_path( Globals::get_singleton()->localize_path(p_path) );
+	ria->open(f);
+
+	return ria;
+}
+
+void ResourceFormatLoaderText::get_recognized_extensions_for_type(const String& p_type,List<String> *p_extensions) const {
+
+
+	if (p_type=="PackedScene")
+		p_extensions->push_back("tscn");
+	else
+		p_extensions->push_back("tres");
+
+}
+
+void ResourceFormatLoaderText::get_recognized_extensions(List<String> *p_extensions) const{
+
+	p_extensions->push_back("tscn");
+	p_extensions->push_back("tres");
+}
+
+bool ResourceFormatLoaderText::handles_type(const String& p_type) const{
+
+	return true;
+}
+String ResourceFormatLoaderText::get_resource_type(const String &p_path) const{
+
+
+
+	String ext=p_path.extension().to_lower();
+	if (ext=="tscn")
+		return "PackedScene";
+	else if (ext!="tres")
+		return String();
+
+	//for anyhting else must test..
+
+	FileAccess *f = FileAccess::open(p_path,FileAccess::READ);
+	if (!f) {
+
+		return ""; //could not rwead
+	}
+
+	Ref<ResourceInteractiveLoaderText> ria = memnew( ResourceInteractiveLoaderText );
+	ria->local_path=Globals::get_singleton()->localize_path(p_path);
+	ria->res_path=ria->local_path;
+//	ria->set_local_path( Globals::get_singleton()->localize_path(p_path) );
+	String r = ria->recognize(f);
+	return r;
+}
+
+
+void ResourceFormatLoaderText::get_dependencies(const String& p_path,List<String> *p_dependencies,bool p_add_types) {
+
+	FileAccess *f = FileAccess::open(p_path,FileAccess::READ);
+	if (!f) {
+
+		ERR_FAIL();
+	}
+
+	Ref<ResourceInteractiveLoaderText> ria = memnew( ResourceInteractiveLoaderText );
+	ria->local_path=Globals::get_singleton()->localize_path(p_path);
+	ria->res_path=ria->local_path;
+//	ria->set_local_path( Globals::get_singleton()->localize_path(p_path) );
+	ria->get_dependencies(f,p_dependencies,p_add_types);
+
+
+}
+
+Error ResourceFormatLoaderText::rename_dependencies(const String &p_path,const Map<String,String>& p_map) {
+
+	FileAccess *f = FileAccess::open(p_path,FileAccess::READ);
+	if (!f) {
+
+		ERR_FAIL_V(ERR_CANT_OPEN);
+	}
+
+	Ref<ResourceInteractiveLoaderText> ria = memnew( ResourceInteractiveLoaderText );
+	ria->local_path=Globals::get_singleton()->localize_path(p_path);
+	ria->res_path=ria->local_path;
+//	ria->set_local_path( Globals::get_singleton()->localize_path(p_path) );
+	return ria->rename_dependencies(f,p_path,p_map);
+}
+
+
+/*****************************************************************************************************/
+/*****************************************************************************************************/
+/*****************************************************************************************************/
+/*****************************************************************************************************/
+/*****************************************************************************************************/
+/*****************************************************************************************************/
+/*****************************************************************************************************/
+/*****************************************************************************************************/
+/*****************************************************************************************************/
+/*****************************************************************************************************/
+
+
 void ResourceFormatSaverTextInstance::write_property(const String& p_name,const Variant& p_property,bool *r_ok) {
 
 	if (r_ok)
@@ -137,11 +1153,11 @@ void ResourceFormatSaverTextInstance::write_property(const String& p_name,const
 			Image img=p_property;
 
 			if (img.empty()) {
-				f->store_string("RawImage()");
+				f->store_string("Image()");
 				break;
 			}
 
-			String imgstr="RawImage( ";
+			String imgstr="Image( ";
 			imgstr+=itos(img.get_width());
 			imgstr+=", "+itos(img.get_height());
 			imgstr+=", "+itos(img.get_mipmaps());
@@ -182,10 +1198,9 @@ void ResourceFormatSaverTextInstance::write_property(const String& p_name,const
 			const uint8_t *ptr=r.ptr();;
 			for (int i=0;i<len;i++) {
 
-				uint8_t byte = ptr[i];
-				const char  hex[16]={'0','1','2','3','4','5','6','7','8','9','A','B','C','D','E','F'};
-				char str[3]={ hex[byte>>4], hex[byte&0xF], 0};
-				s+=str;
+				if (i>0)
+					s+=", ";
+				s+=itos(ptr[i]);
 			}
 
 			imgstr+=", ";
@@ -287,7 +1302,7 @@ void ResourceFormatSaverTextInstance::write_property(const String& p_name,const
 
 		case Variant::RAW_ARRAY: {
 
-			f->store_string("RawArray( ");
+			f->store_string("ByteArray( ");
 			String s;
 			DVector<uint8_t> data = p_property;
 			int len = data.size();
@@ -297,10 +1312,8 @@ void ResourceFormatSaverTextInstance::write_property(const String& p_name,const
 
 				if (i>0)
 					f->store_string(", ");
-				uint8_t byte = ptr[i];
-				const char  hex[16]={'0','1','2','3','4','5','6','7','8','9','A','B','C','D','E','F'};
-				char str[3]={ hex[byte>>4], hex[byte&0xF], 0};
-				f->store_string(str);
+
+				f->store_string(itos(ptr[i]));
 
 			}
 
@@ -673,6 +1686,9 @@ Error ResourceFormatSaverTextInstance::save(const String &p_path,const RES& p_re
 			Ref<PackedScene> instance = state->get_node_instance(i);
 			Vector<StringName> groups = state->get_node_groups(i);
 
+			if (instance.is_valid())
+				print_line("for path "+String(path)+" instance "+instance->get_path());
+
 			String header="[node";
 			header+=" name=\""+String(name)+"\"";
 			if (type!=StringName()) {
@@ -690,7 +1706,7 @@ Error ResourceFormatSaverTextInstance::save(const String &p_path,const RES& p_re
 				for(int j=0;j<groups.size();j++) {
 					if (j>0)
 						sgroups+=", ";
-					sgroups+="\""+groups[i].operator String().c_escape()+"\"";
+					sgroups+="\""+groups[j].operator String().c_escape()+"\"";
 				}
 				sgroups+=" ]";
 				header+=sgroups;
diff --git a/scene/resources/scene_format_text.h b/scene/resources/scene_format_text.h
index 576a78d183..4f18af2b62 100644
--- a/scene/resources/scene_format_text.h
+++ b/scene/resources/scene_format_text.h
@@ -5,6 +5,100 @@
 #include "io/resource_saver.h"
 #include "os/file_access.h"
 #include "scene/resources/packed_scene.h"
+#include "variant_parser.h"
+
+
+
+class ResourceInteractiveLoaderText : public ResourceInteractiveLoader {
+
+	String local_path;
+	String res_path;
+	String error_text;
+
+	FileAccess *f;
+
+	VariantParser::StreamFile stream;
+
+	struct ExtResource {
+		String path;
+		String type;
+	};
+
+
+	bool is_scene;
+	String res_type;
+
+
+
+//	Map<String,String> remaps;
+
+	Map<int,ExtResource> ext_resources;
+
+	int resources_total;
+	int resource_current;
+	String resource_type;
+
+	VariantParser::Tag next_tag;
+
+	mutable int lines;
+
+	Map<String,String> remaps;
+	//void _printerr();
+
+	static Error _parse_sub_resources(void* p_self, VariantParser::Stream* p_stream,Ref<Resource>& r_res,int &line,String &r_err_str) { return reinterpret_cast<ResourceInteractiveLoaderText*>(p_self)->_parse_sub_resource(p_stream,r_res,line,r_err_str); }
+	static Error _parse_ext_resources(void* p_self, VariantParser::Stream* p_stream,Ref<Resource>& r_res,int &line,String &r_err_str) { return reinterpret_cast<ResourceInteractiveLoaderText*>(p_self)->_parse_ext_resource(p_stream,r_res,line,r_err_str); }
+
+	Error _parse_sub_resource(VariantParser::Stream* p_stream,Ref<Resource>& r_res,int &line,String &r_err_str);
+	Error _parse_ext_resource(VariantParser::Stream* p_stream,Ref<Resource>& r_res,int &line,String &r_err_str);
+
+	VariantParser::ResourceParser rp;
+
+
+	Ref<PackedScene> packed_scene;
+
+
+friend class ResourceFormatLoaderText;
+
+	List<RES> resource_cache;
+	Error parse_property(Variant& r_v, String &r_name);
+	Error error;
+
+	RES resource;
+
+public:
+
+	virtual void set_local_path(const String& p_local_path);
+	virtual Ref<Resource> get_resource();
+	virtual Error poll();
+	virtual int get_stage() const;
+	virtual int get_stage_count() const;
+
+	void open(FileAccess *p_f);
+	String recognize(FileAccess *p_f);
+	void get_dependencies(FileAccess *p_f, List<String> *p_dependencies, bool p_add_types);
+	Error rename_dependencies(FileAccess *p_f, const String &p_path,const Map<String,String>& p_map);
+
+
+	~ResourceInteractiveLoaderText();
+
+};
+
+
+
+class ResourceFormatLoaderText : public ResourceFormatLoader {
+public:
+
+	virtual Ref<ResourceInteractiveLoader> load_interactive(const String &p_path,Error *r_error=NULL);
+	virtual void get_recognized_extensions_for_type(const String& p_type,List<String> *p_extensions) const;
+	virtual void get_recognized_extensions(List<String> *p_extensions) const;
+	virtual bool handles_type(const String& p_type) const;
+	virtual String get_resource_type(const String &p_path) const;
+	virtual void get_dependencies(const String& p_path, List<String> *p_dependencies, bool p_add_types=false);
+	virtual Error rename_dependencies(const String &p_path,const Map<String,String>& p_map);
+
+
+};
+
 
 class ResourceFormatSaverTextInstance  {
 
diff --git a/scene/resources/shader_graph.cpp b/scene/resources/shader_graph.cpp
index 49a1bdccb1..7b67eaeda8 100644
--- a/scene/resources/shader_graph.cpp
+++ b/scene/resources/shader_graph.cpp
@@ -1435,6 +1435,7 @@ const ShaderGraph::InOutParamInfo ShaderGraph::inout_param_info[]={
 	{MODE_MATERIAL,SHADER_TYPE_LIGHT,"ShadeParam","SHADE_PARAM","",SLOT_TYPE_SCALAR,SLOT_IN},
 	//light out
 	{MODE_MATERIAL,SHADER_TYPE_LIGHT,"Light","LIGHT","",SLOT_TYPE_VEC,SLOT_OUT},
+	{MODE_MATERIAL,SHADER_TYPE_LIGHT,"Shadow", "SHADOW", "",SLOT_TYPE_VEC, SLOT_OUT },
 	//canvas item vertex in
 	{MODE_CANVAS_ITEM,SHADER_TYPE_VERTEX,"Vertex","vec3(SRC_VERTEX,0)","",SLOT_TYPE_VEC,SLOT_IN},
 	{MODE_CANVAS_ITEM,SHADER_TYPE_VERTEX,"UV","SRC_UV","",SLOT_TYPE_VEC,SLOT_IN},
@@ -2083,7 +2084,7 @@ void ShaderGraph::_add_node_code(ShaderType p_type,Node *p_node,const Vector<Str
 #define DEF_MATRIX(slot) \
 	if (p_inputs[slot].ends_with("def")){\
 		Transform xf = p_node->defaults[slot]; \
-		code+=String(typestr[3])+" "+p_inputs[slot]+"=mat4(\n";\
+		code+=String(typestr[2])+" "+p_inputs[slot]+"=mat4(\n";\
 		code+="\tvec4(vec3("+rtos(xf.basis.get_axis(0).x)+","+rtos(xf.basis.get_axis(0).y)+","+rtos(xf.basis.get_axis(0).z)+"),0),\n";\
 		code+="\tvec4(vec3("+rtos(xf.basis.get_axis(1).x)+","+rtos(xf.basis.get_axis(1).y)+","+rtos(xf.basis.get_axis(1).z)+"),0),\n";\
 		code+="\tvec4(vec3("+rtos(xf.basis.get_axis(2).x)+","+rtos(xf.basis.get_axis(2).y)+","+rtos(xf.basis.get_axis(2).z)+"),0),\n";\
@@ -2393,15 +2394,29 @@ void ShaderGraph::_add_node_code(ShaderType p_type,Node *p_node,const Vector<Str
 			DEF_VEC(1);
 			DEF_VEC(2);
 			DEF_VEC(3);
-			code += OUTNAME(p_node->id,0)+"=xform("+p_inputs[0]+","+p_inputs[1]+","+p_inputs[2]+","+","+p_inputs[3]+");\n";
+			code += OUTNAME(p_node->id, 0) + "=mat4(" +
+				"vec4(" + p_inputs[0] + ".x," + p_inputs[0] + ".y," + p_inputs[0] + ".z, 0.0),"
+				"vec4(" + p_inputs[1] + ".x," + p_inputs[1] + ".y," + p_inputs[1] + ".z, 0.0),"
+				"vec4(" + p_inputs[2] + ".x," + p_inputs[2] + ".y," + p_inputs[2] + ".z, 0.0),"
+				"vec4(" + p_inputs[3] + ".x," + p_inputs[3] + ".y," + p_inputs[3] + ".z, 1.0));\n";
 
 		}break;
 		case NODE_XFORM_TO_VEC: {
 			DEF_MATRIX(0);
-			code += OUTNAME(p_node->id,0)+"="+p_inputs[0]+".x;\n";
-			code += OUTNAME(p_node->id,1)+"="+p_inputs[0]+".y;\n";
-			code += OUTNAME(p_node->id,2)+"="+p_inputs[0]+".z;\n";
-			code += OUTNAME(p_node->id,3)+"="+p_inputs[0]+".o;\n";
+			code += OUTNAME(p_node->id, 0) + ";\n";
+			code += OUTNAME(p_node->id, 1) + ";\n";
+			code += OUTNAME(p_node->id, 2) + ";\n";
+			code += OUTNAME(p_node->id, 3) + ";\n";
+			code += "{\n";
+			code += "\tvec4 xform_row_01=" + p_inputs[0] + ".x;\n";
+			code += "\tvec4 xform_row_02=" + p_inputs[0] + ".y;\n";
+			code += "\tvec4 xform_row_03=" + p_inputs[0] + ".z;\n";
+			code += "\tvec4 xform_row_04=" + p_inputs[0] + ".w;\n";
+			code += "\t" + OUTVAR(p_node->id, 0) + "=vec3(xform_row_01.x, xform_row_01.y, xform_row_01.z);\n";
+			code += "\t" + OUTVAR(p_node->id, 1) + "=vec3(xform_row_02.x, xform_row_02.y, xform_row_02.z);\n";
+			code += "\t" + OUTVAR(p_node->id, 2) + "=vec3(xform_row_03.x, xform_row_03.y, xform_row_03.z);\n";
+			code += "\t" + OUTVAR(p_node->id, 3) + "=vec3(xform_row_04.x, xform_row_04.y, xform_row_04.z);\n";
+			code += "}\n";
 		}break;
 		case NODE_SCALAR_INTERP: {
 			DEF_SCALAR(0);
diff --git a/scene/resources/shape_line_2d.cpp b/scene/resources/shape_line_2d.cpp
index c660b604f3..97e9985754 100644
--- a/scene/resources/shape_line_2d.cpp
+++ b/scene/resources/shape_line_2d.cpp
@@ -35,6 +35,7 @@ void LineShape2D::_update_shape() {
 	arr.push_back(normal);
 	arr.push_back(d);
 	Physics2DServer::get_singleton()->shape_set_data(get_rid(),arr);
+	emit_changed();
 
 }
 
diff --git a/scene/resources/texture.cpp b/scene/resources/texture.cpp
index 994473f11e..5df3d64d1a 100644
--- a/scene/resources/texture.cpp
+++ b/scene/resources/texture.cpp
@@ -329,6 +329,16 @@ void ImageTexture::normal_to_xy() {
 	create_from_image(img,flags);
 }
 
+void ImageTexture::shrink_x2_and_keep_size() {
+
+	Size2 sizeov=get_size();
+	Image img = get_data();
+	img.resize(img.get_width()/2,img.get_height()/2,Image::INTERPOLATE_BILINEAR);
+	create_from_image(img,flags);
+	set_size_override(sizeov);
+
+}
+
 bool ImageTexture::has_alpha() const {
 
 	return ( format==Image::FORMAT_GRAYSCALE_ALPHA || format==Image::FORMAT_INDEXED_ALPHA || format==Image::FORMAT_RGBA );
@@ -424,10 +434,13 @@ void ImageTexture::_bind_methods() {
 	ObjectTypeDB::bind_method(_MD("fix_alpha_edges"),&ImageTexture::fix_alpha_edges);
 	ObjectTypeDB::bind_method(_MD("premultiply_alpha"),&ImageTexture::premultiply_alpha);
 	ObjectTypeDB::bind_method(_MD("normal_to_xy"),&ImageTexture::normal_to_xy);
+	ObjectTypeDB::bind_method(_MD("shrink_x2_and_keep_size"),&ImageTexture::shrink_x2_and_keep_size);
+
 	ObjectTypeDB::bind_method(_MD("set_size_override","size"),&ImageTexture::set_size_override);
 	ObjectTypeDB::set_method_flags(get_type_static(),_SCS("fix_alpha_edges"),METHOD_FLAGS_DEFAULT|METHOD_FLAG_EDITOR);
 	ObjectTypeDB::set_method_flags(get_type_static(),_SCS("premultiply_alpha"),METHOD_FLAGS_DEFAULT|METHOD_FLAG_EDITOR);
 	ObjectTypeDB::set_method_flags(get_type_static(),_SCS("normal_to_xy"),METHOD_FLAGS_DEFAULT|METHOD_FLAG_EDITOR);
+	ObjectTypeDB::set_method_flags(get_type_static(),_SCS("shrink_x2_and_keep_size"),METHOD_FLAGS_DEFAULT|METHOD_FLAG_EDITOR);
 	ObjectTypeDB::bind_method(_MD("_reload_hook","rid"),&ImageTexture::_reload_hook);
 
 
diff --git a/scene/resources/texture.h b/scene/resources/texture.h
index 1a4f211af1..3be13bf815 100644
--- a/scene/resources/texture.h
+++ b/scene/resources/texture.h
@@ -148,6 +148,7 @@ public:
 	void fix_alpha_edges();
 	void premultiply_alpha();
 	void normal_to_xy();
+	void shrink_x2_and_keep_size();
 
 
 	void set_size_override(const Size2& p_size);
diff --git a/scene/resources/world_2d.cpp b/scene/resources/world_2d.cpp
index d8d9c5b675..3b1f1d2346 100644
--- a/scene/resources/world_2d.cpp
+++ b/scene/resources/world_2d.cpp
@@ -374,7 +374,7 @@ World2D::World2D() {
 	Physics2DServer::get_singleton()->area_set_param(space,Physics2DServer::AREA_PARAM_GRAVITY,GLOBAL_DEF("physics_2d/default_gravity",98));
 	Physics2DServer::get_singleton()->area_set_param(space,Physics2DServer::AREA_PARAM_GRAVITY_VECTOR,GLOBAL_DEF("physics_2d/default_gravity_vector",Vector2(0,1)));
 	Physics2DServer::get_singleton()->area_set_param(space,Physics2DServer::AREA_PARAM_LINEAR_DAMP,GLOBAL_DEF("physics_2d/default_density",0.1));
-	Physics2DServer::get_singleton()->area_set_param(space,Physics2DServer::AREA_PARAM_ANGULAR_DAMP,GLOBAL_DEF("physics_2d/default_density",1));
+	Physics2DServer::get_singleton()->area_set_param(space,Physics2DServer::AREA_PARAM_ANGULAR_DAMP,GLOBAL_DEF("physics_2d/default_angular_damp",1));
 	Physics2DServer::get_singleton()->space_set_param(space,Physics2DServer::SPACE_PARAM_CONTACT_RECYCLE_RADIUS,1.0);
 	Physics2DServer::get_singleton()->space_set_param(space,Physics2DServer::SPACE_PARAM_CONTACT_MAX_SEPARATION,1.5);
 	Physics2DServer::get_singleton()->space_set_param(space,Physics2DServer::SPACE_PARAM_BODY_MAX_ALLOWED_PENETRATION,0.3);
diff --git a/servers/SCsub b/servers/SCsub
index 3871c30cfa..d861847101 100644
--- a/servers/SCsub
+++ b/servers/SCsub
@@ -15,5 +15,3 @@ SConscript('spatial_sound_2d/SCsub');
 lib = env.Library("servers",env.servers_sources)
 
 env.Prepend(LIBS=[lib])
-
-
diff --git a/servers/audio/SCsub b/servers/audio/SCsub
index 16fe3a59ac..d31af2c1c4 100644
--- a/servers/audio/SCsub
+++ b/servers/audio/SCsub
@@ -3,5 +3,3 @@ Import('env')
 env.add_source_files(env.servers_sources,"*.cpp")
 
 Export('env')
-
-
diff --git a/servers/audio/audio_mixer_sw.cpp b/servers/audio/audio_mixer_sw.cpp
index 791f31719e..033cd333d5 100644
--- a/servers/audio/audio_mixer_sw.cpp
+++ b/servers/audio/audio_mixer_sw.cpp
@@ -45,14 +45,14 @@ void AudioMixerSW::do_resample(const Depth* p_src, int32_t *p_dst, ResamplerStat
 	while (p_state->amount--) {
 
 		int32_t pos=p_state->pos >> MIX_FRAC_BITS;
-		if (is_stereo)
+		if (is_stereo && !is_ima_adpcm)
 			pos<<=1;
 
 		if (is_ima_adpcm) {
 
-			int sample_pos = pos + p_state->ima_adpcm->window_ofs;
+			int sample_pos = pos + p_state->ima_adpcm[0].window_ofs;
 
-			while(sample_pos>p_state->ima_adpcm->last_nibble) {
+			while(sample_pos>p_state->ima_adpcm[0].last_nibble) {
 
 
 				static const int16_t _ima_adpcm_step_table[89] = {
@@ -72,52 +72,64 @@ void AudioMixerSW::do_resample(const Depth* p_src, int32_t *p_dst, ResamplerStat
 					-1, -1, -1, -1, 2, 4, 6, 8
 				};
 
-				int16_t nibble,signed_nibble,diff,step;
+				for(int i=0;i<(is_stereo?2:1);i++) {
 
-				p_state->ima_adpcm->last_nibble++;
-				const uint8_t *src_ptr=p_state->ima_adpcm->ptr;
 
-				nibble = (p_state->ima_adpcm->last_nibble&1)?
-						(src_ptr[p_state->ima_adpcm->last_nibble>>1]>>4):(src_ptr[p_state->ima_adpcm->last_nibble>>1]&0xF);
-				step=_ima_adpcm_step_table[p_state->ima_adpcm->step_index];
+					int16_t nibble,signed_nibble,diff,step;
 
-				p_state->ima_adpcm->step_index += _ima_adpcm_index_table[nibble];
-				if (p_state->ima_adpcm->step_index<0)
-					p_state->ima_adpcm->step_index=0;
-				if (p_state->ima_adpcm->step_index>88)
-					p_state->ima_adpcm->step_index=88;
+					p_state->ima_adpcm[i].last_nibble++;
+					const uint8_t *src_ptr=p_state->ima_adpcm[i].ptr;
 
-				/*
-				signed_nibble = (nibble&7) * ((nibble&8)?-1:1);
-				diff = (2 * signed_nibble + 1) * step / 4; */
 
-				diff = step >> 3 ;
-				if (nibble & 1)
-					diff += step >> 2 ;
-				if (nibble & 2)
-					diff += step >> 1 ;
-				if (nibble & 4)
-					diff += step ;
-				if (nibble & 8)
-					diff = -diff ;
+					uint8_t nbb = src_ptr[ (p_state->ima_adpcm[i].last_nibble>>1) *  (is_stereo?2:1) + i ];
+					nibble = (p_state->ima_adpcm[i].last_nibble&1)?(nbb>>4):(nbb&0xF);
+					step=_ima_adpcm_step_table[p_state->ima_adpcm[i].step_index];
 
-				p_state->ima_adpcm->predictor+=diff;
-				if (p_state->ima_adpcm->predictor<-0x8000)
-					p_state->ima_adpcm->predictor=-0x8000;
-				else if (p_state->ima_adpcm->predictor>0x7FFF)
-					p_state->ima_adpcm->predictor=0x7FFF;
 
+					p_state->ima_adpcm[i].step_index += _ima_adpcm_index_table[nibble];
+					if (p_state->ima_adpcm[i].step_index<0)
+						p_state->ima_adpcm[i].step_index=0;
+					if (p_state->ima_adpcm[i].step_index>88)
+						p_state->ima_adpcm[i].step_index=88;
 
-				/* store loop if there */
-				if (p_state->ima_adpcm->last_nibble==p_state->ima_adpcm->loop_pos) {
+					/*
+					signed_nibble = (nibble&7) * ((nibble&8)?-1:1);
+					diff = (2 * signed_nibble + 1) * step / 4; */
+
+					diff = step >> 3 ;
+					if (nibble & 1)
+						diff += step >> 2 ;
+					if (nibble & 2)
+						diff += step >> 1 ;
+					if (nibble & 4)
+						diff += step ;
+					if (nibble & 8)
+						diff = -diff ;
+
+					p_state->ima_adpcm[i].predictor+=diff;
+					if (p_state->ima_adpcm[i].predictor<-0x8000)
+						p_state->ima_adpcm[i].predictor=-0x8000;
+					else if (p_state->ima_adpcm[i].predictor>0x7FFF)
+						p_state->ima_adpcm[i].predictor=0x7FFF;
+
+
+					/* store loop if there */
+					if (p_state->ima_adpcm[i].last_nibble==p_state->ima_adpcm[i].loop_pos) {
+
+						p_state->ima_adpcm[i].loop_step_index = p_state->ima_adpcm[i].step_index;
+						p_state->ima_adpcm[i].loop_predictor = p_state->ima_adpcm[i].predictor;
+					}
+
+					//printf("%i - %i - pred %i\n",int(p_state->ima_adpcm[i].last_nibble),int(nibble),int(p_state->ima_adpcm[i].predictor));
 
-					p_state->ima_adpcm->loop_step_index = p_state->ima_adpcm->step_index;
-					p_state->ima_adpcm->loop_predictor = p_state->ima_adpcm->predictor;
 				}
 
 			}
 
-			final=p_state->ima_adpcm->predictor;
+			final=p_state->ima_adpcm[0].predictor;
+			if (is_stereo) {
+				final_r=p_state->ima_adpcm[1].predictor;
+			}
 
 		} else {
 			final=p_src[pos];
@@ -399,9 +411,10 @@ void AudioMixerSW::mix_channel(Channel& c) {
 
 	if (format==AS::SAMPLE_FORMAT_IMA_ADPCM) {
 
-		rstate.ima_adpcm=&c.mix.ima_adpcm;
+		rstate.ima_adpcm=c.mix.ima_adpcm;
 		if (loop_format!=AS::SAMPLE_LOOP_NONE) {
-			c.mix.ima_adpcm.loop_pos=loop_begin_fp>>MIX_FRAC_BITS;
+			c.mix.ima_adpcm[0].loop_pos=loop_begin_fp>>MIX_FRAC_BITS;
+			c.mix.ima_adpcm[1].loop_pos=loop_begin_fp>>MIX_FRAC_BITS;
 			loop_format=AS::SAMPLE_LOOP_FORWARD;
 		}
 	}
@@ -447,9 +460,11 @@ void AudioMixerSW::mix_channel(Channel& c) {
 					/* go to loop-begin */
 
 					if (format==AS::SAMPLE_FORMAT_IMA_ADPCM) {
-						c.mix.ima_adpcm.step_index=c.mix.ima_adpcm.loop_step_index;
-						c.mix.ima_adpcm.predictor=c.mix.ima_adpcm.loop_predictor;
-						c.mix.ima_adpcm.last_nibble=loop_begin_fp>>MIX_FRAC_BITS;
+						for(int i=0;i<2;i++) {
+							c.mix.ima_adpcm[i].step_index=c.mix.ima_adpcm[i].loop_step_index;
+							c.mix.ima_adpcm[i].predictor=c.mix.ima_adpcm[i].loop_predictor;
+							c.mix.ima_adpcm[i].last_nibble=loop_begin_fp>>MIX_FRAC_BITS;
+						}
 						c.mix.offset=loop_begin_fp;
 					} else {
 						c.mix.offset=loop_begin_fp+(c.mix.offset-loop_end_fp);
@@ -549,10 +564,12 @@ void AudioMixerSW::mix_channel(Channel& c) {
 			CALL_RESAMPLE_MODE(int16_t,is_stereo,false,use_filter,use_fx,interpolation_type,mix_channels);
 
 		} else if (format==AS::SAMPLE_FORMAT_IMA_ADPCM) {
-			c.mix.ima_adpcm.window_ofs=c.mix.offset>>MIX_FRAC_BITS;
-			c.mix.ima_adpcm.ptr=(const uint8_t*)data;
-			int8_t *src_ptr =  &((int8_t*)data)[(c.mix.offset >> MIX_FRAC_BITS)<<(is_stereo?1:0) ];
-			CALL_RESAMPLE_MODE(int8_t,false,true,use_filter,use_fx,interpolation_type,mix_channels);
+			for(int i=0;i<2;i++) {
+				c.mix.ima_adpcm[i].window_ofs=c.mix.offset>>MIX_FRAC_BITS;
+				c.mix.ima_adpcm[i].ptr=(const uint8_t*)data;
+			}
+			int8_t *src_ptr =  NULL;
+			CALL_RESAMPLE_MODE(int8_t,is_stereo,true,use_filter,use_fx,interpolation_type,mix_channels);
 
 		}
 
@@ -781,14 +798,16 @@ AudioMixer::ChannelID AudioMixerSW::channel_alloc(RID p_sample) {
 
 	if (sample_manager->sample_get_format(c.sample)==AudioServer::SAMPLE_FORMAT_IMA_ADPCM) {
 
-		c.mix.ima_adpcm.step_index=0;
-		c.mix.ima_adpcm.predictor=0;
-		c.mix.ima_adpcm.loop_step_index=0;
-		c.mix.ima_adpcm.loop_predictor=0;
-		c.mix.ima_adpcm.last_nibble=-1;
-		c.mix.ima_adpcm.loop_pos=0x7FFFFFFF;
-		c.mix.ima_adpcm.window_ofs=0;
-		c.mix.ima_adpcm.ptr=NULL;
+		for(int i=0;i<2;i++) {
+			c.mix.ima_adpcm[i].step_index=0;
+			c.mix.ima_adpcm[i].predictor=0;
+			c.mix.ima_adpcm[i].loop_step_index=0;
+			c.mix.ima_adpcm[i].loop_predictor=0;
+			c.mix.ima_adpcm[i].last_nibble=-1;
+			c.mix.ima_adpcm[i].loop_pos=0x7FFFFFFF;
+			c.mix.ima_adpcm[i].window_ofs=0;
+			c.mix.ima_adpcm[i].ptr=NULL;
+		}
 	}
 
 	ChannelID ret_id = index+c.check*MAX_CHANNELS;
diff --git a/servers/audio/audio_mixer_sw.h b/servers/audio/audio_mixer_sw.h
index cb38561c27..d8d9b7bacd 100644
--- a/servers/audio/audio_mixer_sw.h
+++ b/servers/audio/audio_mixer_sw.h
@@ -105,7 +105,7 @@ private:
 				int32_t loop_pos;
 				int32_t window_ofs;
 				const uint8_t *ptr;
-			} ima_adpcm;
+			} ima_adpcm[2];
 
 		} mix;
 
diff --git a/servers/audio/sample_manager_sw.cpp b/servers/audio/sample_manager_sw.cpp
index 9195136a5d..375aa88cd2 100644
--- a/servers/audio/sample_manager_sw.cpp
+++ b/servers/audio/sample_manager_sw.cpp
@@ -38,12 +38,8 @@ SampleManagerSW::~SampleManagerSW()
 
 RID SampleManagerMallocSW::sample_create(AS::SampleFormat p_format, bool p_stereo, int p_length) {
 
-	ERR_EXPLAIN("IMA-ADPCM and STEREO are not a valid combination for sample format.");
-	ERR_FAIL_COND_V( p_format == AS::SAMPLE_FORMAT_IMA_ADPCM && p_stereo,RID());
 	Sample *s = memnew( Sample );
 	int datalen = p_length;
-	if (p_stereo)
-		datalen*=2;
 	if (p_format==AS::SAMPLE_FORMAT_PCM16)
 		datalen*=2;
 	else if (p_format==AS::SAMPLE_FORMAT_IMA_ADPCM) {
@@ -53,6 +49,10 @@ RID SampleManagerMallocSW::sample_create(AS::SampleFormat p_format, bool p_stere
 		datalen/=2;
 		datalen+=4;
 	}
+
+	if (p_stereo)
+		datalen*=2;
+
 #define SAMPLE_EXTRA 16
 
 	s->data = memalloc(datalen+SAMPLE_EXTRA); //help the interpolator by allocating a little more..
diff --git a/servers/audio_server.cpp b/servers/audio_server.cpp
index c155f5204a..6c5a2de97b 100644
--- a/servers/audio_server.cpp
+++ b/servers/audio_server.cpp
@@ -132,7 +132,7 @@ void AudioServer::_bind_methods() {
 
 	ObjectTypeDB::bind_method(_MD("voice_stop","voice"), &AudioServer::voice_stop );
 
-	ObjectTypeDB::bind_method(_MD("free","rid"), &AudioServer::free );
+	ObjectTypeDB::bind_method(_MD("free_rid","rid"), &AudioServer::free );
 
 	ObjectTypeDB::bind_method(_MD("set_stream_global_volume_scale","scale"), &AudioServer::set_stream_global_volume_scale );
 	ObjectTypeDB::bind_method(_MD("get_stream_global_volume_scale"), &AudioServer::get_stream_global_volume_scale );
diff --git a/servers/physics/SCsub b/servers/physics/SCsub
index 3b84c5ef18..95296eadbe 100644
--- a/servers/physics/SCsub
+++ b/servers/physics/SCsub
@@ -5,5 +5,3 @@ env.add_source_files(env.servers_sources,"*.cpp")
 Export('env')
 
 SConscript("joints/SCsub")
-
-
diff --git a/servers/physics/body_sw.cpp b/servers/physics/body_sw.cpp
index 8edbaf0b89..c66e73b430 100644
--- a/servers/physics/body_sw.cpp
+++ b/servers/physics/body_sw.cpp
@@ -382,7 +382,7 @@ void BodySW::set_space(SpaceSW *p_space){
 
 }
 
-void BodySW::_compute_area_gravity(const AreaSW *p_area) {
+void BodySW::_compute_area_gravity_and_dampenings(const AreaSW *p_area) {
 
 	if (p_area->is_gravity_point()) {
 		if(p_area->get_gravity_distance_scale() > 0) {
@@ -394,6 +394,9 @@ void BodySW::_compute_area_gravity(const AreaSW *p_area) {
 	} else {
 		gravity += p_area->get_gravity_vector() * p_area->get_gravity();
 	}
+
+	area_linear_damp += p_area->get_linear_damp();
+	area_angular_damp += p_area->get_angular_damp();
 }
 
 void BodySW::integrate_forces(real_t p_step) {
@@ -409,13 +412,15 @@ void BodySW::integrate_forces(real_t p_step) {
 
 	int ac = areas.size();
 	bool replace = false;
-	gravity=Vector3(0,0,0);
+	gravity = Vector3(0,0,0);
+	area_linear_damp = 0;
+	area_angular_damp = 0;
 	if (ac) {
 		areas.sort();
 		const AreaCMP *aa = &areas[0];
 		damp_area = aa[ac-1].area;
 		for(int i=ac-1;i>=0;i--) {
-			_compute_area_gravity(aa[i].area);
+			_compute_area_gravity_and_dampenings(aa[i].area);
 			if (aa[i].area->get_space_override_mode() == PhysicsServer::AREA_SPACE_OVERRIDE_REPLACE) {
 				replace = true;
 				break;
@@ -424,20 +429,21 @@ void BodySW::integrate_forces(real_t p_step) {
 	}
 
 	if( !replace ) {
-		_compute_area_gravity(def_area);
+		_compute_area_gravity_and_dampenings(def_area);
 	}
 
 	gravity*=gravity_scale;
 
+	// If less than 0, override dampenings with that of the Body
 	if (angular_damp>=0)
 		area_angular_damp=angular_damp;
-	else
-		area_angular_damp=damp_area->get_angular_damp();
+	//else
+	//	area_angular_damp=damp_area->get_angular_damp();
 
 	if (linear_damp>=0)
 		area_linear_damp=linear_damp;
-	else
-		area_linear_damp=damp_area->get_linear_damp();
+	//else
+	//	area_linear_damp=damp_area->get_linear_damp();
 
 
 	Vector3 motion;
diff --git a/servers/physics/body_sw.h b/servers/physics/body_sw.h
index 66d814bfd1..4c4c7818c5 100644
--- a/servers/physics/body_sw.h
+++ b/servers/physics/body_sw.h
@@ -130,7 +130,7 @@ class BodySW : public CollisionObjectSW {
 	BodySW *island_next;
 	BodySW *island_list_next;
 
-	_FORCE_INLINE_ void _compute_area_gravity(const AreaSW *p_area);
+	_FORCE_INLINE_ void _compute_area_gravity_and_dampenings(const AreaSW *p_area);
 
 	_FORCE_INLINE_ void _update_inertia_tensor();
 
diff --git a/servers/physics/joints/SCsub b/servers/physics/joints/SCsub
index 97d6edea21..d31af2c1c4 100644
--- a/servers/physics/joints/SCsub
+++ b/servers/physics/joints/SCsub
@@ -3,6 +3,3 @@ Import('env')
 env.add_source_files(env.servers_sources,"*.cpp")
 
 Export('env')
-
-
-
diff --git a/servers/physics/space_sw.cpp b/servers/physics/space_sw.cpp
index ba1c737530..778d20d3f1 100644
--- a/servers/physics/space_sw.cpp
+++ b/servers/physics/space_sw.cpp
@@ -175,13 +175,15 @@ int PhysicsDirectSpaceStateSW::intersect_shape(const RID& p_shape, const Transfo
 		if (!CollisionSolverSW::solve_static(shape,p_xform,col_obj->get_shape(shape_idx),col_obj->get_transform() * col_obj->get_shape_transform(shape_idx), NULL,NULL,NULL,p_margin,0))
 			continue;
 
-		r_results[cc].collider_id=col_obj->get_instance_id();
-		if (r_results[cc].collider_id!=0)
-			r_results[cc].collider=ObjectDB::get_instance(r_results[cc].collider_id);
-		else
-			r_results[cc].collider=NULL;
-		r_results[cc].rid=col_obj->get_self();
-		r_results[cc].shape=shape_idx;
+		if (r_results) {
+			r_results[cc].collider_id=col_obj->get_instance_id();
+			if (r_results[cc].collider_id!=0)
+				r_results[cc].collider=ObjectDB::get_instance(r_results[cc].collider_id);
+			else
+				r_results[cc].collider=NULL;
+			r_results[cc].rid=col_obj->get_self();
+			r_results[cc].shape=shape_idx;
+		}
 
 		cc++;
 
diff --git a/servers/physics_2d/SCsub b/servers/physics_2d/SCsub
index a2c2b51a61..ebb7f8be00 100644
--- a/servers/physics_2d/SCsub
+++ b/servers/physics_2d/SCsub
@@ -1,4 +1,3 @@
 Import('env')
 
 env.add_source_files(env.servers_sources,"*.cpp")
-
diff --git a/servers/physics_2d/body_2d_sw.cpp b/servers/physics_2d/body_2d_sw.cpp
index 38835c9a82..d0c5cbc77b 100644
--- a/servers/physics_2d/body_2d_sw.cpp
+++ b/servers/physics_2d/body_2d_sw.cpp
@@ -380,7 +380,7 @@ void Body2DSW::set_space(Space2DSW *p_space){
 
 }
 
-void Body2DSW::_compute_area_gravity(const Area2DSW *p_area) {
+void Body2DSW::_compute_area_gravity_and_dampenings(const Area2DSW *p_area) {
 
 	if (p_area->is_gravity_point()) {
 		if(p_area->get_gravity_distance_scale() > 0) {
@@ -393,6 +393,8 @@ void Body2DSW::_compute_area_gravity(const Area2DSW *p_area) {
 		gravity += p_area->get_gravity_vector() * p_area->get_gravity();
 	}
 
+	area_linear_damp += p_area->get_linear_damp();
+	area_angular_damp += p_area->get_angular_damp();
 }
 
 void Body2DSW::integrate_forces(real_t p_step) {
@@ -406,13 +408,15 @@ void Body2DSW::integrate_forces(real_t p_step) {
 
 	int ac = areas.size();
 	bool replace = false;
-	gravity=Vector2(0,0);
+	gravity = Vector2(0,0);
+	area_angular_damp = 0;
+	area_linear_damp = 0;
 	if (ac) {
 		areas.sort();
 		const AreaCMP *aa = &areas[0];
 		damp_area = aa[ac-1].area;
 		for(int i=ac-1;i>=0;i--) {
-			_compute_area_gravity(aa[i].area);
+			_compute_area_gravity_and_dampenings(aa[i].area);
 			if (aa[i].area->get_space_override_mode() == Physics2DServer::AREA_SPACE_OVERRIDE_REPLACE) {
 				replace = true;
 				break;
@@ -420,19 +424,20 @@ void Body2DSW::integrate_forces(real_t p_step) {
 		}
 	}
 	if( !replace ) {
-		_compute_area_gravity(def_area);
+		_compute_area_gravity_and_dampenings(def_area);
 	}
 	gravity*=gravity_scale;
 
+	// If less than 0, override dampenings with that of the Body2D
 	if (angular_damp>=0)
-		area_angular_damp=angular_damp;
-	else
-		area_angular_damp=damp_area->get_angular_damp();
+		area_angular_damp = angular_damp;
+	//else
+	//	area_angular_damp=damp_area->get_angular_damp();
 
 	if (linear_damp>=0)
-		area_linear_damp=linear_damp;
-	else
-		area_linear_damp=damp_area->get_linear_damp();
+		area_linear_damp = linear_damp;
+	//else
+	//	area_linear_damp=damp_area->get_linear_damp();
 
 	Vector2 motion;
 	bool do_motion=false;
@@ -442,7 +447,7 @@ void Body2DSW::integrate_forces(real_t p_step) {
 		//compute motion, angular and etc. velocities from prev transform
 		linear_velocity = (new_transform.elements[2] - get_transform().elements[2])/p_step;
 
-		real_t rot = new_transform.affine_inverse().basis_xform(get_transform().elements[1]).atan2();
+		real_t rot = new_transform.affine_inverse().basis_xform(get_transform().elements[1]).angle();
 		angular_velocity = rot / p_step;
 
 		motion = new_transform.elements[2] - get_transform().elements[2];
diff --git a/servers/physics_2d/body_2d_sw.h b/servers/physics_2d/body_2d_sw.h
index 2fbfcaca60..8418c5dcd7 100644
--- a/servers/physics_2d/body_2d_sw.h
+++ b/servers/physics_2d/body_2d_sw.h
@@ -132,7 +132,7 @@ class Body2DSW : public CollisionObject2DSW {
 	Body2DSW *island_next;
 	Body2DSW *island_list_next;
 
-	_FORCE_INLINE_ void _compute_area_gravity(const Area2DSW *p_area);
+	_FORCE_INLINE_ void _compute_area_gravity_and_dampenings(const Area2DSW *p_area);
 
 friend class Physics2DDirectBodyStateSW; // i give up, too many functions to expose
 
diff --git a/servers/spatial_sound/SCsub b/servers/spatial_sound/SCsub
index 16fe3a59ac..d31af2c1c4 100644
--- a/servers/spatial_sound/SCsub
+++ b/servers/spatial_sound/SCsub
@@ -3,5 +3,3 @@ Import('env')
 env.add_source_files(env.servers_sources,"*.cpp")
 
 Export('env')
-
-
diff --git a/servers/spatial_sound_2d/SCsub b/servers/spatial_sound_2d/SCsub
index 16fe3a59ac..d31af2c1c4 100644
--- a/servers/spatial_sound_2d/SCsub
+++ b/servers/spatial_sound_2d/SCsub
@@ -3,5 +3,3 @@ Import('env')
 env.add_source_files(env.servers_sources,"*.cpp")
 
 Export('env')
-
-
diff --git a/servers/visual/SCsub b/servers/visual/SCsub
index 16fe3a59ac..d31af2c1c4 100644
--- a/servers/visual/SCsub
+++ b/servers/visual/SCsub
@@ -3,5 +3,3 @@ Import('env')
 env.add_source_files(env.servers_sources,"*.cpp")
 
 Export('env')
-
-
diff --git a/servers/visual/rasterizer.h b/servers/visual/rasterizer.h
index e22b3c3a6c..50407f1b0f 100644
--- a/servers/visual/rasterizer.h
+++ b/servers/visual/rasterizer.h
@@ -693,7 +693,7 @@ public:
 			Rect2 rect;
 			RID texture;
 			float margin[4];
-			float draw_center;
+			bool draw_center;
 			Color color;
 			CommandStyle() { draw_center=true; type = TYPE_STYLE; }
 		};
@@ -1029,6 +1029,8 @@ public:
 
 	virtual int get_render_info(VS::RenderInfo p_info)=0;
 
+	virtual void set_force_16_bits_fbo(bool p_force) {}
+
 	Rasterizer();
 	virtual ~Rasterizer() {}
 };
diff --git a/servers/visual/rasterizer_dummy.h b/servers/visual/rasterizer_dummy.h
index f582fbd8ee..2c503249fe 100644
--- a/servers/visual/rasterizer_dummy.h
+++ b/servers/visual/rasterizer_dummy.h
@@ -162,10 +162,6 @@ class RasterizerDummy : public Rasterizer {
 		uint32_t format;
 		uint32_t morph_format;
 
-		RID material;
-		bool material_owned;
-
-
 		Surface() {
 
 			packed=false;
diff --git a/servers/visual/shader_language.cpp b/servers/visual/shader_language.cpp
index ea56306241..0e10c7dfe4 100644
--- a/servers/visual/shader_language.cpp
+++ b/servers/visual/shader_language.cpp
@@ -1043,6 +1043,7 @@ const ShaderLanguage::BuiltinsDef ShaderLanguage::vertex_builtins_defs[]={
 	{ "SRC_TANGENT", TYPE_VEC3},
 	{ "SRC_BINORMALF", TYPE_FLOAT},
 
+	{ "POSITION", TYPE_VEC4 },
 	{ "VERTEX", TYPE_VEC3},
 	{ "NORMAL", TYPE_VEC3},
 	{ "TANGENT", TYPE_VEC3},
@@ -1112,7 +1113,8 @@ const ShaderLanguage::BuiltinsDef ShaderLanguage::light_builtins_defs[]={
 	{ "SPECULAR_EXP", TYPE_FLOAT},
 	{ "SHADE_PARAM", TYPE_FLOAT},
 	{ "LIGHT", TYPE_VEC3},
-	{ "POINT_COORD", TYPE_VEC2},
+	{ "SHADOW", TYPE_VEC3 },
+	{ "POINT_COORD", TYPE_VEC2 },
 //	{ "SCREEN_POS", TYPE_VEC2},
 //	{ "SCREEN_TEXEL_SIZE", TYPE_VEC2},
 	{ "TIME", TYPE_FLOAT},
@@ -1368,7 +1370,7 @@ ShaderLanguage::Node* ShaderLanguage::validate_function_call(Parser&parser, Oper
 			}
 		}
 
-		if (!fail) {
+		if (!fail && name == program->functions[i].name) {
 			p_func->return_cache=pfunc->return_type;
 			return p_func;
 		}
@@ -2339,19 +2341,27 @@ Error ShaderLanguage::parse_flow_if(Parser& parser,Node *p_parent,Node **r_state
 
 	parser.advance();
 
+	if (parser.get_token_type()!=TK_CURLY_BRACKET_OPEN) {
+		parser.set_error("Expected statement block after 'if()'");
+		return ERR_PARSE_ERROR;
+	}
+
 	Node *substatement=NULL;
 	err = parse_statement(parser,cf,&substatement);
 	if (err)
 		return err;
 
-
 	cf->statements.push_back(substatement);
 
-
-
 	if (parser.get_token_type()==TK_CF_ELSE) {
 
 		parser.advance();
+
+		if (parser.get_token_type()!=TK_CURLY_BRACKET_OPEN) {
+			parser.set_error("Expected statement block after 'else'");
+			return ERR_PARSE_ERROR;
+		}
+
 		substatement=NULL;
 		err = parse_statement(parser,cf,&substatement);
 		if (err)
diff --git a/tools/Godot.app/Contents/Info.plist b/tools/Godot.app/Contents/Info.plist
index 3a4b51e2fe..8a89993fb9 100755
--- a/tools/Godot.app/Contents/Info.plist
+++ b/tools/Godot.app/Contents/Info.plist
@@ -13,7 +13,7 @@
 	<key>CFBundleIconFile</key>
 	<string>Godot.icns</string>
 	<key>CFBundleIdentifier</key>
-	<string>com.okamstudio.godot</string>
+	<string>org.godotengine.godot</string>
 	<key>CFBundleInfoDictionaryVersion</key>
 	<string>6.0</string>
 	<key>CFBundlePackageType</key>
@@ -61,7 +61,7 @@
 			<true/>
 			<key>LSItemContentTypes</key>
 			<array>
-				<string>com.okamstudio.scn</string>
+				<string>org.godotengine.scn</string>
 			</array>
 		</dict>
 	</array>
diff --git a/tools/Godot.app/Contents/Resources/Godot.icns b/tools/Godot.app/Contents/Resources/Godot.icns
index 18bc68d6ea..4a3dc0415a 100644
--- a/tools/Godot.app/Contents/Resources/Godot.icns
+++ b/tools/Godot.app/Contents/Resources/Godot.icns
diff --git a/tools/SCsub b/tools/SCsub
index ce7df2c35b..f046e9ad08 100644
--- a/tools/SCsub
+++ b/tools/SCsub
@@ -11,11 +11,10 @@ if (env["tools"]!="no"):
 	SConscript('collada/SCsub');
 	SConscript('docdump/SCsub');
 	SConscript('freetype/SCsub');
+	SConscript('pe_bliss/SCsub');
 	SConscript('doc/SCsub')
 	SConscript('pck/SCsub')
 
 	lib = env.Library("tool",env.tool_sources)
 
 	env.Prepend(LIBS=[lib])
-
-
diff --git a/tools/collada/SCsub b/tools/collada/SCsub
index c8eaa596d1..34524f10ef 100644
--- a/tools/collada/SCsub
+++ b/tools/collada/SCsub
@@ -3,5 +3,3 @@ Import('env')
 env.add_source_files(env.tool_sources,"*.cpp")
 
 Export('env')
-
-
diff --git a/tools/doc/SCsub b/tools/doc/SCsub
index c8eaa596d1..34524f10ef 100644
--- a/tools/doc/SCsub
+++ b/tools/doc/SCsub
@@ -3,5 +3,3 @@ Import('env')
 env.add_source_files(env.tool_sources,"*.cpp")
 
 Export('env')
-
-
diff --git a/tools/doc/doc_data.cpp b/tools/doc/doc_data.cpp
index 432f358627..c1d3e5e314 100644
--- a/tools/doc/doc_data.cpp
+++ b/tools/doc/doc_data.cpp
@@ -189,9 +189,11 @@ void DocData::generate(bool p_basic_types) {
 					arginfo=E->get().return_val;
 					if (arginfo.type==Variant::NIL)
 						continue;
+#ifdef DEBUG_METHODS_ENABLED
 					if (m && m->get_return_type()!=StringName())
 						method.return_type=m->get_return_type();
 					else
+#endif
 						method.return_type=(arginfo.hint==PROPERTY_HINT_RESOURCE_TYPE)?arginfo.hint_string:Variant::get_type_name(arginfo.type);
 
 				} else {
diff --git a/tools/docdump/SCsub b/tools/docdump/SCsub
index c8eaa596d1..34524f10ef 100644
--- a/tools/docdump/SCsub
+++ b/tools/docdump/SCsub
@@ -3,5 +3,3 @@ Import('env')
 env.add_source_files(env.tool_sources,"*.cpp")
 
 Export('env')
-
-
diff --git a/tools/docdump/makehtml.py b/tools/docdump/makehtml.py
index d533ca1b8b..9b9c62f33b 100644
--- a/tools/docdump/makehtml.py
+++ b/tools/docdump/makehtml.py
@@ -1,5 +1,19 @@
 import sys
 import xml.etree.ElementTree as ET
+from xml.sax.saxutils import escape, unescape
+
+html_escape_table = {
+ '"': "&quot;",
+ "'": "&apos;"
+}
+
+html_unescape_table = {v:k for k, v in html_escape_table.items()}
+
+def html_escape(text):
+ return escape(text, html_escape_table)
+
+def html_unescape(text):
+ return unescape(text, html_unescape_table)
 
 input_list = []
 
@@ -96,7 +110,7 @@ def make_html_class_list(class_list,columns):
      
   idx=0
   for n in class_list:
-   col = idx/col_max
+   col = int(idx/col_max)
    if (col>=columns):
     col=columns-1
    fit_columns[col]+=[n]
@@ -299,6 +313,7 @@ def make_type(p_type,p_parent):
 
 
 def make_text_def(class_name,parent,text):
+ text = html_escape(text)
  pos=0
  while(True):
   pos = text.find("[",pos)
@@ -598,7 +613,6 @@ def make_html_class(node):
   
   descr=node.find("description")
   if (descr!=None and descr.text.strip()!=""):
-
    h4=ET.SubElement(div,"h4")
    h4.text="Description:"
   
@@ -644,7 +658,6 @@ def make_html_class(node):
 class_names=[]
 classes={}
 
-
 for file in input_list:
  tree = ET.parse(file)
  doc=tree.getroot()
diff --git a/tools/editor/SCsub b/tools/editor/SCsub
index 73ec530177..cd46ff8353 100644
--- a/tools/editor/SCsub
+++ b/tools/editor/SCsub
@@ -28,7 +28,7 @@ def make_doc_header(target,source,env):
 
 
 
-	
+
 
 if (env["tools"]=="yes"):
 
@@ -43,17 +43,16 @@ if (env["tools"]=="yes"):
 	f.write(reg_exporters_inc)
 	f.write(reg_exporters)
 	f.close()
-	
+
 	env.Depends("#tools/editor/doc_data_compressed.h","#doc/base/classes.xml")
 	env.Command("#tools/editor/doc_data_compressed.h","#doc/base/classes.xml",make_doc_header)
 
 	#make_doc_header(env.File("#tools/editor/doc_data_raw.h").srcnode().abspath,env.File("#doc/base/classes.xml").srcnode().abspath,env)
-	
+
 	env.add_source_files(env.tool_sources,"*.cpp")
-	
+
 	Export('env')
-	SConscript('icons/SCsub');	
+	SConscript('icons/SCsub');
 	SConscript('plugins/SCsub');
 	SConscript('fileserver/SCsub');
 	SConscript('io_plugins/SCsub');
-
diff --git a/tools/editor/animation_editor.cpp b/tools/editor/animation_editor.cpp
index 5df49bd327..b8aa5874d1 100644
--- a/tools/editor/animation_editor.cpp
+++ b/tools/editor/animation_editor.cpp
@@ -34,6 +34,7 @@
 #include "pair.h"
 #include "scene/gui/separator.h"
 #include "editor_node.h"
+#include "tools/editor/plugins/animation_player_editor_plugin.h"
 /* Missing to fix:
 
   *Set
@@ -627,31 +628,41 @@ public:
 };
 
 
-void AnimationKeyEditor::_menu_track(int p_type) {
+void AnimationKeyEditor::_menu_add_track(int p_type) {
 
 	ERR_FAIL_COND(!animation.is_valid());
 
 
-	last_menu_track_opt=p_type;
 	switch(p_type) {
 
-		case TRACK_MENU_ADD_CALL_TRACK: {
+		case ADD_TRACK_MENU_ADD_CALL_TRACK: {
 			if (root) {
 				call_select->popup_centered_ratio();
 				break;
 			}
 		} break;
-		case TRACK_MENU_ADD_VALUE_TRACK:
-		case TRACK_MENU_ADD_TRANSFORM_TRACK: {
+		case ADD_TRACK_MENU_ADD_VALUE_TRACK:
+		case ADD_TRACK_MENU_ADD_TRANSFORM_TRACK: {
 
 			undo_redo->create_action("Anim Add Track");
-			undo_redo->add_do_method(animation.ptr(),"add_track",p_type);			
+			undo_redo->add_do_method(animation.ptr(),"add_track",p_type);
 			undo_redo->add_do_method(animation.ptr(),"track_set_path",animation->get_track_count(),".");
 			undo_redo->add_undo_method(animation.ptr(),"remove_track",animation->get_track_count());
 			undo_redo->commit_action();
 
 
 		} break;
+	}
+}
+
+void AnimationKeyEditor::_menu_track(int p_type) {
+
+	ERR_FAIL_COND(!animation.is_valid());
+
+
+	last_menu_track_opt=p_type;
+	switch(p_type) {
+
 		case TRACK_MENU_SCALE:
 		case TRACK_MENU_SCALE_PIVOT: {
 
@@ -893,6 +904,23 @@ void AnimationKeyEditor::_menu_track(int p_type) {
 
 			optimize_dialog->popup_centered(Size2(250,180));
 		} break;
+		case TRACK_MENU_CLEAN_UP: {
+
+			cleanup_dialog->popup_centered_minsize(Size2(300,0));
+		} break;
+		case TRACK_MENU_CLEAN_UP_CONFIRM: {
+
+			if (cleanup_all->is_pressed()) {
+				List<StringName> names;
+				AnimationPlayerEditor::singleton->get_player()->get_animation_list(&names);
+				for (List<StringName>::Element *E=names.front();E;E=E->next()) {
+					_cleanup_animation(AnimationPlayerEditor::singleton->get_player()->get_animation(E->get()));
+				}
+			} else {
+				_cleanup_animation(animation);
+
+			}
+		} break;
 		case CURVE_SET_LINEAR: {
 			curve_edit->force_transition(1.0);
 
@@ -923,6 +951,57 @@ void AnimationKeyEditor::_menu_track(int p_type) {
 
 }
 
+void AnimationKeyEditor::_cleanup_animation(Ref<Animation> p_animation) {
+
+
+	for(int i=0;i<p_animation->get_track_count();i++) {
+
+		bool prop_exists=false;
+		Variant::Type valid_type=Variant::NIL;
+		Object *obj=NULL;
+
+		RES res;
+		Node *node = root->get_node_and_resource(p_animation->track_get_path(i),res);
+
+		if (res.is_valid()) {
+			obj=res.ptr();
+		} else if (node) {
+			obj=node;
+		}
+
+		if (obj && p_animation->track_get_type(i)==Animation::TYPE_VALUE) {
+			valid_type=obj->get_static_property_type(p_animation->track_get_path(i).get_property(),&prop_exists);
+		}
+
+		if (!obj && cleanup_tracks->is_pressed()) {
+
+			p_animation->remove_track(i);
+			i--;
+			continue;
+		}
+
+		if (!prop_exists || p_animation->track_get_type(i)!=Animation::TYPE_VALUE || cleanup_keys->is_pressed()==false)
+			continue;
+
+		for(int j=0;j<p_animation->track_get_key_count(i);j++) {
+
+			Variant v = p_animation->track_get_key_value(i,j);
+
+			if (!Variant::can_convert(v.get_type(),valid_type)) {
+				p_animation->track_remove_key(i,j);
+				j--;
+			}
+		}
+
+		if (p_animation->track_get_key_count(i)==0 && cleanup_tracks->is_pressed()) {
+			p_animation->remove_track(i);
+			i--;
+		}
+	}
+
+	undo_redo->clear_history();
+	_update_paths();
+}
 
 void AnimationKeyEditor::_animation_optimize()  {
 
@@ -999,6 +1078,7 @@ void AnimationKeyEditor::_track_editor_draw() {
 	if (!animation.is_valid()) {
 		v_scroll->hide();
 		h_scroll->hide();
+		menu_add_track->set_disabled(true);
 		menu_track->set_disabled(true);
 		edit_button->set_disabled(true);
 		key_editor_tab->hide();
@@ -1008,6 +1088,7 @@ void AnimationKeyEditor::_track_editor_draw() {
 		return;
 	}
 
+	menu_add_track->set_disabled(false);
 	menu_track->set_disabled(false);
 	edit_button->set_disabled(false);
 	move_up_button->set_disabled(false);
@@ -1030,6 +1111,7 @@ void AnimationKeyEditor::_track_editor_draw() {
 	timecolor = Color::html("ff4a414f");
 	Color hover_color = Color(1,1,1,0.05);
 	Color select_color = Color(1,1,1,0.1);
+	Color invalid_path_color = Color(1,0.6,0.4,0.5);
 	Color track_select_color =Color::html("ffbd8e8e");
 
 	Ref<Texture> remove_icon = get_icon("Remove","EditorIcons");
@@ -1056,6 +1138,9 @@ void AnimationKeyEditor::_track_editor_draw() {
 		get_icon("KeyCall","EditorIcons")
 	};
 
+	Ref<Texture> invalid_icon = get_icon("KeyInvalid","EditorIcons");
+	Ref<Texture> invalid_icon_hover = get_icon("KeyInvalidHover","EditorIcons");
+
 	Ref<Texture> hsize_icon = get_icon("Hsize","EditorIcons");
 
 	Ref<Texture> type_hover=get_icon("KeyHover","EditorIcons");
@@ -1242,6 +1327,23 @@ void AnimationKeyEditor::_track_editor_draw() {
 			break;
 		int y = h+i*h+sep;
 
+		bool prop_exists=false;
+		Variant::Type valid_type=Variant::NIL;
+		Object *obj=NULL;
+
+		RES res;
+		Node *node = root->get_node_and_resource(animation->track_get_path(idx),res);
+
+		if (res.is_valid()) {
+			obj=res.ptr();
+		} else if (node) {
+			obj=node;
+		}
+
+		if (obj && animation->track_get_type(idx)==Animation::TYPE_VALUE) {
+			valid_type=obj->get_static_property_type(animation->track_get_path(idx).get_property(),&prop_exists);
+		}
+
 
 		if (/*mouse_over.over!=MouseOver::OVER_NONE &&*/ idx==mouse_over.track) {
 			Color sepc=hover_color;
@@ -1262,6 +1364,8 @@ void AnimationKeyEditor::_track_editor_draw() {
 			ncol=track_select_color;
 		te->draw_string(font,Point2(ofs+Point2(type_icon[0]->get_width()+sep,y+font->get_ascent()+(sep/2))).floor(),np,ncol,name_limit-(type_icon[0]->get_width()+sep)-5);
 
+		if (!obj)
+			te->draw_line(ofs+Point2(0,y+h/2),ofs+Point2(name_limit,y+h/2),invalid_path_color);
 
 		te->draw_line(ofs+Point2(0,y+h),ofs+Point2(size.width,y+h),sepcolor);
 
@@ -1327,6 +1431,8 @@ void AnimationKeyEditor::_track_editor_draw() {
 		int kc=animation->track_get_key_count(idx);
 		bool first=true;
 
+
+
 		for(int i=0;i<kc;i++) {
 
 
@@ -1374,7 +1480,21 @@ void AnimationKeyEditor::_track_editor_draw() {
 
 			}
 
-			te->draw_texture(tex,ofs+Point2(x,y+key_vofs).floor());
+			if (prop_exists && !Variant::can_convert(value.get_type(),valid_type)) {
+				te->draw_texture(invalid_icon,ofs+Point2(x,y+key_vofs).floor());
+			}
+
+			if (prop_exists && !Variant::can_convert(value.get_type(),valid_type)) {
+				if (tex==type_hover)
+					te->draw_texture(invalid_icon_hover,ofs+Point2(x,y+key_vofs).floor());
+				else
+					te->draw_texture(invalid_icon,ofs+Point2(x,y+key_vofs).floor());
+			} else {
+
+				te->draw_texture(tex,ofs+Point2(x,y+key_vofs).floor());
+			}
+
+
 			first=false;
 		}
 
@@ -2543,6 +2663,8 @@ void AnimationKeyEditor::_track_editor_input_event(const InputEvent& p_input) {
 
 						String text;
 						text="time: "+rtos(animation->track_get_key_time(idx,mouse_over.over_key))+"\n";
+
+
 						switch(animation->track_get_type(idx)) {
 
 							case Animation::TYPE_TRANSFORM: {
@@ -2557,8 +2679,33 @@ void AnimationKeyEditor::_track_editor_input_event(const InputEvent& p_input) {
 							} break;
 							case Animation::TYPE_VALUE: {
 
-								Variant v = animation->track_get_key_value(idx,mouse_over.over_key);
-								text+="value: "+String(v)+"\n";
+								Variant v = animation->track_get_key_value(idx,mouse_over.over_key);;
+								//text+="value: "+String(v)+"\n";
+
+								bool prop_exists=false;
+								Variant::Type valid_type=Variant::NIL;
+								Object *obj=NULL;
+
+								RES res;
+								Node *node = root->get_node_and_resource(animation->track_get_path(idx),res);
+
+								if (res.is_valid()) {
+									obj=res.ptr();
+								} else if (node) {
+									obj=node;
+								}
+
+								if (obj) {
+									valid_type=obj->get_static_property_type(animation->track_get_path(idx).get_property(),&prop_exists);
+								}
+
+								text+="type: "+Variant::get_type_name(v.get_type())+"\n";
+								if (prop_exists && !Variant::can_convert(v.get_type(),valid_type)) {
+									text+="value: "+String(v)+"  (Invalid, expected type: "+Variant::get_type_name(valid_type)+")\n";
+								} else {
+									text+="value: "+String(v)+"\n";
+								}
+
 							} break;
 							case Animation::TYPE_METHOD: {
 
@@ -2581,6 +2728,9 @@ void AnimationKeyEditor::_track_editor_input_event(const InputEvent& p_input) {
 							} break;
 						}
 						text+="easing: "+rtos(animation->track_get_key_transition(idx,mouse_over.over_key));
+
+
+
 						track_editor->set_tooltip(text);
 						return;
 
@@ -2659,12 +2809,14 @@ void AnimationKeyEditor::_notification(int p_what) {
 
 		case NOTIFICATION_ENTER_TREE: {
 
-				zoomicon->set_texture( get_icon("Zoom","EditorIcons") );				
-				//menu_track->set_icon(get_icon("AddTrack","EditorIcons"));
-				menu_track->get_popup()->add_icon_item(get_icon("KeyValue","EditorIcons"),"Add Normal Track",TRACK_MENU_ADD_VALUE_TRACK);
-				menu_track->get_popup()->add_icon_item(get_icon("KeyXform","EditorIcons"),"Add Transform Track",TRACK_MENU_ADD_TRANSFORM_TRACK);
-				menu_track->get_popup()->add_icon_item(get_icon("KeyCall","EditorIcons"),"Add Call Func Track",TRACK_MENU_ADD_CALL_TRACK);
-				menu_track->get_popup()->add_separator();
+				zoomicon->set_texture( get_icon("Zoom","EditorIcons") );
+
+				menu_add_track->set_icon(get_icon("AddTrack","EditorIcons"));
+				menu_add_track->get_popup()->add_icon_item(get_icon("KeyValue","EditorIcons"),"Add Normal Track",ADD_TRACK_MENU_ADD_VALUE_TRACK);
+				menu_add_track->get_popup()->add_icon_item(get_icon("KeyXform","EditorIcons"),"Add Transform Track",ADD_TRACK_MENU_ADD_TRANSFORM_TRACK);
+				menu_add_track->get_popup()->add_icon_item(get_icon("KeyCall","EditorIcons"),"Add Call Func Track",ADD_TRACK_MENU_ADD_CALL_TRACK);
+
+				menu_track->set_icon(get_icon("Tools","EditorIcons"));
 				menu_track->get_popup()->add_item("Scale Selection",TRACK_MENU_SCALE);
 				menu_track->get_popup()->add_item("Scale From Cursor",TRACK_MENU_SCALE_PIVOT);
 				menu_track->get_popup()->add_separator();
@@ -2689,6 +2841,7 @@ void AnimationKeyEditor::_notification(int p_what) {
 				//menu_track->get_popup()->add_submenu_item("Set Transitions..","Transitions");
 				//menu_track->get_popup()->add_separator();
 				menu_track->get_popup()->add_item("Optimize Animation",TRACK_MENU_OPTIMIZE);
+				menu_track->get_popup()->add_item("Clean-Up Animation",TRACK_MENU_CLEAN_UP);
 
 				curve_linear->set_icon(get_icon("CurveLinear","EditorIcons"));
 				curve_in->set_icon(get_icon("CurveIn","EditorIcons"));
@@ -3511,6 +3664,7 @@ void AnimationKeyEditor::_bind_methods() {
 	ObjectTypeDB::bind_method(_MD("_track_editor_input_event"),&AnimationKeyEditor::_track_editor_input_event);
 	ObjectTypeDB::bind_method(_MD("_track_name_changed"),&AnimationKeyEditor::_track_name_changed);
 	ObjectTypeDB::bind_method(_MD("_track_menu_selected"),&AnimationKeyEditor::_track_menu_selected);
+	ObjectTypeDB::bind_method(_MD("_menu_add_track"),&AnimationKeyEditor::_menu_add_track);
 	ObjectTypeDB::bind_method(_MD("_menu_track"),&AnimationKeyEditor::_menu_track);
 	ObjectTypeDB::bind_method(_MD("_clear_selection_for_anim"),&AnimationKeyEditor::_clear_selection_for_anim);
 	ObjectTypeDB::bind_method(_MD("_select_at_anim"),&AnimationKeyEditor::_select_at_anim);
@@ -3563,15 +3717,6 @@ AnimationKeyEditor::AnimationKeyEditor(UndoRedo *p_undo_redo, EditorHistory *p_h
 	//menu->set_pos(Point2());
 	//add_child(menu);
 
-	menu_track = memnew( MenuButton );
-	menu_track->set_text("Tracks");
-	hb->add_child(menu_track);
-	menu_track->get_popup()->connect("item_pressed",this,"_menu_track");
-
-
-
-	hb->add_child( memnew( VSeparator ) );
-
 	zoomicon = memnew( TextureFrame );
 	hb->add_child(zoomicon);
 	zoomicon->set_tooltip("Animation zoom.");
@@ -3629,6 +3774,10 @@ AnimationKeyEditor::AnimationKeyEditor(UndoRedo *p_undo_redo, EditorHistory *p_h
 
 	hb->add_child( memnew( VSeparator ) );
 
+	menu_add_track = memnew( MenuButton );
+	hb->add_child(menu_add_track);
+	menu_add_track->get_popup()->connect("item_pressed",this,"_menu_add_track");
+	menu_add_track->set_tooltip("Add new tracks.");
 
 	move_up_button = memnew( ToolButton );
 	hb->add_child(move_up_button);
@@ -3653,6 +3802,11 @@ AnimationKeyEditor::AnimationKeyEditor(UndoRedo *p_undo_redo, EditorHistory *p_h
 
 	hb->add_child(memnew( VSeparator ));
 
+	menu_track = memnew( MenuButton );
+	hb->add_child(menu_track);
+	menu_track->get_popup()->connect("item_pressed",this,"_menu_track");
+	menu_track->set_tooltip("Track tools");
+
 	edit_button = memnew( ToolButton );
 	edit_button->set_toggle_mode(true);
 	edit_button->set_focus_mode(FOCUS_NONE);
@@ -3847,6 +4001,32 @@ AnimationKeyEditor::AnimationKeyEditor(UndoRedo *p_undo_redo, EditorHistory *p_h
 	add_child(call_select);
 	call_select->set_title("Call Functions in Which Node?");
 
+	cleanup_dialog = memnew( ConfirmationDialog );
+	add_child(cleanup_dialog);
+	VBoxContainer *cleanup_vb = memnew( VBoxContainer );
+	cleanup_dialog->add_child(cleanup_vb);
+	cleanup_dialog->set_child_rect(cleanup_vb);
+	cleanup_keys = memnew( CheckButton );
+	cleanup_keys->set_text("Remove invalid keys");
+	cleanup_keys->set_pressed(true);
+	cleanup_vb->add_child(cleanup_keys);
+
+	cleanup_tracks = memnew( CheckButton );
+	cleanup_tracks->set_text("Remove unresolved and empty tracks");
+	cleanup_tracks->set_pressed(true);
+	cleanup_vb->add_child(cleanup_tracks);
+
+	cleanup_all = memnew( CheckButton );
+	cleanup_all->set_text("Clean-Up all animations");
+	cleanup_vb->add_child(cleanup_all);
+
+	cleanup_dialog->set_title("Clean up Animation(s) (NO UNDO!)");
+	cleanup_dialog->get_ok()->set_text("Clean-Up");
+
+	cleanup_dialog->connect("confirmed",this,"_menu_track",varray(TRACK_MENU_CLEAN_UP_CONFIRM));
+
+
+
 }
 
 AnimationKeyEditor::~AnimationKeyEditor() {
diff --git a/tools/editor/animation_editor.h b/tools/editor/animation_editor.h
index 4a1cc21154..743242fe94 100644
--- a/tools/editor/animation_editor.h
+++ b/tools/editor/animation_editor.h
@@ -70,9 +70,9 @@ class AnimationKeyEditor : public VBoxContainer  {
 
 	enum {
 
-		TRACK_MENU_ADD_VALUE_TRACK,
-		TRACK_MENU_ADD_TRANSFORM_TRACK,
-		TRACK_MENU_ADD_CALL_TRACK,
+		ADD_TRACK_MENU_ADD_VALUE_TRACK,
+		ADD_TRACK_MENU_ADD_TRANSFORM_TRACK,
+		ADD_TRACK_MENU_ADD_CALL_TRACK,
 		TRACK_MENU_SCALE,
 		TRACK_MENU_SCALE_PIVOT,
 		TRACK_MENU_MOVE_UP,
@@ -89,6 +89,8 @@ class AnimationKeyEditor : public VBoxContainer  {
 		TRACK_MENU_NEXT_STEP,
 		TRACK_MENU_PREV_STEP,
 		TRACK_MENU_OPTIMIZE,
+		TRACK_MENU_CLEAN_UP,
+		TRACK_MENU_CLEAN_UP_CONFIRM,
 		CURVE_SET_LINEAR,
 		CURVE_SET_IN,
 		CURVE_SET_OUT,
@@ -190,8 +192,14 @@ class AnimationKeyEditor : public VBoxContainer  {
 	SpinBox *optimize_angular_error;
 	SpinBox *optimize_max_angle;
 
+	ConfirmationDialog *cleanup_dialog;
+	CheckButton *cleanup_keys;
+	CheckButton *cleanup_tracks;
+	CheckButton *cleanup_all;
+
 	SpinBox *step;
 
+	MenuButton *menu_add_track;
 	MenuButton *menu_track;
 
 	HScrollBar *h_scroll;
@@ -283,9 +291,11 @@ class AnimationKeyEditor : public VBoxContainer  {
 
 	void _animation_changed();
 	void _animation_optimize();
+	void _cleanup_animation(Ref<Animation> p_animation);
 
 	void _scroll_changed(double);
 
+	void _menu_add_track(int p_type);
 	void _menu_track(int p_type);
 
 	void _clear_selection_for_anim(const Ref<Animation>& p_anim);
diff --git a/tools/editor/code_editor.cpp b/tools/editor/code_editor.cpp
index 2ed03a1858..0728b3b7c1 100644
--- a/tools/editor/code_editor.cpp
+++ b/tools/editor/code_editor.cpp
@@ -614,11 +614,12 @@ CodeTextEditor::CodeTextEditor() {
 		text_editor->add_font_override("font",get_font("source","Fonts"));
 	text_editor->set_show_line_numbers(true);
 	text_editor->set_brace_matching(true);
+	text_editor->set_auto_indent(true);
 
 	line_col = memnew( Label );
 	add_child(line_col);
 	line_col->set_anchor_and_margin(MARGIN_LEFT,ANCHOR_END,135);
-	line_col->set_anchor_and_margin(MARGIN_TOP,ANCHOR_END,20);
+	line_col->set_anchor_and_margin(MARGIN_TOP,ANCHOR_END,15);
 	line_col->set_anchor_and_margin(MARGIN_BOTTOM,ANCHOR_END,1);
 	line_col->set_anchor_and_margin(MARGIN_RIGHT,ANCHOR_END,5);
 	//line_col->set_align(Label::ALIGN_RIGHT);
@@ -637,7 +638,7 @@ CodeTextEditor::CodeTextEditor() {
 	error = memnew( Label );
 	add_child(error);
 	error->set_anchor_and_margin(MARGIN_LEFT,ANCHOR_BEGIN,5);
-	error->set_anchor_and_margin(MARGIN_TOP,ANCHOR_END,20);
+	error->set_anchor_and_margin(MARGIN_TOP,ANCHOR_END,15);
 	error->set_anchor_and_margin(MARGIN_BOTTOM,ANCHOR_END,1);
 	error->set_anchor_and_margin(MARGIN_RIGHT,ANCHOR_END,130);
 	error->hide();
diff --git a/tools/editor/connections_dialog.cpp b/tools/editor/connections_dialog.cpp
index b0bacdae61..d4937d7114 100644
--- a/tools/editor/connections_dialog.cpp
+++ b/tools/editor/connections_dialog.cpp
@@ -163,6 +163,7 @@ void ConnectDialog::edit(Node *p_node) {
 	dst_path->set_text("");
 	dst_method->set_text("");
 	deferred->set_pressed(false);
+	oneshot->set_pressed(false);
 	cdbinds->params.clear();
 	cdbinds->notify_changed();
 }
@@ -196,6 +197,11 @@ bool ConnectDialog::get_deferred() const {
 	return deferred->is_pressed();
 }
 
+bool ConnectDialog::get_oneshot() const {
+
+	return oneshot->is_pressed();
+}
+
 StringName ConnectDialog::get_dst_method() const {
 	
 	String txt=dst_method->get_text();
@@ -423,12 +429,13 @@ ConnectDialog::ConnectDialog() {
 	dstm_hb->add_child(make_callback);
 
 	deferred = memnew( CheckButton );
-	deferred->set_toggle_mode(true);
-	deferred->set_pressed(true);
 	deferred->set_text("Deferred");
 	dstm_hb->add_child(deferred);
 
-	
+	oneshot = memnew( CheckButton );
+	oneshot->set_text("Oneshot");
+	dstm_hb->add_child(oneshot);
+
 /*
 	realtime = memnew( CheckButton );
 	realtime->set_anchor( MARGIN_TOP, ANCHOR_END );
@@ -496,11 +503,13 @@ void ConnectionsDialog::_connect() {
 
 	StringName dst_method=connect_dialog->get_dst_method();
 	bool defer=connect_dialog->get_deferred();
+	bool oshot=connect_dialog->get_oneshot();
 	Vector<Variant> binds = connect_dialog->get_binds();
 	StringArray args =  it->get_metadata(0).operator Dictionary()["args"];
+	int flags = CONNECT_PERSIST | (defer?CONNECT_DEFERRED:0) | (oshot?CONNECT_ONESHOT:0);
 
 	undo_redo->create_action("Connect '"+signal+"' to '"+String(dst_method)+"'");
-	undo_redo->add_do_method(node,"connect",signal,target,dst_method,binds,CONNECT_PERSIST | (defer?CONNECT_DEFERRED:0));
+	undo_redo->add_do_method(node,"connect",signal,target,dst_method,binds,flags);
 	undo_redo->add_undo_method(node,"disconnect",signal,target,dst_method);
 	undo_redo->add_do_method(this,"update_tree");
 	undo_redo->add_undo_method(this,"update_tree");
@@ -731,6 +740,8 @@ void ConnectionsDialog::update_tree() {
 				String path = String(node->get_path_to(target))+" :: "+c.method+"()";
 				if (c.flags&CONNECT_DEFERRED)
 					path+=" (deferred)";
+				if (c.flags&CONNECT_ONESHOT)
+					path+=" (oneshot)";
 				if (c.binds.size()) {
 
 					path+=" binds( ";
diff --git a/tools/editor/connections_dialog.h b/tools/editor/connections_dialog.h
index 68b13bf07a..4a1c3f189c 100644
--- a/tools/editor/connections_dialog.h
+++ b/tools/editor/connections_dialog.h
@@ -58,6 +58,7 @@ class ConnectDialog : public ConfirmationDialog {
 	//MenuButton *dst_method_list;
 	OptionButton *type_list;
 	CheckButton *deferred;
+	CheckButton *oneshot;
 	CheckButton *make_callback;
 	PropertyEditor *bind_editor;
 	Node *node;
@@ -80,6 +81,7 @@ public:
 	NodePath get_dst_path() const;
 	StringName get_dst_method() const;	
 	bool get_deferred() const;
+	bool get_oneshot() const;
 	Vector<Variant> get_binds() const;
 	void set_dst_method(const StringName& p_method);
 	void set_dst_node(Node* p_node);
diff --git a/tools/editor/dependency_editor.cpp b/tools/editor/dependency_editor.cpp
index c04e82a08a..7e63cfb1b4 100644
--- a/tools/editor/dependency_editor.cpp
+++ b/tools/editor/dependency_editor.cpp
@@ -510,3 +510,184 @@ DependencyErrorDialog::DependencyErrorDialog() {
 	set_title("Errors loading!");
 
 }
+
+//////////////////////////////////////////////////////////////////////
+
+
+
+void OrphanResourcesDialog::ok_pressed() {
+
+	paths.clear();
+
+	_find_to_delete(files->get_root(),paths);
+	if (paths.empty())
+		return;
+
+	delete_confirm->set_text("Permanently Delete "+itos(paths.size())+" Item(s) ? (No Undo!!)");
+	delete_confirm->popup_centered_minsize();
+}
+
+bool OrphanResourcesDialog::_fill_owners(EditorFileSystemDirectory *efsd,HashMap<String,int>& refs,TreeItem* p_parent){
+
+
+	if (!efsd)
+		return false;
+
+	bool has_childs=false;
+
+	for(int i=0;i<efsd->get_subdir_count();i++) {
+
+		TreeItem *dir_item=NULL;
+		if (p_parent) {
+			dir_item = files->create_item(p_parent);
+			dir_item->set_text(0,efsd->get_subdir(i)->get_name());
+			dir_item->set_icon(0,get_icon("folder","FileDialog"));
+
+		}
+		bool children = _fill_owners(efsd->get_subdir(i),refs,dir_item);
+
+		if (p_parent) {
+			if (!children) {
+				memdelete(dir_item);
+			} else {
+				has_childs=true;
+			}
+		}
+
+	}
+
+
+	for(int i=0;i<efsd->get_file_count();i++) {
+
+		if (!p_parent) {
+			Vector<String> deps = efsd->get_file_deps(i);
+			//print_line(":::"+efsd->get_file_path(i));
+			for(int j=0;j<deps.size();j++) {
+
+				if (!refs.has(deps[j])) {
+					refs[deps[j]]=1;
+				}
+			}
+		} else {
+
+			String path = efsd->get_file_path(i);
+			if (!refs.has(path)) {
+				TreeItem *ti=files->create_item(p_parent);
+				ti->set_cell_mode(0,TreeItem::CELL_MODE_CHECK);
+				ti->set_text(0,efsd->get_file(i));
+				ti->set_editable(0,true);
+
+				String type=efsd->get_file_type(i);
+
+				Ref<Texture> icon;
+				if (has_icon(type,"EditorIcons")) {
+					icon=get_icon(type,"EditorIcons");
+				} else {
+					icon=get_icon("Object","EditorIcons");
+				}
+				ti->set_icon(0,icon);
+				int ds = efsd->get_file_deps(i).size();
+				ti->set_text(1,itos(ds));
+				if (ds) {
+					ti->add_button(1,get_icon("Visible","EditorIcons"));
+				}
+				ti->set_metadata(0,path);
+				has_childs=true;
+			}
+		}
+
+	}
+
+	return has_childs;
+}
+
+
+void OrphanResourcesDialog::refresh() {
+	HashMap<String,int> refs;
+	_fill_owners(EditorFileSystem::get_singleton()->get_filesystem(),refs,NULL);
+	files->clear();
+	TreeItem *root=files->create_item();
+	_fill_owners(EditorFileSystem::get_singleton()->get_filesystem(),refs,root);
+}
+
+
+void OrphanResourcesDialog::show(){
+
+	refresh();
+	popup_centered_ratio();
+}
+
+
+void OrphanResourcesDialog::_find_to_delete(TreeItem* p_item,List<String>& paths) {
+
+	while(p_item) {
+
+		if (p_item->get_cell_mode(0)==TreeItem::CELL_MODE_CHECK && p_item->is_checked(0)) {
+			paths.push_back(p_item->get_metadata(0));
+		}
+
+		if (p_item->get_children()) {
+			_find_to_delete(p_item->get_children(),paths);
+		}
+
+		p_item=p_item->get_next();
+	}
+
+
+}
+
+void OrphanResourcesDialog::_delete_confirm() {
+
+	DirAccess *da = DirAccess::create(DirAccess::ACCESS_RESOURCES);
+	for (List<String>::Element *E=paths.front();E;E=E->next()) {
+
+		da->remove(E->get());
+		EditorFileSystem::get_singleton()->update_file(E->get());
+	}
+	memdelete(da);
+	refresh();
+}
+
+void OrphanResourcesDialog::_button_pressed(Object *p_item,int p_column, int p_id) {
+
+	TreeItem *ti=p_item->cast_to<TreeItem>();
+
+	String path = ti->get_metadata(0);
+	dep_edit->edit(path);
+
+}
+
+void OrphanResourcesDialog::_bind_methods() {
+
+	ObjectTypeDB::bind_method(_MD("_delete_confirm"),&OrphanResourcesDialog::_delete_confirm);
+	ObjectTypeDB::bind_method(_MD("_button_pressed"),&OrphanResourcesDialog::_button_pressed);
+
+}
+
+OrphanResourcesDialog::OrphanResourcesDialog(){
+
+	VBoxContainer *vbc = memnew( VBoxContainer );
+	add_child(vbc);
+	set_child_rect(vbc);
+	files = memnew( Tree );
+	files->set_columns(2);
+	files->set_column_titles_visible(true);
+	files->set_column_min_width(1,100);
+	files->set_column_expand(0,true);
+	files->set_column_expand(1,false);
+	files->set_column_title(0,"Resource");
+	files->set_column_title(1,"Owns");
+	files->set_hide_root(true);
+	vbc->add_margin_child("Resources Without Explicit Ownership:",files,true);
+	set_title("Orphan Resource Explorer");
+	delete_confirm = memnew( ConfirmationDialog );
+	delete_confirm->set_text("Delete selected files?");
+	get_ok()->set_text("Delete");
+	add_child(delete_confirm);
+	dep_edit = memnew( DependencyEditor );
+	add_child(dep_edit);
+	files->connect("button_pressed",this,"_button_pressed");
+	delete_confirm->connect("confirmed",this,"_delete_confirm");
+	set_hide_on_ok(false);
+
+}
diff --git a/tools/editor/dependency_editor.h b/tools/editor/dependency_editor.h
index 1c328e7a93..c372025ca0 100644
--- a/tools/editor/dependency_editor.h
+++ b/tools/editor/dependency_editor.h
@@ -91,4 +91,29 @@ public:
 	DependencyErrorDialog();
 };
 
+
+
+class OrphanResourcesDialog : public ConfirmationDialog {
+	OBJ_TYPE(OrphanResourcesDialog,ConfirmationDialog);
+
+	DependencyEditor *dep_edit;
+	Tree *files;
+	ConfirmationDialog *delete_confirm;
+	void ok_pressed();
+
+	bool _fill_owners(EditorFileSystemDirectory *efsd, HashMap<String,int>& refs, TreeItem *p_parent);
+
+	List<String> paths;
+	void _find_to_delete(TreeItem* p_item,List<String>& paths);
+	void _delete_confirm();
+	void _button_pressed(Object *p_item,int p_column, int p_id);
+
+	void refresh();
+	static void _bind_methods();
+public:
+
+	void show();
+	OrphanResourcesDialog();
+};
+
 #endif // DEPENDENCY_EDITOR_H
diff --git a/tools/editor/editor_file_dialog.cpp b/tools/editor/editor_file_dialog.cpp
index c62347d129..104539c308 100644
--- a/tools/editor/editor_file_dialog.cpp
+++ b/tools/editor/editor_file_dialog.cpp
@@ -27,14 +27,14 @@ void EditorFileDialog::_notification(int p_what) {
 		dir_prev->set_icon(get_icon("ArrowLeft","EditorIcons"));
 		dir_next->set_icon(get_icon("ArrowRight","EditorIcons"));
 		dir_up->set_icon(get_icon("ArrowUp","EditorIcons"));
+		refresh->set_icon(get_icon("Reload","EditorIcons"));
 		favorite->set_icon(get_icon("Favorites","EditorIcons"));
 
 		fav_up->set_icon(get_icon("MoveUp","EditorIcons"));
 		fav_down->set_icon(get_icon("MoveDown","EditorIcons"));
 		fav_rm->set_icon(get_icon("RemoveSmall","EditorIcons"));
 
-	}
-	if (p_what==NOTIFICATION_PROCESS) {
+	} else if (p_what==NOTIFICATION_PROCESS) {
 
 		if (preview_waiting) {
 			preview_wheel_timeout-=get_process_delta_time();
@@ -47,12 +47,17 @@ void EditorFileDialog::_notification(int p_what) {
 				preview_wheel_timeout=0.1;
 			}
 		}
-	}
-
-	if (p_what==NOTIFICATION_DRAW) {
+	} else if (p_what==NOTIFICATION_DRAW) {
 
 		//RID ci = get_canvas_item();
 		//get_stylebox("panel","PopupMenu")->draw(ci,Rect2(Point2(),get_size()));
+	} else if (p_what==EditorSettings::NOTIFICATION_EDITOR_SETTINGS_CHANGED) {
+
+		bool show_hidden = EditorSettings::get_singleton()->get("file_dialog/show_hidden_files");
+
+		if (show_hidden != show_hidden_files) {
+			set_show_hidden_files(show_hidden);
+		}
 	}
 }
 
@@ -1012,7 +1017,7 @@ void EditorFileDialog::_go_forward(){
 
 }
 
-bool EditorFileDialog::default_show_hidden_files=true;
+bool EditorFileDialog::default_show_hidden_files=false;
 
 void EditorFileDialog::set_display_mode(DisplayMode p_mode) {
 
@@ -1141,7 +1146,7 @@ void EditorFileDialog::_save_to_recent() {
 
 EditorFileDialog::EditorFileDialog() {
 
-	show_hidden_files=true;
+	show_hidden_files=default_show_hidden_files;
 	display_mode=DISPLAY_THUMBNAILS;
 	local_history_pos=0;
 
@@ -1170,6 +1175,10 @@ EditorFileDialog::EditorFileDialog() {
 	pathhb->add_child(dir);
 	dir->set_h_size_flags(SIZE_EXPAND_FILL);
 
+	refresh = memnew( ToolButton );
+	refresh->connect("pressed",this,"_update_file_list");
+	pathhb->add_child(refresh);
+
 	favorite = memnew( ToolButton );
 	favorite->set_toggle_mode(true);
 	favorite->connect("toggled",this,"_favorite_toggled");
diff --git a/tools/editor/editor_file_dialog.h b/tools/editor/editor_file_dialog.h
index 6cfd970516..eb38c3c02f 100644
--- a/tools/editor/editor_file_dialog.h
+++ b/tools/editor/editor_file_dialog.h
@@ -108,6 +108,7 @@ private:
 	ToolButton *mode_list;
 
 
+	ToolButton *refresh;
 	ToolButton *favorite;
 
 	ToolButton *fav_up;
diff --git a/tools/editor/editor_help.cpp b/tools/editor/editor_help.cpp
index 46ed2194a8..a5a3890129 100644
--- a/tools/editor/editor_help.cpp
+++ b/tools/editor/editor_help.cpp
@@ -36,6 +36,14 @@
 
 #include "os/keyboard.h"
 
+void EditorHelpSearch::popup() {
+	popup_centered_ratio(0.6);
+	if (search_box->get_text()!="") {
+		search_box->select_all();
+		_update_search();
+	}
+	search_box->grab_focus();
+}
 
 void EditorHelpSearch::popup(const String& p_term) {
 
@@ -263,7 +271,7 @@ void EditorHelpSearch::_confirmed() {
 
 	String mdata=ti->get_metadata(0);
 	emit_signal("go_to_help",mdata);
-	editor->call("_editor_select",3); // in case EditorHelpSearch beeen invoked on top of other editor window
+	editor->call("_editor_select",EditorNode::EDITOR_SCRIPT); // in case EditorHelpSearch beeen invoked on top of other editor window
 	// go to that
 	hide();
 }
@@ -300,9 +308,9 @@ void EditorHelpSearch::_bind_methods() {
 }
 
 
-EditorHelpSearch::EditorHelpSearch(EditorNode *p_editor) {
+EditorHelpSearch::EditorHelpSearch() {
 
-	editor=p_editor;
+	editor=EditorNode::get_singleton();
 	VBoxContainer *vbc = memnew( VBoxContainer );
 	add_child(vbc);
 	set_child_rect(vbc);
@@ -318,17 +326,138 @@ EditorHelpSearch::EditorHelpSearch(EditorNode *p_editor) {
 	search_box->connect("input_event",this,"_sbox_input");
 	search_options = memnew( Tree );
 	vbc->add_margin_child("Matches:",search_options,true);
-	get_ok()->set_text("View");
+	get_ok()->set_text("Open");
 	get_ok()->set_disabled(true);
 	register_text_enter(search_box);
 	set_hide_on_ok(false);
 	search_options->connect("item_activated",this,"_confirmed");
 	set_title("Search Classes");
+
 //	search_options->set_hide_root(true);
 
 }
 
+/////////////////////////////////
+
+////////////////////////////////////
+/// /////////////////////////////////
+
+
+
+void EditorHelpIndex::add_type(const String& p_type,HashMap<String,TreeItem*>& p_types,TreeItem *p_root) {
+
+	if (p_types.has(p_type))
+		return;
+//	if (!ObjectTypeDB::is_type(p_type,base) || p_type==base)
+//		return;
+
+	String inherits=EditorHelp::get_doc_data()->class_list[p_type].inherits;
+
+	TreeItem *parent=p_root;
+
+
+	if (inherits.length()) {
+
+		if (!p_types.has(inherits)) {
+
+			add_type(inherits,p_types,p_root);
+		}
+
+		if (p_types.has(inherits) )
+			parent=p_types[inherits];
+	}
+
+	TreeItem *item = class_list->create_item(parent);
+	item->set_metadata(0,p_type);
+	item->set_tooltip(0,EditorHelp::get_doc_data()->class_list[p_type].brief_description);
+	item->set_text(0,p_type);
+
+
+	if (has_icon(p_type,"EditorIcons")) {
+
+		item->set_icon(0, get_icon(p_type,"EditorIcons"));
+	}
+
+	p_types[p_type]=item;
+}
+
+
+void EditorHelpIndex::_tree_item_selected() {
+
+
+	TreeItem *s=class_list->get_selected();
+	if (!s)
+		return;
+
+	emit_signal("open_class",s->get_text(0));
+
+	hide();
+
+	//_goto_desc(s->get_text(0));
+
+}
+
+void EditorHelpIndex::select_class(const String& p_class) {
+
+	if (!tree_item_map.has(p_class))
+		return;
+	tree_item_map[p_class]->select(0);
+	class_list->ensure_cursor_is_visible();
+}
+
+void EditorHelpIndex::_notification(int p_what) {
+
+	if (p_what==NOTIFICATION_ENTER_TREE) {
+
+		class_list->clear();
+		tree_item_map.clear();
+		TreeItem *root = class_list->create_item();
+		class_list->set_hide_root(true);
+		connect("confirmed",this,"_tree_item_selected");
+
+
+		for(Map<String,DocData::ClassDoc>::Element *E=EditorHelp::get_doc_data()->class_list.front();E;E=E->next()) {
+
 
+			add_type(E->key(),tree_item_map,root);
+		}
+
+	}
+}
+
+void EditorHelpIndex::_bind_methods() {
+
+	ObjectTypeDB::bind_method("_tree_item_selected",&EditorHelpIndex::_tree_item_selected);
+	ObjectTypeDB::bind_method("select_class",&EditorHelpIndex::select_class);
+	ADD_SIGNAL( MethodInfo("open_class"));
+}
+
+
+
+EditorHelpIndex::EditorHelpIndex() {
+
+
+	VBoxContainer *vbc = memnew( VBoxContainer );
+	add_child(vbc);
+	set_child_rect(vbc);
+
+	class_list = memnew( Tree );
+	vbc->add_margin_child("Class List: ",class_list,true);
+	class_list->set_v_size_flags(SIZE_EXPAND_FILL);
+
+
+	class_list->connect("item_activated",this,"_tree_item_selected");
+
+
+	get_ok()->set_text("Open");
+}
+
+
+
+/////////////////////////////////
+
+////////////////////////////////////
+/// /////////////////////////////////
 DocData *EditorHelp::doc=NULL;
 
 void EditorHelp::_unhandled_key_input(const InputEvent& p_ev) {
@@ -339,8 +468,6 @@ void EditorHelp::_unhandled_key_input(const InputEvent& p_ev) {
 
 		search->grab_focus();
 		search->select_all();
-	} else if (p_ev.key.mod.shift && p_ev.key.scancode==KEY_F1) {
-		class_search->popup();
 	}
 }
 
@@ -351,17 +478,19 @@ void EditorHelp::_search(const String&) {
 
 
 	String stext=search->get_text();
-	bool keep = prev_search==stext && class_list->get_selected() && prev_search_page==class_list->get_selected()->get_text(0);
+	bool keep = prev_search==stext;
 
-	class_desc->search(stext, keep);
+	bool ret = class_desc->search(stext, keep);
+	if (!ret) {
+		class_desc->search(stext, false);
+	}
 
 	prev_search=stext;
-	if (class_list->get_selected())
-		prev_search_page=class_list->get_selected()->get_text(0);
 
 
 }
 
+#if 0
 void EditorHelp::_button_pressed(int p_idx) {
 
 	if (p_idx==PAGE_CLASS_LIST) {
@@ -399,16 +528,11 @@ void EditorHelp::_button_pressed(int p_idx) {
 	} else if (p_idx==PAGE_SEARCH) {
 
 		_search("");
-	} else if (p_idx==CLASS_SEARCH) {
-
-		class_search->popup();
 	}
-
-
 }
 
 
-
+#endif
 
 void EditorHelp::_class_list_select(const String& p_select) {
 
@@ -417,16 +541,28 @@ void EditorHelp::_class_list_select(const String& p_select) {
 
 void EditorHelp::_class_desc_select(const String& p_select) {
 
+
+
+//	print_line("LINK: "+p_select);
 	if (p_select.begins_with("#")) {
-		_goto_desc(p_select.substr(1,p_select.length()));
+		//_goto_desc(p_select.substr(1,p_select.length()));
+		emit_signal("go_to_help","class_name:"+p_select.substr(1,p_select.length()));
 		return;
 	} else if (p_select.begins_with("@")) {
 
 		String m = p_select.substr(1,p_select.length());
-		if (!method_line.has(m))
-			return;
-		class_desc->scroll_to_line(method_line[m]);
-		return;
+
+		if (m.find(".")!=-1) {
+			//must go somewhere else
+
+			emit_signal("go_to_help","class_method:"+m.get_slice(".",0)+":"+m.get_slice(".",0));
+		} else {
+
+			if (!method_line.has(m))
+				return;
+			class_desc->scroll_to_line(method_line[m]);
+		}
+
 	}
 
 
@@ -449,68 +585,40 @@ void EditorHelp::_add_type(const String& p_type) {
 
 }
 
-void EditorHelp::_update_history_buttons() {
-
-	back->set_disabled(history_pos<2);
-	forward->set_disabled(history_pos>=history.size());
-
-}
-
-
 void EditorHelp::_scroll_changed(double p_scroll) {
 
 	if (scroll_locked)
 		return;
 
-	int p = history_pos -1;
-	if (p<0 || p>=history.size())
-		return;
-
 	if (class_desc->get_v_scroll()->is_hidden())
 		p_scroll=0;
 
-	history[p].scroll=p_scroll;
+	//history[p].scroll=p_scroll;
 }
 
-Error EditorHelp::_goto_desc(const String& p_class,bool p_update_history,int p_vscr) {
+Error EditorHelp::_goto_desc(const String& p_class,int p_vscr) {
 
 	//ERR_FAIL_COND(!doc->class_list.has(p_class));
 	if (!doc->class_list.has(p_class))
 		return ERR_DOES_NOT_EXIST;
 
 
-	if (tree_item_map.has(p_class)) {
+	//if (tree_item_map.has(p_class)) {
 		select_locked = true;
-		tree_item_map[p_class]->select(0);
-		class_list->ensure_cursor_is_visible();
-	}
+	//}
 
 	class_desc->show();
 	//tabs->set_current_tab(PAGE_CLASS_DESC);
-	edited_class->set_pressed(true);
-	class_list_button->set_pressed(false);
 	description_line=0;
 
-	if (p_class==edited_class->get_text())
+	if (p_class==edited_class)
 		return OK; //already there
 
 	scroll_locked=true;
 
-	if (p_update_history) {
-
-		history.resize(history_pos);
-		history_pos++;
-		History h;
-		h.c=p_class;
-		h.scroll=0;
-		history.push_back(h);
-		_update_history_buttons();
-		class_desc->get_v_scroll()->set_val(0);
-	}
-
 	class_desc->clear();
 	method_line.clear();
-	edited_class->set_text(p_class);
+	edited_class=p_class;
 	//edited_class->show();
 
 
@@ -529,7 +637,7 @@ Error EditorHelp::_goto_desc(const String& p_class,bool p_update_history,int p_v
 	class_desc->push_color(EditorSettings::get_singleton()->get("text_editor/keyword_color"));
 	class_desc->add_text("Class: ");
 	class_desc->push_color(EditorSettings::get_singleton()->get("text_editor/base_type_color"));
-	class_desc->add_text(p_class);
+	_add_text(p_class);
 	class_desc->pop();
 	class_desc->pop();
 	class_desc->pop();
@@ -547,7 +655,6 @@ Error EditorHelp::_goto_desc(const String& p_class,bool p_update_history,int p_v
 		class_desc->pop();
 		class_desc->add_newline();
 		class_desc->add_newline();
-		class_desc->add_newline();
 
 	}
 
@@ -561,8 +668,13 @@ Error EditorHelp::_goto_desc(const String& p_class,bool p_update_history,int p_v
 
 		//class_desc->add_newline();
 		class_desc->add_newline();
+		class_desc->push_color(EditorSettings::get_singleton()->get("text_editor/text_color"));
+		class_desc->push_font( get_font("normal","Fonts") );
+		class_desc->push_indent(1);
 		_add_text(cd.brief_description);
-		class_desc->add_newline();
+		class_desc->pop();
+		class_desc->pop();
+		class_desc->pop();
 		class_desc->add_newline();
 		class_desc->add_newline();
 	}
@@ -593,7 +705,7 @@ Error EditorHelp::_goto_desc(const String& p_class,bool p_update_history,int p_v
 				class_desc->push_meta("@"+cd.methods[i].name);
 			}
 			class_desc->push_color(EditorSettings::get_singleton()->get("text_editor/text_color"));
-			class_desc->add_text(cd.methods[i].name);
+			_add_text(cd.methods[i].name);
 			class_desc->pop();
 			if (cd.methods[i].description!="")
 				class_desc->pop();
@@ -605,13 +717,14 @@ Error EditorHelp::_goto_desc(const String& p_class,bool p_update_history,int p_v
 				if (j>0)
 					class_desc->add_text(", ");
 				_add_type(cd.methods[i].arguments[j].type);
-				class_desc->add_text(" "+cd.methods[i].arguments[j].name);
+				class_desc->add_text(" ");
+				_add_text(cd.methods[i].arguments[j].name);
 				if (cd.methods[i].arguments[j].default_value!="") {
 
 					class_desc->push_color(EditorSettings::get_singleton()->get("text_editor/symbol_color"));
 					class_desc->add_text("=");
 					class_desc->pop();
-					class_desc->add_text(cd.methods[i].arguments[j].default_value);
+					_add_text(cd.methods[i].arguments[j].default_value);
 				}
 
 				class_desc->pop();
@@ -623,7 +736,8 @@ Error EditorHelp::_goto_desc(const String& p_class,bool p_update_history,int p_v
 			if (cd.methods[i].qualifiers!="") {
 
 				class_desc->push_color(EditorSettings::get_singleton()->get("text_editor/keyword_color"));
-				class_desc->add_text(" "+cd.methods[i].qualifiers);
+				class_desc->add_text(" ");
+				_add_text(cd.methods[i].qualifiers);
 				class_desc->pop();
 
 			}
@@ -639,6 +753,7 @@ Error EditorHelp::_goto_desc(const String& p_class,bool p_update_history,int p_v
 
 	if (cd.properties.size()) {
 
+
 		class_desc->push_color(EditorSettings::get_singleton()->get("text_editor/keyword_color"));
 		class_desc->push_font(doc_title_font);
 		class_desc->add_text("Members:");
@@ -656,7 +771,8 @@ Error EditorHelp::_goto_desc(const String& p_class,bool p_update_history,int p_v
 			class_desc->push_font(doc_code_font);
 			_add_type(cd.properties[i].type);
 			class_desc->push_color(EditorSettings::get_singleton()->get("text_editor/text_color"));
-			class_desc->add_text(" "+cd.properties[i].name);
+			class_desc->add_text(" ");
+			_add_text(cd.properties[i].name);
 			class_desc->pop();
 			class_desc->pop();
 
@@ -664,7 +780,7 @@ Error EditorHelp::_goto_desc(const String& p_class,bool p_update_history,int p_v
 				class_desc->push_font(doc_font);
 				class_desc->add_text("  ");
 				class_desc->push_color(EditorSettings::get_singleton()->get("text_editor/comment_color"));
-				class_desc->add_text(cd.properties[i].description);
+				_add_text(cd.properties[i].description);
 				class_desc->pop();
 				class_desc->pop();
 
@@ -699,7 +815,8 @@ Error EditorHelp::_goto_desc(const String& p_class,bool p_update_history,int p_v
 			class_desc->push_font(doc_code_font);
 			_add_type(cd.theme_properties[i].type);
 			class_desc->push_color(EditorSettings::get_singleton()->get("text_editor/text_color"));
-			class_desc->add_text(" "+cd.theme_properties[i].name);
+			class_desc->add_text(" ");
+			_add_text(cd.theme_properties[i].name);
 			class_desc->pop();
 			class_desc->pop();
 
@@ -707,7 +824,7 @@ Error EditorHelp::_goto_desc(const String& p_class,bool p_update_history,int p_v
 				class_desc->push_font(doc_font);
 				class_desc->add_text("  ");
 				class_desc->push_color(EditorSettings::get_singleton()->get("text_editor/comment_color"));
-				class_desc->add_text(cd.theme_properties[i].description);
+				_add_text(cd.theme_properties[i].description);
 				class_desc->pop();
 				class_desc->pop();
 
@@ -716,10 +833,9 @@ Error EditorHelp::_goto_desc(const String& p_class,bool p_update_history,int p_v
 			class_desc->add_newline();
 		}
 
+		class_desc->add_newline();
 		class_desc->pop();
 
-		class_desc->add_newline();
-		class_desc->add_newline();
 
 	}
 	if (cd.signals.size()) {
@@ -738,11 +854,11 @@ Error EditorHelp::_goto_desc(const String& p_class,bool p_update_history,int p_v
 		for(int i=0;i<cd.signals.size();i++) {
 
 			signal_line[cd.signals[i].name]=class_desc->get_line_count()-2;	//gets overriden if description
-			class_desc->push_font(doc_code_font);
+			class_desc->push_font(doc_code_font);  // monofont
 			//_add_type("void");
 			//class_desc->add_text(" ");
 			class_desc->push_color(EditorSettings::get_singleton()->get("text_editor/text_color"));
-			class_desc->add_text(cd.signals[i].name);
+			_add_text(cd.signals[i].name);
 			class_desc->pop();
 			class_desc->push_color(EditorSettings::get_singleton()->get("text_editor/symbol_color"));
 			class_desc->add_text(cd.signals[i].arguments.size()?"( ":"(");
@@ -752,13 +868,14 @@ Error EditorHelp::_goto_desc(const String& p_class,bool p_update_history,int p_v
 				if (j>0)
 					class_desc->add_text(", ");
 				_add_type(cd.signals[i].arguments[j].type);
-				class_desc->add_text(" "+cd.signals[i].arguments[j].name);
+				class_desc->add_text(" ");
+				_add_text(cd.signals[i].arguments[j].name);
 				if (cd.signals[i].arguments[j].default_value!="") {
 
 					class_desc->push_color(EditorSettings::get_singleton()->get("text_editor/symbol_color"));
 					class_desc->add_text("=");
 					class_desc->pop();
-					class_desc->add_text(cd.signals[i].arguments[j].default_value);
+					_add_text(cd.signals[i].arguments[j].default_value);
 				}
 
 				class_desc->pop();
@@ -767,21 +884,21 @@ Error EditorHelp::_goto_desc(const String& p_class,bool p_update_history,int p_v
 			class_desc->push_color(EditorSettings::get_singleton()->get("text_editor/symbol_color"));
 			class_desc->add_text(cd.signals[i].arguments.size()?" )":")");
 			class_desc->pop();
+			class_desc->pop(); // end monofont
 			if (cd.signals[i].description!="") {
 
 				class_desc->push_color(EditorSettings::get_singleton()->get("text_editor/comment_color"));
-				class_desc->add_text(" "+cd.signals[i].description);
+				class_desc->add_text(" ");
+				_add_text(cd.signals[i].description);
 				class_desc->pop();
 
 			}
-			class_desc->pop();//monofont
 			class_desc->add_newline();
 
 		}
 
 		class_desc->pop();
 		class_desc->add_newline();
-		class_desc->add_newline();
 
 	}
 
@@ -803,20 +920,20 @@ Error EditorHelp::_goto_desc(const String& p_class,bool p_update_history,int p_v
 			constant_line[cd.constants[i].name]=class_desc->get_line_count()-2;
 			class_desc->push_font(doc_code_font);
 			class_desc->push_color(EditorSettings::get_singleton()->get("text_editor/base_type_color"));
-			class_desc->add_text(cd.constants[i].name);
+			_add_text(cd.constants[i].name);
 			class_desc->pop();
 			class_desc->push_color(EditorSettings::get_singleton()->get("text_editor/symbol_color"));
 			class_desc->add_text(" = ");
 			class_desc->pop();
 			class_desc->push_color(EditorSettings::get_singleton()->get("text_editor/keyword_color"));
-			class_desc->add_text(cd.constants[i].value);
+			_add_text(cd.constants[i].value);
 			class_desc->pop();
 			class_desc->pop();
 			if (cd.constants[i].description!="") {
 				class_desc->push_font(doc_font);
 				class_desc->add_text("  ");
 				class_desc->push_color(EditorSettings::get_singleton()->get("text_editor/comment_color"));
-				class_desc->add_text(cd.constants[i].description);
+				_add_text(cd.constants[i].description);
 				class_desc->pop();
 				class_desc->pop();
 			}
@@ -826,7 +943,6 @@ Error EditorHelp::_goto_desc(const String& p_class,bool p_update_history,int p_v
 
 		class_desc->pop();
 		class_desc->add_newline();
-		class_desc->add_newline();
 
 
 	}
@@ -834,7 +950,6 @@ Error EditorHelp::_goto_desc(const String& p_class,bool p_update_history,int p_v
 	if (cd.description!="") {
 
 		description_line=class_desc->get_line_count()-2;
-
 		class_desc->push_color(EditorSettings::get_singleton()->get("text_editor/keyword_color"));
 		class_desc->push_font(doc_title_font);
 		class_desc->add_text("Description:");
@@ -842,8 +957,14 @@ Error EditorHelp::_goto_desc(const String& p_class,bool p_update_history,int p_v
 		class_desc->pop();
 
 		class_desc->add_newline();
-		_add_text(cd.description);
 		class_desc->add_newline();
+		class_desc->push_color(EditorSettings::get_singleton()->get("text_editor/text_color"));
+		class_desc->push_font( get_font("normal","Fonts") );
+		class_desc->push_indent(1);
+		_add_text(cd.description);
+		class_desc->pop();
+		class_desc->pop();
+		class_desc->pop();
 		class_desc->add_newline();
 		class_desc->add_newline();
 	}
@@ -858,22 +979,18 @@ Error EditorHelp::_goto_desc(const String& p_class,bool p_update_history,int p_v
 
 		class_desc->add_newline();
 		class_desc->add_newline();
-		class_desc->push_indent(1);
 
 
 		for(int i=0;i<cd.methods.size();i++) {
 
 			method_line[cd.methods[i].name]=class_desc->get_line_count()-2;
 
-			if( cd.methods[i].description != "") {
-				class_desc->add_newline();
-			}
 			class_desc->push_font(doc_code_font);
 			_add_type(cd.methods[i].return_type);
 
 			class_desc->add_text(" ");
 			class_desc->push_color(EditorSettings::get_singleton()->get("text_editor/text_color"));
-			class_desc->add_text(cd.methods[i].name);
+			_add_text(cd.methods[i].name);
 			class_desc->pop();
 			class_desc->push_color(EditorSettings::get_singleton()->get("text_editor/symbol_color"));
 			class_desc->add_text(cd.methods[i].arguments.size()?"( ":"(");
@@ -883,13 +1000,14 @@ Error EditorHelp::_goto_desc(const String& p_class,bool p_update_history,int p_v
 				if (j>0)
 					class_desc->add_text(", ");
 				_add_type(cd.methods[i].arguments[j].type);
-				class_desc->add_text(" "+cd.methods[i].arguments[j].name);
+				class_desc->add_text(" ");
+				_add_text(cd.methods[i].arguments[j].name);
 				if (cd.methods[i].arguments[j].default_value!="") {
 
 					class_desc->push_color(EditorSettings::get_singleton()->get("text_editor/symbol_color"));
 					class_desc->add_text("=");
 					class_desc->pop();
-					class_desc->add_text(cd.methods[i].arguments[j].default_value);
+					_add_text(cd.methods[i].arguments[j].default_value);
 				}
 
 				class_desc->pop();
@@ -901,19 +1019,23 @@ Error EditorHelp::_goto_desc(const String& p_class,bool p_update_history,int p_v
 			if (cd.methods[i].qualifiers!="") {
 
 				class_desc->push_color(EditorSettings::get_singleton()->get("text_editor/keyword_color"));
-				class_desc->add_text(" "+cd.methods[i].qualifiers);
+				class_desc->add_text(" ");
+				_add_text(cd.methods[i].qualifiers);
 				class_desc->pop();
 
 			}
 
 			class_desc->pop();
 
-			if( cd.methods[i].description != "") {
-				class_desc->add_text("  ");
-				_add_text(cd.methods[i].description);
-				class_desc->add_newline();
-				class_desc->add_newline();
-			}
+			class_desc->add_newline();
+			class_desc->push_color(EditorSettings::get_singleton()->get("text_editor/text_color"));
+			class_desc->push_font( get_font("normal","Fonts") );
+			class_desc->push_indent(1);
+			_add_text(cd.methods[i].description);
+			class_desc->pop();
+			class_desc->pop();
+			class_desc->pop();
+			class_desc->add_newline();
 			class_desc->add_newline();
 			class_desc->add_newline();
 
@@ -925,10 +1047,7 @@ Error EditorHelp::_goto_desc(const String& p_class,bool p_update_history,int p_v
 
 	}
 
-	if (!p_update_history) {
 
-		class_desc->get_v_scroll()->set_val(history[history_pos-1].scroll);
-	}
 
 	scroll_locked=false;
 
@@ -938,9 +1057,7 @@ Error EditorHelp::_goto_desc(const String& p_class,bool p_update_history,int p_v
 void EditorHelp::_request_help(const String& p_string) {
 	Error err = _goto_desc(p_string);
 	if (err==OK) {
-		editor->call("_editor_select",3);
-	} else {
-		class_search->popup(p_string);
+		editor->call("_editor_select",EditorNode::EDITOR_SCRIPT);
 	}
 	//100 palabras
 }
@@ -980,16 +1097,16 @@ void EditorHelp::_help_callback(const String& p_topic) {
 			line=constant_line[name];
 	}
 
-	class_desc->scroll_to_line(line);
+	class_desc->call_deferred("scroll_to_line", line);
 
 }
 
 void EditorHelp::_add_text(const String& p_bbcode) {
 
 
-	class_desc->push_color(EditorSettings::get_singleton()->get("text_editor/text_color"));
+	/*class_desc->push_color(EditorSettings::get_singleton()->get("text_editor/text_color"));
 	class_desc->push_font( get_font("normal","Fonts") );
-	class_desc->push_indent(1);
+	class_desc->push_indent(1);*/
 	int pos = 0;
 
 	List<String> tag_stack;
@@ -1067,7 +1184,7 @@ void EditorHelp::_add_text(const String& p_bbcode) {
 		} else if (tag=="i") {
 
 			//use italics font
-			//class_desc->push_font(get_font("italic","Fonts"));
+			class_desc->push_font(get_font("italic","Fonts"));
 			pos=brk_end+1;
 			tag_stack.push_front(tag);
 		} else if (tag=="code") {
@@ -1203,321 +1320,160 @@ void EditorHelp::_add_text(const String& p_bbcode) {
 		}
 	}
 
+	/*class_desc->pop();
 	class_desc->pop();
-	class_desc->pop();
+	class_desc->pop();*/
 
 }
 
 
-void EditorHelp::add_type(const String& p_type,HashMap<String,TreeItem*>& p_types,TreeItem *p_root) {
-
-	if (p_types.has(p_type))
-		return;
-//	if (!ObjectTypeDB::is_type(p_type,base) || p_type==base)
-//		return;
-
-	String inherits=doc->class_list[p_type].inherits;
-
-	TreeItem *parent=p_root;
 
 
-	if (inherits.length()) {
+void EditorHelp::_update_doc() {
 
-		if (!p_types.has(inherits)) {
 
-			add_type(inherits,p_types,p_root);
-		}
 
-		if (p_types.has(inherits) )
-			parent=p_types[inherits];
-	}
+}
 
-	TreeItem *item = class_list->create_item(parent);
-	item->set_metadata(0,p_type);
-	item->set_tooltip(0,doc->class_list[p_type].brief_description);
-	item->set_text(0,p_type);
 
+void EditorHelp::generate_doc() {
 
-	if (has_icon(p_type,"EditorIcons")) {
+	doc = memnew( DocData );
+	doc->generate(true);
+	DocData compdoc;
+	compdoc.load_compressed(_doc_data_compressed,_doc_data_compressed_size,_doc_data_uncompressed_size);
+	doc->merge_from(compdoc); //ensure all is up to date
 
-		item->set_icon(0, get_icon(p_type,"EditorIcons"));
-	}
 
-	p_types[p_type]=item;
 }
 
+void EditorHelp::_notification(int p_what) {
 
 
-void EditorHelp::_update_doc() {
-
-
-	class_list->clear();
-
-	List<StringName> type_list;
-
-	tree_item_map.clear();
+	switch(p_what) {
 
-	TreeItem *root = class_list->create_item();
-	class_list->set_hide_root(true);
-	List<StringName>::Element *I=type_list.front();
+		case NOTIFICATION_READY: {
 
-	for(Map<String,DocData::ClassDoc>::Element *E=doc->class_list.front();E;E=E->next()) {
 
+//			forward->set_icon(get_icon("Forward","EditorIcons"));
+//			back->set_icon(get_icon("Back","EditorIcons"));
+			_update_doc();
 
-		add_type(E->key(),tree_item_map,root);
+		} break;
 	}
+}
 
+void EditorHelp::go_to_help(const String& p_help) {
+
+	_help_callback(p_help);
 }
 
+void EditorHelp::go_to_class(const String& p_class,int p_scroll) {
 
-void EditorHelp::generate_doc() {
+	_goto_desc(p_class,p_scroll);
+}
 
-	doc = memnew( DocData );
-	doc->generate(true);
-	DocData compdoc;
-	compdoc.load_compressed(_doc_data_compressed,_doc_data_compressed_size,_doc_data_uncompressed_size);
-	doc->merge_from(compdoc); //ensure all is up to date
+void EditorHelp::popup_search() {
 
 
+	search_dialog->popup_centered(Size2(250,80));
+	search->grab_focus();
 }
 
-void EditorHelp::_notification(int p_what) {
+void EditorHelp::_search_cbk() {
 
+	_search(search->get_text());
+}
 
-	switch(p_what) {
+String EditorHelp::get_class_name() {
 
-		case NOTIFICATION_READY: {
+	return edited_class;
+}
 
+void EditorHelp::search_again() {
+	_search(prev_search);
+}
 
-			forward->set_icon(get_icon("Forward","EditorIcons"));
-			back->set_icon(get_icon("Back","EditorIcons"));
-			_update_doc();
-			editor->connect("request_help",this,"_request_help");
+int EditorHelp::get_scroll() const {
 
-		} break;
-	}
+	return class_desc->get_v_scroll()->get_val();
 }
+void EditorHelp::set_scroll(int p_scroll) {
 
-void EditorHelp::_tree_item_selected() {
 
-	if (select_locked) {
-		select_locked = false;
-		return;
-	}
-	TreeItem *s=class_list->get_selected();
-	if (!s)
-		return;
-	select_locked=true;
-	_goto_desc(s->get_text(0));
-	select_locked=false;
+	class_desc->get_v_scroll()->set_val(p_scroll);
+
 }
 
 void EditorHelp::_bind_methods() {
 
 	ObjectTypeDB::bind_method("_class_list_select",&EditorHelp::_class_list_select);
 	ObjectTypeDB::bind_method("_class_desc_select",&EditorHelp::_class_desc_select);
-	ObjectTypeDB::bind_method("_button_pressed",&EditorHelp::_button_pressed);
+//	ObjectTypeDB::bind_method("_button_pressed",&EditorHelp::_button_pressed);
 	ObjectTypeDB::bind_method("_scroll_changed",&EditorHelp::_scroll_changed);
 	ObjectTypeDB::bind_method("_request_help",&EditorHelp::_request_help);
 	ObjectTypeDB::bind_method("_unhandled_key_input",&EditorHelp::_unhandled_key_input);
 	ObjectTypeDB::bind_method("_search",&EditorHelp::_search);
-	ObjectTypeDB::bind_method("_tree_item_selected",&EditorHelp::_tree_item_selected);
+	ObjectTypeDB::bind_method("_search_cbk",&EditorHelp::_search_cbk);
 	ObjectTypeDB::bind_method("_help_callback",&EditorHelp::_help_callback);
 
+	ADD_SIGNAL(MethodInfo("go_to_help"));
+
 }
 
-EditorHelp::EditorHelp(EditorNode *p_editor) {
+EditorHelp::EditorHelp() {
 
-	editor=p_editor;
+	editor=EditorNode::get_singleton();
 
 	VBoxContainer *vbc = this;
 
-	HBoxContainer *panel_hb = memnew( HBoxContainer );
-
-	Button *b = memnew( Button );
-	b->set_text("Class List");
-	panel_hb->add_child(b);
-	vbc->add_child(panel_hb);
-	b->set_toggle_mode(true);
-	b->set_pressed(true);
-	b->connect("pressed",this,"_button_pressed",make_binds(PAGE_CLASS_LIST));
-	class_list_button=b;
-	class_list_button->hide();
-
-	b = memnew( Button );
-	b->set_text("Class");
-	panel_hb->add_child(b);
-	edited_class=b;
-	edited_class->hide();
-	b->set_toggle_mode(true);
-	b->connect("pressed",this,"_button_pressed",make_binds(PAGE_CLASS_DESC));
-
-	b = memnew( Button );
-	b->set_text("Search in Classes");
-	panel_hb->add_child(b);
-	b->connect("pressed",this,"_button_pressed",make_binds(CLASS_SEARCH));
-
-	Control *expand = memnew( Control );
-	expand->set_h_size_flags(SIZE_EXPAND_FILL);
-	panel_hb->add_child(expand);
-
-	b = memnew( Button );
-	panel_hb->add_child(b);
-	back=b;
-	b->connect("pressed",this,"_button_pressed",make_binds(PAGE_CLASS_PREV));
-
-	b = memnew( Button );
-	panel_hb->add_child(b);
-	forward=b;
-	b->connect("pressed",this,"_button_pressed",make_binds(PAGE_CLASS_NEXT));
-
-	Separator *hs = memnew( VSeparator );
-	panel_hb->add_child(hs);
-	Control *ec = memnew( Control );
-	ec->set_custom_minimum_size(Size2(200,1));
-	panel_hb->add_child(ec);
-	search = memnew( LineEdit );
-	ec->add_child(search);
-	search->set_area_as_parent_rect();
-	search->connect("text_entered",this,"_search");
-
-	b = memnew( Button );
-	b->set_text("Find");
-	panel_hb->add_child(b);
-	b->connect("pressed",this,"_button_pressed",make_binds(PAGE_SEARCH));
-
-	hs = memnew( VSeparator );
-	panel_hb->add_child(hs);
 
-	h_split = memnew( HSplitContainer );
-	h_split->set_v_size_flags(SIZE_EXPAND_FILL);
-
-
-	vbc->add_child(h_split);
-
-	class_list = memnew( Tree );
-	h_split->add_child(class_list);
 	//class_list->connect("meta_clicked",this,"_class_list_select");
 	//class_list->set_selection_enabled(true);
 
 	{
-		PanelContainer *pc = memnew( PanelContainer );
+		Panel *pc = memnew( Panel );
 		Ref<StyleBoxFlat> style( memnew( StyleBoxFlat ) );
-		style->set_bg_color( EditorSettings::get_singleton()->get("text_editor/background_color") );	
-		style->set_default_margin(MARGIN_LEFT,20);
-		style->set_default_margin(MARGIN_TOP,20);
+		style->set_bg_color( EditorSettings::get_singleton()->get("text_editor/background_color") );
+		pc->set_v_size_flags(SIZE_EXPAND_FILL);
 		pc->add_style_override("panel", style); //get_stylebox("normal","TextEdit"));
-		h_split->add_child(pc);
+		vbc->add_child(pc);
 		class_desc = memnew( RichTextLabel );
 		pc->add_child(class_desc);
+		class_desc->set_area_as_parent_rect(8);
 		class_desc->connect("meta_clicked",this,"_class_desc_select");
 	}
 
 	class_desc->get_v_scroll()->connect("value_changed",this,"_scroll_changed");
 	class_desc->set_selection_enabled(true);
-	editor=p_editor;
-	history_pos=0;
+
 	scroll_locked=false;
 	select_locked=false;
 	set_process_unhandled_key_input(true);
-	h_split->set_split_offset(200);
-	class_list->connect("cell_selected",this,"_tree_item_selected");
 	class_desc->hide();
 
-	class_search = memnew( EditorHelpSearch(editor) );
-	editor->get_gui_base()->add_child(class_search);
-	class_search->connect("go_to_help",this,"_help_callback");
-//	prev_search_page=-1;
-}
-
-EditorHelp::~EditorHelp() {
-	if (doc)
-		memdelete(doc);
-}
-
-
-void EditorHelpPlugin::edit(Object *p_object) {
-
-	if (!p_object->cast_to<Script>())
-		return;
-
-	//editor_help->edit(p_object->cast_to<Script>());
-}
-
-bool EditorHelpPlugin::handles(Object *p_object) const {
-
-	return false;
-}
-
-void EditorHelpPlugin::make_visible(bool p_visible) {
-
-	if (p_visible) {
-		editor_help->show();
-	} else {
-
-		editor_help->hide();
-	}
-
-}
-
-void EditorHelpPlugin::selected_notify() {
-
-	//editor_help->ensure_select_current();
-}
-
-Dictionary EditorHelpPlugin::get_state() const {
-
-	return Dictionary();
-}
-
-void EditorHelpPlugin::set_state(const Dictionary& p_state) {
-
-	//editor_help->set_state(p_state);
-}
-void EditorHelpPlugin::clear() {
-
-	//editor_help->clear();
-}
-
-void EditorHelpPlugin::save_external_data() {
-
-	//editor_help->save_external_data();
-}
-
-void EditorHelpPlugin::apply_changes() {
-
-	//editor_help->apply_helps();
-}
-
-void EditorHelpPlugin::restore_global_state() {
-
-	//if (bool(EDITOR_DEF("text_editor/restore_helps_on_load",true))) {
-//		editor_help->_load_files_state();
-	//}
-
-}
+	search_dialog = memnew( ConfirmationDialog );
+	add_child(search_dialog);
+	VBoxContainer *search_vb = memnew( VBoxContainer );
+	search_dialog->add_child(search_vb);
+	search_dialog->set_child_rect(search_vb);
+	search = memnew( LineEdit );
+	search_dialog->register_text_enter(search);
+	search_vb->add_margin_child("Search Text",search);
+	search_dialog->get_ok()->set_text("Find");
+	search_dialog->connect("confirmed",this,"_search_cbk");
+	search_dialog->set_hide_on_ok(false);
+	search_dialog->set_self_opacity(0.8);
 
-void EditorHelpPlugin::save_global_state() {
 
-	//if (bool(EDITOR_DEF("text_editor/restore_helps_on_load",true))) {
-//		editor_help->_save_files_state();
-//	}
+	/*class_search = memnew( EditorHelpSearch(editor) );
+	editor->get_gui_base()->add_child(class_search);
+	class_search->connect("go_to_help",this,"_help_callback");*/
 
+//	prev_search_page=-1;
 }
 
-
-EditorHelpPlugin::EditorHelpPlugin(EditorNode *p_node) {
-
-	editor=p_node;
-	editor_help = memnew( EditorHelp(p_node) );
-	editor->get_viewport()->add_child(editor_help);
-	editor_help->set_area_as_parent_rect();
-	editor_help->hide();
-
+EditorHelp::~EditorHelp() {
 
 }
 
-
-EditorHelpPlugin::~EditorHelpPlugin()
-{
-}
diff --git a/tools/editor/editor_help.h b/tools/editor/editor_help.h
index d4066d076a..04ac4d35ff 100644
--- a/tools/editor/editor_help.h
+++ b/tools/editor/editor_help.h
@@ -68,11 +68,32 @@ protected:
 	static void _bind_methods();
 public:
 
-	void popup(const String& p_term="");
+	void popup();
+	void popup(const String& p_term);
 
-	EditorHelpSearch(EditorNode *p_editor);
+	EditorHelpSearch();
 };
 
+class EditorHelpIndex : public ConfirmationDialog {
+	OBJ_TYPE( EditorHelpIndex, ConfirmationDialog );
+
+
+	Tree *class_list;
+	HashMap<String,TreeItem*> tree_item_map;
+
+	void _tree_item_selected();
+	void add_type(const String& p_type,HashMap<String,TreeItem*>& p_types,TreeItem *p_root);
+protected:
+
+	void _notification(int p_what);
+	static void _bind_methods();
+
+public:
+
+	void select_class(const String& p_class);
+
+	EditorHelpIndex();
+};
 
 
 class EditorHelp : public VBoxContainer {
@@ -91,17 +112,11 @@ class EditorHelp : public VBoxContainer {
 	};
 
 
-	struct History {
-		String c;
-		int scroll;
-	};
-
-	Vector<History> history;
-	int history_pos;
 	bool select_locked;
 
 	String prev_search;
-	String prev_search_page;
+
+	String edited_class;
 
 	EditorNode *editor;
 	Map<String,int> method_line;
@@ -111,21 +126,17 @@ class EditorHelp : public VBoxContainer {
 	Map<String,int> constant_line;
 	int description_line;
 
-	Tree *class_list;
 
 	RichTextLabel *class_desc;
 	HSplitContainer *h_split;
 	static DocData *doc;
 
-	Button *class_list_button;
-	Button *edited_class;
-	Button *back;
-	Button *forward;
+
+	ConfirmationDialog *search_dialog;
 	LineEdit *search;
 
-	String base_path;
 
-	HashMap<String,TreeItem*> tree_item_map;
+	String base_path;
 
 
 	void _help_callback(const String& p_topic);
@@ -133,25 +144,24 @@ class EditorHelp : public VBoxContainer {
 	void _add_text(const String& p_text);
 	bool scroll_locked;
 
-	void _button_pressed(int p_idx);
+	//void _button_pressed(int p_idx);
 	void _add_type(const String& p_type);
 
 	void _scroll_changed(double p_scroll);
 	void _class_list_select(const String& p_select);
 	void _class_desc_select(const String& p_select);
 
-	Error _goto_desc(const String& p_class,bool p_update_history=true,int p_vscr=-1);
-	void _update_history_buttons();
+	Error _goto_desc(const String& p_class, int p_vscr=-1);
+	//void _update_history_buttons();
 	void _update_doc();
 
 	void _request_help(const String& p_string);
 	void _search(const String& p_str);
+	void _search_cbk();
 
 	void _unhandled_key_input(const InputEvent& p_ev);
-	void add_type(const String& p_type,HashMap<String,TreeItem*>& p_types,TreeItem *p_root);
-	void _tree_item_selected();
 
-	EditorHelpSearch *class_search;
+
 
 protected:
 
@@ -163,41 +173,25 @@ public:
 	static void generate_doc();
 	static DocData *get_doc_data() { return doc; }
 
-	EditorHelp(EditorNode *p_editor=NULL);
-	~EditorHelp();
-};
-
-
+	void go_to_help(const String& p_help);
+	void go_to_class(const String& p_class,int p_scroll=0);
 
-class EditorHelpPlugin : public EditorPlugin {
+	void popup_search();
+	void search_again();
 
-	OBJ_TYPE( EditorHelpPlugin, EditorPlugin );
+	String get_class_name();
 
-	EditorHelp *editor_help;
-	EditorNode *editor;
-public:
+	void set_focused() { class_desc->grab_focus(); }
 
-	virtual String get_name() const { return "Help"; }
-	bool has_main_screen() const { return true; }
-	virtual void edit(Object *p_node);
-	virtual bool handles(Object *p_node) const;
-	virtual void make_visible(bool p_visible);
-	virtual void selected_notify();
+	int get_scroll() const;
+	void set_scroll(int p_scroll);
 
-	Dictionary get_state() const;
-	virtual void set_state(const Dictionary& p_state);
-	virtual void clear();
-
-	virtual void save_external_data();
-	virtual void apply_changes();
+	EditorHelp();
+	~EditorHelp();
+};
 
-	virtual void restore_global_state();
-	virtual void save_global_state();
 
-	EditorHelpPlugin(EditorNode *p_node);
-	~EditorHelpPlugin();
 
-};
 
 
 #endif // EDITOR_HELP_H
diff --git a/tools/editor/editor_import_export.cpp b/tools/editor/editor_import_export.cpp
index b13473e61c..cd455406b7 100644
--- a/tools/editor/editor_import_export.cpp
+++ b/tools/editor/editor_import_export.cpp
@@ -399,6 +399,40 @@ Vector<StringName> EditorExportPlatform::get_dependencies(bool p_bundles) const
 
 }
 
+String EditorExportPlatform::find_export_template(String template_file_name, String *err) const {
+	String user_file = EditorSettings::get_singleton()->get_settings_path()
+		+"/templates/"+template_file_name;
+	String system_file=OS::get_singleton()->get_installed_templates_path();
+	bool has_system_path=(system_file!="");
+	system_file+=template_file_name;
+
+	// Prefer user file
+	if (FileAccess::exists(user_file)) {
+		return user_file;
+	}
+
+	// Now check system file
+	if (has_system_path) {
+		if (FileAccess::exists(system_file)) {
+			return system_file;
+		}
+	}
+
+	// Not found
+	if (err) {
+		*err+="No export template found at \""+user_file+"\"";
+		if (has_system_path)
+			*err+="\n or \""+system_file+"\".";
+		else
+			*err+=".";
+	}
+	return "";
+}
+
+bool EditorExportPlatform::exists_export_template(String template_file_name, String *err) const {
+	return find_export_template(template_file_name,err)!="";
+}
+
 ///////////////////////////////////////
 
 
@@ -1131,19 +1165,32 @@ Error EditorExportPlatformPC::export_project(const String& p_path, bool p_debug,
 
 	ep.step("Setting Up..",0);
 
-	String exe_path = EditorSettings::get_singleton()->get_settings_path()+"/templates/";
-	if (use64) {
-		if (p_debug)
-			exe_path=custom_debug_binary!=""?custom_debug_binary:exe_path+debug_binary64;
-		else
-			exe_path=custom_release_binary!=""?custom_release_binary:exe_path+release_binary64;
-	} else {
+	String exe_path="";
 
-		if (p_debug)
-			exe_path=custom_debug_binary!=""?custom_debug_binary:exe_path+debug_binary32;
-		else
-			exe_path=custom_release_binary!=""?custom_release_binary:exe_path+release_binary32;
+	if (p_debug)
+		exe_path=custom_debug_binary;
+	else
+		exe_path=custom_release_binary;
 
+	if (exe_path=="") {
+		String fname;
+		if (use64) {
+			if (p_debug)
+				fname=debug_binary64;
+			else
+				fname=release_binary64;
+		} else {
+			if (p_debug)
+				fname=debug_binary32;
+			else
+				fname=release_binary32;
+		}
+		String err="";
+		exe_path=find_export_template(fname,&err);
+		if (exe_path=="") {
+			EditorNode::add_io_error(err);
+			return ERR_FILE_CANT_READ;
+		}
 	}
 
 	FileAccess *src_exe=FileAccess::open(exe_path,FileAccess::READ);
@@ -1207,32 +1254,42 @@ bool EditorExportPlatformPC::can_export(String *r_error) const {
 	String err;
 	bool valid=true;
 
-	String exe_path = EditorSettings::get_singleton()->get_settings_path()+"/templates/";
+	if (use64 && (!exists_export_template(debug_binary64)) || !exists_export_template(release_binary64)) {
+		valid=false;
+		err="No 64 bits export templates found.\nDownload and install export templates.\n";
+	}
 
-	if (!FileAccess::exists(exe_path+debug_binary32) || !FileAccess::exists(exe_path+release_binary32)) {
+	if (!use64 && (!exists_export_template(debug_binary32) || !exists_export_template(release_binary32))) {
 		valid=false;
 		err="No 32 bits export templates found.\nDownload and install export templates.\n";
 	}
-	if (!FileAccess::exists(exe_path+debug_binary64) || !FileAccess::exists(exe_path+release_binary64)) {
-		valid=false;
-		err="No 64 bits export templates found.\nDownload and install export templates.\n";
+
+	if(custom_debug_binary=="" && custom_release_binary=="") {
+		if (r_error) *r_error=err;
+		return valid;
 	}
 
+	bool dvalid = true;
+	bool rvalid = true;
 
-	if (custom_debug_binary!="" && !FileAccess::exists(custom_debug_binary)) {
-		valid=false;
-		err+="Custom debug binary not found.\n";
+	if(!FileAccess::exists(custom_debug_binary)) {
+		dvalid = false;
+		err = "Custom debug binary not found.\n";
 	}
 
-	if (custom_release_binary!="" && !FileAccess::exists(custom_release_binary)) {
-		valid=false;
-		err+="Custom release binary not found.\n";
+	if(!FileAccess::exists(custom_release_binary)) {
+		rvalid = false;
+		err = "Custom release binary not found.\n";
 	}
 
+	if (dvalid || rvalid)
+		valid = true;
+	else
+		valid = false;
+
 	if (r_error)
 		*r_error=err;
 	return valid;
-
 }
 
 
@@ -1550,6 +1607,17 @@ void EditorImportExport::image_export_get_images_in_group(const StringName& p_gr
 	}
 }
 
+void EditorImportExport::set_convert_text_scenes(bool p_convert) {
+
+	convert_text_scenes=p_convert;
+}
+
+bool EditorImportExport::get_convert_text_scenes() const{
+
+	return convert_text_scenes;
+}
+
+
 void EditorImportExport::load_config() {
 
 	Ref<ConfigFile> cf = memnew( ConfigFile );
@@ -1592,6 +1660,12 @@ void EditorImportExport::load_config() {
 		}
 	}
 
+	if (cf->has_section("convert_scenes")) {
+
+		convert_text_scenes = cf->get_value("convert_scenes","convert_text_scenes");
+	}
+
+
 	if (cf->has_section("export_filter_files")) {
 
 
@@ -1706,6 +1780,17 @@ void EditorImportExport::load_config() {
 		}
 	}
 
+	if (cf->has_section("convert_samples")) {
+
+		if (cf->has_section_key("convert_samples","max_hz"))
+			sample_action_max_hz=cf->get_value("convert_samples","max_hz");
+
+		if (cf->has_section_key("convert_samples","trim"))
+			sample_action_trim=cf->get_value("convert_samples","trim");
+	}
+
+
+
 }
 
 
@@ -1814,8 +1899,18 @@ void EditorImportExport::save_config() {
 		case SCRIPT_ACTION_ENCRYPT: cf->set_value("script","action","encrypt"); break;
 	}
 
+	cf->set_value("convert_scenes","convert_text_scenes",convert_text_scenes);
+
 	cf->set_value("script","encrypt_key",script_key);
 
+	switch(sample_action) {
+		case SAMPLE_ACTION_NONE: cf->set_value("convert_samples","action","none"); break;
+		case SAMPLE_ACTION_COMPRESS_RAM: cf->set_value("convert_samples","action","compress_ram"); break;
+	}
+
+	cf->set_value("convert_samples","max_hz",sample_action_max_hz);
+	cf->set_value("convert_samples","trim",sample_action_trim);
+
 	cf->save("res://export.cfg");
 
 }
@@ -1841,6 +1936,35 @@ String EditorImportExport::script_get_encryption_key() const{
 }
 
 
+void EditorImportExport::sample_set_action(SampleAction p_action) {
+
+	sample_action=p_action;
+}
+
+EditorImportExport::SampleAction EditorImportExport::sample_get_action() const{
+
+	return sample_action;
+}
+
+void EditorImportExport::sample_set_max_hz(int p_hz){
+
+	sample_action_max_hz=p_hz;
+}
+int EditorImportExport::sample_get_max_hz() const{
+
+	return sample_action_max_hz;
+}
+
+void EditorImportExport::sample_set_trim(bool p_trim){
+
+	sample_action_trim=p_trim;
+}
+bool EditorImportExport::sample_get_trim() const{
+
+	return sample_action_trim;
+}
+
+
 void EditorImportExport::_bind_methods() {
 
 	ObjectTypeDB::bind_method(_MD("image_export_group_create"),&EditorImportExport::image_export_group_create);
@@ -1868,8 +1992,15 @@ EditorImportExport::EditorImportExport() {
 	image_formats.insert("png");
 	image_shrink=1;
 
+
 	script_action=SCRIPT_ACTION_COMPILE;
 
+	sample_action=SAMPLE_ACTION_NONE;
+	sample_action_max_hz=44100;
+	sample_action_trim=false;
+
+	convert_text_scenes=true;
+
 }
 
 
diff --git a/tools/editor/editor_import_export.h b/tools/editor/editor_import_export.h
index 245adffbfd..93086f7ad4 100644
--- a/tools/editor/editor_import_export.h
+++ b/tools/editor/editor_import_export.h
@@ -86,6 +86,8 @@ protected:
 	Vector<uint8_t> get_exported_file_default(String& p_fname) const;
 	virtual Vector<uint8_t> get_exported_file(String& p_fname) const;
 	virtual Vector<StringName> get_dependencies(bool p_bundles) const;
+	virtual String find_export_template(String template_file_name, String *err=NULL) const;
+	virtual bool exists_export_template(String template_file_name, String *err=NULL) const;
 
 	struct TempData {
 
@@ -245,6 +247,12 @@ public:
 		SCRIPT_ACTION_ENCRYPT
 	};
 
+	enum SampleAction {
+
+		SAMPLE_ACTION_NONE,
+		SAMPLE_ACTION_COMPRESS_RAM,
+	};
+
 protected:
 
 	struct ImageGroup {
@@ -274,6 +282,12 @@ protected:
 	ScriptAction script_action;
 	String script_key;
 
+	SampleAction sample_action;
+	int sample_action_max_hz;
+	bool sample_action_trim;
+
+	bool convert_text_scenes;
+
 	static EditorImportExport* singleton;
 
 	static void _bind_methods();
@@ -343,6 +357,18 @@ public:
 	void script_set_encryption_key(const String& p_key);
 	String script_get_encryption_key() const;
 
+	void sample_set_action(SampleAction p_action);
+	SampleAction sample_get_action() const;
+
+	void sample_set_max_hz(int p_hz);
+	int sample_get_max_hz() const;
+
+	void sample_set_trim(bool p_trim);
+	bool sample_get_trim() const;
+
+	void set_convert_text_scenes(bool p_convert);
+	bool get_convert_text_scenes() const;
+
 	void load_config();
 	void save_config();
 
diff --git a/tools/editor/editor_layout_dialog.cpp b/tools/editor/editor_layout_dialog.cpp
new file mode 100644
index 0000000000..e37f263c0c
--- /dev/null
+++ b/tools/editor/editor_layout_dialog.cpp
@@ -0,0 +1,59 @@
+/*************************************************************************/
+/*  editor_node.cpp                                                      */
+/*************************************************************************/
+/*                       This file is part of:                           */
+/*                           GODOT ENGINE                                */
+/*                    http://www.godotengine.org                         */
+/*************************************************************************/
+/* Copyright (c) 2007-2015 Juan Linietsky, Ariel Manzur.                 */
+/*                                                                       */
+/* Permission is hereby granted, free of charge, to any person obtaining */
+/* a copy of this software and associated documentation files (the       */
+/* "Software"), to deal in the Software without restriction, including   */
+/* without limitation the rights to use, copy, modify, merge, publish,   */
+/* distribute, sublicense, and/or sell copies of the Software, and to    */
+/* permit persons to whom the Software is furnished to do so, subject to */
+/* the following conditions:                                             */
+/*                                                                       */
+/* The above copyright notice and this permission notice shall be        */
+/* included in all copies or substantial portions of the Software.       */
+/*                                                                       */
+/* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,       */
+/* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF    */
+/* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.*/
+/* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY  */
+/* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,  */
+/* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE     */
+/* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.                */
+/*************************************************************************/
+
+#include "editor_layout_dialog.h"
+#include "object_type_db.h"
+
+void EditorLayoutDialog::clear_layout_name() {
+
+	layout_name->clear();
+}
+
+void EditorLayoutDialog::ok_pressed() {
+
+	if (layout_name->get_text()!="") {
+		emit_signal("layout_selected", layout_name->get_text());
+	}
+}
+
+void EditorLayoutDialog::_bind_methods() {
+
+	ADD_SIGNAL(MethodInfo("layout_selected",PropertyInfo( Variant::STRING,"layout_name")));
+}
+
+EditorLayoutDialog::EditorLayoutDialog()
+{
+
+	layout_name = memnew( LineEdit );
+	layout_name->set_margin(MARGIN_TOP,5);
+	layout_name->set_anchor_and_margin(MARGIN_LEFT,ANCHOR_BEGIN,5);
+	layout_name->set_anchor_and_margin(MARGIN_RIGHT,ANCHOR_END,5);
+	add_child(layout_name);
+	move_child(layout_name, get_label()->get_index()+1);
+}
diff --git a/tools/editor/editor_layout_dialog.h b/tools/editor/editor_layout_dialog.h
new file mode 100644
index 0000000000..7e3b9e3d8a
--- /dev/null
+++ b/tools/editor/editor_layout_dialog.h
@@ -0,0 +1,53 @@
+/*************************************************************************/
+/*  editor_layout_dialog.h                                                        */
+/*************************************************************************/
+/*                       This file is part of:                           */
+/*                           GODOT ENGINE                                */
+/*                    http://www.godotengine.org                         */
+/*************************************************************************/
+/* Copyright (c) 2007-2015 Juan Linietsky, Ariel Manzur.                 */
+/*                                                                       */
+/* Permission is hereby granted, free of charge, to any person obtaining */
+/* a copy of this software and associated documentation files (the       */
+/* "Software"), to deal in the Software without restriction, including   */
+/* without limitation the rights to use, copy, modify, merge, publish,   */
+/* distribute, sublicense, and/or sell copies of the Software, and to    */
+/* permit persons to whom the Software is furnished to do so, subject to */
+/* the following conditions:                                             */
+/*                                                                       */
+/* The above copyright notice and this permission notice shall be        */
+/* included in all copies or substantial portions of the Software.       */
+/*                                                                       */
+/* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,       */
+/* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF    */
+/* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.*/
+/* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY  */
+/* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,  */
+/* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE     */
+/* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.                */
+/*************************************************************************/
+
+#ifndef EDITOR_LAYOUT_DIALOG_H
+#define EDITOR_LAYOUT_DIALOG_H
+
+#include "scene/gui/dialogs.h"
+#include "scene/gui/line_edit.h"
+
+class EditorLayoutDialog : public ConfirmationDialog {
+
+	OBJ_TYPE( EditorLayoutDialog, ConfirmationDialog );
+
+	LineEdit *layout_name;
+
+protected:
+
+	static void _bind_methods();
+	virtual void ok_pressed();
+
+public:
+	void clear_layout_name();
+
+	EditorLayoutDialog();
+};
+
+#endif // EDITOR_LAYOUT_DIALOG_H
diff --git a/tools/editor/editor_log.cpp b/tools/editor/editor_log.cpp
index 2d26490a8a..264117eecd 100644
--- a/tools/editor/editor_log.cpp
+++ b/tools/editor/editor_log.cpp
@@ -81,6 +81,7 @@ void EditorLog::_notification(int p_what) {
 		log->add_color_override("default_color",get_color("font_color","Tree"));
 		tb->set_normal_texture( get_icon("Collapse","EditorIcons"));
 		tb->set_hover_texture( get_icon("CollapseHl","EditorIcons"));
+		//button->set_icon(get_icon("Console","EditorIcons"));
 
 	}
 
@@ -125,6 +126,7 @@ void EditorLog::add_message(const String& p_msg,bool p_error) {
 		log->push_color(get_color("fg_error","Editor"));
 	} else {
 		button->set_icon(Ref<Texture>());
+
 	}
 
 
@@ -154,17 +156,20 @@ void EditorLog::_dragged(const Point2& p_ofs) {
 */
 
 
-ToolButton *EditorLog::get_button() {
+Button *EditorLog::get_button() {
 
 	return button;
 }
 
 void EditorLog::_flip_request() {
 
-	if (is_visible())
+	if (is_visible()) {
 		hide();
-	else
+		button->show();
+	} else {
 		show();
+		button->hide();
+	}
 }
 
 void EditorLog::_undo_redo_cbk(void *p_self,const String& p_name) {
@@ -200,7 +205,7 @@ EditorLog::EditorLog() {
 	hb->add_child(title);
 
 
-	button = memnew( ToolButton );
+	button = memnew( Button );
 	button->set_text_align(Button::ALIGN_LEFT);
 	button->connect("pressed",this,"_flip_request");
 	button->set_focus_mode(FOCUS_NONE);
diff --git a/tools/editor/editor_log.h b/tools/editor/editor_log.h
index 6950ffa1a0..93044f9a2d 100644
--- a/tools/editor/editor_log.h
+++ b/tools/editor/editor_log.h
@@ -44,7 +44,7 @@ class EditorLog : public PanelContainer {
 
 	OBJ_TYPE( EditorLog, PanelContainer );
 
-	ToolButton *button;
+	Button *button;
 	Button *clearbutton;
 	Label *title;
 	RichTextLabel *log;
@@ -73,7 +73,7 @@ public:
 	void add_message(const String& p_msg, bool p_error=false);
 	void deinit();
 
-	ToolButton *get_button();
+	Button *get_button();
 	void clear();
 	EditorLog();
 	~EditorLog();
diff --git a/tools/editor/editor_node.cpp b/tools/editor/editor_node.cpp
index a3d7cbd7cf..4afb98bdd4 100644
--- a/tools/editor/editor_node.cpp
+++ b/tools/editor/editor_node.cpp
@@ -102,6 +102,7 @@
 #include "tools/editor/io_plugins/editor_sample_import_plugin.h"
 #include "tools/editor/io_plugins/editor_translation_import_plugin.h"
 #include "tools/editor/io_plugins/editor_mesh_import_plugin.h"
+#include "tools/editor/io_plugins/editor_export_scene.h"
 
 #include "plugins/editor_preview_plugins.h"
 
@@ -111,6 +112,8 @@ EditorNode *EditorNode::singleton=NULL;
 
 void EditorNode::_update_scene_tabs() {
 
+	bool show_rb = EditorSettings::get_singleton()->get("global/show_script_in_scene_tabs");
+
 	scene_tabs->clear_tabs();
 	Ref<Texture> script_icon = gui_base->get_icon("Script","EditorIcons");
 	for(int i=0;i<editor_data.get_edited_scene_count();i++) {
@@ -133,7 +136,7 @@ void EditorNode::_update_scene_tabs() {
 		bool unsaved = (i==current)?saved_version!=editor_data.get_undo_redo().get_version():editor_data.get_scene_version(i)!=0;
 		scene_tabs->add_tab(editor_data.get_scene_title(i)+(unsaved?"(*)":""),icon);
 
-		if (editor_data.get_scene_root_script(i).is_valid()) {
+		if (show_rb && editor_data.get_scene_root_script(i).is_valid()) {
 			scene_tabs->set_tab_right_button(i,script_icon);
 		}
 
@@ -163,13 +166,22 @@ void EditorNode::_unhandled_input(const InputEvent& p_event) {
 
 		switch(p_event.key.scancode) {
 
+			/*case KEY_F1:
+				if (!p_event.key.mod.shift && !p_event.key.mod.command)
+					_editor_select(EDITOR_SCRIPT);
+			break;*/
 			case KEY_F1:
 				if (!p_event.key.mod.shift && !p_event.key.mod.command)
-					_editor_select(3);
+					_editor_select(EDITOR_2D);
+			break;
+			case KEY_F2:
+				if (!p_event.key.mod.shift && !p_event.key.mod.command)
+					_editor_select(EDITOR_3D);
+			break;
+			case KEY_F3:
+				if (!p_event.key.mod.shift && !p_event.key.mod.command)
+					_editor_select(EDITOR_SCRIPT);
 			break;
-			case KEY_F2: _editor_select(0); break;
-			case KEY_F3: _editor_select(1); break;
-			case KEY_F4: _editor_select(2); break;
 			case KEY_F5: _menu_option_confirm((p_event.key.mod.control&&p_event.key.mod.shift)?RUN_PLAY_CUSTOM_SCENE:RUN_PLAY,true); break;
 			case KEY_F6: _menu_option_confirm(RUN_PLAY_SCENE,true); break;
 			case KEY_F7: _menu_option_confirm(RUN_PAUSE,true); break;
@@ -287,7 +299,7 @@ void EditorNode::_notification(int p_what) {
 		VisualServer::get_singleton()->viewport_set_hide_canvas(get_scene_root()->get_viewport(),true);
 		VisualServer::get_singleton()->viewport_set_disable_environment(get_viewport()->get_viewport_rid(),true);
 
-		_editor_select(1);
+		_editor_select(EDITOR_3D);
 
 		if (defer_load_scene!="") {
 
@@ -531,7 +543,6 @@ void EditorNode::save_resource_as(const Ref<Resource>& p_resource) {
 }
 
 
-
 void EditorNode::_menu_option(int p_option) {
 	
 	_menu_option_confirm(p_option,false);
@@ -877,7 +888,7 @@ void EditorNode::_save_scene_with_preview(String p_file) {
 		}
 	}
 
-	_editor_select(is2d?0:1);
+	_editor_select(is2d?EDITOR_2D:EDITOR_3D);
 
 	VS::get_singleton()->viewport_queue_screen_capture(viewport);
 	save.step("Creating Thumbnail",2);
@@ -1398,6 +1409,69 @@ void EditorNode::_dialog_action(String p_file) {
 			save_resource_in_path(current_res,p_file);
 
 		} break;
+		case SETTINGS_LAYOUT_SAVE: {
+
+			if (p_file.empty())
+				return;
+
+			if (p_file=="Default") {
+				confirm_error->set_text("Cannot overwrite default layout!");
+				confirm_error->popup_centered_minsize();
+				return;
+			}
+
+			Ref<ConfigFile> config;
+			config.instance();
+			Error err = config->load(EditorSettings::get_singleton()->get_settings_path().plus_file("editor_layouts.cfg"));
+			if (err!=OK && err!=ERR_FILE_NOT_FOUND) {
+				return; //no config
+			}
+
+			_save_docks_to_config(config, p_file);
+
+			config->save(EditorSettings::get_singleton()->get_settings_path().plus_file("editor_layouts.cfg"));
+
+			layout_dialog->hide();
+			_update_layouts_menu();
+
+		} break;
+		case SETTINGS_LAYOUT_DELETE: {
+
+			if (p_file.empty())
+				return;
+
+			if (p_file=="Default") {
+				confirm_error->set_text("Cannot delete default layout!");
+				confirm_error->popup_centered_minsize();
+				return;
+			}
+
+			Ref<ConfigFile> config;
+			config.instance();
+			Error err = config->load(EditorSettings::get_singleton()->get_settings_path().plus_file("editor_layouts.cfg"));
+			if (err!=OK) {
+				return; //no config
+			}
+
+			if (!config->has_section(p_file)) {
+				confirm_error->set_text("Layout name not found!");
+				confirm_error->popup_centered_minsize();
+				return;
+			}
+
+			// erase
+			List<String> keys;
+			config->get_section_keys(p_file, &keys);
+			for (List<String>::Element *E=keys.front();E;E=E->next()) {
+				config->set_value(p_file, E->get(), Variant());
+			}
+
+			config->save(EditorSettings::get_singleton()->get_settings_path().plus_file("editor_layouts.cfg"));
+
+			layout_dialog->hide();
+			_update_layouts_menu();
+
+		} break;
 		default: { //save scene?
 		
 			if (file->get_mode()==FileDialog::MODE_SAVE_FILE) {
@@ -1551,6 +1625,10 @@ void EditorNode::_edit_current() {
 		scene_tree_dock->set_selected(NULL);
 		property_editor->edit( NULL );
 		object_menu->set_disabled(true);
+
+		if (editor_plugin_over)
+			editor_plugin_over->make_visible(false);
+
 		return;
 	}
 
@@ -1617,10 +1695,8 @@ void EditorNode::_edit_current() {
 
 
 				for(int i=0;i<editor_table.size();i++) {
-					if (editor_table[i]==main_plugin) {
-						main_editor_tabs->set_current_tab(i);
-						break;
-					}
+
+					main_editor_buttons[i]->set_pressed(editor_table[i]==main_plugin);
 				}
 			}
 
@@ -1740,8 +1816,10 @@ void EditorNode::_run(bool p_current,const String& p_custom) {
 	}
 
 	play_button->set_pressed(false);
+	play_button->set_icon(gui_base->get_icon("Play","EditorIcons"));
 	//pause_button->set_pressed(false);
 	play_scene_button->set_pressed(false);
+	play_scene_button->set_icon(gui_base->get_icon("PlayScene","EditorIcons"));
 
 	String current_filename;
 	String run_filename;
@@ -1859,8 +1937,10 @@ void EditorNode::_run(bool p_current,const String& p_custom) {
 	emit_signal("play_pressed");
 	if (p_current) {
 		play_scene_button->set_pressed(true);
+		play_scene_button->set_icon(gui_base->get_icon("Reload","EditorIcons"));
 	} else {
 		play_button->set_pressed(true);
+		play_button->set_icon(gui_base->get_icon("Reload","EditorIcons"));
 	}
 
 	_playing_edited=p_current;
@@ -2008,6 +2088,11 @@ void EditorNode::_menu_option_confirm(int p_option,bool p_confirmed) {
 
 
 		} break;
+		case SCENE_TAB_CLOSE: {
+			_remove_scene(tab_closing);
+			_update_scene_tabs();
+			current_option = -1;
+		} break;
 		case FILE_SAVE_SCENE: {
 
 
@@ -2370,6 +2455,10 @@ void EditorNode::_menu_option_confirm(int p_option,bool p_confirmed) {
 				log->add_message("REDO: "+action);
 
 		} break;
+		case TOOLS_ORPHAN_RESOURCES: {
+
+			orphan_resources->show();
+		} break;
 
 		case EDIT_REVERT: {
 
@@ -2510,7 +2599,7 @@ void EditorNode::_menu_option_confirm(int p_option,bool p_confirmed) {
 		case OBJECT_REQUEST_HELP: {
 
 			if (current) {
-				_editor_select(3);
+				_editor_select(EDITOR_SCRIPT);
 				emit_signal("request_help",current->get_type());
 			}
 
@@ -2577,12 +2666,12 @@ void EditorNode::_menu_option_confirm(int p_option,bool p_confirmed) {
 			call_dialog->popup_centered_ratio();
 		} break;
 		case RUN_PLAY: {
-
+			_menu_option_confirm(RUN_STOP,true);
 			_run(false);
 
 		} break;
 		case RUN_PLAY_CUSTOM_SCENE: {
-
+			_menu_option_confirm(RUN_STOP,true);
 			quick_run->popup("PackedScene",true);
 			quick_run->set_title("Quick Run Scene..");
 
@@ -2599,18 +2688,20 @@ void EditorNode::_menu_option_confirm(int p_option,bool p_confirmed) {
 
 			editor_run.stop();
 			play_button->set_pressed(false);
+			play_button->set_icon(gui_base->get_icon("Play","EditorIcons"));
 			play_scene_button->set_pressed(false);
+			play_scene_button->set_icon(gui_base->get_icon("PlayScene","EditorIcons"));
 			//pause_button->set_pressed(false);
 			emit_signal("stop_pressed");
 
 		} break;
 		case RUN_PLAY_SCENE: {
-
+			_menu_option_confirm(RUN_STOP,true);
 			_run(true);
 
 		} break;
 		case RUN_PLAY_NATIVE: {
-
+			_menu_option_confirm(RUN_STOP,true);
 			emit_signal("play_pressed");
 			editor_run.run_native_notify();
 
@@ -2846,7 +2937,7 @@ Control* EditorNode::get_viewport() {
 void EditorNode::_editor_select(int p_which) {
 
 	static bool selecting=false;
-	if (selecting)
+	if (selecting || changing_scene)
 		return;
 
 	selecting=true;
@@ -2854,7 +2945,9 @@ void EditorNode::_editor_select(int p_which) {
 
 	ERR_FAIL_INDEX(p_which,editor_table.size());
 
-	main_editor_tabs->set_current_tab(p_which);
+	for(int i=0;i<main_editor_buttons.size();i++) {
+		main_editor_buttons[i]->set_pressed(i==p_which);
+	}
 
 	selecting=false;
 
@@ -2872,6 +2965,8 @@ void EditorNode::_editor_select(int p_which) {
 	editor_plugin_screen=new_editor;
 	editor_plugin_screen->make_visible(true);
 	editor_plugin_screen->selected_notify();
+
+
 }
 
 void EditorNode::add_editor_plugin(EditorPlugin *p_editor) {
@@ -2879,7 +2974,12 @@ void EditorNode::add_editor_plugin(EditorPlugin *p_editor) {
 
 	if (p_editor->has_main_screen()) {
 	
-		singleton->main_editor_tabs->add_tab(p_editor->get_name());
+		ToolButton *tb = memnew( ToolButton );
+		tb->set_toggle_mode(true);
+		tb->connect("pressed",singleton,"_editor_select",varray(singleton->main_editor_buttons.size()));
+		tb->set_text(p_editor->get_name());
+		singleton->main_editor_buttons.push_back(tb);
+		singleton->main_editor_button_vb->add_child(tb);
 		singleton->editor_table.push_back(p_editor);
 	}
 	singleton->editor_data.add_editor_plugin( p_editor );
@@ -2891,16 +2991,18 @@ void EditorNode::remove_editor_plugin(EditorPlugin *p_editor) {
 
 	if (p_editor->has_main_screen()) {
 
-		for(int i=0;i<singleton->main_editor_tabs->get_tab_count();i++) {
+		for(int i=0;i<singleton->main_editor_buttons.size();i++) {
 
-			if (p_editor->get_name()==singleton->main_editor_tabs->get_tab_title(i)) {
+			if (p_editor->get_name()==singleton->main_editor_buttons[i]->get_name()) {
+
+				memdelete( singleton->main_editor_buttons[i] );
+				singleton->main_editor_buttons.remove(i);
 
-				singleton->main_editor_tabs->remove_tab(i);
 				break;
 			}
 		}
 
-		singleton->main_editor_tabs->add_tab(p_editor->get_name());
+		//singleton->main_editor_tabs->add_tab(p_editor->get_name());
 		singleton->editor_table.erase(p_editor);
 	}
 	singleton->remove_child(p_editor);
@@ -2940,23 +3042,23 @@ void EditorNode::_remove_edited_scene() {
 	_update_title();
 	_update_scene_tabs();
 
-	if (editor_data.get_edited_scene_count()==1) {
-		//make new scene appear saved
-		set_current_version(editor_data.get_undo_redo().get_version());
-		unsaved_cache=false;
-	}
+//	if (editor_data.get_edited_scene_count()==1) {
+//		//make new scene appear saved
+//		set_current_version(editor_data.get_undo_redo().get_version());
+//		unsaved_cache=false;
+//	}
 }
 
 void EditorNode::_remove_scene(int index) {
 //	printf("Attempting to remove scene %d (current is %d)\n", index, editor_data.get_edited_scene());
+
 	if (editor_data.get_edited_scene() == index) {
 		//Scene to remove is current scene
 		_remove_edited_scene();
 	}
 	else {
-		// Scene to remove is not active scene.");
+		// Scene to remove is not active scene
 		editor_data.remove_scene(index);
-		editor_data.get_undo_redo().clear_history();
 	}
 }
 
@@ -3034,7 +3136,7 @@ Error EditorNode::save_translatable_strings(const String& p_to_file) {
 	OS::Time time = OS::get_singleton()->get_time();
 	f->store_line("# Translation Strings Dump.");
 	f->store_line("# Created By.");
-	f->store_line("# \t"VERSION_FULL_NAME" (c) 2008-2015 Juan Linietsky, Ariel Manzur.");
+	f->store_line("# \t" VERSION_FULL_NAME " (c) 2008-2015 Juan Linietsky, Ariel Manzur.");
 	f->store_line("# From Scene: ");
 	f->store_line("# \t"+get_edited_scene()->get_filename());
 	f->store_line("");
@@ -3209,10 +3311,20 @@ Error EditorNode::save_optimized_copy(const String& p_scene,const String& p_pres
 }
 
 
+int EditorNode::_get_current_main_editor() {
+
+	for(int i=0;i<editor_table.size();i++) {
+		if (editor_table[i]==editor_plugin_screen)
+			return i;
+	}
+
+	return 0;
+}
+
 Dictionary EditorNode::_get_main_scene_state() {
 
 	Dictionary state;
-	state["main_tab"]=main_editor_tabs->get_current_tab();
+	state["main_tab"]=_get_current_main_editor();
 	state["scene_tree_offset"]=scene_tree_dock->get_tree_editor()->get_scene_tree()->get_vscroll_bar()->get_val();
 	state["property_edit_offset"]=get_property_editor()->get_scene_tree()->get_vscroll_bar()->get_val();
 	state["saved_version"]=saved_version;
@@ -3223,9 +3335,14 @@ Dictionary EditorNode::_get_main_scene_state() {
 void EditorNode::_set_main_scene_state(Dictionary p_state) {
 
 	//print_line("set current 7 ");
+	changing_scene=false;
 
+#if 0
 	if (p_state.has("main_tab")) {
 		int idx = p_state["main_tab"];
+
+
+		print_line("comes with tab: "+itos(idx));
 		int current=-1;
 		for(int i=0;i<editor_table.size();i++) {
 			if (editor_plugin_screen==editor_table[i]) {
@@ -3234,12 +3351,41 @@ void EditorNode::_set_main_scene_state(Dictionary p_state) {
 			}
 		}
 
+
 		if (idx<2 && current<2) {
 			//only set tab for 2D and 3D
-			_editor_select(p_state["main_tab"]);
+			_editor_select(idx);
 			//print_line(" setting main tab: "+itos(p_state["main_tab"]));
 		}
 	}
+#else
+
+	if (get_edited_scene()) {
+
+		int current=-1;
+		for(int i=0;i<editor_table.size();i++) {
+			if (editor_plugin_screen==editor_table[i]) {
+				current=i;
+				break;
+			}
+		}
+
+		if (current<2) {
+			//use heuristic instead
+
+			int n2d=0,n3d=0;
+			_find_node_types(get_edited_scene(),n2d,n3d);
+			if (n2d>n3d) {
+				_editor_select(EDITOR_2D);
+			} else if (n3d>n2d) {
+				_editor_select(EDITOR_3D);
+
+			}
+		}
+
+	}
+#endif
+
 
 	if (p_state.has("scene_tree_offset"))
 		scene_tree_dock->get_tree_editor()->get_scene_tree()->get_vscroll_bar()->set_val(p_state["scene_tree_offset"]);
@@ -3248,6 +3394,12 @@ void EditorNode::_set_main_scene_state(Dictionary p_state) {
 
 	//print_line("set current 8 ");
 
+	//this should only happen at the very end
+
+	//changing_scene=true; //avoid script change from opening editor
+	ScriptEditor::get_singleton()->get_debugger()->update_live_edit_root();
+	ScriptEditor::get_singleton()->set_scene_root_script( editor_data.get_scene_root_script(editor_data.get_edited_scene()) );
+	//changing_scene=false;
 
 }
 
@@ -3312,8 +3464,6 @@ void EditorNode::set_current_scene(int p_idx) {
 
 	call_deferred("_set_main_scene_state",state); //do after everything else is done setting up
 	//print_line("set current 6 ");
-	changing_scene=false;
-	ScriptEditor::get_singleton()->get_debugger()->update_live_edit_root();
 
 
 }
@@ -3464,13 +3614,14 @@ Error EditorNode::load_scene(const String& p_scene, bool p_ignore_broken_deps,bo
 
 	if (p_set_inherited) {
 		Ref<SceneState> state = sdata->get_state();
-		state->set_path(lpath);
+		state->set_path(lpath);		
 		new_scene->set_scene_inherited_state(state);
 		new_scene->set_filename(String());
-		if (new_scene->get_scene_instance_state().is_valid())
-			new_scene->get_scene_instance_state()->set_path(String());
+		//if (new_scene->get_scene_instance_state().is_valid())
+		//	new_scene->get_scene_instance_state()->set_path(String());
 	}
 
+	new_scene->set_scene_instance_state(Ref<SceneState>());
 
 	set_edited_scene(new_scene);
 	_get_scene_metadata();
@@ -3752,9 +3903,7 @@ void EditorNode::_quick_run(const String& p_resource) {
 
 void EditorNode::notify_child_process_exited() {
 
-	play_button->set_pressed(false);
-	play_scene_button->set_pressed(false);
-	//pause_button->set_pressed(false);
+	_menu_option_confirm(RUN_STOP,false);
 	stop_button->set_pressed(false);
 	editor_run.stop();
 
@@ -3789,7 +3938,8 @@ bool EditorNode::_find_editing_changed_scene(Node *p_from) {
 
 
 void EditorNode::add_io_error(const String& p_error) {
-
+	CharString err_ut = p_error.utf8();
+	ERR_PRINT(err_ut.get_data());
 	_load_error_notify(singleton,p_error);
 }
 
@@ -3945,6 +4095,9 @@ void EditorNode::_bind_methods() {
 	ObjectTypeDB::bind_method("_dock_move_left",&EditorNode::_dock_move_left);
 	ObjectTypeDB::bind_method("_dock_move_right",&EditorNode::_dock_move_right);
 
+	ObjectTypeDB::bind_method("_layout_menu_option",&EditorNode::_layout_menu_option);
+	ObjectTypeDB::bind_method("_layout_dialog_action",&EditorNode::_dialog_action);
+
 	ObjectTypeDB::bind_method("set_current_scene",&EditorNode::set_current_scene);
 	ObjectTypeDB::bind_method("set_current_version",&EditorNode::set_current_version);
 	ObjectTypeDB::bind_method("_scene_tab_changed",&EditorNode::_scene_tab_changed);
@@ -3956,6 +4109,8 @@ void EditorNode::_bind_methods() {
 	ObjectTypeDB::bind_method("_prepare_history",&EditorNode::_prepare_history);
 	ObjectTypeDB::bind_method("_select_history",&EditorNode::_select_history);
 
+	ObjectTypeDB::bind_method("_toggle_search_bar",&EditorNode::_toggle_search_bar);
+	ObjectTypeDB::bind_method("_clear_search_box",&EditorNode::_clear_search_box);
 
 	ObjectTypeDB::bind_method(_MD("add_editor_import_plugin", "plugin"), &EditorNode::add_editor_import_plugin);
 	ObjectTypeDB::bind_method(_MD("remove_editor_import_plugin", "plugin"), &EditorNode::remove_editor_import_plugin);
@@ -4237,6 +4392,15 @@ void EditorNode::_save_docks() {
 	Ref<ConfigFile> config;
 	config.instance();
 
+	_save_docks_to_config(config, "docks");
+	editor_data.get_plugin_window_layout(config);
+
+	config->save(EditorSettings::get_singleton()->get_project_settings_path().plus_file("editor_layout.cfg"));
+
+}
+
+void EditorNode::_save_docks_to_config(Ref<ConfigFile> p_layout, const String& p_section) {
+
 	for(int i=0;i<DOCK_SLOT_MAX;i++) {
 		String names;
 		for(int j=0;j<dock_slot[i]->get_tab_count();j++) {
@@ -4247,7 +4411,7 @@ void EditorNode::_save_docks() {
 		}
 
 		if (names!="") {
-			config->set_value("docks","dock_"+itos(i+1),names);
+			p_layout->set_value(p_section,"dock_"+itos(i+1),names);
 		}
 	}
 
@@ -4261,7 +4425,7 @@ void EditorNode::_save_docks() {
 	for(int i=0;i<DOCK_SLOT_MAX/2;i++) {
 
 		if (splits[i]->is_visible()) {
-			config->set_value("docks","dock_split_"+itos(i+1),splits[i]->get_split_offset());
+			p_layout->set_value(p_section,"dock_split_"+itos(i+1),splits[i]->get_split_offset());
 		}
 	}
 
@@ -4275,13 +4439,9 @@ void EditorNode::_save_docks() {
 
 	for(int i=0;i<4;i++) {
 
-		config->set_value("docks","dock_hsplit_"+itos(i+1),h_splits[i]->get_split_offset());
+		p_layout->set_value(p_section,"dock_hsplit_"+itos(i+1),h_splits[i]->get_split_offset());
 	}
 
-	editor_data.get_plugin_window_layout(config);
-
-	config->save(EditorSettings::get_singleton()->get_project_settings_path().plus_file("editor_layout.cfg"));
-
 }
 
 void EditorNode::save_layout() {
@@ -4303,12 +4463,19 @@ void EditorNode::_load_docks() {
 		return; //no config
 	}
 
+	_load_docks_from_config(config, "docks");
+	editor_data.set_plugin_window_layout(config);
+
+}
+
+void EditorNode::_load_docks_from_config(Ref<ConfigFile> p_layout, const String& p_section) {
+
 	for(int i=0;i<DOCK_SLOT_MAX;i++) {
 
-		if (!config->has_section_key("docks","dock_"+itos(i+1)))
+		if (!p_layout->has_section_key(p_section,"dock_"+itos(i+1)))
 			continue;
 
-		Vector<String> names = String(config->get_value("docks","dock_"+itos(i+1))).split(",");
+		Vector<String> names = String(p_layout->get_value(p_section,"dock_"+itos(i+1))).split(",");
 
 		for(int j=0;j<names.size();j++) {
 
@@ -4328,7 +4495,7 @@ void EditorNode::_load_docks() {
 			if (atidx==-1) //well, it's not anywhere
 				continue;
 
-			if (atidx==j) {
+			if (atidx==i) {
 				node->raise();
 				continue;
 			}
@@ -4343,7 +4510,6 @@ void EditorNode::_load_docks() {
 			dock_slot[i]->add_child(node);
 			dock_slot[i]->show();
 		}
-
 	}
 
 	VSplitContainer*splits[DOCK_SLOT_MAX/2]={
@@ -4355,14 +4521,14 @@ void EditorNode::_load_docks() {
 
 	for(int i=0;i<DOCK_SLOT_MAX/2;i++) {
 
-		if (!config->has_section_key("docks","dock_split_"+itos(i+1)))
+		if (!p_layout->has_section_key(p_section,"dock_split_"+itos(i+1)))
 			continue;
 
-		int ofs = config->get_value("docks","dock_split_"+itos(i+1));
+		int ofs = p_layout->get_value(p_section,"dock_split_"+itos(i+1));
 		splits[i]->set_split_offset(ofs);
 	}
 
-	HSplitContainer *h_splits[4]={
+	HSplitContainer*h_splits[4]={
 		left_l_hsplit,
 		left_r_hsplit,
 		main_hsplit,
@@ -4370,9 +4536,9 @@ void EditorNode::_load_docks() {
 	};
 
 	for(int i=0;i<4;i++) {
-		if (!config->has_section_key("docks","dock_hsplit_"+itos(i+1)))
+		if (!p_layout->has_section_key(p_section,"dock_hsplit_"+itos(i+1)))
 			continue;
-		int ofs = config->get_value("docks","dock_hsplit_"+itos(i+1));
+		int ofs = p_layout->get_value(p_section,"dock_hsplit_"+itos(i+1));
 		h_splits[i]->set_split_offset(ofs);
 	}
 
@@ -4390,8 +4556,78 @@ void EditorNode::_load_docks() {
 			dock_slot[i]->set_current_tab(0);
 		}
 	}
+}
 
-	editor_data.set_plugin_window_layout(config);
+
+void EditorNode::_update_layouts_menu() {
+
+	editor_layouts->clear();
+	editor_layouts->set_size(Vector2());
+	editor_layouts->add_item("Save Layout", SETTINGS_LAYOUT_SAVE);
+	editor_layouts->add_item("Delete Layout", SETTINGS_LAYOUT_DELETE);
+	editor_layouts->add_separator();
+	editor_layouts->add_item("Default", SETTINGS_LAYOUT_DEFAULT);
+
+	Ref<ConfigFile> config;
+	config.instance();
+	Error err = config->load(EditorSettings::get_singleton()->get_settings_path().plus_file("editor_layouts.cfg"));
+	if (err!=OK) {
+		return; //no config
+	}
+
+	List<String> layouts;
+	config.ptr()->get_sections(&layouts);
+
+	for (List<String>::Element *E=layouts.front();E;E=E->next()) {
+
+		String layout=E->get();
+
+		if (layout!="Default")
+			editor_layouts->add_item(layout);
+	}
+
+}
+
+void EditorNode::_layout_menu_option(int p_id) {
+
+	switch (p_id) {
+
+		case SETTINGS_LAYOUT_SAVE: {
+
+			current_option=p_id;
+			layout_dialog->clear_layout_name();
+			layout_dialog->set_title("Save Layout");
+			layout_dialog->get_ok()->set_text("Save");
+			layout_dialog->popup_centered();
+		} break;
+		case SETTINGS_LAYOUT_DELETE: {
+
+			current_option=p_id;
+			layout_dialog->clear_layout_name();
+			layout_dialog->set_title("Delete Layout");
+			layout_dialog->get_ok()->set_text("Delete");
+			layout_dialog->popup_centered();
+		} break;
+		case SETTINGS_LAYOUT_DEFAULT: {
+
+			_load_docks_from_config(default_theme, "docks");
+			_save_docks();
+		} break;
+		default: {
+
+			Ref<ConfigFile> config;
+			config.instance();
+			Error err = config->load(EditorSettings::get_singleton()->get_settings_path().plus_file("editor_layouts.cfg"));
+			if (err!=OK) {
+				return; //no config
+			}
+
+			int idx=editor_layouts->get_item_index(p_id);
+			_load_docks_from_config(config, editor_layouts->get_item_text(idx));
+			_save_docks();
+
+		}
+	}
 
 }
 
@@ -4404,8 +4640,19 @@ void EditorNode::_scene_tab_script_edited(int p_tab) {
 }
 
 void EditorNode::_scene_tab_closed(int p_tab) {
- 	_remove_scene(p_tab);
-	_update_scene_tabs();
+	current_option = SCENE_TAB_CLOSE;
+	tab_closing = p_tab;
+	if (unsaved_cache) {
+		confirmation->get_ok()->set_text("Yes");
+		//confirmation->get_cancel()->show();
+		confirmation->set_text("Close scene? (Unsaved changes will be lost)");
+		confirmation->popup_centered_minsize();
+	}
+	else {
+		_remove_scene(p_tab);
+		//_update_scene_tabs();
+	}
+
 }
 
 
@@ -4438,6 +4685,30 @@ void EditorNode::_scene_tab_changed(int p_tab) {
 
 }
 
+void EditorNode::_toggle_search_bar(bool p_pressed) {
+
+	property_editor->set_use_filter(p_pressed);
+
+	if (p_pressed) {
+
+		search_bar->show();
+		search_box->grab_focus();
+		search_box->select_all();
+	} else {
+
+		search_bar->hide();
+	}
+}
+
+void EditorNode::_clear_search_box() {
+
+	if (search_box->get_text()=="")
+		return;
+
+	search_box->clear();
+	property_editor->update_tree();
+}
+
 EditorNode::EditorNode() {
 
 	EditorHelp::generate_doc(); //before any editor classes are crated
@@ -4469,6 +4740,7 @@ EditorNode::EditorNode() {
 
 	ResourceLoader::set_abort_on_missing_resources(false);
 	FileDialog::set_default_show_hidden_files(EditorSettings::get_singleton()->get("file_dialog/show_hidden_files"));
+	EditorFileDialog::set_default_show_hidden_files(EditorSettings::get_singleton()->get("file_dialog/show_hidden_files"));
 	ResourceLoader::set_error_notify_func(this,_load_error_notify);
 	ResourceLoader::set_dependency_error_notify_func(this,_dependency_error_report);
 
@@ -4551,6 +4823,7 @@ EditorNode::EditorNode() {
 	gui_base->add_child(main_vbox);
 	main_vbox->set_area_as_parent_rect(8);
 
+#if 0
 	PanelContainer *top_dark_panel = memnew( PanelContainer );
 	Ref<StyleBoxTexture> top_dark_sb;
 	top_dark_sb.instance();;
@@ -4566,21 +4839,14 @@ EditorNode::EditorNode() {
 	VBoxContainer *top_dark_vb = memnew( VBoxContainer );
 	main_vbox->add_child(top_dark_panel);
 	top_dark_panel->add_child(top_dark_vb);
-
+#endif
 
 
 
 	menu_hb = memnew( HBoxContainer );
-	top_dark_vb->add_child(menu_hb);
+	main_vbox->add_child(menu_hb);
 
-	scene_tabs=memnew( Tabs );
-	scene_tabs->add_tab("unsaved");
-	scene_tabs->set_tab_align(Tabs::ALIGN_CENTER);
-	scene_tabs->set_tab_close_display_policy(Tabs::SHOW_HOVER);
-	scene_tabs->connect("tab_changed",this,"_scene_tab_changed");
-	scene_tabs->connect("right_button_pressed",this,"_scene_tab_script_edited");
-	scene_tabs->connect("tab_close", this, "_scene_tab_closed");
-	top_dark_vb->add_child(scene_tabs);
+//	top_dark_vb->add_child(scene_tabs);
 	//left
 	left_l_hsplit = memnew( HSplitContainer );
 	main_vbox->add_child(left_l_hsplit);
@@ -4613,11 +4879,15 @@ EditorNode::EditorNode() {
 	main_hsplit = memnew( HSplitContainer );
 	left_r_hsplit->add_child(main_hsplit);
 	//main_split->set_v_size_flags(Control::SIZE_EXPAND_FILL);
+	VBoxContainer * center_vb = memnew( VBoxContainer);
+	main_hsplit->add_child(center_vb);
+	center_vb->set_h_size_flags(Control::SIZE_EXPAND_FILL);
 
 	center_split = memnew( VSplitContainer );
-	main_hsplit->add_child(center_split);
-	center_split->set_h_size_flags(Control::SIZE_EXPAND_FILL);
+	//main_hsplit->add_child(center_split);
+	center_split->set_v_size_flags(Control::SIZE_EXPAND_FILL);
 	center_split->set_collapsed(false);
+	center_vb->add_child(center_split);
 
 	right_hsplit = memnew( HSplitContainer );
 	main_hsplit->add_child(right_hsplit);
@@ -4628,6 +4898,9 @@ EditorNode::EditorNode() {
 	right_l_vsplit->add_child(dock_slot[DOCK_SLOT_RIGHT_UL]);
 	dock_slot[DOCK_SLOT_RIGHT_BL]=memnew( TabContainer );
 	right_l_vsplit->add_child(dock_slot[DOCK_SLOT_RIGHT_BL]);
+	//right_l_vsplit->hide();
+	//dock_slot[DOCK_SLOT_RIGHT_UL]->hide();
+	//dock_slot[DOCK_SLOT_RIGHT_BL]->hide();
 
 	right_r_vsplit = memnew( VSplitContainer );
 	right_hsplit->add_child(right_r_vsplit);
@@ -4636,8 +4909,8 @@ EditorNode::EditorNode() {
 	dock_slot[DOCK_SLOT_RIGHT_BR]=memnew( TabContainer );
 	right_r_vsplit->add_child(dock_slot[DOCK_SLOT_RIGHT_BR]);
 	right_r_vsplit->hide();
-	//dock_slot[DOCK_SLOT_RIGHT_UL]->hide();
-	//dock_slot[DOCK_SLOT_RIGHT_BL]->hide();
+	dock_slot[DOCK_SLOT_RIGHT_UR]->hide();
+	dock_slot[DOCK_SLOT_RIGHT_BR]->hide();
 
 	left_l_vsplit->connect("dragged",this,"_dock_split_dragged");
 	left_r_vsplit->connect("dragged",this,"_dock_split_dragged");
@@ -4714,20 +4987,19 @@ EditorNode::EditorNode() {
 	srt->add_constant_override("separation",0);
 
 
-	main_editor_tabs  = memnew( Tabs );
+/*	main_editor_tabs  = memnew( Tabs );
 	main_editor_tabs->connect("tab_changed",this,"_editor_select");
 	main_editor_tabs->set_tab_close_display_policy(Tabs::SHOW_NEVER);
-	HBoxContainer *srth = memnew( HBoxContainer );
-	srt->add_child( srth );
-	Control *tec = memnew( Control );
-	tec->set_custom_minimum_size(Size2(100,0));
-	tec->set_h_size_flags(Control::SIZE_EXPAND_FILL);
-	srth->add_child(tec);
-	srth->add_child(main_editor_tabs);
-	tec = memnew( Control );
-	tec->set_custom_minimum_size(Size2(100,0));
-	srth->add_child(tec);
-	tec->set_h_size_flags(Control::SIZE_EXPAND_FILL);
+*/
+	scene_tabs=memnew( Tabs );
+	scene_tabs->add_tab("unsaved");
+	scene_tabs->set_tab_align(Tabs::ALIGN_CENTER);
+	scene_tabs->set_tab_close_display_policy(Tabs::SHOW_ACTIVE_ONLY);
+	scene_tabs->connect("tab_changed",this,"_scene_tab_changed");
+	scene_tabs->connect("right_button_pressed",this,"_scene_tab_script_edited");
+	scene_tabs->connect("tab_close", this, "_scene_tab_closed");
+
+	srt->add_child(scene_tabs);
 
 	scene_root_parent = memnew( Panel );
 
@@ -4804,6 +5076,8 @@ EditorNode::EditorNode() {
 	anim_close->set_pressed_texture( anim_close->get_icon("Close","EditorIcons"));
 
 
+
+
 	PanelContainer *top_region = memnew( PanelContainer );
 	top_region->add_style_override("panel",gui_base->get_stylebox("hover","Button"));
 	HBoxContainer *left_menu_hb = memnew( HBoxContainer );
@@ -4880,6 +5154,18 @@ EditorNode::EditorNode() {
 	p->add_child(recent_scenes);
 	recent_scenes->connect("item_pressed",this,"_open_recent_scene");
 
+	{
+		Control *sp = memnew( Control );
+		sp->set_custom_minimum_size(Size2(30,0));
+		menu_hb->add_child(sp);
+	}
+
+	PanelContainer *editor_region = memnew( PanelContainer );
+	editor_region->add_style_override("panel",gui_base->get_stylebox("hover","Button"));
+	main_editor_button_vb = memnew( HBoxContainer );
+	editor_region->add_child(main_editor_button_vb);
+	menu_hb->add_child(editor_region);
+
 	//menu_hb->add_spacer();
 #if 0
 	node_menu = memnew( MenuButton );
@@ -4918,6 +5204,17 @@ EditorNode::EditorNode() {
 	p=import_menu->get_popup();
 	p->connect("item_pressed",this,"_menu_option");
 
+	tool_menu = memnew( MenuButton );
+	tool_menu->set_tooltip("Miscelaneous project or scene wide tools.");
+	tool_menu->set_text("Tools");
+
+	//tool_menu->set_icon(gui_base->get_icon("Save","EditorIcons"));
+	left_menu_hb->add_child( tool_menu );
+
+	p=tool_menu->get_popup();
+	p->connect("item_pressed",this,"_menu_option");
+	p->add_item("Orphan Resource Explorer",TOOLS_ORPHAN_RESOURCES);
+
 	export_button = memnew( ToolButton );
 	export_button->set_tooltip("Export the project to many platforms.");
 	export_button->set_text("Export");
@@ -5040,6 +5337,39 @@ EditorNode::EditorNode() {
 */
 
 
+	progress_hb = memnew( BackgroundProgress );
+	menu_hb->add_child(progress_hb);
+
+	{
+		Control *sp = memnew( Control );
+		sp->set_custom_minimum_size(Size2(30,0));
+		menu_hb->add_child(sp);
+	}
+
+
+	PanelContainer *vu_cont = memnew( PanelContainer );
+	vu_cont->add_style_override("panel",gui_base->get_stylebox("hover","Button"));
+	menu_hb->add_child(vu_cont);
+
+	audio_vu = memnew( TextureProgress );
+	CenterContainer *vu_cc = memnew( CenterContainer );
+	vu_cc->add_child(audio_vu);
+	vu_cont->add_child(vu_cc);
+	audio_vu->set_under_texture(gui_base->get_icon("VuEmpty","EditorIcons"));
+	audio_vu->set_progress_texture(gui_base->get_icon("VuFull","EditorIcons"));
+	audio_vu->set_max(24);
+	audio_vu->set_min(-80);
+	audio_vu->set_step(0.01);
+	audio_vu->set_val(0);
+
+	{
+		Control *sp = memnew( Control );
+		sp->set_custom_minimum_size(Size2(30,0));
+		menu_hb->add_child(sp);
+	}
+
+
+
 	top_region = memnew( PanelContainer );
 	top_region->add_style_override("panel",gui_base->get_stylebox("hover","Button"));
 	HBoxContainer *right_menu_hb = memnew( HBoxContainer );
@@ -5053,17 +5383,29 @@ EditorNode::EditorNode() {
 	right_menu_hb->add_child( settings_menu );
 	p=settings_menu->get_popup();
 
-
 	//p->add_item("Export Settings",SETTINGS_EXPORT_PREFERENCES);
 	p->add_item("Editor Settings",SETTINGS_PREFERENCES);
 	//p->add_item("Optimization Presets",SETTINGS_OPTIMIZED_PRESETS);
 	p->add_separator();
+	editor_layouts = memnew( PopupMenu );
+	editor_layouts->set_name("Layouts");
+	p->add_child(editor_layouts);
+	editor_layouts->connect("item_pressed",this,"_layout_menu_option");
+	p->add_submenu_item("Editor Layout", "Layouts");
+	p->add_separator();
 	p->add_check_item("Show Animation",SETTINGS_SHOW_ANIMATION,KEY_MASK_CMD+KEY_N);
 	p->add_separator();
 	p->add_item("Install Export Templates",SETTINGS_LOAD_EXPORT_TEMPLATES);
 	p->add_separator();
 	p->add_item("About",SETTINGS_ABOUT);
 
+	layout_dialog = memnew( EditorLayoutDialog );
+	gui_base->add_child(layout_dialog);
+	layout_dialog->set_hide_on_ok(false);
+	layout_dialog->set_size(Size2(175, 70));
+	confirm_error = memnew( AcceptDialog  );
+	layout_dialog->add_child(confirm_error);
+	layout_dialog->connect("layout_selected", this,"_layout_dialog_action");
 
 	sources_button = memnew( ToolButton );
 	right_menu_hb->add_child(sources_button);
@@ -5071,6 +5413,15 @@ EditorNode::EditorNode() {
 	sources_button->connect("pressed",this,"_menu_option",varray(SOURCES_REIMPORT));
 	sources_button->set_tooltip("Alerts when an external resource has changed.");
 
+	update_menu = memnew( MenuButton );
+	update_menu->set_tooltip("Spins when the editor window repaints!");
+	right_menu_hb->add_child(update_menu);
+	update_menu->set_icon(gui_base->get_icon("Progress1","EditorIcons"));
+	p=update_menu->get_popup();
+	p->add_check_item("Update Always",SETTINGS_UPDATE_ALWAYS);
+	p->add_check_item("Update Changes",SETTINGS_UPDATE_CHANGES);
+	p->set_item_checked(1,true);
+
 	//sources_button->connect();
 
 /*
@@ -5173,6 +5524,7 @@ EditorNode::EditorNode() {
 
 
 	editor_history_menu = memnew( MenuButton );
+	editor_history_menu->set_tooltip("History of recently edited objects");
 	editor_history_menu->set_icon( gui_base->get_icon("History","EditorIcons"));
 	prop_editor_hb->add_child(editor_history_menu);
 	editor_history_menu->connect("about_to_show",this,"_prepare_history");
@@ -5186,6 +5538,12 @@ EditorNode::EditorNode() {
 	editor_path->set_h_size_flags(Control::SIZE_EXPAND_FILL);
 	prop_editor_hb->add_child(editor_path);
 
+	search_button = memnew( ToolButton );
+	search_button->set_toggle_mode(true);
+	search_button->set_pressed(false);
+	search_button->set_icon(gui_base->get_icon("Zoom","EditorIcons"));
+	prop_editor_hb->add_child(search_button);
+	search_button->connect("toggled",this,"_toggle_search_bar");
 
 	object_menu = memnew( MenuButton );
 	object_menu->set_icon(gui_base->get_icon("Tools","EditorIcons"));
@@ -5197,6 +5555,22 @@ EditorNode::EditorNode() {
 	create_dialog->set_base_type("Resource");
 	create_dialog->connect("create",this,"_resource_created");
 
+	search_bar = memnew( HBoxContainer );
+	search_bar->set_h_size_flags(Control::SIZE_EXPAND_FILL);
+	prop_editor_base->add_child(search_bar);
+	search_bar->hide();
+
+	l = memnew( Label("Search: ") );
+	search_bar->add_child(l);
+
+	search_box = memnew( LineEdit );
+	search_box->set_h_size_flags(Control::SIZE_EXPAND_FILL);
+	search_bar->add_child(search_box);
+
+	ToolButton *clear_button = memnew( ToolButton );
+	clear_button->set_icon(gui_base->get_icon("Close","EditorIcons"));
+	search_bar->add_child(clear_button);
+	clear_button->connect("pressed",this,"_clear_search_box");
 
 	property_editor = memnew( PropertyEditor );
 	property_editor->set_autoclear(true);
@@ -5205,6 +5579,7 @@ EditorNode::EditorNode() {
 	property_editor->set_use_doc_hints(true);
 
 	property_editor->hide_top_label();
+	property_editor->register_text_enter(search_box);
 
 	prop_editor_base->add_child( property_editor );
 	property_editor->set_undo_redo(&editor_data.get_undo_redo());
@@ -5217,13 +5592,27 @@ EditorNode::EditorNode() {
 	scenes_dock->connect("open",this,"open_request");
 	scenes_dock->connect("instance",this,"_instance_request");
 
+	const String docks_section = "docks";
+
+	default_theme.instance();
+	default_theme->set_value(docks_section, "dock_3", "Scene");
+	default_theme->set_value(docks_section, "dock_4", "FileSystem");
+	default_theme->set_value(docks_section, "dock_5", "Inspector");
 
+	for(int i=0;i<DOCK_SLOT_MAX/2;i++)
+		default_theme->set_value(docks_section, "dock_hsplit_"+itos(i+1), 0);
+	for(int i=0;i<DOCK_SLOT_MAX/2;i++)
+		default_theme->set_value(docks_section, "dock_split_"+itos(i+1), 0);
+
+	_update_layouts_menu();
 
 	log = memnew( EditorLog );
 	center_split->add_child(log);
 	log->connect("close_request",this,"_close_messages");
 	log->connect("show_request",this,"_show_messages");
 	//left_split->set_dragger_visible(false);
+
+
 	old_split_ofs=0;
 
 
@@ -5241,37 +5630,18 @@ EditorNode::EditorNode() {
 
 	animation_editor->hide();
 
-	PanelContainer *bottom_pc = memnew( PanelContainer );
-	main_vbox->add_child(bottom_pc);
+	/*PanelContainer *bottom_pc = memnew( PanelContainer );
+	srt->add_child(bottom_pc);
 	bottom_hb = memnew( HBoxContainer );
-	bottom_pc->add_child(bottom_hb);
+	bottom_pc->add_child(bottom_hb);*/
 
-	bottom_hb->add_child( log->get_button() );
+	center_vb->add_child( log->get_button() );
 	log->get_button()->set_h_size_flags(Control::SIZE_EXPAND_FILL);
 
-	progress_hb = memnew( BackgroundProgress );
-	bottom_hb->add_child(progress_hb);
+
 	//progress_hb->set_h_size_flags(Control::SIZE_EXPAND_FILL);
 
-	audio_vu = memnew( TextureProgress );
-	CenterContainer *vu_cc = memnew( CenterContainer );
-	vu_cc->add_child(audio_vu);
-	bottom_hb->add_child(vu_cc);
-	audio_vu->set_under_texture(gui_base->get_icon("VuEmpty","EditorIcons"));
-	audio_vu->set_progress_texture(gui_base->get_icon("VuFull","EditorIcons"));
-	audio_vu->set_max(24);
-	audio_vu->set_min(-80);
-	audio_vu->set_step(0.01);
-	audio_vu->set_val(0);
 
-	update_menu = memnew( MenuButton );
-	update_menu->set_tooltip("Spins when the editor window repaints!");
-	bottom_hb->add_child(update_menu);
-	update_menu->set_icon(gui_base->get_icon("Progress1","EditorIcons"));
-	p=update_menu->get_popup();
-	p->add_check_item("Update Always",SETTINGS_UPDATE_ALWAYS);
-	p->add_check_item("Update Changes",SETTINGS_UPDATE_CHANGES);
-	p->set_item_checked(1,true);
 
 	/*
 	animation_menu = memnew( ToolButton );
@@ -5297,7 +5667,8 @@ EditorNode::EditorNode() {
 
 
 
-
+	orphan_resources = memnew( OrphanResourcesDialog );
+	gui_base->add_child(orphan_resources);
 
 
 
@@ -5488,11 +5859,12 @@ EditorNode::EditorNode() {
 	editor_import_export->add_import_plugin( Ref<EditorTranslationImportPlugin>( memnew(EditorTranslationImportPlugin(this))));
 
 	editor_import_export->add_export_plugin( Ref<EditorTextureExportPlugin>( memnew(EditorTextureExportPlugin)));
+	editor_import_export->add_export_plugin( Ref<EditorSampleExportPlugin>( memnew(EditorSampleExportPlugin)));
+	editor_import_export->add_export_plugin( Ref<EditorSceneExportPlugin>( memnew(EditorSceneExportPlugin)));
 
 	add_editor_plugin( memnew( CanvasItemEditorPlugin(this) ) );
 	add_editor_plugin( memnew( SpatialEditorPlugin(this) ) );
 	add_editor_plugin( memnew( ScriptEditorPlugin(this) ) );
-	add_editor_plugin( memnew( EditorHelpPlugin(this) ) );
 	add_editor_plugin( memnew( AnimationPlayerEditorPlugin(this) ) );
 	add_editor_plugin( memnew( ShaderGraphEditorPlugin(this,true) ) );
 	add_editor_plugin( memnew( ShaderGraphEditorPlugin(this,false) ) );
@@ -5541,7 +5913,7 @@ EditorNode::EditorNode() {
 	resource_preview->add_preview_generator( Ref<EditorMeshPreviewPlugin>( memnew(EditorMeshPreviewPlugin )));
 
 	circle_step_msec=OS::get_singleton()->get_ticks_msec();
-	circle_step_frame=OS::get_singleton()->get_frames_drawn();;
+	circle_step_frame=OS::get_singleton()->get_frames_drawn();
 	circle_step=0;
 
 	_rebuild_import_menu();
@@ -5559,9 +5931,9 @@ EditorNode::EditorNode() {
 
 	Globals::get_singleton()->set("debug/indicators_enabled",true);
 	Globals::get_singleton()->set("render/room_cull_enabled",false);
-	theme->set_color("prop_category","Editor",Color::hex(0x403d41ff));
-	theme->set_color("prop_section","Editor",Color::hex(0x383539ff));
-	theme->set_color("prop_subsection","Editor",Color::hex(0x343135ff));
+	theme->set_color("prop_category","Editor",Color::hex(0x3f3a44ff));
+	theme->set_color("prop_section","Editor",Color::hex(0x35313aff));
+	theme->set_color("prop_subsection","Editor",Color::hex(0x312e37ff));
 	theme->set_color("fg_selected","Editor",Color::html("ffbd8e8e"));
 	theme->set_color("fg_error","Editor",Color::html("ffbd8e8e"));
 
@@ -5668,6 +6040,7 @@ EditorNode::EditorNode() {
 EditorNode::~EditorNode() {	
 
 
+	memdelete( EditorHelp::get_doc_data() );
 	memdelete(editor_selection);
 	memdelete(file_server);
 	EditorSettings::destroy();
diff --git a/tools/editor/editor_node.h b/tools/editor/editor_node.h
index 56e455c9c0..bd25f27c59 100644
--- a/tools/editor/editor_node.h
+++ b/tools/editor/editor_node.h
@@ -76,6 +76,7 @@
 #include "editor_reimport_dialog.h"
 #include "import_settings.h"
 #include "tools/editor/editor_plugin.h"
+#include "tools/editor/editor_layout_dialog.h"
 
 #include "fileserver/editor_file_server.h"
 #include "editor_resource_preview.h"
@@ -133,6 +134,7 @@ class EditorNode : public Node {
 		EDIT_UNDO,
 		EDIT_REDO,
 		EDIT_REVERT,
+		TOOLS_ORPHAN_RESOURCES,
 		RESOURCE_NEW,
 		RESOURCE_LOAD,
 		RESOURCE_SAVE,
@@ -166,6 +168,9 @@ class EditorNode : public Node {
 		SETTINGS_EXPORT_PREFERENCES,
 		SETTINGS_PREFERENCES,
 		SETTINGS_OPTIMIZED_PRESETS,
+		SETTINGS_LAYOUT_SAVE,
+		SETTINGS_LAYOUT_DELETE,
+		SETTINGS_LAYOUT_DEFAULT,
 		SETTINGS_SHOW_ANIMATION,
 		SETTINGS_LOAD_EXPORT_TEMPLATES,
 		SETTINGS_HELP,
@@ -173,6 +178,7 @@ class EditorNode : public Node {
 		SOURCES_REIMPORT,
 		DEPENDENCY_LOAD_CHANGED_IMAGES,
 		DEPENDENCY_UPDATE_IMPORTED,
+		SCENE_TAB_CLOSE,
 
 		IMPORT_PLUGIN_BASE=100,
 
@@ -217,6 +223,7 @@ class EditorNode : public Node {
 	//main tabs
 
 	Tabs *scene_tabs;
+	int tab_closing;
 
 
 	int old_split_ofs;
@@ -235,6 +242,7 @@ class EditorNode : public Node {
 	Control *viewport;
 	MenuButton *file_menu;
 	MenuButton *import_menu;
+	MenuButton *tool_menu;
 	ToolButton *export_button;
 	ToolButton *prev_scene;
 	MenuButton *object_menu;
@@ -248,6 +256,7 @@ class EditorNode : public Node {
 	ToolButton *play_scene_button;
 	ToolButton *play_custom_scene_button;
 	MenuButton *debug_button;
+	ToolButton *search_button;
 	TextureProgress *audio_vu;
 	//MenuButton *fileserver_menu;
 
@@ -266,6 +275,9 @@ class EditorNode : public Node {
 	ScenesDock *scenes_dock;
 	EditorRunNative *run_native;
 
+	HBoxContainer *search_bar;
+	LineEdit *search_box;
+
 	CreateDialog *create_dialog;
 
 	CallDialog *call_dialog;
@@ -276,6 +288,11 @@ class EditorNode : public Node {
 	AcceptDialog *about;
 	AcceptDialog *warning;
 
+	Ref<ConfigFile> default_theme;
+	PopupMenu *editor_layouts;
+	EditorLayoutDialog *layout_dialog;
+	AcceptDialog *confirm_error;
+
 	//OptimizedPresetsDialog *optimized_presets;
 	EditorSettingsDialog *settings_config_dialog;
 	RunSettingsDialog *run_settings_dialog;
@@ -314,7 +331,9 @@ class EditorNode : public Node {
 	CenterContainer *tabs_center;
 	EditorQuickOpen *quick_open;
 	EditorQuickOpen *quick_run;
-	Tabs *main_editor_tabs;
+
+	HBoxContainer *main_editor_button_vb;
+	Vector<ToolButton*> main_editor_buttons;
 	Vector<EditorPlugin*> editor_table;
 
 	EditorReImportDialog *reimport_dialog;
@@ -325,6 +344,7 @@ class EditorNode : public Node {
 
 	DependencyErrorDialog *dependency_error;
 	DependencyEditor *dependency_fixer;
+	OrphanResourcesDialog *orphan_resources;
 
 	TabContainer *dock_slot[DOCK_SLOT_MAX];
 	Rect2 dock_select_rect[DOCK_SLOT_MAX];
@@ -508,14 +528,30 @@ class EditorNode : public Node {
 	Dictionary _get_main_scene_state();
 	void _set_main_scene_state(Dictionary p_state);
 
+	int _get_current_main_editor();
+
 	void _save_docks();
 	void _load_docks();
+	void _save_docks_to_config(Ref<ConfigFile> p_layout, const String& p_section);
+	void _load_docks_from_config(Ref<ConfigFile> p_layout, const String& p_section);
+
+	void _update_layouts_menu();
+	void _layout_menu_option(int p_idx);
+
+	void _toggle_search_bar(bool p_pressed);
+	void _clear_search_box();
 
 protected:
 	void _notification(int p_what);
-	static void _bind_methods();		
+	static void _bind_methods();
 public:
 
+	enum EditorTable {
+		EDITOR_2D = 0,
+		EDITOR_3D,
+		EDITOR_SCRIPT
+	};
+
 	static EditorNode* get_singleton() { return singleton; }
 
 
diff --git a/tools/editor/editor_settings.cpp b/tools/editor/editor_settings.cpp
index 651b30c724..15de6e7266 100644
--- a/tools/editor/editor_settings.cpp
+++ b/tools/editor/editor_settings.cpp
@@ -447,7 +447,7 @@ void EditorSettings::_load_defaults() {
 	hints["global/default_project_path"]=PropertyInfo(Variant::STRING,"global/default_project_path",PROPERTY_HINT_GLOBAL_DIR);
 	set("global/default_project_export_path","");
 	hints["global/default_project_export_path"]=PropertyInfo(Variant::STRING,"global/default_project_export_path",PROPERTY_HINT_GLOBAL_DIR);
-
+	set("global/show_script_in_scene_tabs",false);
 	set("text_editor/background_color",Color::html("3b000000"));
 	set("text_editor/text_color",Color::html("aaaaaa"));
 	set("text_editor/text_selected_color",Color::html("000000"));
diff --git a/tools/editor/editor_settings.h b/tools/editor/editor_settings.h
index 4ba940cd1c..bdfa5160d6 100644
--- a/tools/editor/editor_settings.h
+++ b/tools/editor/editor_settings.h
@@ -107,6 +107,7 @@ public:
 	static EditorSettings *get_singleton();
 	void erase(String p_var);
 	String get_settings_path() const;
+	String get_global_settings_path() const;
 	String get_project_settings_path() const;
 
 	const Map<String,Plugin>& get_plugins() const { return plugins; }
diff --git a/tools/editor/fileserver/SCsub b/tools/editor/fileserver/SCsub
index b525fb3f75..363a2ce4c0 100644
--- a/tools/editor/fileserver/SCsub
+++ b/tools/editor/fileserver/SCsub
@@ -1,7 +1,3 @@
 Import('env')
 Export('env')
 env.add_source_files(env.tool_sources,"*.cpp")
-
-
-
-
diff --git a/tools/editor/groups_editor.cpp b/tools/editor/groups_editor.cpp
index 2e82854014..bb5e93da34 100644
--- a/tools/editor/groups_editor.cpp
+++ b/tools/editor/groups_editor.cpp
@@ -27,151 +27,130 @@
 /* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.                */
 /*************************************************************************/
 #include "groups_editor.h"
-#include "scene/gui/box_container.h"
 
+#include "scene/gui/box_container.h"
 #include "scene/gui/label.h"
 
+void GroupsEditor::_add_group(const String& p_group) {
 
-#include "print_string.h"
+	if (!node)
+		return;
 
-void GroupsEditor::_notification(int p_what) {
-	
-	if (p_what==NOTIFICATION_ENTER_TREE) {
-		connect("confirmed", this,"_close");
-	}	
-	if (p_what==NOTIFICATION_EXIT_TREE) {
-		disconnect("confirmed", this,"_close");
-	}
-}
+	String name = group_name->get_text();
+	if (name.strip_edges()=="")
+		return;
 
-void GroupsEditor::_close() {
-	
-	hide();
-	
-}
-void GroupsEditor::_add() {
-	
-	if (!node)
+	if (node->is_in_group(name))
 		return;
-		
-	undo_redo->create_action("Add To Group");
-	undo_redo->add_do_method(node,"add_to_group",group_name->get_text(),true);
-	undo_redo->add_undo_method(node,"remove_from_group",group_name->get_text());
 
+	undo_redo->create_action("Add to Group");
+
+	undo_redo->add_do_method(node,"add_to_group",name,true);
 	undo_redo->add_do_method(this,"update_tree");
+	undo_redo->add_undo_method(node,"remove_from_group",name,get_text());
 	undo_redo->add_undo_method(this,"update_tree");
 
 	undo_redo->commit_action();
 }
 
+void GroupsEditor::_remove_group(Object *p_item, int p_column, int p_id) {
 
-void GroupsEditor::_remove() {
-	
-	if (!tree->get_selected())
-		return;
 	if (!node)
 		return;
 
-	TreeItem *sel = tree->get_selected();
-	if (!sel)
+	TreeItem *ti = p_item->cast_to<TreeItem>();
+	if (!ti)
 		return;
-		
-	node->remove_from_group( sel->get_text(0) );
-	update_tree();
+
+	String name = ti->get_text(0);
+
+	undo_redo->create_action("Remove from Group");
+
+	undo_redo->add_do_method(node,"remove_from_group",name);
+	undo_redo->add_do_method(this,"update_tree");
+	undo_redo->add_undo_method(node,"add_to_group",name,true);
+	undo_redo->add_undo_method(this,"update_tree");
+
+	undo_redo->commit_action();
 }
 
+struct _GroupInfoComparator {
+
+	bool operator()(const Node::GroupInfo& p_a, const Node::GroupInfo& p_b) const {
+		return p_a.name.operator String() < p_b.name.operator String();
+	}
+};
+
 void GroupsEditor::update_tree() {
 
-	
 	tree->clear();
-	
+
 	if (!node)
 		return;
-		
-	List<GroupInfo> groups;
+
+	List<Node::GroupInfo> groups;
 	node->get_groups(&groups);
-	
+	groups.sort_custom<_GroupInfoComparator>();
+
 	TreeItem *root=tree->create_item();
-	
+
 	for(List<GroupInfo>::Element *E=groups.front();E;E=E->next()) {
-	
-		if (!E->get().persistent)
+
+		Node::GroupInfo gi = E->get();
+		if (!gi.persistent)
 			continue;
+
 		TreeItem *item=tree->create_item(root);
-		item->set_text(0, E->get().name);	
-	
+		item->set_text(0, gi.name);
+		item->add_button(0, get_icon("Remove", "EditorIcons"), 0);
 	}
-
 }
 
 void GroupsEditor::set_current(Node* p_node) {
-	
+
 	node=p_node;
 	update_tree();
-
 }
 
 void GroupsEditor::_bind_methods() {
-	
-	ObjectTypeDB::bind_method("_add",&GroupsEditor::_add);
-	ObjectTypeDB::bind_method("_close",&GroupsEditor::_close);
-	ObjectTypeDB::bind_method("_remove",&GroupsEditor::_remove);	
+
+	ObjectTypeDB::bind_method("_add_group",&GroupsEditor::_add_group);
+	ObjectTypeDB::bind_method("_remove_group",&GroupsEditor::_remove_group);
 	ObjectTypeDB::bind_method("update_tree",&GroupsEditor::update_tree);
 }
 
 GroupsEditor::GroupsEditor() {
 
+	node=NULL;
+
 	set_title("Group Editor");
-	
-	Label * label = memnew( Label );
-	label->set_pos( Point2( 8,11) );
-	label->set_text("Groups:");
-	
-	add_child(label);	
-	
-	group_name = memnew(LineEdit);
-	group_name->set_anchor( MARGIN_RIGHT, ANCHOR_END );
-	group_name->set_begin( Point2( 15,28) );
-	group_name->set_end( Point2( 94,48 ) );
-	
-	add_child(group_name);
-	
-	tree = memnew( Tree );
-	tree->set_anchor( MARGIN_RIGHT, ANCHOR_END );
-	tree->set_anchor( MARGIN_BOTTOM, ANCHOR_END );
-	tree->set_begin( Point2( 15,52) );
-	tree->set_end( Point2( 94,42 ) );
-	tree->set_hide_root(true);		
-	add_child(tree);
-	
+
+	VBoxContainer *vbc = memnew( VBoxContainer );
+	add_child(vbc);
+	set_child_rect(vbc);
+
+	HBoxContainer *hbc = memnew( HBoxContainer );
+	vbc->add_margin_child("Group", hbc);
+
+	group_name = memnew( LineEdit );
+	group_name->set_h_size_flags(SIZE_EXPAND_FILL);
+	hbc->add_child(group_name);
+	group_name->connect("text_entered",this,"_add_group");
+
 	add = memnew( Button );
-	add->set_anchor( MARGIN_LEFT, ANCHOR_END );
-	add->set_anchor( MARGIN_RIGHT, ANCHOR_END );
-	add->set_begin( Point2( 90, 28 ) );
-	add->set_end( Point2( 15, 48 ) );	
 	add->set_text("Add");
-	
-	add_child(add);
-	
-	remove = memnew( Button );
-	remove->set_anchor( MARGIN_LEFT, ANCHOR_END );
-	remove->set_anchor( MARGIN_RIGHT, ANCHOR_END );
-	remove->set_begin( Point2( 90, 52 ) );
-	remove->set_end( Point2( 15, 72 ) );	
-	remove->set_text("Remove");
-	
-	add_child(remove);
+	hbc->add_child(add);
+	add->connect("pressed", this,"_add_group", varray(String()));
 
-	get_ok()->set_text("Close");
-			
-	add->connect("pressed", this,"_add");
-	remove->connect("pressed", this,"_remove");	
+	tree = memnew( Tree );
+	tree->set_hide_root(true);
+	tree->set_v_size_flags(SIZE_EXPAND_FILL);
+	vbc->add_margin_child("Node Group(s)", tree, true);
+	tree->connect("button_pressed",this,"_remove_group");
 
-	
-	node=NULL;
+	get_ok()->set_text("Close");
 }
 
-
 GroupsEditor::~GroupsEditor()
 {
 }
diff --git a/tools/editor/groups_editor.h b/tools/editor/groups_editor.h
index 09883a150f..3a9cc77727 100644
--- a/tools/editor/groups_editor.h
+++ b/tools/editor/groups_editor.h
@@ -29,42 +29,42 @@
 #ifndef GROUPS_EDITOR_H
 #define GROUPS_EDITOR_H
 
-
 #include "scene/gui/dialogs.h"
 #include "scene/gui/button.h"
 #include "scene/gui/tree.h"
 #include "scene/gui/line_edit.h"
 #include "undo_redo.h"
+
 /**
 @author Juan Linietsky <reduzio@gmail.com>
 */
-class GroupsEditor : public ConfirmationDialog {
-	
-	OBJ_TYPE( GroupsEditor, ConfirmationDialog );
-	
+
+class GroupsEditor : public AcceptDialog {
+
+	OBJ_TYPE(GroupsEditor,AcceptDialog);
+
+	Node *node;
+
 	LineEdit *group_name;
-	Tree *tree;
 	Button *add;
-	Button *remove;
-	Node *node;
+	Tree *tree;
+
 	UndoRedo *undo_redo;
-	
+
 	void update_tree();
-	void _add();
-	void _remove();
+	void _add_group(const String& p_group="");
+	void _remove_group(Object *p_item, int p_column, int p_id);
 	void _close();
-	
 protected:
-	
-	void _notification(int p_what);
-	static void _bind_methods();	
+
+	static void _bind_methods();
 public:
-	
+
 	void set_undo_redo(UndoRedo *p_undoredo) { undo_redo=p_undoredo; }
 	void set_current(Node* p_node);
-	
+
 	GroupsEditor();	
 	~GroupsEditor();
-	
 };
+
 #endif
diff --git a/tools/editor/icons/SCsub b/tools/editor/icons/SCsub
index aea053d22b..addf6879a2 100644
--- a/tools/editor/icons/SCsub
+++ b/tools/editor/icons/SCsub
@@ -9,30 +9,30 @@ def make_editor_icons_action(target, source, env):
 	pixmaps = source
 
 	s = cStringIO.StringIO()
-	
+
 	s.write("#include \"editor_icons.h\"\n\n")
 	s.write("#include \"scene/resources/theme.h\"\n\n")
 
 	for x in pixmaps:
-	
+
 		x=str(x)
 		var_str=os.path.basename(x)[:-4]+"_png";
 		#print(var_str)
-		
+
 		s.write("static const unsigned char "+ var_str +"[]={\n");
-		
+
 		pngf=open(x,"rb");
-		
+
 		b=pngf.read(1);
 		while(len(b)==1):
 			s.write(hex(ord(b)))
 			b=pngf.read(1);
 			if (len(b)==1):
 				s.write(",")
-				
+
 		s.write("\n};\n\n\n");
 		pngf.close();
-	  
+
 	s.write("static Ref<ImageTexture> make_icon(const uint8_t* p_png) {\n")
 	s.write("\tRef<ImageTexture> texture( memnew( ImageTexture ) );\n")
 	s.write("\ttexture->create_from_image( Image(p_png),ImageTexture::FLAG_FILTER );\n")
@@ -42,14 +42,14 @@ def make_editor_icons_action(target, source, env):
 	s.write("void editor_register_icons(Ref<Theme> p_theme) {\n\n")
 
 	for x in pixmaps:
-	
+
 		x=os.path.basename(str(x))
 		type=x[5:-4].title().replace("_","");
 		var_str=x[:-4]+"_png";
 		s.write("\tp_theme->set_icon(\""+type+"\",\"EditorIcons\",make_icon("+var_str+"));\n");
 
 	s.write("\n\n}\n\n");
-	
+
 	f = open(dst,"wb")
 	f.write(s.getvalue())
 	f.close()
@@ -63,4 +63,3 @@ env.Alias('editor_icons',[env.MakeEditorIconsBuilder('#tools/editor/editor_icons
 
 env.tool_sources.append("#tools/editor/editor_icons.cpp")
 Export('env')
-
diff --git a/tools/editor/icons/icon_back.png b/tools/editor/icons/icon_back.png
index d60e3b2640..f7e507d92b 100644
--- a/tools/editor/icons/icon_back.png
+++ b/tools/editor/icons/icon_back.png
diff --git a/tools/editor/icons/icon_class_list.png b/tools/editor/icons/icon_class_list.png
new file mode 100644
index 0000000000..fb756c0fe1
--- /dev/null
+++ b/tools/editor/icons/icon_class_list.png
diff --git a/tools/editor/icons/icon_console.png b/tools/editor/icons/icon_console.png
new file mode 100644
index 0000000000..7dc7407ef7
--- /dev/null
+++ b/tools/editor/icons/icon_console.png
diff --git a/tools/editor/icons/icon_forward.png b/tools/editor/icons/icon_forward.png
index ca6838ae9e..14e8bc9a5a 100644
--- a/tools/editor/icons/icon_forward.png
+++ b/tools/editor/icons/icon_forward.png
diff --git a/tools/editor/icons/icon_godot.png b/tools/editor/icons/icon_godot.png
new file mode 100644
index 0000000000..e80820fc10
--- /dev/null
+++ b/tools/editor/icons/icon_godot.png
diff --git a/tools/editor/icons/icon_help.png b/tools/editor/icons/icon_help.png
index 3f4f8453a7..d2085589ae 100644
--- a/tools/editor/icons/icon_help.png
+++ b/tools/editor/icons/icon_help.png
diff --git a/tools/editor/icons/icon_key_invalid.png b/tools/editor/icons/icon_key_invalid.png
new file mode 100644
index 0000000000..e8e6c87180
--- /dev/null
+++ b/tools/editor/icons/icon_key_invalid.png
diff --git a/tools/editor/icons/icon_key_invalid_hover.png b/tools/editor/icons/icon_key_invalid_hover.png
new file mode 100644
index 0000000000..6f0396d96a
--- /dev/null
+++ b/tools/editor/icons/icon_key_invalid_hover.png
diff --git a/tools/editor/icons/icon_multi_edit.png b/tools/editor/icons/icon_multi_edit.png
new file mode 100644
index 0000000000..70faee3d6a
--- /dev/null
+++ b/tools/editor/icons/icon_multi_edit.png
diff --git a/tools/editor/io_plugins/SCsub b/tools/editor/io_plugins/SCsub
index b525fb3f75..363a2ce4c0 100644
--- a/tools/editor/io_plugins/SCsub
+++ b/tools/editor/io_plugins/SCsub
@@ -1,7 +1,3 @@
 Import('env')
 Export('env')
 env.add_source_files(env.tool_sources,"*.cpp")
-
-
-
-
diff --git a/tools/editor/io_plugins/editor_atlas.cpp b/tools/editor/io_plugins/editor_atlas.cpp
index 4a260a9a6f..7e9acd193d 100644
--- a/tools/editor/io_plugins/editor_atlas.cpp
+++ b/tools/editor/io_plugins/editor_atlas.cpp
@@ -27,7 +27,7 @@
 /* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.                */
 /*************************************************************************/
 #include "editor_atlas.h"
-
+#include "print_string.h"
 
 struct _EditorAtlasWorkRect {
 
@@ -83,6 +83,7 @@ void EditorAtlas::fit(const Vector<Size2i>& p_rects,Vector<Point2i>& r_result, S
 		//place them
 		int ofs=0;
 		int limit_h=0;
+
 		for(int j=0;j<wrects.size();j++) {
 
 
@@ -100,6 +101,9 @@ void EditorAtlas::fit(const Vector<Size2i>& p_rects,Vector<Point2i>& r_result, S
 
 			wrects[j].p.x=ofs;
 			wrects[j].p.y=from_y;
+
+
+
 			int end_h = from_y+wrects[j].s.height;
 			int end_w = ofs+wrects[j].s.width;
 			if (ofs==0)
@@ -116,7 +120,7 @@ void EditorAtlas::fit(const Vector<Size2i>& p_rects,Vector<Point2i>& r_result, S
 			if (end_w > max_w)
 				max_w=end_w;
 
-			if (ofs==0 || end_h>limit_h ) //while h limit not reched, keep stacking
+			//if (ofs==0 || end_h>limit_h ) //while h limit not reched, keep stacking
 				ofs+=wrects[j].s.width;
 
 		}
@@ -136,8 +140,8 @@ void EditorAtlas::fit(const Vector<Size2i>& p_rects,Vector<Point2i>& r_result, S
 
 	for(int i=0;i<results.size();i++) {
 
-		float h = nearest_power_of_2(results[i].max_h);
-		float w = nearest_power_of_2(results[i].max_w);
+		float h = results[i].max_h;
+		float w = results[i].max_w;
 		float aspect = h>w ? h/w : w/h;
 		if (aspect < best_aspect) {
 			best=i;
diff --git a/tools/editor/io_plugins/editor_export_scene.cpp b/tools/editor/io_plugins/editor_export_scene.cpp
new file mode 100644
index 0000000000..cd5c34e53b
--- /dev/null
+++ b/tools/editor/io_plugins/editor_export_scene.cpp
@@ -0,0 +1,112 @@
+#include "editor_export_scene.h"
+#include "io/resource_loader.h"
+#include "io/resource_saver.h"
+#include "os/dir_access.h"
+#include "os/file_access.h"
+#include "tools/editor/editor_settings.h"
+#include "scene/resources/packed_scene.h"
+#include "globals.h"
+
+Vector<uint8_t> EditorSceneExportPlugin::custom_export(String& p_path,const Ref<EditorExportPlatform> &p_platform) {
+
+	if (!EditorImportExport::get_singleton()->get_convert_text_scenes()) {
+		return Vector<uint8_t>();
+	}
+
+
+	String extension = p_path.extension();
+
+	//step 1 check if scene
+
+	if (extension=="xml" || extension=="xres") {
+
+		String type = ResourceLoader::get_resource_type(p_path);
+
+		if (type!="PackedScene")
+			return Vector<uint8_t>();
+
+	} else if (extension!="tscn" && extension!="xscn") {
+		return Vector<uint8_t>();
+	}
+
+	//step 2 check if cached
+
+	uint64_t sd=0;
+	String smd5;
+	String gp = Globals::get_singleton()->globalize_path(p_path);
+	String md5=gp.md5_text();
+	String tmp_path = EditorSettings::get_singleton()->get_settings_path().plus_file("tmp/");
+
+	bool valid=false;
+	{
+		//if existing, make sure it's valid
+		FileAccessRef f = FileAccess::open(tmp_path+"scnexp-"+md5+".txt",FileAccess::READ);
+		if (f) {
+
+			uint64_t d = f->get_line().strip_edges().to_int64();
+			sd = FileAccess::get_modified_time(p_path);
+
+			if (d==sd) {
+				valid=true;
+			} else {
+				String cmd5 = f->get_line().strip_edges();
+				smd5 = FileAccess::get_md5(p_path);
+				if (cmd5==smd5) {
+					valid=true;
+				}
+			}
+
+
+		}
+	}
+
+	if (!valid) {
+		//cache failed, convert
+		DirAccess *da = DirAccess::create(DirAccess::ACCESS_RESOURCES);
+
+		String copy = p_path+".convert."+extension;
+
+		// a copy will allow loading the internal resources without conflicting with opened scenes
+		da->copy(p_path,copy);
+
+		//@todo for tscn use something more efficient
+
+		Ref<PackedScene> copyres =  ResourceLoader::load(copy,"PackedScene");
+
+		da->remove(copy);
+
+		memdelete(da);
+
+		ERR_FAIL_COND_V(!copyres.is_valid(),Vector<uint8_t>());
+
+		Error err = ResourceSaver::save(tmp_path+"scnexp-"+md5+".scn",copyres);
+
+		copyres=Ref<PackedScene>();
+
+		ERR_FAIL_COND_V(err!=OK,Vector<uint8_t>());
+
+		FileAccessRef f = FileAccess::open(tmp_path+"scnexp-"+md5+".txt",FileAccess::WRITE);
+
+		if (sd==0)
+			sd = FileAccess::get_modified_time(p_path);
+		if (smd5==String())
+			smd5 = FileAccess::get_md5(p_path);
+
+		f->store_line(String::num(sd));
+		f->store_line(smd5);
+		f->store_line(gp); //source path for reference
+	}
+
+
+	Vector<uint8_t> ret = FileAccess::get_file_as_array(tmp_path+"scnexp-"+md5+".scn");
+
+	p_path+=".optimized.scn";
+
+	return ret;
+
+}
+
+
+EditorSceneExportPlugin::EditorSceneExportPlugin()
+{
+}
diff --git a/tools/editor/io_plugins/editor_export_scene.h b/tools/editor/io_plugins/editor_export_scene.h
new file mode 100644
index 0000000000..134da6c234
--- /dev/null
+++ b/tools/editor/io_plugins/editor_export_scene.h
@@ -0,0 +1,16 @@
+#ifndef EDITOR_EXPORT_SCENE_H
+#define EDITOR_EXPORT_SCENE_H
+
+#include "tools/editor/editor_import_export.h"
+
+
+class EditorSceneExportPlugin : public EditorExportPlugin {
+	OBJ_TYPE( EditorSceneExportPlugin, EditorExportPlugin );
+public:
+
+	virtual Vector<uint8_t> custom_export(String& p_path,const Ref<EditorExportPlatform> &p_platform);
+
+	EditorSceneExportPlugin();
+};
+
+#endif // EDITOR_EXPORT_SCENE_H
diff --git a/tools/editor/io_plugins/editor_font_import_plugin.cpp b/tools/editor/io_plugins/editor_font_import_plugin.cpp
index 10a3877529..5ba0669f1d 100644
--- a/tools/editor/io_plugins/editor_font_import_plugin.cpp
+++ b/tools/editor/io_plugins/editor_font_import_plugin.cpp
@@ -520,6 +520,10 @@ class EditorFontImportDialog : public ConfirmationDialog {
 			return;
 		}
 
+		if (dest->get_line_edit()->get_text().get_file()==".fnt") {
+			dest->get_line_edit()->set_text(dest->get_line_edit()->get_text().get_base_dir() + "/" + source->get_line_edit()->get_text().get_file().basename() + ".fnt" );
+		}
+
 		Ref<ResourceImportMetadata> rimd = get_rimd();
 
 		if (rimd.is_null()) {
diff --git a/tools/editor/io_plugins/editor_mesh_import_plugin.cpp b/tools/editor/io_plugins/editor_mesh_import_plugin.cpp
index 2139513025..b32ab8cb0b 100644
--- a/tools/editor/io_plugins/editor_mesh_import_plugin.cpp
+++ b/tools/editor/io_plugins/editor_mesh_import_plugin.cpp
@@ -128,7 +128,7 @@ class EditorMeshImportDialog : public ConfirmationDialog {
 	LineEdit *save_path;
 	EditorFileDialog *file_select;
 	EditorDirDialog *save_select;
-	ConfirmationDialog *error_dialog;
+	AcceptDialog *error_dialog;
 	PropertyEditor *option_editor;
 
 	_EditorMeshImportOptions *options;
@@ -169,13 +169,12 @@ public:
 	void _browse_target() {
 
 		save_select->popup_centered_ratio();
-
 	}
 
-
 	void popup_import(const String& p_path) {
 
 		popup_centered(Size2(400,400));
+
 		if (p_path!="") {
 
 			Ref<ResourceImportMetadata> rimd = ResourceLoader::load_import_metadata(p_path);
@@ -199,14 +198,13 @@ public:
 		}
 	}
 
-
 	void _import() {
 
 		Vector<String> meshes = import_path->get_text().split(",");
-
 		if (meshes.size()==0) {
 			error_dialog->set_text("No meshes to import!");
-			error_dialog->popup_centered(Size2(200,100));
+			error_dialog->popup_centered_minsize();
+			return;
 		}
 
 		for(int i=0;i<meshes.size();i++) {
@@ -229,19 +227,18 @@ public:
 			String dst = save_path->get_text();
 			if (dst=="") {
 				error_dialog->set_text("Save path is empty!");
-				error_dialog->popup_centered(Size2(200,100));
+				error_dialog->popup_centered_minsize();
+				return;
 			}
 
 			dst = dst.plus_file(meshes[i].get_file().basename()+".msh");
 
-			Error err = plugin->import(dst,imd);
+			plugin->import(dst,imd);
 		}
 
 		hide();
-
 	}
 
-
 	void _notification(int p_what) {
 
 
@@ -253,27 +250,24 @@ public:
 
 	static void _bind_methods() {
 
-
 		ObjectTypeDB::bind_method("_choose_files",&EditorMeshImportDialog::_choose_files);
 		ObjectTypeDB::bind_method("_choose_save_dir",&EditorMeshImportDialog::_choose_save_dir);
 		ObjectTypeDB::bind_method("_import",&EditorMeshImportDialog::_import);
 		ObjectTypeDB::bind_method("_browse",&EditorMeshImportDialog::_browse);
 		ObjectTypeDB::bind_method("_browse_target",&EditorMeshImportDialog::_browse_target);
-	//	ADD_SIGNAL( MethodInfo("imported",PropertyInfo(Variant::OBJECT,"scene")) );
 	}
 
 	EditorMeshImportDialog(EditorMeshImportPlugin *p_plugin) {
 
 		plugin=p_plugin;
 
-
 		set_title("Single Mesh Import");
+		set_hide_on_ok(false);
 
 		VBoxContainer *vbc = memnew( VBoxContainer );
 		add_child(vbc);
 		set_child_rect(vbc);
 
-
 		HBoxContainer *hbc = memnew( HBoxContainer );
 		vbc->add_margin_child("Source Mesh(es):",hbc);
 
@@ -300,28 +294,23 @@ public:
 
 		save_choose->connect("pressed", this,"_browse_target");
 
-		file_select = memnew(EditorFileDialog);
+		file_select = memnew( EditorFileDialog );
 		file_select->set_access(EditorFileDialog::ACCESS_FILESYSTEM);
-		add_child(file_select);
 		file_select->set_mode(EditorFileDialog::MODE_OPEN_FILES);
-		file_select->connect("files_selected", this,"_choose_files");
 		file_select->add_filter("*.obj ; Wavefront OBJ");
-		save_select = memnew(	EditorDirDialog );
-		add_child(save_select);
+		add_child(file_select);
+		file_select->connect("files_selected", this,"_choose_files");
 
-	//	save_select->set_mode(EditorFileDialog::MODE_OPEN_DIR);
+		save_select = memnew( EditorDirDialog );
+		add_child(save_select);
 		save_select->connect("dir_selected", this,"_choose_save_dir");
 
 		get_ok()->connect("pressed", this,"_import");
 		get_ok()->set_text("Import");
 
-
-		error_dialog = memnew ( ConfirmationDialog );
+		error_dialog = memnew( AcceptDialog );
 		add_child(error_dialog);
-		error_dialog->get_ok()->set_text("Accept");
-	//	error_dialog->get_cancel()->hide();
 
-		set_hide_on_ok(false);
 		options = memnew( _EditorMeshImportOptions );
 
 		option_editor = memnew( PropertyEditor );
diff --git a/tools/editor/io_plugins/editor_sample_import_plugin.cpp b/tools/editor/io_plugins/editor_sample_import_plugin.cpp
index 9298b35b3b..7888246956 100644
--- a/tools/editor/io_plugins/editor_sample_import_plugin.cpp
+++ b/tools/editor/io_plugins/editor_sample_import_plugin.cpp
@@ -35,6 +35,7 @@
 #include "io/resource_saver.h"
 #include "os/file_access.h"
 #include "io/marshalls.h"
+#include "tools/editor/editor_settings.h"
 
 class _EditorSampleImportOptions : public Object {
 
@@ -156,7 +157,7 @@ public:
 		edit_normalize=true;
 		edit_loop=false;
 
-		compress_mode=COMPRESS_MODE_DISABLED;
+		compress_mode=COMPRESS_MODE_RAM;
 		compress_bitrate=COMPRESS_128;
 	}
 
@@ -580,8 +581,7 @@ Error EditorSampleImportPlugin::import(const String& p_path, const Ref<ResourceI
 
 	int compression = from->get_option("compress/mode");
 	bool force_mono = from->get_option("force/mono");
-	if (compression==_EditorSampleImportOptions::COMPRESS_MODE_RAM)
-		force_mono=true;
+
 
 	if (force_mono && chans==2) {
 
@@ -608,9 +608,47 @@ Error EditorSampleImportPlugin::import(const String& p_path, const Ref<ResourceI
 	if ( compression == _EditorSampleImportOptions::COMPRESS_MODE_RAM) {
 
 		dst_format=Sample::FORMAT_IMA_ADPCM;
+		if (chans==1) {
+			_compress_ima_adpcm(data,dst_data);
+		} else {
+
+			print_line("INTERLEAAVE!");
+
+
+
+			//byte interleave
+			Vector<float> left;
+			Vector<float> right;
+
+			int tlen = data.size()/2;
+			left.resize(tlen);
+			right.resize(tlen);
+
+			for(int i=0;i<tlen;i++) {
+				left[i]=data[i*2+0];
+				right[i]=data[i*2+1];
+			}
+
+			DVector<uint8_t> bleft;
+			DVector<uint8_t> bright;
 
-		_compress_ima_adpcm(data,dst_data);
-		print_line("compressing ima-adpcm, resulting buffersize is "+itos(dst_data.size())+" from "+itos(data.size()));
+			_compress_ima_adpcm(left,bleft);
+			_compress_ima_adpcm(right,bright);
+
+			int dl = bleft.size();
+			dst_data.resize( dl *2 );
+
+			DVector<uint8_t>::Write w=dst_data.write();
+			DVector<uint8_t>::Read rl=bleft.read();
+			DVector<uint8_t>::Read rr=bright.read();
+
+			for(int i=0;i<dl;i++) {
+				w[i*2+0]=rl[i];
+				w[i*2+1]=rr[i];
+			}
+		}
+
+//		print_line("compressing ima-adpcm, resulting buffersize is "+itos(dst_data.size())+" from "+itos(data.size()));
 
 	} else {
 
@@ -781,9 +819,54 @@ void EditorSampleImportPlugin::_compress_ima_adpcm(const Vector<float>& p_data,D
 
 }
 
+
+EditorSampleImportPlugin* EditorSampleImportPlugin::singleton=NULL;
+
+
+
 EditorSampleImportPlugin::EditorSampleImportPlugin(EditorNode* p_editor) {
 
+	singleton=this;
 	dialog = memnew( EditorSampleImportDialog(this));
 	p_editor->get_gui_base()->add_child(dialog);
 }
 
+Vector<uint8_t> EditorSampleExportPlugin::custom_export(String& p_path,const Ref<EditorExportPlatform> &p_platform) {
+
+
+
+	if (EditorImportExport::get_singleton()->sample_get_action()==EditorImportExport::SAMPLE_ACTION_NONE || p_path.extension().to_lower()!="wav") {
+
+		return Vector<uint8_t>();
+	}
+
+	Ref<ResourceImportMetadata> imd = memnew( ResourceImportMetadata );
+
+	imd->add_source(EditorImportPlugin::validate_source_path(p_path));
+
+	imd->set_option("force/8_bit",false);
+	imd->set_option("force/mono",false);
+	imd->set_option("force/max_rate",true);
+	imd->set_option("force/max_rate_hz",EditorImportExport::get_singleton()->sample_get_max_hz());
+	imd->set_option("edit/trim",EditorImportExport::get_singleton()->sample_get_trim());
+	imd->set_option("edit/normalize",false);
+	imd->set_option("edit/loop",false);
+	imd->set_option("compress/mode",1);
+
+	String savepath = EditorSettings::get_singleton()->get_settings_path().plus_file("tmp/smpconv.smp");
+	Error err = EditorSampleImportPlugin::singleton->import(savepath,imd);
+
+
+	ERR_FAIL_COND_V(err!=OK,Vector<uint8_t>());
+
+	p_path=p_path.basename()+".smp";
+	return FileAccess::get_file_as_array(savepath);
+
+}
+
+
+EditorSampleExportPlugin::EditorSampleExportPlugin() {
+
+}
+
+
diff --git a/tools/editor/io_plugins/editor_sample_import_plugin.h b/tools/editor/io_plugins/editor_sample_import_plugin.h
index 03a4d38ab3..89319affa0 100644
--- a/tools/editor/io_plugins/editor_sample_import_plugin.h
+++ b/tools/editor/io_plugins/editor_sample_import_plugin.h
@@ -43,6 +43,8 @@ class EditorSampleImportPlugin : public EditorImportPlugin {
 	void _compress_ima_adpcm(const Vector<float>& p_data,DVector<uint8_t>& dst_data);
 public:
 
+	static EditorSampleImportPlugin *singleton;
+
 	virtual String get_name() const;
 	virtual String get_visible_name() const;
 	virtual void import_dialog(const String& p_from="");
@@ -52,4 +54,16 @@ public:
 	EditorSampleImportPlugin(EditorNode* p_editor);
 };
 
+class EditorSampleExportPlugin : public EditorExportPlugin {
+
+	OBJ_TYPE( EditorSampleExportPlugin, EditorExportPlugin);
+
+
+public:
+
+	virtual Vector<uint8_t> custom_export(String& p_path,const Ref<EditorExportPlatform> &p_platform);
+
+	EditorSampleExportPlugin();
+};
+
 #endif // EDITOR_SAMPLE_IMPORT_PLUGIN_H
diff --git a/tools/editor/io_plugins/editor_scene_import_plugin.cpp b/tools/editor/io_plugins/editor_scene_import_plugin.cpp
index 99dcf4ed28..ca44df269b 100644
--- a/tools/editor/io_plugins/editor_scene_import_plugin.cpp
+++ b/tools/editor/io_plugins/editor_scene_import_plugin.cpp
@@ -1814,8 +1814,8 @@ Node* EditorSceneImportPlugin::_fix_node(Node *p_node,Node *p_root,Map<Ref<Mesh>
 
 			for(int i=0;i<portal_points.size()-1;i++) {
 
-				float a = portal_points[i].atan2();
-				float b = portal_points[i+1].atan2();
+				float a = portal_points[i].angle();
+				float b = portal_points[i+1].angle();
 
 				if (a>b) {
 					SWAP( portal_points[i], portal_points[i+1] );
diff --git a/tools/editor/io_plugins/editor_texture_import_plugin.h b/tools/editor/io_plugins/editor_texture_import_plugin.h
index 78383d1d77..38fd671e9d 100644
--- a/tools/editor/io_plugins/editor_texture_import_plugin.h
+++ b/tools/editor/io_plugins/editor_texture_import_plugin.h
@@ -123,6 +123,7 @@ public:
 	virtual Vector<uint8_t> custom_export(String& p_path,const Ref<EditorExportPlatform> &p_platform);
 	EditorTextureExportPlugin();
 };
+
 class EditorImportTextureOptions : public VBoxContainer {
 
 	OBJ_TYPE( EditorImportTextureOptions, VBoxContainer );
diff --git a/tools/editor/plugins/SCsub b/tools/editor/plugins/SCsub
index b525fb3f75..363a2ce4c0 100644
--- a/tools/editor/plugins/SCsub
+++ b/tools/editor/plugins/SCsub
@@ -1,7 +1,3 @@
 Import('env')
 Export('env')
 env.add_source_files(env.tool_sources,"*.cpp")
-
-
-
-
diff --git a/tools/editor/plugins/animation_player_editor_plugin.cpp b/tools/editor/plugins/animation_player_editor_plugin.cpp
index f8c484e886..dc2c241d2f 100644
--- a/tools/editor/plugins/animation_player_editor_plugin.cpp
+++ b/tools/editor/plugins/animation_player_editor_plugin.cpp
@@ -27,7 +27,9 @@
 /* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.                */
 /*************************************************************************/
 #include "animation_player_editor_plugin.h"
+#include "globals.h"
 #include "io/resource_loader.h"
+#include "io/resource_saver.h"
 #include "os/keyboard.h"
 #include "tools/editor/editor_settings.h"
 
@@ -98,6 +100,8 @@ void AnimationPlayerEditor::_notification(int p_what) {
 		duplicate_anim->set_icon( get_icon("Duplicate","EditorIcons") );
 		autoplay->set_icon( get_icon("AutoPlay","EditorIcons") );
 		load_anim->set_icon( get_icon("Folder","EditorIcons") );
+		save_anim->set_icon(get_icon("Save", "EditorIcons"));
+		save_anim->get_popup()->connect("item_pressed", this, "_animation_save_menu");
 		remove_anim->set_icon( get_icon("Remove","EditorIcons") );
 		edit_anim->set_icon( get_icon("Edit","EditorIcons") );
 		blend_anim->set_icon( get_icon("Blend","EditorIcons") );
@@ -367,9 +371,79 @@ void AnimationPlayerEditor::_animation_load() {
 	}
 
 	file->popup_centered_ratio();
+	current_option = RESOURCE_LOAD;
+}
+
+
+void AnimationPlayerEditor::_animation_save_in_path(const Ref<Resource>& p_resource, const String& p_path) {
 
+	int flg = 0;
+	if (EditorSettings::get_singleton()->get("on_save/compress_binary_resources"))
+		flg |= ResourceSaver::FLAG_COMPRESS;
+	if (EditorSettings::get_singleton()->get("on_save/save_paths_as_relative"))
+		flg |= ResourceSaver::FLAG_RELATIVE_PATHS;
+
+	String path = Globals::get_singleton()->localize_path(p_path);
+	Error err = ResourceSaver::save(path, p_resource, flg | ResourceSaver::FLAG_REPLACE_SUBRESOURCE_PATHS);
+
+	if (err != OK) {
+		accept->set_text("Error saving resource!");
+		accept->popup_centered_minsize();
+		return;
+	}
+	//	EditorFileSystem::get_singleton()->update_file(path,p_resource->get_type());
+
+	((Resource*)p_resource.ptr())->set_path(path);
+	editor->emit_signal("resource_saved", p_resource);
 
 }
+
+void AnimationPlayerEditor::_animation_save(const Ref<Resource>& p_resource) {
+
+	if (p_resource->get_path().is_resource_file()) {
+		_animation_save_in_path(p_resource, p_resource->get_path());
+	}
+	else {
+		_animation_save_as(p_resource);
+	}
+}
+
+void AnimationPlayerEditor::_animation_save_as(const Ref<Resource>& p_resource) {
+
+	file->set_mode(EditorFileDialog::MODE_SAVE_FILE);
+	bool relpaths = (p_resource->has_meta("__editor_relpaths__") && p_resource->get_meta("__editor_relpaths__").operator bool());
+
+	List<String> extensions;
+	ResourceSaver::get_recognized_extensions(p_resource, &extensions);
+	file->clear_filters();
+	for (int i = 0; i<extensions.size(); i++) {
+
+		file->add_filter("*." + extensions[i] + " ; " + extensions[i].to_upper());
+	}
+
+	//file->set_current_path(current_path);
+	if (p_resource->get_path() != "") {
+		file->set_current_path(p_resource->get_path());
+		if (extensions.size()) {
+			String ext = p_resource->get_path().extension().to_lower();
+			if (extensions.find(ext) == NULL) {
+				file->set_current_path(p_resource->get_path().replacen("." + ext, "." + extensions.front()->get()));
+			}
+		}
+	}
+	else {
+
+		String existing;
+		if (extensions.size()) {
+			existing = "new_" + p_resource->get_type().to_lower() + "." + extensions.front()->get().to_lower();
+		}
+		file->set_current_path(existing);
+
+	}
+	file->popup_centered_ratio();
+	file->set_title("Save Resource As..");
+	current_option = RESOURCE_SAVE;
+}
 void AnimationPlayerEditor::_animation_remove() {
 
 	if (animation->get_item_count()==0)
@@ -635,38 +709,55 @@ void AnimationPlayerEditor::_animation_edit() {
 	//get_scene()->get_root_node()->call("_resource_selected",anim,"");
 
 }
-void AnimationPlayerEditor::_file_selected(String p_file) {
+void AnimationPlayerEditor::_dialog_action(String p_file) {
 
-	ERR_FAIL_COND(!player);
+	switch (current_option) {
+		case RESOURCE_LOAD: {
+			ERR_FAIL_COND(!player);
 
-	Ref<Resource> res = ResourceLoader::load(p_file,"Animation");
-	ERR_FAIL_COND(res.is_null());
-	ERR_FAIL_COND( !res->is_type("Animation") );
-	if (p_file.find_last("/")!=-1) {
+			Ref<Resource> res = ResourceLoader::load(p_file, "Animation");
+			ERR_FAIL_COND(res.is_null());
+			ERR_FAIL_COND(!res->is_type("Animation"));
+			if (p_file.find_last("/") != -1) {
 
-		p_file=p_file.substr( p_file.find_last("/")+1, p_file.length() );
+				p_file = p_file.substr(p_file.find_last("/") + 1, p_file.length());
 
-	}
-	if (p_file.find_last("\\")!=-1) {
+			}
+			if (p_file.find_last("\\") != -1) {
 
-		p_file=p_file.substr( p_file.find_last("\\")+1, p_file.length() );
+				p_file = p_file.substr(p_file.find_last("\\") + 1, p_file.length());
 
-	}
+			}
 
-	if (p_file.find(".")!=-1)
-		p_file=p_file.substr(0,p_file.find("."));
+			if (p_file.find(".") != -1)
+				p_file = p_file.substr(0, p_file.find("."));
 
-	undo_redo->create_action("Load Animation");
-	undo_redo->add_do_method(player,"add_animation",p_file,res);
-	undo_redo->add_undo_method(player,"remove_animation",p_file);
-	if (player->has_animation(p_file)) {
-		undo_redo->add_undo_method(player,"add_animation",p_file,player->get_animation(p_file));
+			undo_redo->create_action("Load Animation");
+			undo_redo->add_do_method(player, "add_animation", p_file, res);
+			undo_redo->add_undo_method(player, "remove_animation", p_file);
+			if (player->has_animation(p_file)) {
+				undo_redo->add_undo_method(player, "add_animation", p_file, player->get_animation(p_file));
 
-	}
-	undo_redo->add_do_method(this,"_animation_player_changed",player);
-	undo_redo->add_undo_method(this,"_animation_player_changed",player);
-	undo_redo->commit_action();
+			}
+			undo_redo->add_do_method(this, "_animation_player_changed", player);
+			undo_redo->add_undo_method(this, "_animation_player_changed", player);
+			undo_redo->commit_action();
+			break;
+		}
+		case RESOURCE_SAVE: {
+
+			String current = animation->get_item_text(animation->get_selected());
+			if (current != "") {
+				Ref<Animation> anim = player->get_animation(current);
+				
+				ERR_FAIL_COND(!anim->cast_to<Resource>())
+
+					RES current_res = RES(anim->cast_to<Resource>());
 
+				_animation_save_in_path(current_res, p_file);
+			}
+		}
+	}
 }
 
 void AnimationPlayerEditor::_scale_changed(const String& p_scale) {
@@ -730,6 +821,8 @@ void AnimationPlayerEditor::_update_player() {
 	blend_anim->set_disabled(animlist.size()==0);
 	remove_anim->set_disabled(animlist.size()==0);
 	resource_edit_anim->set_disabled(animlist.size()==0);
+	save_anim->set_disabled(animlist.size() == 0);
+
 
 	int active_idx=-1;
 	for (List<StringName>::Element *E=animlist.front();E;E=E->next()) {
@@ -1072,6 +1165,23 @@ void AnimationPlayerEditor::_animation_tool_menu(int p_option) {
 	}
 }
 
+void AnimationPlayerEditor::_animation_save_menu(int p_option) {
+
+	String current = animation->get_item_text(animation->get_selected());
+	if (current != "") {
+		Ref<Animation> anim = player->get_animation(current);
+
+		switch (p_option) {
+		case ANIM_SAVE:
+			_animation_save(anim);
+			break;
+		case ANIM_SAVE_AS:
+			_animation_save_as(anim);
+			break;
+		}
+	}
+}
+
 void AnimationPlayerEditor::_unhandled_key_input(const InputEvent& p_ev) {
 
 	if (is_visible() && p_ev.type==InputEvent::KEY && p_ev.key.pressed && !p_ev.key.echo && !p_ev.key.mod.alt && !p_ev.key.mod.control && !p_ev.key.mod.meta) {
@@ -1117,7 +1227,7 @@ void AnimationPlayerEditor::_bind_methods() {
 	ObjectTypeDB::bind_method(_MD("_animation_blend"),&AnimationPlayerEditor::_animation_blend);
 	ObjectTypeDB::bind_method(_MD("_animation_edit"),&AnimationPlayerEditor::_animation_edit);
 	ObjectTypeDB::bind_method(_MD("_animation_resource_edit"),&AnimationPlayerEditor::_animation_resource_edit);
-	ObjectTypeDB::bind_method(_MD("_file_selected"),&AnimationPlayerEditor::_file_selected);
+	ObjectTypeDB::bind_method(_MD("_dialog_action"),&AnimationPlayerEditor::_dialog_action);
 	ObjectTypeDB::bind_method(_MD("_seek_value_changed"),&AnimationPlayerEditor::_seek_value_changed);
 	ObjectTypeDB::bind_method(_MD("_animation_player_changed"),&AnimationPlayerEditor::_animation_player_changed);
 	ObjectTypeDB::bind_method(_MD("_blend_edited"),&AnimationPlayerEditor::_blend_edited);
@@ -1133,14 +1243,22 @@ void AnimationPlayerEditor::_bind_methods() {
 	ObjectTypeDB::bind_method(_MD("_blend_editor_next_changed"),&AnimationPlayerEditor::_blend_editor_next_changed);
 	ObjectTypeDB::bind_method(_MD("_unhandled_key_input"),&AnimationPlayerEditor::_unhandled_key_input);
 	ObjectTypeDB::bind_method(_MD("_animation_tool_menu"),&AnimationPlayerEditor::_animation_tool_menu);
+	ObjectTypeDB::bind_method(_MD("_animation_save_menu"), &AnimationPlayerEditor::_animation_save_menu);
 
 
 
 
 }
 
+AnimationPlayerEditor *AnimationPlayerEditor::singleton=NULL;
+
+AnimationPlayer *AnimationPlayerEditor::get_player() const {
+
+	return player;
+}
 AnimationPlayerEditor::AnimationPlayerEditor(EditorNode *p_editor) {
 	editor=p_editor;
+	singleton=this;
 
 	updating=false;
 
@@ -1170,6 +1288,17 @@ AnimationPlayerEditor::AnimationPlayerEditor(EditorNode *p_editor) {
 	load_anim->set_tooltip("Load an animation from disk.");
 	hb->add_child(load_anim);
 
+	save_anim = memnew(MenuButton);
+	save_anim->set_tooltip("Save the current animation");
+	save_anim->get_popup()->add_item("Save", ANIM_SAVE);
+	save_anim->get_popup()->add_item("Save As..", ANIM_SAVE_AS);
+	save_anim->set_focus_mode(Control::FOCUS_NONE);
+	hb->add_child(save_anim);
+
+	accept = memnew(AcceptDialog);
+	add_child(accept);
+	accept->connect("confirmed", this, "_menu_confirm_current");
+
 	duplicate_anim = memnew( ToolButton );
 	hb->add_child(duplicate_anim);
 	duplicate_anim->set_tooltip("Duplicate Animation");
@@ -1279,6 +1408,7 @@ AnimationPlayerEditor::AnimationPlayerEditor(EditorNode *p_editor) {
 	add_child(file);
 
 	name_dialog = memnew( ConfirmationDialog );
+	name_dialog->set_title("Create New Animation");
 	name_dialog->set_hide_on_ok(false);
 	add_child(name_dialog);
 	name = memnew( LineEdit );
@@ -1339,7 +1469,7 @@ AnimationPlayerEditor::AnimationPlayerEditor(EditorNode *p_editor) {
 	remove_anim->connect("pressed", this,"_animation_remove");
 	animation->connect("item_selected", this,"_animation_selected",Vector<Variant>(),true);
 	resource_edit_anim->connect("pressed", this,"_animation_resource_edit");
-	file->connect("file_selected", this,"_file_selected");
+	file->connect("file_selected", this,"_dialog_action");
 	 seek->connect("value_changed", this, "_seek_value_changed",Vector<Variant>(),true);
 	 scale->connect("text_entered", this, "_scale_changed",Vector<Variant>(),true);
 	 editor->get_animation_editor()->connect("timeline_changed",this,"_animation_key_editor_seek");
diff --git a/tools/editor/plugins/animation_player_editor_plugin.h b/tools/editor/plugins/animation_player_editor_plugin.h
index 5705742565..839df3ddcc 100644
--- a/tools/editor/plugins/animation_player_editor_plugin.h
+++ b/tools/editor/plugins/animation_player_editor_plugin.h
@@ -55,6 +55,17 @@ class AnimationPlayerEditor : public VBoxContainer {
 		TOOL_EDIT_RESOURCE
 	};
 
+	enum {
+		ANIM_SAVE,
+		ANIM_SAVE_AS
+	};
+
+	enum {
+		RESOURCE_LOAD,
+		RESOURCE_SAVE
+	};
+
+
 	OptionButton *animation;
 	Button *stop;
 	Button *play;
@@ -70,6 +81,7 @@ class AnimationPlayerEditor : public VBoxContainer {
 	Button *edit_anim;
 	Button *resource_edit_anim;
 	Button *load_anim;
+	MenuButton *save_anim;
 	Button *blend_anim;
 	Button *remove_anim;
 	MenuButton *tool_anim;
@@ -85,6 +97,8 @@ class AnimationPlayerEditor : public VBoxContainer {
 	bool last_active;
 
 	EditorFileDialog *file;
+	AcceptDialog *accept;
+	int current_option;
 
 	struct BlendEditor {
 
@@ -116,13 +130,18 @@ class AnimationPlayerEditor : public VBoxContainer {
 	void _animation_rename();
 	void _animation_name_edited();
 	void _animation_load();
+
+	void _animation_save_in_path(const Ref<Resource>& p_resource, const String& p_path);
+	void _animation_save(const Ref<Resource>& p_resource);
+	void _animation_save_as(const Ref<Resource>& p_resource);
+
 	void _animation_remove();
 	void _animation_blend();
 	void _animation_edit();
 	void _animation_duplicate();
 	void _animation_resource_edit();
 	void _scale_changed(const String& p_scale);
-	void _file_selected(String p_file);
+	void _dialog_action(String p_file);
 	void _seek_frame_changed(const String& p_frame);
 	void _seek_value_changed(float p_value);
 	void _blend_editor_next_changed(const String& p_string);
@@ -141,6 +160,8 @@ class AnimationPlayerEditor : public VBoxContainer {
 	void _animation_key_editor_anim_len_changed(float p_new);
 	void _unhandled_key_input(const InputEvent& p_ev);
 	void _animation_tool_menu(int p_option);
+	void _animation_save_menu(int p_option);
+
 
 	AnimationPlayerEditor();
 protected:
@@ -151,6 +172,9 @@ protected:
 	static void _bind_methods();
 public:
 
+	AnimationPlayer *get_player() const;
+	static AnimationPlayerEditor *singleton;
+
 	Dictionary get_state() const;
 	void set_state(const Dictionary& p_state);
 
diff --git a/tools/editor/plugins/canvas_item_editor_plugin.cpp b/tools/editor/plugins/canvas_item_editor_plugin.cpp
index d318f6f6fa..a3164fc524 100644
--- a/tools/editor/plugins/canvas_item_editor_plugin.cpp
+++ b/tools/editor/plugins/canvas_item_editor_plugin.cpp
@@ -41,9 +41,9 @@
 class SnapDialog : public ConfirmationDialog {
 
 	OBJ_TYPE(SnapDialog,ConfirmationDialog);
-	
-protected:
-	friend class CanvasItemEditor;
+
+friend class CanvasItemEditor;
+
 	SpinBox *grid_offset_x;
 	SpinBox *grid_offset_y;
 	SpinBox *grid_step_x;
@@ -58,63 +58,75 @@ public:
 		Label *label;
 		VBoxContainer *container;
 		GridContainer *child_container;
-		
+
 		set_title("Configure Snap");
 		get_ok()->set_text("Close");
-		container = memnew(VBoxContainer);
+
+		container = memnew( VBoxContainer );
 		add_child(container);
-		
-		child_container = memnew(GridContainer);
+		set_child_rect(container);
+
+		child_container = memnew( GridContainer );
 		child_container->set_columns(3);
 		container->add_child(child_container);
-		
-		label = memnew(Label);
+
+		label = memnew( Label );
 		label->set_text("Grid Offset:");
 		child_container->add_child(label);
-		grid_offset_x=memnew(SpinBox);
+		label->set_h_size_flags(SIZE_EXPAND_FILL);
+
+		grid_offset_x = memnew( SpinBox );
 		grid_offset_x->set_min(-SPIN_BOX_GRID_RANGE);
 		grid_offset_x->set_max(SPIN_BOX_GRID_RANGE);
 		grid_offset_x->set_suffix("px");
 		child_container->add_child(grid_offset_x);
-		grid_offset_y=memnew(SpinBox);
+
+		grid_offset_y = memnew( SpinBox );
 		grid_offset_y->set_min(-SPIN_BOX_GRID_RANGE);
 		grid_offset_y->set_max(SPIN_BOX_GRID_RANGE);
 		grid_offset_y->set_suffix("px");
 		child_container->add_child(grid_offset_y);
 
-		label = memnew(Label);
+		label = memnew( Label );
 		label->set_text("Grid Step:");
 		child_container->add_child(label);
-		grid_step_x=memnew(SpinBox);
+		label->set_h_size_flags(SIZE_EXPAND_FILL);
+
+		grid_step_x = memnew( SpinBox );
 		grid_step_x->set_min(-SPIN_BOX_GRID_RANGE);
 		grid_step_x->set_max(SPIN_BOX_GRID_RANGE);
 		grid_step_x->set_suffix("px");
 		child_container->add_child(grid_step_x);
-		grid_step_y=memnew(SpinBox);
+
+		grid_step_y = memnew( SpinBox );
 		grid_step_y->set_min(-SPIN_BOX_GRID_RANGE);
 		grid_step_y->set_max(SPIN_BOX_GRID_RANGE);
 		grid_step_y->set_suffix("px");
 		child_container->add_child(grid_step_y);
-		
-		container->add_child(memnew(HSeparator));
 
-		child_container = memnew(GridContainer);
+		container->add_child( memnew( HSeparator ) );
+
+		child_container = memnew( GridContainer );
 		child_container->set_columns(2);
 		container->add_child(child_container);
 
-		label = memnew(Label);
+		label = memnew( Label );
 		label->set_text("Rotation Offset:");
 		child_container->add_child(label);
-		rotation_offset=memnew(SpinBox);
+		label->set_h_size_flags(SIZE_EXPAND_FILL);
+
+		rotation_offset = memnew( SpinBox );
 		rotation_offset->set_min(-SPIN_BOX_ROTATION_RANGE);
 		rotation_offset->set_max(SPIN_BOX_ROTATION_RANGE);
 		rotation_offset->set_suffix("deg");
 		child_container->add_child(rotation_offset);
-		
-		label = memnew(Label);
+
+		label = memnew( Label );
 		label->set_text("Rotation Step:");
 		child_container->add_child(label);
-		rotation_step=memnew(SpinBox);
+		label->set_h_size_flags(SIZE_EXPAND_FILL);
+
+		rotation_step = memnew( SpinBox );
 		rotation_step->set_min(-SPIN_BOX_ROTATION_RANGE);
 		rotation_step->set_max(SPIN_BOX_ROTATION_RANGE);
 		rotation_step->set_suffix("deg");
@@ -411,8 +423,6 @@ CanvasItem* CanvasItemEditor::_select_canvas_item_at_pos(const Point2& p_pos,Nod
 			r=_select_canvas_item_at_pos(p_pos,p_node->get_child(i),p_parent_xform * c->get_transform(),p_canvas_xform);
 		else {
 			CanvasLayer *cl = p_node->cast_to<CanvasLayer>();
-			if (cl)
-				return NULL;
 			r=_select_canvas_item_at_pos(p_pos,p_node->get_child(i),transform ,cl ? cl->get_transform() : p_canvas_xform); //use base transform
 		}
 
@@ -421,7 +431,7 @@ CanvasItem* CanvasItemEditor::_select_canvas_item_at_pos(const Point2& p_pos,Nod
 	}
 
 
-	if (c && c->is_visible() && !c->has_meta("_edit_lock_")) {
+	if (c && c->is_visible() && !c->has_meta("_edit_lock_") && !c->cast_to<CanvasLayer>()) {
 
 		Rect2 rect = c->get_item_rect();
 		Point2 local_pos = (p_parent_xform * p_canvas_xform * c->get_transform()).affine_inverse().xform(p_pos);
@@ -435,6 +445,45 @@ CanvasItem* CanvasItemEditor::_select_canvas_item_at_pos(const Point2& p_pos,Nod
 	return NULL;
 }
 
+void CanvasItemEditor::_find_canvas_items_at_pos(const Point2 &p_pos,Node* p_node,const Matrix32& p_parent_xform,const Matrix32& p_canvas_xform, Vector<_SelectResult> &r_items) {
+	if (!p_node)
+		return;
+	if (p_node->cast_to<Viewport>())
+		return;
+
+	CanvasItem *c=p_node->cast_to<CanvasItem>();
+
+	for (int i=p_node->get_child_count()-1;i>=0;i--) {
+
+		if (c && !c->is_set_as_toplevel())
+			_find_canvas_items_at_pos(p_pos,p_node->get_child(i),p_parent_xform * c->get_transform(),p_canvas_xform, r_items);
+		else {
+			CanvasLayer *cl = p_node->cast_to<CanvasLayer>();
+			_find_canvas_items_at_pos(p_pos,p_node->get_child(i),transform ,cl ? cl->get_transform() : p_canvas_xform, r_items); //use base transform
+		}
+	}
+
+
+	if (c && c->is_visible() && !c->has_meta("_edit_lock_") && !c->cast_to<CanvasLayer>()) {
+
+		Rect2 rect = c->get_item_rect();
+		Point2 local_pos = (p_parent_xform * p_canvas_xform * c->get_transform()).affine_inverse().xform(p_pos);
+
+
+		if (rect.has_point(local_pos)) {
+			Node2D *node=c->cast_to<Node2D>();
+
+			_SelectResult res;
+			res.item=c;
+			res.z=node?node->get_z():0;
+			res.has_z=node;
+			r_items.push_back(res);
+		}
+
+	}
+
+	return;
+}
 
 void CanvasItemEditor::_find_canvas_items_at_rect(const Rect2& p_rect,Node* p_node,const Matrix32& p_parent_xform,const Matrix32& p_canvas_xform,List<CanvasItem*> *r_items) {
 
@@ -452,14 +501,12 @@ void CanvasItemEditor::_find_canvas_items_at_rect(const Rect2& p_rect,Node* p_no
 			_find_canvas_items_at_rect(p_rect,p_node->get_child(i),p_parent_xform * c->get_transform(),p_canvas_xform,r_items);
 		else {
 			CanvasLayer *cl = p_node->cast_to<CanvasLayer>();
-			if (cl)
-				return;
 			_find_canvas_items_at_rect(p_rect,p_node->get_child(i),transform,cl?cl->get_transform():p_canvas_xform,r_items);
 		}
 	}
 
 
-	if (c && c->is_visible() && !c->has_meta("_edit_lock_")) {
+	if (c && c->is_visible() && !c->has_meta("_edit_lock_") && !c->cast_to<CanvasLayer>()) {
 
 		Rect2 rect = c->get_item_rect();
 		Matrix32 xform = p_parent_xform * p_canvas_xform * c->get_transform();
@@ -477,6 +524,96 @@ void CanvasItemEditor::_find_canvas_items_at_rect(const Rect2& p_rect,Node* p_no
 
 }
 
+bool CanvasItemEditor::_select(CanvasItem *item, Point2 p_click_pos, bool p_append, bool p_drag) {
+
+	if (p_append) {
+		//additive selection
+
+		if (!item) {
+
+			if (p_drag) {
+				drag_from=transform.affine_inverse().xform(p_click_pos);
+
+				box_selecting=true;
+				box_selecting_to=drag_from;
+			}
+
+			return false; //nothing to add
+		}
+
+		if (editor_selection->is_selected(item)) {
+			//already in here, erase it
+			editor_selection->remove_node(item);
+			//_remove_canvas_item(c);
+
+			viewport->update();
+			return false;
+
+		}
+		_append_canvas_item(item);
+		viewport->update();
+
+	} else {
+		//regular selection
+
+		if (!item) {
+			//clear because nothing clicked
+			editor_selection->clear();;
+
+			if (p_drag) {
+				drag_from=transform.affine_inverse().xform(p_click_pos);
+
+				box_selecting=true;
+				box_selecting_to=drag_from;
+			}
+
+			viewport->update();
+			return false;
+		}
+
+		if (!editor_selection->is_selected(item)) {
+			//select a new one and clear previous selection
+			editor_selection->clear();
+			editor_selection->add_node(item);
+			//reselect
+			if (get_tree()->is_editor_hint()) {
+				editor->call("edit_node",item);
+			}
+
+		}
+
+		if (p_drag) {
+			//prepare to move!
+
+			List<Node*> &selection = editor_selection->get_selected_node_list();
+
+			for(List<Node*>::Element *E=selection.front();E;E=E->next()) {
+
+				CanvasItem *canvas_item = E->get()->cast_to<CanvasItem>();
+				if (!canvas_item || !canvas_item->is_visible())
+					continue;
+				CanvasItemEditorSelectedItem *se=editor_selection->get_node_editor_data<CanvasItemEditorSelectedItem>(canvas_item);
+				if (!se)
+					continue;
+
+				se->undo_state=canvas_item->edit_get_state();
+				if (canvas_item->cast_to<Node2D>())
+					se->undo_pivot=canvas_item->cast_to<Node2D>()->edit_get_pivot();
+
+			}
+
+			drag=DRAG_ALL;
+			drag_from=transform.affine_inverse().xform(p_click_pos);
+			drag_point_from=_find_topleftmost_point();
+		}
+
+		viewport->update();
+
+		return true;
+
+	}
+}
+
 void CanvasItemEditor::_key_move(const Vector2& p_dir, bool p_snap, KeyMoveMODE p_move_mode) {
 
 
@@ -776,6 +913,24 @@ void CanvasItemEditor::_dialog_value_changed(double) {
 	}
 }
 
+void CanvasItemEditor::_selection_result_pressed(int p_result) {
+
+	if (selection_results.size() <= p_result)
+		return;
+
+	CanvasItem *item=selection_results[p_result].item;
+
+	if (item)
+		_select(item, Point2(), additive_selection, false);
+}
+
+void CanvasItemEditor::_selection_menu_hide() {
+
+	selection_results.clear();
+	selection_menu->clear();
+	selection_menu->set_size(Vector2(0, 0));
+}
+
 bool CanvasItemEditor::get_remove_list(List<Node*> *p_list) {
 
 
@@ -838,7 +993,60 @@ void CanvasItemEditor::_viewport_input_event(const InputEvent& p_event) {
 
 		if (b.button_index==BUTTON_RIGHT) {
 
+			if (b.pressed && tool==TOOL_SELECT && b.mod.alt) {
+
+				Point2 click=Point2(b.x,b.y);
+
+				Node* scene = editor->get_edited_scene();
+				if (!scene)
+					return;
+
+				_find_canvas_items_at_pos(click, scene,transform,Matrix32(), selection_results);
+
+				if (selection_results.size() == 1) {
+
+					CanvasItem *item = selection_results[0].item;
+					selection_results.clear();
 
+					additive_selection=b.mod.shift;
+					if (!_select(item, click, additive_selection, false))
+						return;
+
+				} else if (!selection_results.empty()) {
+
+					selection_results.sort();
+
+					NodePath root_path = get_tree()->get_edited_scene_root()->get_path();
+					StringName root_name = root_path.get_name(root_path.get_name_count()-1);
+
+					for (int i = 0; i < selection_results.size(); i++) {
+
+						CanvasItem *item=selection_results[i].item;
+
+						Ref<Texture> icon;
+						if (item->has_meta("_editor_icon"))
+							icon=item->get_meta("_editor_icon");
+						else
+							icon=get_icon( has_icon(item->get_type(),"EditorIcons")?item->get_type():String("Object"),"EditorIcons");
+
+						String node_path="/"+root_name+"/"+root_path.rel_path_to(item->get_path());
+
+						selection_menu->add_item(item->get_name());
+						selection_menu->set_item_icon(i, icon );
+						selection_menu->set_item_metadata(i, node_path);
+						selection_menu->set_item_tooltip(i,String(item->get_name())+
+								"\nType: "+item->get_type()+"\nPath: "+node_path);
+					}
+
+					additive_selection=b.mod.shift;
+
+					selection_menu->set_global_pos(Vector2( b.global_x, b.global_y ));
+					selection_menu->popup();
+					selection_menu->call_deferred("grab_click_focus");
+
+					return;
+				}
+			}
 
 			if (get_item_count() > 0 && drag!=DRAG_NONE) {
 				//cancel drag
@@ -1203,82 +1411,10 @@ void CanvasItemEditor::_viewport_input_event(const InputEvent& p_event) {
 #if 0
 		if ( b.pressed ) box_selection_start( click );
 #endif
-		if (b.mod.shift) { //additive selection
-
-			if (!c) {
-
-				drag_from=transform.affine_inverse().xform(click);
-
-				box_selecting=true;
-				box_selecting_to=drag_from;
-
-				return; //nothing to add
-			}
-
-			if (editor_selection->is_selected(c)) {
-				//already in here, erase it
-				editor_selection->remove_node(c);
-				//_remove_canvas_item(c);
 
-				viewport->update();
-				return;
-
-			}
-			_append_canvas_item(c);
-			viewport->update();
-		} else {
-			//regular selection
-
-
-
-			if (!c) {
-				//clear because nothing clicked
-				editor_selection->clear();;
-
-				drag_from=transform.affine_inverse().xform(click);
-
-				box_selecting=true;
-				box_selecting_to=drag_from;
-				viewport->update();
-				return;
-			}
-
-			if (!editor_selection->is_selected(c)) {
-				//select a new one and clear previous selection
-				editor_selection->clear();
-				editor_selection->add_node(c);
-				//reselect
-				if (get_tree()->is_editor_hint()) {
-					editor->call("edit_node",c);
-				}
-
-			}
-
-			//prepare to move!
-
-			List<Node*> &selection = editor_selection->get_selected_node_list();
-
-			for(List<Node*>::Element *E=selection.front();E;E=E->next()) {
-
-				CanvasItem *canvas_item = E->get()->cast_to<CanvasItem>();
-				if (!canvas_item || !canvas_item->is_visible())
-					continue;
-				CanvasItemEditorSelectedItem *se=editor_selection->get_node_editor_data<CanvasItemEditorSelectedItem>(canvas_item);
-				if (!se)
-					continue;
-
-				se->undo_state=canvas_item->edit_get_state();
-				if (canvas_item->cast_to<Node2D>())
-					se->undo_pivot=canvas_item->cast_to<Node2D>()->edit_get_pivot();
-
-			}
-
-			drag=DRAG_ALL;
-			drag_from=transform.affine_inverse().xform(click);
-			drag_point_from=_find_topleftmost_point();
-			viewport->update();
-
-		}
+		additive_selection=b.mod.shift;
+		if (!_select(c, click, additive_selection))
+			return;
 
 	}
 
@@ -1346,7 +1482,7 @@ void CanvasItemEditor::_viewport_input_event(const InputEvent& p_event) {
 					Matrix32 rot;
 					rot.elements[1] = (dfrom - center).normalized();
 					rot.elements[0] = rot.elements[1].tangent();
-					node->set_rot(snap_angle(rot.xform_inv(dto-center).atan2(), node->get_rot()));
+					node->set_rot(snap_angle(rot.xform_inv(dto-center).angle(), node->get_rot()));
 					display_rotate_to = dto;
 					display_rotate_from = center;
 					viewport->update();
@@ -2278,7 +2414,7 @@ void CanvasItemEditor::_popup_callback(int p_op) {
 		} break;
 		case SNAP_CONFIGURE: {
 			((SnapDialog *)snap_dialog)->set_fields(snap_offset, snap_step, snap_rotation_offset, snap_rotation_step);
-			snap_dialog->popup_centered(Size2(200,160));
+			snap_dialog->popup_centered(Size2(220,160));
 		} break;
 		case ZOOM_IN: {
 			zoom=zoom*(1.0/0.5);
@@ -2867,6 +3003,8 @@ void CanvasItemEditor::_bind_methods() {
 	ObjectTypeDB::bind_method("_viewport_draw",&CanvasItemEditor::_viewport_draw);
 	ObjectTypeDB::bind_method("_viewport_input_event",&CanvasItemEditor::_viewport_input_event);
 	ObjectTypeDB::bind_method("_snap_changed",&CanvasItemEditor::_snap_changed);
+	ObjectTypeDB::bind_method(_MD("_selection_result_pressed"),&CanvasItemEditor::_selection_result_pressed);
+	ObjectTypeDB::bind_method(_MD("_selection_menu_hide"),&CanvasItemEditor::_selection_menu_hide);
 
 	ADD_SIGNAL( MethodInfo("item_lock_status_changed") );
 	ADD_SIGNAL( MethodInfo("item_group_status_changed") );
@@ -3173,7 +3311,7 @@ CanvasItemEditor::CanvasItemEditor(EditorNode *p_editor) {
 	p->add_item("Paste Pose",ANIM_PASTE_POSE);
 	p->add_item("Clear Pose",ANIM_CLEAR_POSE,KEY_MASK_SHIFT|KEY_K);
 
-	snap_dialog = memnew(SnapDialog);
+	snap_dialog = memnew( SnapDialog );
 	snap_dialog->connect("confirmed",this,"_snap_changed");
 	add_child(snap_dialog);
 
@@ -3196,6 +3334,12 @@ CanvasItemEditor::CanvasItemEditor(EditorNode *p_editor) {
 	dialog_val->connect("value_changed",this,"_dialog_value_changed");
 	select_sb = Ref<StyleBoxTexture>( memnew( StyleBoxTexture) );
 
+	selection_menu = memnew( PopupMenu );
+	add_child(selection_menu);
+	selection_menu->set_custom_minimum_size(Vector2(100, 0));
+	selection_menu->connect("item_pressed", this, "_selection_result_pressed");
+	selection_menu->connect("popup_hide", this, "_selection_menu_hide");
+
 	key_pos=true;
 	key_rot=true;
 	key_scale=false;
@@ -3218,6 +3362,7 @@ CanvasItemEditor::CanvasItemEditor(EditorNode *p_editor) {
 	can_move_pivot=false;
 	drag=DRAG_NONE;
 	bone_last_frame=0;
+	additive_selection=false;
 }
 
 CanvasItemEditor *CanvasItemEditor::singleton=NULL;
diff --git a/tools/editor/plugins/canvas_item_editor_plugin.h b/tools/editor/plugins/canvas_item_editor_plugin.h
index 485422028e..b96d36f7dc 100644
--- a/tools/editor/plugins/canvas_item_editor_plugin.h
+++ b/tools/editor/plugins/canvas_item_editor_plugin.h
@@ -150,6 +150,7 @@ class CanvasItemEditor : public VBoxContainer {
 	};
 
 	EditorSelection *editor_selection;
+	bool additive_selection;
 
 	Tool tool;
 	bool first_update;
@@ -183,6 +184,18 @@ class CanvasItemEditor : public VBoxContainer {
 
 	MenuOption last_option;
 
+	struct _SelectResult {
+
+		CanvasItem* item;
+		float z;
+		bool has_z;
+		_FORCE_INLINE_ bool operator<(const _SelectResult& p_rr) const {
+			return has_z && p_rr.has_z ? p_rr.z < z : p_rr.has_z;
+		}
+	};
+
+	Vector<_SelectResult> selection_results;
+
 	struct LockList {
 		Point2 pos;
 		bool lock;
@@ -249,6 +262,8 @@ class CanvasItemEditor : public VBoxContainer {
 	Button *key_scale_button;
 	Button *key_insert_button;
 
+	PopupMenu *selection_menu;
+
 	//PopupMenu *popup;
 	DragType drag;
 	Point2 drag_from;
@@ -276,8 +291,11 @@ class CanvasItemEditor : public VBoxContainer {
 
 	int handle_len;
 	CanvasItem* _select_canvas_item_at_pos(const Point2 &p_pos,Node* p_node,const Matrix32& p_parent_xform,const Matrix32& p_canvas_xform);
+	void _find_canvas_items_at_pos(const Point2 &p_pos,Node* p_node,const Matrix32& p_parent_xform,const Matrix32& p_canvas_xform, Vector<_SelectResult> &r_items);
 	void _find_canvas_items_at_rect(const Rect2& p_rect,Node* p_node,const Matrix32& p_parent_xform,const Matrix32& p_canvas_xform,List<CanvasItem*> *r_items);
 
+	bool _select(CanvasItem *item, Point2 p_click_pos, bool p_append, bool p_drag=true);
+
 	ConfirmationDialog *snap_dialog;
 	
 	AcceptDialog *value_dialog;
@@ -304,6 +322,9 @@ class CanvasItemEditor : public VBoxContainer {
 	void _append_canvas_item(CanvasItem *p_item);
 	void _dialog_value_changed(double);
 	void _snap_changed();
+	void _selection_result_pressed(int);
+	void _selection_menu_hide();
+
 	UndoRedo *undo_redo;
 
 	Point2 _find_topleftmost_point();
diff --git a/tools/editor/plugins/collision_polygon_editor_plugin.cpp b/tools/editor/plugins/collision_polygon_editor_plugin.cpp
index 381cfd74ab..60683f4eda 100644
--- a/tools/editor/plugins/collision_polygon_editor_plugin.cpp
+++ b/tools/editor/plugins/collision_polygon_editor_plugin.cpp
@@ -113,6 +113,7 @@ bool CollisionPolygonEditor::forward_spatial_input_event(Camera* p_camera,const
 		return false;
 
 	Transform gt = node->get_global_transform();
+	Transform gi = gt.affine_inverse();
 	float depth = node->get_depth()*0.5;
 	Vector3 n = gt.basis.get_axis(2).normalized();
 	Plane p(gt.origin+n*depth,n);
@@ -135,6 +136,8 @@ bool CollisionPolygonEditor::forward_spatial_input_event(Camera* p_camera,const
 			if (!p.intersects_ray(ray_from,ray_dir,&spoint))
 				break;
 
+			spoint = gi.xform(spoint);
+
 			Vector2 cpoint(spoint.x,spoint.y);
 
 			cpoint=CanvasItemEditor::get_singleton()->snap_point(cpoint);
@@ -349,6 +352,8 @@ bool CollisionPolygonEditor::forward_spatial_input_event(Camera* p_camera,const
 				if (!p.intersects_ray(ray_from,ray_dir,&spoint))
 					break;
 
+				spoint = gi.xform(spoint);
+
 				Vector2 cpoint(spoint.x,spoint.y);
 
 				cpoint=CanvasItemEditor::get_singleton()->snap_point(cpoint);
diff --git a/tools/editor/plugins/collision_shape_2d_editor_plugin.cpp b/tools/editor/plugins/collision_shape_2d_editor_plugin.cpp
index 62cf1b4acb..7e5d52d17d 100644
--- a/tools/editor/plugins/collision_shape_2d_editor_plugin.cpp
+++ b/tools/editor/plugins/collision_shape_2d_editor_plugin.cpp
@@ -42,6 +42,13 @@ Variant CollisionShape2DEditor::get_handle_value(int idx) const {
 		} break;
 
 		case LINE_SHAPE: {
+			Ref<LineShape2D> line = node->get_shape();
+
+			if (idx==0) {
+				return line->get_d();
+			} else {
+				return line->get_normal();
+			}
 
 		} break;
 
@@ -115,6 +122,18 @@ void CollisionShape2DEditor::set_handle(int idx, Point2& p_point) {
 		} break;
 
 		case LINE_SHAPE: {
+			if (idx<2) {
+				Ref<LineShape2D> line = node->get_shape();
+
+				if (idx==0){
+					line->set_d(p_point.length());
+				}else{
+					line->set_normal(p_point.normalized());
+				}
+
+				canvas_item_editor->get_viewport_control()->update();
+			}
+
 
 		} break;
 
@@ -200,6 +219,19 @@ void CollisionShape2DEditor::commit_handle(int idx, Variant& p_org) {
 		} break;
 
 		case LINE_SHAPE: {
+			Ref<LineShape2D> line = node->get_shape();
+
+			if (idx==0) {
+				undo_redo->add_do_method(line.ptr(),"set_d",line->get_d());
+				undo_redo->add_do_method(c,"update");
+				undo_redo->add_undo_method(line.ptr(),"set_d",p_org);
+				undo_redo->add_undo_method(c,"update");
+			} else {
+				undo_redo->add_do_method(line.ptr(),"set_normal",line->get_normal());
+				undo_redo->add_do_method(c,"update");
+				undo_redo->add_undo_method(line.ptr(),"set_normal",p_org);
+				undo_redo->add_undo_method(c,"update");
+			}
 
 		} break;
 
@@ -418,6 +450,14 @@ void CollisionShape2DEditor::_canvas_draw() {
 		} break;
 
 		case LINE_SHAPE: {
+			Ref<LineShape2D> shape = node->get_shape();
+			
+			handles.resize(2);
+			handles[0] = shape->get_normal() * shape->get_d();
+			handles[1] = shape->get_normal() * (shape->get_d() + 30.0);
+			
+			c->draw_texture(h,gt.xform(handles[0])-size);
+			c->draw_texture(h,gt.xform(handles[1])-size);
 
 		} break;
 
diff --git a/tools/editor/plugins/editor_preview_plugins.cpp b/tools/editor/plugins/editor_preview_plugins.cpp
index c2b3ecfcda..5f52d4c3e7 100644
--- a/tools/editor/plugins/editor_preview_plugins.cpp
+++ b/tools/editor/plugins/editor_preview_plugins.cpp
@@ -491,8 +491,14 @@ Ref<Texture> EditorSamplePreviewPlugin::generate(const RES& p_from) {
 					ima_adpcm.last_nibble++;
 					const uint8_t *src_ptr=sdata;
 
+					int ofs = ima_adpcm.last_nibble>>1;
+
+					if (stereo)
+						ofs*=2;
+
+
 					nibble = (ima_adpcm.last_nibble&1)?
-							(src_ptr[ima_adpcm.last_nibble>>1]>>4):(src_ptr[ima_adpcm.last_nibble>>1]&0xF);
+							(src_ptr[ofs]>>4):(src_ptr[ofs]&0xF);
 					step=_ima_adpcm_step_table[ima_adpcm.step_index];
 
 					ima_adpcm.step_index += _ima_adpcm_index_table[nibble];
diff --git a/tools/editor/plugins/item_list_editor_plugin.cpp b/tools/editor/plugins/item_list_editor_plugin.cpp
index fa261edea3..9c53c73afd 100644
--- a/tools/editor/plugins/item_list_editor_plugin.cpp
+++ b/tools/editor/plugins/item_list_editor_plugin.cpp
@@ -30,7 +30,6 @@
 
 #include "io/resource_loader.h"
 
-
 bool ItemListPlugin::_set(const StringName& p_name, const Variant& p_value) {
 
 	String name = p_name;
@@ -45,12 +44,10 @@ bool ItemListPlugin::_set(const StringName& p_name, const Variant& p_value) {
 		set_item_checkable(idx,p_value);
 	else if (what=="checked")
 		set_item_checked(idx,p_value);
-	else if (what=="enabled")
-		set_item_enabled(idx,p_value);
-	else if (what=="accel")
-		set_item_accel(idx,p_value);
 	else if (what=="id")
 		set_item_id(idx,p_value);
+	else if (what=="enabled")
+		set_item_enabled(idx,p_value);
 	else if (what=="separator")
 		set_item_separator(idx,p_value);
 	else
@@ -60,6 +57,7 @@ bool ItemListPlugin::_set(const StringName& p_name, const Variant& p_value) {
 }
 
 bool ItemListPlugin::_get(const StringName& p_name,Variant &r_ret) const {
+
 	String name = p_name;
 	int idx = name.get_slice("/",0).to_int();
 	String what=name.get_slice("/",1);
@@ -72,12 +70,10 @@ bool ItemListPlugin::_get(const StringName& p_name,Variant &r_ret) const {
 		r_ret=is_item_checkable(idx);
 	else if (what=="checked")
 		r_ret=is_item_checked(idx);
-	else if (what=="enabled")
-		r_ret=is_item_enabled(idx);
-	else if (what=="accel")
-		r_ret=get_item_accel(idx);
 	else if (what=="id")
 		r_ret=get_item_id(idx);
+	else if (what=="enabled")
+		r_ret=is_item_enabled(idx);
 	else if (what=="separator")
 		r_ret=is_item_separator(idx);
 	else
@@ -93,66 +89,119 @@ void ItemListPlugin::_get_property_list( List<PropertyInfo> *p_list) const {
 
 		p_list->push_back( PropertyInfo(Variant::STRING,base+"text") );
 		p_list->push_back( PropertyInfo(Variant::OBJECT,base+"icon",PROPERTY_HINT_RESOURCE_TYPE,"Texture") );
-		if (get_flags()&FLAG_CHECKABLE) {
 
+		int flags = get_flags();
+
+		if (flags&FLAG_CHECKABLE) {
 			p_list->push_back( PropertyInfo(Variant::BOOL,base+"checkable") );
 			p_list->push_back( PropertyInfo(Variant::BOOL,base+"checked") );
-
 		}
-		if (get_flags()&FLAG_ENABLE) {
 
+		if (flags&FLAG_ID)
+			p_list->push_back( PropertyInfo(Variant::INT,base+"id",PROPERTY_HINT_RANGE,"-1,4096") );
+
+		if (flags&FLAG_ENABLE)
 			p_list->push_back( PropertyInfo(Variant::BOOL,base+"enabled") );
 
-		}
-		if (get_flags()&FLAG_ACCEL) {
+		if (flags&FLAG_SEPARATOR)
+			p_list->push_back( PropertyInfo(Variant::BOOL,base+"separator") );
+	}
+}
 
-			p_list->push_back( PropertyInfo(Variant::INT,base+"accel",PROPERTY_HINT_KEY_ACCEL) );
+///////////////////////////////////////////////////////////////
+///////////////////////// PLUGINS /////////////////////////////
+///////////////////////////////////////////////////////////////
 
-		}
-		if (get_flags()&FLAG_ID) {
+void ItemListOptionButtonPlugin::set_object(Object *p_object) {
 
-			p_list->push_back( PropertyInfo(Variant::INT,base+"id",PROPERTY_HINT_RANGE,"-1,4096") );
+	ob = p_object->cast_to<OptionButton>();
+}
 
-		}
-		if (get_flags()&FLAG_SEPARATOR) {
+bool ItemListOptionButtonPlugin::handles(Object *p_object) const {
 
-			p_list->push_back( PropertyInfo(Variant::BOOL,base+"separator") );
+	return p_object->is_type("OptionButton"); 
+}
 
-		}
-	}
+int ItemListOptionButtonPlugin::get_flags() const {
+
+	return FLAG_ICON|FLAG_ID|FLAG_ENABLE;
 }
 
-void ItemListEditor::_node_removed(Node *p_node) {
+void ItemListOptionButtonPlugin::add_item() {
 
-	if(p_node==item_list) {
-		item_list=NULL;
-		hide();
-		dialog->hide();
-	}
+	ob->add_item( "Item "+itos(ob->get_item_count()));
+	_change_notify(); 
+}
 
+int ItemListOptionButtonPlugin::get_item_count() const {
 
+	return ob->get_item_count(); 
 }
 
-void ItemListEditor::_delete_pressed() {
+void ItemListOptionButtonPlugin::erase(int p_idx) {
 
-	String p = prop_editor->get_selected_path();
+	ob->remove_item(p_idx); 
+	_change_notify();
+}
 
-	if (p.find("/")!=-1) {
+ItemListOptionButtonPlugin::ItemListOptionButtonPlugin() {
 
-		if (selected_idx<0 || selected_idx>=item_plugins.size())
-			return;
+	ob=NULL; 
+}
 
-		item_plugins[selected_idx]->erase(p.get_slice("/",0).to_int());;
-	}
+///////////////////////////////////////////////////////////////
+
+void ItemListPopupMenuPlugin::set_object(Object *p_object) {
 
+	if (p_object->is_type("MenuButton"))
+		pp = p_object->cast_to<MenuButton>()->get_popup();
+	else
+		pp = p_object->cast_to<PopupMenu>();
 }
 
-void ItemListEditor::_add_pressed() {
+bool ItemListPopupMenuPlugin::handles(Object *p_object) const {
 
-	if (selected_idx<0 || selected_idx>=item_plugins.size())
-		return;
+	return p_object->is_type("PopupMenu") || p_object->is_type("MenuButton");
+}
 
-	item_plugins[selected_idx]->add_item();
+int ItemListPopupMenuPlugin::get_flags() const {
+
+	return FLAG_ICON|FLAG_CHECKABLE|FLAG_ID|FLAG_ENABLE|FLAG_SEPARATOR;
+}
+
+void ItemListPopupMenuPlugin::add_item() {
+
+	pp->add_item( "Item "+itos(pp->get_item_count()));
+	_change_notify();
+}
+
+int ItemListPopupMenuPlugin::get_item_count() const {
+
+	return pp->get_item_count();
+}
+
+void ItemListPopupMenuPlugin::erase(int p_idx) {
+
+	pp->remove_item(p_idx);
+	_change_notify();
+}
+
+ItemListPopupMenuPlugin::ItemListPopupMenuPlugin() {
+
+	pp=NULL;
+}
+
+///////////////////////////////////////////////////////////////
+///////////////////////////////////////////////////////////////
+///////////////////////////////////////////////////////////////
+
+void ItemListEditor::_node_removed(Node *p_node) {
+
+	if(p_node==item_list) {
+		item_list=NULL;
+		hide();
+		dialog->hide();
+	}
 }
 
 void ItemListEditor::_notification(int p_notification) {
@@ -160,57 +209,73 @@ void ItemListEditor::_notification(int p_notification) {
 	if (p_notification==NOTIFICATION_ENTER_TREE) {
 
 		add_button->set_icon(get_icon("Add","EditorIcons"));
-		del_button->set_icon(get_icon("Del","EditorIcons"));
+		del_button->set_icon(get_icon("Remove","EditorIcons"));
 	}
 }
 
+void ItemListEditor::_add_pressed() {
 
-void ItemListEditor::_menu_option(int p_option) {
+	if (selected_idx==-1)
+		return;
 
+	item_plugins[selected_idx]->add_item();
+}
 
-	switch(p_option) {
+void ItemListEditor::_delete_pressed() {
 
-		case MENU_EDIT_ITEMS: {
+	TreeItem *ti = tree->get_selected();
 
-			dialog->popup_centered_ratio();
-		} break;
-	}
+	if (!ti)
+		return;
+
+	if (ti->get_parent()!=tree->get_root())
+		return;
+
+	int idx = ti->get_text(0).to_int();
+
+	if (selected_idx==-1)
+		return;
+
+	item_plugins[selected_idx]->erase(idx);
 }
 
+void ItemListEditor::_edit_items() {
+
+	dialog->popup_centered(Vector2(300, 400));
+}
 
 void ItemListEditor::edit(Node *p_item_list) {
 
 	item_list=p_item_list;
 
+	if (!item_list) {
+		selected_idx=-1;
+		property_editor->edit(NULL);
+		return;
+	}
+
 	for(int i=0;i<item_plugins.size();i++) {
 		if (item_plugins[i]->handles(p_item_list)) {
 
 			item_plugins[i]->set_object(p_item_list);
-			prop_editor->edit(item_plugins[i]);
+			property_editor->edit(item_plugins[i]);
+			
+			if (has_icon(item_list->get_type(), "EditorIcons"))
+				toolbar_button->set_icon(get_icon(item_list->get_type(), "EditorIcons"));
+			else
+				toolbar_button->set_icon(Ref<Texture>());
+
 			selected_idx=i;
 			return;
 		}
 	}
 
 	selected_idx=-1;
-
-	prop_editor->edit(NULL);
-
-}
-
-
-void ItemListEditor::_bind_methods() {
-
-	ObjectTypeDB::bind_method("_menu_option",&ItemListEditor::_menu_option);
-	ObjectTypeDB::bind_method("_add_button",&ItemListEditor::_add_pressed);
-	ObjectTypeDB::bind_method("_delete_button",&ItemListEditor::_delete_pressed);
-
-	//ObjectTypeDB::bind_method("_populate",&ItemListEditor::_populate);
-
+	property_editor->edit(NULL);
 }
 
 bool ItemListEditor::handles(Object *p_object) const {
-	return false;
+
 	for(int i=0;i<item_plugins.size();i++)  {
 		if (item_plugins[i]->handles(p_object)) {
 			return true;
@@ -218,57 +283,65 @@ bool ItemListEditor::handles(Object *p_object) const {
 	}
 
 	return false;
+}
 
+void ItemListEditor::_bind_methods() {
+
+	ObjectTypeDB::bind_method("_edit_items",&ItemListEditor::_edit_items);
+	ObjectTypeDB::bind_method("_add_button",&ItemListEditor::_add_pressed);
+	ObjectTypeDB::bind_method("_delete_button",&ItemListEditor::_delete_pressed);
 }
+
 ItemListEditor::ItemListEditor() {
 
 	selected_idx=-1;
-	options = memnew( MenuButton );
-	add_child(options);
-	options->set_area_as_parent_rect();
 
-	options->set_text("Items");
-	options->get_popup()->add_item("Edit Items",MENU_EDIT_ITEMS);
-	//options->get_popup()->add_item("Clear",MENU_CLEAR);
+	add_child( memnew( VSeparator ) );
 
-	options->get_popup()->connect("item_pressed", this,"_menu_option");
+	toolbar_button = memnew( ToolButton );
+	toolbar_button->set_text("Items");
+	add_child(toolbar_button);
+	toolbar_button->connect("pressed",this,"_edit_items");
 
 	dialog = memnew( AcceptDialog );
+	dialog->set_title("Item List Editor");
 	add_child( dialog );
 
-
+	VBoxContainer *vbc = memnew( VBoxContainer );
+	dialog->add_child(vbc);
+	dialog->set_child_rect(vbc);
 
 	HBoxContainer *hbc = memnew( HBoxContainer );
-
-	dialog->add_child(hbc);
-	dialog->set_child_rect(hbc);
-
-	prop_editor = memnew( PropertyEditor );
-
-	hbc->add_child(prop_editor);
-	prop_editor->set_h_size_flags(SIZE_EXPAND_FILL);
-
-	VBoxContainer *vbc = memnew( VBoxContainer );
-	hbc->add_child(vbc);
+	hbc->set_h_size_flags(SIZE_EXPAND_FILL);
+	vbc->add_child(hbc);
 
 	add_button = memnew( Button );
-	//add_button->set_text("Add");
+	add_button->set_text("Add");
+	hbc->add_child(add_button);
 	add_button->connect("pressed",this,"_add_button");
-	vbc->add_child(add_button);
+
+	hbc->add_spacer();
 
 	del_button = memnew( Button );
-	//del_button->set_text("Del");
+	del_button->set_text("Delete");
+	hbc->add_child(del_button);
 	del_button->connect("pressed",this,"_delete_button");
-	vbc->add_child(del_button);
 
-	dialog->set_title("Item List");
-	prop_editor->hide_top_label();
+	property_editor = memnew( PropertyEditor );
+	property_editor->hide_top_label();
+	property_editor->set_subsection_selectable(true);
+	vbc->add_child(property_editor);
+	property_editor->set_v_size_flags(SIZE_EXPAND_FILL);
 
+	tree = property_editor->get_scene_tree();
+}
 
+ItemListEditor::~ItemListEditor() {
 
+	for(int i=0;i<item_plugins.size();i++)
+		memdelete( item_plugins[i] );
 }
 
-
 void ItemListEditorPlugin::edit(Object *p_object) {
 
 	item_list_editor->edit(p_object->cast_to<Node>());
@@ -288,127 +361,19 @@ void ItemListEditorPlugin::make_visible(bool p_visible) {
 		item_list_editor->hide();
 		item_list_editor->edit(NULL);
 	}
-
-}
-
-
-ItemListEditor::~ItemListEditor() {
-
-	for(int i=0;i<item_plugins.size();i++)
-		memdelete( item_plugins[i] );
 }
 
-///////////////////////// PLUGINS /////////////////////////////
-///////////////////////// PLUGINS /////////////////////////////
-///////////////////////// PLUGINS /////////////////////////////
-///////////////////////// PLUGINS /////////////////////////////
-///////////////////////// PLUGINS /////////////////////////////
-
-
-class ItemListOptionButtonPlugin : public ItemListPlugin {
-
-	OBJ_TYPE(ItemListOptionButtonPlugin,ItemListPlugin);
-
-	OptionButton *ob;
-public:
-
-	virtual void set_object(Object *p_object) { ob = p_object->cast_to<OptionButton>(); }
-
-	virtual bool handles(Object *p_object) const { return p_object->cast_to<OptionButton>()!=NULL; }
-
-	virtual int get_flags() const { return FLAG_ICON|FLAG_ID|FLAG_ENABLE; }
-
-	virtual void set_item_text(int p_idx,const String& p_text){ ob->set_item_text(p_idx,p_text);}
-	virtual void set_item_icon(int p_idx,const Ref<Texture>& p_tex){ ob->set_item_icon(p_idx,p_tex);}
-	virtual void set_item_enabled(int p_idx,int p_enabled){ ob->set_item_disabled(p_idx,!p_enabled);}
-	virtual void set_item_id(int p_idx,int p_id){ ob->set_item_ID(p_idx,p_id);}
-
-
-	virtual String get_item_text(int p_idx) const{ return ob->get_item_text(p_idx); };
-	virtual Ref<Texture> get_item_icon(int p_idx) const{ return ob->get_item_icon(p_idx); };
-	virtual bool is_item_enabled(int p_idx) const{ return !ob->is_item_disabled(p_idx); };
-	virtual int get_item_id(int p_idx) const{ return ob->get_item_ID(p_idx); };
-
-	virtual void add_item() { ob->add_item( "New Item "+itos(ob->get_item_count())); _change_notify();}
-	virtual int get_item_count() const { return ob->get_item_count(); }
-	virtual void erase(int p_idx) { ob->remove_item(p_idx); _change_notify();}
-
-
-	ItemListOptionButtonPlugin() { ob=NULL; }
-};
-
-class ItemListPopupMenuPlugin : public ItemListPlugin {
-
-	OBJ_TYPE(ItemListPopupMenuPlugin,ItemListPlugin);
-
-	PopupMenu *pp;
-public:
-
-	virtual void set_object(Object *p_object) {
-		if (p_object->cast_to<MenuButton>())
-			pp = p_object->cast_to<MenuButton>()->get_popup();
-		else
-			pp = p_object->cast_to<PopupMenu>();
-	}
-
-	virtual bool handles(Object *p_object) const { return p_object->cast_to<PopupMenu>()!=NULL || p_object->cast_to<MenuButton>()!=NULL; }
-
-	virtual int get_flags() const { return FLAG_ICON|FLAG_ID|FLAG_ENABLE|FLAG_CHECKABLE|FLAG_SEPARATOR|FLAG_ACCEL; }
-
-	virtual void set_item_text(int p_idx,const String& p_text){ pp->set_item_text(p_idx,p_text); }
-	virtual void set_item_icon(int p_idx,const Ref<Texture>& p_tex){ pp->set_item_icon(p_idx,p_tex);}
-	virtual void set_item_checkable(int p_idx,bool p_check){ pp->set_item_as_checkable(p_idx,p_check);}
-	virtual void set_item_checked(int p_idx,bool p_checked){ pp->set_item_checked(p_idx,p_checked);}
-	virtual void set_item_accel(int p_idx,int p_accel){ pp->set_item_accelerator(p_idx,p_accel);}
-	virtual void set_item_enabled(int p_idx,int p_enabled){ pp->set_item_disabled(p_idx,!p_enabled);}
-	virtual void set_item_id(int p_idx,int p_id){ pp->set_item_ID(p_idx,p_idx);}
-	virtual void set_item_separator(int p_idx,bool p_separator){ pp->set_item_as_separator(p_idx,p_separator);}
-
-
-	virtual String get_item_text(int p_idx) const{ return pp->get_item_text(p_idx); };
-	virtual Ref<Texture> get_item_icon(int p_idx) const{ return pp->get_item_icon(p_idx); };
-	virtual bool is_item_checkable(int p_idx) const{ return pp->is_item_checkable(p_idx);  };
-	virtual bool is_item_checked(int p_idx) const{ return pp->is_item_checked(p_idx); };
-	virtual int get_item_accel(int p_idx) const{ return pp->get_item_accelerator(p_idx); };
-	virtual bool is_item_enabled(int p_idx) const{ return !pp->is_item_disabled(p_idx);  };
-	virtual int get_item_id(int p_idx) const{ return pp->get_item_ID(p_idx);  };
-	virtual bool is_item_separator(int p_idx) const{ return pp->is_item_separator(p_idx); };
-
-
-
-	virtual void add_item() { pp->add_item( "New Item "+itos(pp->get_item_count())); _change_notify();}
-	virtual int get_item_count() const { return pp->get_item_count(); }
-	virtual void erase(int p_idx) { pp->remove_item(p_idx); _change_notify();}
-
-
-	ItemListPopupMenuPlugin() { pp=NULL; }
-};
-
-
-
-
-
-
 ItemListEditorPlugin::ItemListEditorPlugin(EditorNode *p_node) {
 
 	editor=p_node;
 	item_list_editor = memnew( ItemListEditor );
-	editor->get_viewport()->add_child(item_list_editor);
-
-//	item_list_editor->set_anchor(MARGIN_LEFT,Control::ANCHOR_END);
-//	item_list_editor->set_anchor(MARGIN_RIGHT,Control::ANCHOR_END);
-	item_list_editor->set_margin(MARGIN_LEFT,180);
-	item_list_editor->set_margin(MARGIN_RIGHT,230);
-	item_list_editor->set_margin(MARGIN_TOP,0);
-	item_list_editor->set_margin(MARGIN_BOTTOM,10);
-
+	CanvasItemEditor::get_singleton()->add_control_to_menu_panel(item_list_editor);
 
 	item_list_editor->hide();
-	item_list_editor->add_plugin( memnew( ItemListOptionButtonPlugin) );
-	item_list_editor->add_plugin( memnew( ItemListPopupMenuPlugin) );
+	item_list_editor->add_plugin( memnew( ItemListOptionButtonPlugin ) );
+	item_list_editor->add_plugin( memnew( ItemListPopupMenuPlugin ) );
 }
 
-
 ItemListEditorPlugin::~ItemListEditorPlugin()
 {
 }
diff --git a/tools/editor/plugins/item_list_editor_plugin.h b/tools/editor/plugins/item_list_editor_plugin.h
index 351dbb800d..b40a2c22f8 100644
--- a/tools/editor/plugins/item_list_editor_plugin.h
+++ b/tools/editor/plugins/item_list_editor_plugin.h
@@ -31,10 +31,11 @@
 
 #include "tools/editor/editor_plugin.h"
 #include "tools/editor/editor_node.h"
+#include "canvas_item_editor_plugin.h"
+
 #include "scene/gui/option_button.h"
 #include "scene/gui/menu_button.h"
 #include "scene/gui/popup_menu.h"
-#include "scene/gui/spin_box.h"
 
 /**
 	@author Juan Linietsky <reduzio@gmail.com>
@@ -51,43 +52,42 @@ protected:
 	bool _get(const StringName& p_name,Variant &r_ret) const;
 	void _get_property_list( List<PropertyInfo> *p_list) const;
 
-
 public:
 
 	enum Flags {
 
 		FLAG_ICON=1,
 		FLAG_CHECKABLE=2,
-		FLAG_ACCEL=4,
-		FLAG_ID=8,
-		FLAG_ENABLE=16,
-		FLAG_SEPARATOR=32
+		FLAG_ID=4,
+		FLAG_ENABLE=8,
+		FLAG_SEPARATOR=16
 	};
 
 	virtual void set_object(Object *p_object)=0;
-
 	virtual bool handles(Object *p_object) const=0;
 
 	virtual int get_flags() const=0;
 
-	virtual void set_item_text(int p_idx,const String& p_text){}
-	virtual void set_item_icon(int p_idx,const Ref<Texture>& p_tex){}
-	virtual void set_item_checkable(int p_idx,bool p_check){}
-	virtual void set_item_checked(int p_idx,bool p_checked){}
-	virtual void set_item_accel(int p_idx,int p_accel){}
-	virtual void set_item_enabled(int p_idx,int p_enabled){}
-	virtual void set_item_id(int p_idx,int p_id){}
-	virtual void set_item_separator(int p_idx,bool p_separator){}
-
-
+	virtual void set_item_text(int p_idx, const String& p_text) {}
 	virtual String get_item_text(int p_idx) const{ return ""; };
+
+	virtual void set_item_icon(int p_idx, const Ref<Texture>& p_tex) {}
 	virtual Ref<Texture> get_item_icon(int p_idx) const{ return Ref<Texture>(); };
+
+	virtual void set_item_checkable(int p_idx, bool p_check) {}
 	virtual bool is_item_checkable(int p_idx) const{ return false; };
+
+	virtual void set_item_checked(int p_idx, bool p_checked) {}
 	virtual bool is_item_checked(int p_idx) const{ return false; };
-	virtual int get_item_accel(int p_idx) const{ return 0; };
+
+	virtual void set_item_enabled(int p_idx, int p_enabled) {}
 	virtual bool is_item_enabled(int p_idx) const{ return false; };
+
+	virtual void set_item_id(int p_idx, int p_id) {}
 	virtual int get_item_id(int p_idx) const{ return -1; };
-	virtual bool is_item_separator(int p_idx) const{ return false; };
+
+	virtual void set_item_separator(int p_idx, bool p_separator) {}
+	virtual bool is_item_separator(int p_idx) const { return false; };
 
 	virtual void add_item()=0;
 	virtual int get_item_count() const=0;
@@ -96,41 +96,107 @@ public:
 	ItemListPlugin() {}
 };
 
-class ItemListEditor : public Control {
+///////////////////////////////////////////////////////////////
 
-	OBJ_TYPE(ItemListEditor, Control );
+class ItemListOptionButtonPlugin : public ItemListPlugin {
 
-	Node *item_list;
+	OBJ_TYPE(ItemListOptionButtonPlugin,ItemListPlugin);
 
-	enum {
+	OptionButton *ob;
+public:
 
-		MENU_EDIT_ITEMS,
-		MENU_CLEAR
-	};
+	virtual void set_object(Object *p_object);
+	virtual bool handles(Object *p_object) const;
+	virtual int get_flags() const;
 
-	AcceptDialog *dialog;
+	virtual void set_item_text(int p_idx, const String& p_text) { ob->set_item_text(p_idx,p_text); }
+	virtual String get_item_text(int p_idx) const { return ob->get_item_text(p_idx); }
 
-	PropertyEditor *prop_editor;
+	virtual void set_item_icon(int p_idx, const Ref<Texture>& p_tex) { ob->set_item_icon(p_idx, p_tex); }
+	virtual Ref<Texture> get_item_icon(int p_idx) const { return ob->get_item_icon(p_idx); }
 
-	MenuButton * options;
-	int selected_idx;
+	virtual void set_item_enabled(int p_idx, int p_enabled) { ob->set_item_disabled(p_idx, !p_enabled); }
+	virtual bool is_item_enabled(int p_idx) const { return !ob->is_item_disabled(p_idx); }
+
+	virtual void set_item_id(int p_idx, int p_id) { ob->set_item_ID(p_idx,p_id); }
+	virtual int get_item_id(int p_idx) const { return ob->get_item_ID(p_idx); }
+
+	virtual void add_item();
+	virtual int get_item_count() const;
+	virtual void erase(int p_idx);
+
+	ItemListOptionButtonPlugin();
+};
+
+class ItemListPopupMenuPlugin : public ItemListPlugin {
+
+	OBJ_TYPE(ItemListPopupMenuPlugin,ItemListPlugin);
+
+	PopupMenu *pp;
+public:
+
+	virtual void set_object(Object *p_object);
+	virtual bool handles(Object *p_object) const;
+	virtual int get_flags() const;
 
+	virtual void set_item_text(int p_idx, const String& p_text) { pp->set_item_text(p_idx,p_text); }
+	virtual String get_item_text(int p_idx) const { return pp->get_item_text(p_idx); }
+
+	virtual void set_item_icon(int p_idx, const Ref<Texture>& p_tex) { pp->set_item_icon(p_idx,p_tex); }
+	virtual Ref<Texture> get_item_icon(int p_idx) const { return pp->get_item_icon(p_idx); }
+
+	virtual void set_item_checkable(int p_idx, bool p_check) { pp->set_item_as_checkable(p_idx,p_check); }
+	virtual bool is_item_checkable(int p_idx) const { return pp->is_item_checkable(p_idx); }
+
+	virtual void set_item_checked(int p_idx, bool p_checked) { pp->set_item_checked(p_idx,p_checked); }
+	virtual bool is_item_checked(int p_idx) const { return pp->is_item_checked(p_idx); }
+
+	virtual void set_item_enabled(int p_idx, int p_enabled) { pp->set_item_disabled(p_idx,!p_enabled); }
+	virtual bool is_item_enabled(int p_idx) const { return !pp->is_item_disabled(p_idx); }
+
+	virtual void set_item_id(int p_idx, int p_id) { pp->set_item_ID(p_idx,p_idx); }
+	virtual int get_item_id(int p_idx) const { return pp->get_item_ID(p_idx); }
+
+	virtual void set_item_separator(int p_idx, bool p_separator) { pp->set_item_as_separator(p_idx,p_separator); }
+	virtual bool is_item_separator(int p_idx) const { return pp->is_item_separator(p_idx); }
+
+	virtual void add_item();
+	virtual int get_item_count() const;
+	virtual void erase(int p_idx);
+
+	ItemListPopupMenuPlugin();
+};
+
+///////////////////////////////////////////////////////////////
+
+class ItemListEditor : public HBoxContainer {
+
+	OBJ_TYPE(ItemListEditor,HBoxContainer);
+
+	Node *item_list;
+
+	ToolButton *toolbar_button;
+
+	AcceptDialog *dialog;
+	PropertyEditor *property_editor;
+	Tree *tree;
 	Button *add_button;
 	Button *del_button;
 
-
-//	FileDialog *emission_file_dialog;
-	void _menu_option(int);
+	int selected_idx;
 
 	Vector<ItemListPlugin*> item_plugins;
 
-	void _node_removed(Node *p_node);
+	void _edit_items();
+
 	void _add_pressed();
 	void _delete_pressed();
+
+	void _node_removed(Node *p_node);
+
 protected:
 
 	void _notification(int p_notification);
-
 	static void _bind_methods();
 public:
 
@@ -143,7 +209,7 @@ public:
 
 class ItemListEditorPlugin : public EditorPlugin {
 
-	OBJ_TYPE( ItemListEditorPlugin, EditorPlugin );
+	OBJ_TYPE(ItemListEditorPlugin,EditorPlugin);
 
 	ItemListEditor *item_list_editor;
 	EditorNode *editor;
diff --git a/tools/editor/plugins/mesh_editor_plugin.cpp b/tools/editor/plugins/mesh_editor_plugin.cpp
index 13d4c8db5a..cea774f94b 100644
--- a/tools/editor/plugins/mesh_editor_plugin.cpp
+++ b/tools/editor/plugins/mesh_editor_plugin.cpp
@@ -160,7 +160,7 @@ void MeshInstanceEditor::_menu_option(int p_option) {
 		} break;
 		case MENU_OPTION_CREATE_OUTLINE_MESH: {
 
-			outline_dialog->popup_centered_minsize();
+			outline_dialog->popup_centered(Vector2(200, 90));
 		} break;
 	}
 
@@ -212,7 +212,6 @@ MeshInstanceEditor::MeshInstanceEditor() {
 
 
 	options = memnew( MenuButton );
-	//add_child(options);
 	SpatialEditor::get_singleton()->add_control_to_menu_panel(options);
 
 	options->set_text("Mesh");
@@ -231,14 +230,20 @@ MeshInstanceEditor::MeshInstanceEditor() {
 	options->get_popup()->connect("item_pressed", this,"_menu_option");
 
 	outline_dialog = memnew( ConfirmationDialog );
-	outline_dialog->set_title("Outline Size: ");
+	outline_dialog->set_title("Create Outline Mesh");
+	outline_dialog->get_ok()->set_text("Create");
+
+	VBoxContainer *outline_dialog_vbc = memnew( VBoxContainer );
+	outline_dialog->add_child(outline_dialog_vbc);
+	outline_dialog->set_child_rect(outline_dialog_vbc);
+
 	outline_size = memnew( SpinBox );
 	outline_size->set_min(0.001);
 	outline_size->set_max(1024);
 	outline_size->set_step(0.001);
 	outline_size->set_val(0.05);
-	outline_dialog->add_child(outline_size);
-	outline_dialog->set_child_rect(outline_size);
+	outline_dialog_vbc->add_margin_child("Outline Size:",outline_size);
+
 	add_child(outline_dialog);
 	outline_dialog->connect("confirmed",this,"_create_outline_mesh");
 
diff --git a/tools/editor/plugins/multimesh_editor_plugin.cpp b/tools/editor/plugins/multimesh_editor_plugin.cpp
index 3c88b1d3a8..a5c823f8bd 100644
--- a/tools/editor/plugins/multimesh_editor_plugin.cpp
+++ b/tools/editor/plugins/multimesh_editor_plugin.cpp
@@ -289,7 +289,7 @@ void MultiMeshEditor::_menu_option(int p_option) {
 
 				_last_pp_node=node;
 			}
-			populate_dialog->popup_centered(Size2(250,395));
+			populate_dialog->popup_centered(Size2(250,380));
 
 		} break;
 	}
@@ -325,10 +325,8 @@ MultiMeshEditor::MultiMeshEditor() {
 
 
 	options = memnew( MenuButton );
-	//add_child(options);
 	SpatialEditor::get_singleton()->add_control_to_menu_panel(options);
-	options->set_area_as_parent_rect();
-
+	
 	options->set_text("MultiMesh");
 	options->set_icon(EditorNode::get_singleton()->get_gui_base()->get_icon("MultiMeshInstance","EditorIcons"));
 
@@ -373,12 +371,12 @@ MultiMeshEditor::MultiMeshEditor() {
 	populate_axis->select(2);
 	vbc->add_margin_child("Mesh Up Axis:",populate_axis);
 
-	populate_rotate_random = memnew( HScrollBar );
+	populate_rotate_random = memnew( HSlider );
 	populate_rotate_random->set_max(1);
 	populate_rotate_random->set_step(0.01);
 	vbc->add_margin_child("Random Rotation:",populate_rotate_random);
 
-	populate_tilt_random = memnew( HScrollBar );
+	populate_tilt_random = memnew( HSlider );
 	populate_tilt_random->set_max(1);
 	populate_tilt_random->set_step(0.01);
 	vbc->add_margin_child("Random Tilt:",populate_tilt_random);
@@ -416,8 +414,7 @@ MultiMeshEditor::MultiMeshEditor() {
 	std->connect("selected",this,"_browsed");
 
 	_last_pp_node=NULL;
-	//options->set_anchor(MARGIN_LEFT,Control::ANCHOR_END);
-	//options->set_anchor(MARGIN_RIGHT,Control::ANCHOR_END);
+
 	err_dialog = memnew( AcceptDialog );
 	add_child(err_dialog);
 }
@@ -451,13 +448,6 @@ MultiMeshEditorPlugin::MultiMeshEditorPlugin(EditorNode *p_node) {
 	multimesh_editor = memnew( MultiMeshEditor );
 	editor->get_viewport()->add_child(multimesh_editor);
 
-//	multimesh_editor->set_anchor(MARGIN_LEFT,Control::ANCHOR_END);
-//	multimesh_editor->set_anchor(MARGIN_RIGHT,Control::ANCHOR_END);
-	multimesh_editor->set_margin(MARGIN_LEFT,253);
-	multimesh_editor->set_margin(MARGIN_RIGHT,310);
-	multimesh_editor->set_margin(MARGIN_TOP,0);
-	multimesh_editor->set_margin(MARGIN_BOTTOM,10);
-
 	multimesh_editor->options->hide();
 }
 
diff --git a/tools/editor/plugins/multimesh_editor_plugin.h b/tools/editor/plugins/multimesh_editor_plugin.h
index 4f0c0d008b..edc3dfd55f 100644
--- a/tools/editor/plugins/multimesh_editor_plugin.h
+++ b/tools/editor/plugins/multimesh_editor_plugin.h
@@ -42,10 +42,10 @@ class MultiMeshEditor : public Control {
 
 	OBJ_TYPE(MultiMeshEditor, Control );
 
-        friend class MultiMeshEditorPlugin;
+friend class MultiMeshEditorPlugin;
 
 	AcceptDialog *err_dialog;
-        MenuButton * options;
+	MenuButton * options;
 	MultiMeshInstance *_last_pp_node;
 	bool browsing_source;
 
@@ -59,8 +59,8 @@ class MultiMeshEditor : public Control {
 
 	ConfirmationDialog *populate_dialog;
 	OptionButton *populate_axis;
-	HScrollBar *populate_rotate_random;
-	HScrollBar *populate_tilt_random;
+	HSlider *populate_rotate_random;
+	HSlider *populate_tilt_random;
 	SpinBox *populate_scale_random;
 	SpinBox *populate_scale;
 	SpinBox *populate_amount;
diff --git a/tools/editor/plugins/polygon_2d_editor_plugin.cpp b/tools/editor/plugins/polygon_2d_editor_plugin.cpp
index 3029dcf2ab..cd82297365 100644
--- a/tools/editor/plugins/polygon_2d_editor_plugin.cpp
+++ b/tools/editor/plugins/polygon_2d_editor_plugin.cpp
@@ -54,6 +54,8 @@ void Polygon2DEditor::_notification(int p_what) {
 			b_snap_enable->set_icon( get_icon("Snap", "EditorIcons"));
 			uv_icon_zoom->set_texture( get_icon("Zoom", "EditorIcons"));
 
+			get_tree()->connect("node_removed", this, "_node_removed");
+
 		} break;
 		case NOTIFICATION_FIXED_PROCESS: {
 
@@ -65,8 +67,10 @@ void Polygon2DEditor::_notification(int p_what) {
 void Polygon2DEditor::_node_removed(Node *p_node) {
 
 	if(p_node==node) {
-		node=NULL;
+		edit(NULL);
 		hide();
+		
+		canvas_item_editor->get_viewport_control()->update();
 	}
 
 }
@@ -757,16 +761,13 @@ void Polygon2DEditor::edit(Node *p_collision_polygon) {
 		node=p_collision_polygon->cast_to<Polygon2D>();
 		if (!canvas_item_editor->get_viewport_control()->is_connected("draw",this,"_canvas_draw"))
 			canvas_item_editor->get_viewport_control()->connect("draw",this,"_canvas_draw");
-		node->connect("exit_tree",this,"_node_removed",varray(),CONNECT_ONESHOT);
+		
 		wip.clear();
 		wip_active=false;
 		edited_point=-1;
 
 	} else {
 
-		if (node)
-			node->disconnect("exit_tree",this,"_node_removed");
-
 		node=NULL;
 
 		if (canvas_item_editor->get_viewport_control()->is_connected("draw",this,"_canvas_draw"))
diff --git a/tools/editor/plugins/sample_editor_plugin.cpp b/tools/editor/plugins/sample_editor_plugin.cpp
index 31fa7246ae..d88f2adc73 100644
--- a/tools/editor/plugins/sample_editor_plugin.cpp
+++ b/tools/editor/plugins/sample_editor_plugin.cpp
@@ -156,8 +156,14 @@ void SampleEditor::generate_preview_texture(const Ref<Sample>& p_sample,Ref<Imag
 					ima_adpcm.last_nibble++;
 					const uint8_t *src_ptr=sdata;
 
+					int ofs = ima_adpcm.last_nibble>>1;
+
+					if (stereo)
+						ofs*=2;
+
 					nibble = (ima_adpcm.last_nibble&1)?
-							(src_ptr[ima_adpcm.last_nibble>>1]>>4):(src_ptr[ima_adpcm.last_nibble>>1]&0xF);
+							(src_ptr[ofs]>>4):(src_ptr[ofs]&0xF);
+
 					step=_ima_adpcm_step_table[ima_adpcm.step_index];
 
 					ima_adpcm.step_index += _ima_adpcm_index_table[nibble];
diff --git a/tools/editor/plugins/sample_library_editor_plugin.cpp b/tools/editor/plugins/sample_library_editor_plugin.cpp
index cf9a6c41a4..b497458a2a 100644
--- a/tools/editor/plugins/sample_library_editor_plugin.cpp
+++ b/tools/editor/plugins/sample_library_editor_plugin.cpp
@@ -49,9 +49,13 @@ void SampleLibraryEditor::_notification(int p_what) {
 
 	if (p_what==NOTIFICATION_ENTER_TREE) {
 		play->set_icon( get_icon("Play","EditorIcons") );
+		play->set_tooltip("Play Sample");
 		stop->set_icon( get_icon("Stop","EditorIcons") );
+		stop->set_tooltip("Stop Sample");
 		load->set_icon( get_icon("Folder","EditorIcons") );
+		load->set_tooltip("Open Sample File(s)");
 		_delete->set_icon( get_icon("Del","EditorIcons") );
+		_delete->set_tooltip("Remove Sample");
 	}
 
 	if (p_what==NOTIFICATION_READY) {
diff --git a/tools/editor/plugins/script_editor_plugin.cpp b/tools/editor/plugins/script_editor_plugin.cpp
index bd0f580a34..4e394f9e3f 100644
--- a/tools/editor/plugins/script_editor_plugin.cpp
+++ b/tools/editor/plugins/script_editor_plugin.cpp
@@ -572,28 +572,119 @@ void ScriptEditor::_goto_script_line(REF p_script,int p_line) {
 
 }
 
+
+void ScriptEditor::_update_history_arrows() {
+
+	script_back->set_disabled( history_pos<=0 );
+	script_forward->set_disabled( history_pos>=history.size()-1 );
+}
+
+
+void ScriptEditor::_go_to_tab(int p_idx) {
+
+	Node *cn = tab_container->get_child(p_idx);
+	if (!cn)
+		return;
+	Control *c = cn->cast_to<Control>();
+	if (!c)
+		return;
+
+	if (history_pos>=0 && history_pos<history.size() && history[history_pos].control==tab_container->get_current_tab_control()) {
+
+		Node *n = tab_container->get_current_tab_control();
+
+		if (n->cast_to<ScriptTextEditor>()) {
+
+			history[history_pos].scroll_pos=n->cast_to<ScriptTextEditor>()->get_text_edit()->get_v_scroll();
+			history[history_pos].cursor_column=n->cast_to<ScriptTextEditor>()->get_text_edit()->cursor_get_column();
+			history[history_pos].cursor_row=n->cast_to<ScriptTextEditor>()->get_text_edit()->cursor_get_line();
+		}
+		if (n->cast_to<EditorHelp>()) {
+
+			history[history_pos].scroll_pos=n->cast_to<EditorHelp>()->get_scroll();
+		}
+	}
+
+	history.resize(history_pos+1);
+	ScriptHistory sh;
+	sh.control=c;
+	sh.scroll_pos=0;
+
+	history.push_back(sh);
+	history_pos++;
+
+
+	tab_container->set_current_tab(p_idx);
+
+	c = tab_container->get_current_tab_control();
+
+	if (c->cast_to<ScriptTextEditor>()) {
+
+		script_name_label->set_text(c->cast_to<ScriptTextEditor>()->get_name());
+		script_icon->set_texture(c->cast_to<ScriptTextEditor>()->get_icon());
+		if (is_visible())
+			c->cast_to<ScriptTextEditor>()->get_text_edit()->grab_focus();
+	}
+	if (c->cast_to<EditorHelp>()) {
+
+		script_name_label->set_text(c->cast_to<EditorHelp>()->get_class_name());
+		script_icon->set_texture(get_icon("Help","EditorIcons"));
+		if (is_visible())
+			c->cast_to<EditorHelp>()->set_focused();
+	}
+
+
+
+	c->set_meta("__editor_pass",++edit_pass);
+	_update_history_arrows();
+	_update_script_colors();
+}
+
 void ScriptEditor::_close_current_tab() {
 
 	int selected = tab_container->get_current_tab();
 	if (selected<0 || selected>=tab_container->get_child_count())
 		return;
 	
+	Node *tselected = tab_container->get_child(selected);
 	ScriptTextEditor *current = tab_container->get_child(selected)->cast_to<ScriptTextEditor>();
-	if (!current)
-		return;
+	if (current) {
+		apply_scripts();
+	}
 
-	apply_scripts();
+	//remove from history
+	history.resize(history_pos+1);
+
+	for(int i=0;i<history.size();i++) {
+		if (history[i].control==tselected) {
+			history.remove(i);
+			i--;
+			history_pos--;
+		}
+	}
+
+	if (history_pos>=history.size()) {
+		history_pos=history.size()-1;
+	}
 
 	int idx = tab_container->get_current_tab();
-	memdelete(current);
+	memdelete(tselected);
 	if (idx>=tab_container->get_child_count())
 		idx=tab_container->get_child_count()-1;
 	if (idx>=0) {
+
+		if (history_pos>=0) {
+			idx = history[history_pos].control->get_index();
+		}
 		tab_container->set_current_tab(idx);
+
 		//script_list->select(idx);
 	}
 
 
+	_update_history_arrows();
+
+
 
 	_update_script_names();
 	EditorNode::get_singleton()->save_layout();
@@ -755,33 +846,15 @@ void ScriptEditor::swap_lines(TextEdit *tx, int line1, int line2)
 void ScriptEditor::_menu_option(int p_option) {
 
 
-	if (p_option==FILE_OPEN) {
-		editor->open_resource("Script");
-		return;
-	}
-	int selected = tab_container->get_current_tab();
-	if (selected<0 || selected>=tab_container->get_child_count())
-		return;
-
-	ScriptTextEditor *current = tab_container->get_child(selected)->cast_to<ScriptTextEditor>();
-	if (!current)
-		return;
-
 	switch(p_option) {
 		case FILE_NEW: {
 			script_create_dialog->config("Node", ".gd");
 			script_create_dialog->popup_centered(Size2(300, 300));
 		} break;
-		case FILE_SAVE: {
-			if (!_test_script_times_on_disk())
-				return;
-			editor->save_resource( current->get_edited_script() );
-
-		} break;
-		case FILE_SAVE_AS: {
-
-			editor->save_resource_as( current->get_edited_script() );
+		case FILE_OPEN: {
 
+			editor->open_resource("Script");
+			return;
 		} break;
 		case FILE_SAVE_ALL: {
 
@@ -806,387 +879,471 @@ void ScriptEditor::_menu_option(int p_option) {
 
 
 		} break;
-		case EDIT_UNDO: {
-			current->get_text_edit()->undo();
-			current->get_text_edit()->call_deferred("grab_focus");
-		} break;
-		case EDIT_REDO: {
-			current->get_text_edit()->redo();
-			current->get_text_edit()->call_deferred("grab_focus");
-		} break;
-		case EDIT_CUT: {
+		case SEARCH_HELP: {
 
-			current->get_text_edit()->cut();
-			current->get_text_edit()->call_deferred("grab_focus");
+			help_search_dialog->popup();
 		} break;
-		case EDIT_COPY: {
-			current->get_text_edit()->copy();
-			current->get_text_edit()->call_deferred("grab_focus");
+		case SEARCH_CLASSES: {
 
-		} break;
-		case EDIT_PASTE: {
-			current->get_text_edit()->paste();
-			current->get_text_edit()->call_deferred("grab_focus");
+			String current;
 
-		} break;
-		case EDIT_SELECT_ALL: {
+			if (tab_container->get_tab_count()>0) {
+				EditorHelp *eh = tab_container->get_child( tab_container->get_current_tab() )->cast_to<EditorHelp>();
+				if (eh) {
+					current=eh->get_class_name();
+				}
+			}
 
-			current->get_text_edit()->select_all();
-			current->get_text_edit()->call_deferred("grab_focus");
+			help_index->popup_centered_ratio(0.6);
 
+			if (current!="") {
+				help_index->call_deferred("select_class",current);
+			}
 		} break;
-        case EDIT_MOVE_LINE_UP: {
-
-            TextEdit *tx = current->get_text_edit();
-            Ref<Script> scr = current->get_edited_script();
-            if (scr.is_null())
-                return;
-
-            if (tx->is_selection_active())
-            {
-                int from_line = tx->get_selection_from_line();
-                int from_col  = tx->get_selection_from_column();
-                int to_line   = tx->get_selection_to_line();
-                int to_column = tx->get_selection_to_column();
-
-                for (int i = from_line; i <= to_line; i++)
-                {
-                    int line_id = i;
-                    int next_id = i - 1;
-
-                    if (line_id == 0 || next_id < 0)
-                        return;
-
-                    swap_lines(tx, line_id, next_id);
-                }
-                int from_line_up = from_line > 0 ? from_line-1 : from_line;
-                int to_line_up   = to_line   > 0 ? to_line-1   : to_line;
-                tx->select(from_line_up, from_col, to_line_up, to_column);
-            }
-            else
-            {
-                int line_id = tx->cursor_get_line();
-                int next_id = line_id - 1;
-
-                if (line_id == 0 || next_id < 0)
-                    return;
-
-                swap_lines(tx, line_id, next_id);
-            }
-            tx->update();
-
-        } break;
-        case EDIT_MOVE_LINE_DOWN: {
-
-            TextEdit *tx = current->get_text_edit();
-            Ref<Script> scr = current->get_edited_script();
-            if (scr.is_null())
-                return;
-
-            if (tx->is_selection_active())
-            {
-                int from_line = tx->get_selection_from_line();
-                int from_col  = tx->get_selection_from_column();
-                int to_line   = tx->get_selection_to_line();
-                int to_column = tx->get_selection_to_column();
-
-                for (int i = to_line; i >= from_line; i--)
-                {
-                    int line_id = i;
-                    int next_id = i + 1;
-
-                    if (line_id == tx->get_line_count()-1 || next_id > tx->get_line_count())
-                        return;
-
-                    swap_lines(tx, line_id, next_id);
-                }
-                int from_line_down = from_line < tx->get_line_count() ? from_line+1 : from_line;
-                int to_line_down   = to_line   < tx->get_line_count() ? to_line+1   : to_line;
-                tx->select(from_line_down, from_col, to_line_down, to_column);
-            }
-            else
-            {
-                int line_id = tx->cursor_get_line();
-                int next_id = line_id + 1;
-
-                if (line_id == tx->get_line_count()-1 || next_id > tx->get_line_count())
-                    return;
-
-                swap_lines(tx, line_id, next_id);
-            }
-            tx->update();
-
-        } break;
-        case EDIT_INDENT_LEFT: {
-
-            TextEdit *tx = current->get_text_edit();
-            Ref<Script> scr = current->get_edited_script();
-            if (scr.is_null())
-                return;
-
-
-            if (tx->is_selection_active())
-            {
-		int begin = tx->get_selection_from_line();
-		int end = tx->get_selection_to_line();
-                for (int i = begin; i <= end; i++)
-                {
-                    String line_text = tx->get_line(i);
-                    // begins with tab
-                    if (line_text.begins_with("\t"))
-                    {
-                        line_text = line_text.substr(1, line_text.length());
-                        tx->set_line(i, line_text);
-                    }
-                    // begins with 4 spaces
-                    else if (line_text.begins_with("    "))
-                    {
-                        line_text = line_text.substr(4, line_text.length());
-                        tx->set_line(i, line_text);
-                    }
-                }
-            }
-            else
-            {
-		int begin = tx->cursor_get_line();
-                String line_text = tx->get_line(begin);
-                // begins with tab
-                if (line_text.begins_with("\t"))
-                {
-                    line_text = line_text.substr(1, line_text.length());
-                    tx->set_line(begin, line_text);
-                }
-                // begins with 4 spaces
-                else if (line_text.begins_with("    "))
-                {
-                    line_text = line_text.substr(4, line_text.length());
-                    tx->set_line(begin, line_text);
-                }
-            }
-            tx->update();
-            //tx->deselect();
-
-        } break;
-        case EDIT_INDENT_RIGHT: {
-
-            TextEdit *tx = current->get_text_edit();
-            Ref<Script> scr = current->get_edited_script();
-            if (scr.is_null())
-                return;
-
-            if (tx->is_selection_active())
-            {
-		int begin = tx->get_selection_from_line();
-		int end = tx->get_selection_to_line();
-                for (int i = begin; i <= end; i++)
-                {
-                    String line_text = tx->get_line(i);
-                    line_text = '\t' + line_text;
-                    tx->set_line(i, line_text);
-                }
-            }
-            else
-            {
-		int begin = tx->cursor_get_line();
-                String line_text = tx->get_line(begin);
-                line_text = '\t' + line_text;
-                tx->set_line(begin, line_text);
-            }
-            tx->update();
-            //tx->deselect();
-
-        } break;
-        case EDIT_CLONE_DOWN: {
-
-            TextEdit *tx = current->get_text_edit();
-            Ref<Script> scr = current->get_edited_script();
-            if (scr.is_null())
-                return;
-            int line = tx->cursor_get_line();
-            int next_line = line + 1;
-
-            if (line == tx->get_line_count() || next_line > tx->get_line_count())
-                return;
-
-            String line_clone = tx->get_line(line);
-            tx->insert_at(line_clone, next_line);
-            tx->update();
-
-        } break;
-        case EDIT_TOGGLE_COMMENT: {
-
-            TextEdit *tx = current->get_text_edit();
-            Ref<Script> scr = current->get_edited_script();
-            if (scr.is_null())
-                return;
-
-
-
-            if (tx->is_selection_active())
-            {
-		int begin = tx->get_selection_from_line();
-		int end = tx->get_selection_to_line();
-                for (int i = begin; i <= end; i++)
-                {
-                    String line_text = tx->get_line(i);
-
-                    if (line_text.begins_with("#"))
-                        line_text = line_text.substr(1, line_text.length());
-                    else
-                        line_text = "#" + line_text;
-                    tx->set_line(i, line_text);
-                }
-            }
-            else
-            {
-		int begin = tx->cursor_get_line();
-                String line_text = tx->get_line(begin);
-
-                if (line_text.begins_with("#"))
-                    line_text = line_text.substr(1, line_text.length());
-                else
-                    line_text = "#" + line_text;
-                tx->set_line(begin, line_text);
-            }
-            tx->update();
-            //tx->deselect();
-
-        } break;
-		case EDIT_COMPLETE: {
-
-			current->get_text_edit()->query_code_comple();
+		case SEARCH_WEBSITE: {
 
+			OS::get_singleton()->shell_open("http://www.godotengine.org/projects/godot-engine/wiki/Documentation#Tutorials");
 		} break;
-		case EDIT_AUTO_INDENT: {
-
-			TextEdit *te = current->get_text_edit();
-			String text = te->get_text();
-			Ref<Script> scr = current->get_edited_script();
-			if (scr.is_null())
-				return;
-			int begin,end;
-			if (te->is_selection_active()) {
-				begin=te->get_selection_from_line();
-				end=te->get_selection_to_line();
-			} else {
-				begin=0;
-				end=te->get_line_count()-1;
-			}
-			scr->get_language()->auto_indent_code(text,begin,end);
-			te->set_text(text);
 
+		case WINDOW_NEXT: {
 
+			_history_forward();
 		} break;
-		case SEARCH_FIND: {
-
-			find_replace_dialog->set_text_edit(current->get_text_edit());
-			find_replace_dialog->popup_search();
+		case WINDOW_PREV: {
+			_history_back();
 		} break;
-		case SEARCH_FIND_NEXT: {
 
-			find_replace_dialog->set_text_edit(current->get_text_edit());
-			 find_replace_dialog->search_next();
-		} break;
-		case SEARCH_REPLACE: {
+	}
 
-			find_replace_dialog->set_text_edit(current->get_text_edit());
-			find_replace_dialog->popup_replace();
-		} break;
-		case SEARCH_LOCATE_FUNCTION: {
 
-			if (!current)
-				return;
-			quick_open->popup(current->get_functions());
-		} break;
-		case SEARCH_GOTO_LINE: {
+	int selected = tab_container->get_current_tab();
+	if (selected<0 || selected>=tab_container->get_child_count())
+		return;
 
-			goto_line_dialog->popup_find_line(current->get_text_edit());
-		} break;
-		case DEBUG_TOGGLE_BREAKPOINT: {
-			int line=current->get_text_edit()->cursor_get_line();
-			bool dobreak = !current->get_text_edit()->is_line_set_as_breakpoint(line);
-			current->get_text_edit()->set_line_as_breakpoint(line,dobreak);
-			get_debugger()->set_breakpoint(current->get_edited_script()->get_path(),line+1,dobreak);
-		} break;
-		case DEBUG_NEXT: {
+	ScriptTextEditor *current = tab_container->get_child(selected)->cast_to<ScriptTextEditor>();
+	if (current) {
+
+		switch(p_option) {
+			case FILE_NEW: {
+				script_create_dialog->config("Node", ".gd");
+				script_create_dialog->popup_centered(Size2(300, 300));
+			} break;
+			case FILE_SAVE: {
+				if (!_test_script_times_on_disk())
+					return;
+				editor->save_resource( current->get_edited_script() );
+
+			} break;
+			case FILE_SAVE_AS: {
+
+				editor->save_resource_as( current->get_edited_script() );
+
+			} break;
+			case EDIT_UNDO: {
+				current->get_text_edit()->undo();
+				current->get_text_edit()->call_deferred("grab_focus");
+			} break;
+			case EDIT_REDO: {
+				current->get_text_edit()->redo();
+				current->get_text_edit()->call_deferred("grab_focus");
+			} break;
+			case EDIT_CUT: {
+
+				current->get_text_edit()->cut();
+				current->get_text_edit()->call_deferred("grab_focus");
+			} break;
+			case EDIT_COPY: {
+				current->get_text_edit()->copy();
+				current->get_text_edit()->call_deferred("grab_focus");
+
+			} break;
+			case EDIT_PASTE: {
+				current->get_text_edit()->paste();
+				current->get_text_edit()->call_deferred("grab_focus");
+
+			} break;
+			case EDIT_SELECT_ALL: {
+
+				current->get_text_edit()->select_all();
+				current->get_text_edit()->call_deferred("grab_focus");
+
+			} break;
+			case EDIT_MOVE_LINE_UP: {
+
+				TextEdit *tx = current->get_text_edit();
+				Ref<Script> scr = current->get_edited_script();
+				if (scr.is_null())
+					return;
+
+				if (tx->is_selection_active())
+				{
+					int from_line = tx->get_selection_from_line();
+					int from_col  = tx->get_selection_from_column();
+					int to_line   = tx->get_selection_to_line();
+					int to_column = tx->get_selection_to_column();
+
+					for (int i = from_line; i <= to_line; i++)
+					{
+						int line_id = i;
+						int next_id = i - 1;
+
+						if (line_id == 0 || next_id < 0)
+							return;
+
+						swap_lines(tx, line_id, next_id);
+					}
+					int from_line_up = from_line > 0 ? from_line-1 : from_line;
+					int to_line_up   = to_line   > 0 ? to_line-1   : to_line;
+					tx->select(from_line_up, from_col, to_line_up, to_column);
+				}
+				else
+				{
+					int line_id = tx->cursor_get_line();
+					int next_id = line_id - 1;
 
-			if (debugger)
-				debugger->debug_next();
-		} break;
-		case DEBUG_STEP: {
+					if (line_id == 0 || next_id < 0)
+						return;
+
+					swap_lines(tx, line_id, next_id);
+				}
+				tx->update();
+
+			} break;
+			case EDIT_MOVE_LINE_DOWN: {
+
+				TextEdit *tx = current->get_text_edit();
+				Ref<Script> scr = current->get_edited_script();
+				if (scr.is_null())
+					return;
+
+				if (tx->is_selection_active())
+				{
+					int from_line = tx->get_selection_from_line();
+					int from_col  = tx->get_selection_from_column();
+					int to_line   = tx->get_selection_to_line();
+					int to_column = tx->get_selection_to_column();
+
+					for (int i = to_line; i >= from_line; i--)
+					{
+						int line_id = i;
+						int next_id = i + 1;
+
+						if (line_id == tx->get_line_count()-1 || next_id > tx->get_line_count())
+							return;
+
+						swap_lines(tx, line_id, next_id);
+					}
+					int from_line_down = from_line < tx->get_line_count() ? from_line+1 : from_line;
+					int to_line_down   = to_line   < tx->get_line_count() ? to_line+1   : to_line;
+					tx->select(from_line_down, from_col, to_line_down, to_column);
+				}
+				else
+				{
+					int line_id = tx->cursor_get_line();
+					int next_id = line_id + 1;
 
-			if (debugger)
-				debugger->debug_step();
+					if (line_id == tx->get_line_count()-1 || next_id > tx->get_line_count())
+						return;
 
-		} break;
-		case DEBUG_BREAK: {
+					swap_lines(tx, line_id, next_id);
+				}
+				tx->update();
+
+			} break;
+			case EDIT_INDENT_LEFT: {
+
+				TextEdit *tx = current->get_text_edit();
+				Ref<Script> scr = current->get_edited_script();
+				if (scr.is_null())
+					return;
+
+
+				if (tx->is_selection_active())
+				{
+					int begin = tx->get_selection_from_line();
+					int end = tx->get_selection_to_line();
+					for (int i = begin; i <= end; i++)
+					{
+						String line_text = tx->get_line(i);
+						// begins with tab
+						if (line_text.begins_with("\t"))
+						{
+							line_text = line_text.substr(1, line_text.length());
+							tx->set_line(i, line_text);
+						}
+						// begins with 4 spaces
+						else if (line_text.begins_with("    "))
+						{
+							line_text = line_text.substr(4, line_text.length());
+							tx->set_line(i, line_text);
+						}
+					}
+				}
+				else
+				{
+					int begin = tx->cursor_get_line();
+					String line_text = tx->get_line(begin);
+					// begins with tab
+					if (line_text.begins_with("\t"))
+					{
+						line_text = line_text.substr(1, line_text.length());
+						tx->set_line(begin, line_text);
+					}
+					// begins with 4 spaces
+					else if (line_text.begins_with("    "))
+					{
+						line_text = line_text.substr(4, line_text.length());
+						tx->set_line(begin, line_text);
+					}
+				}
+				tx->update();
+				//tx->deselect();
+
+			} break;
+			case EDIT_INDENT_RIGHT: {
+
+				TextEdit *tx = current->get_text_edit();
+				Ref<Script> scr = current->get_edited_script();
+				if (scr.is_null())
+					return;
+
+				if (tx->is_selection_active())
+				{
+					int begin = tx->get_selection_from_line();
+					int end = tx->get_selection_to_line();
+					for (int i = begin; i <= end; i++)
+					{
+						String line_text = tx->get_line(i);
+						line_text = '\t' + line_text;
+						tx->set_line(i, line_text);
+					}
+				}
+				else
+				{
+					int begin = tx->cursor_get_line();
+					String line_text = tx->get_line(begin);
+					line_text = '\t' + line_text;
+					tx->set_line(begin, line_text);
+				}
+				tx->update();
+				//tx->deselect();
+
+			} break;
+			case EDIT_CLONE_DOWN: {
+
+				TextEdit *tx = current->get_text_edit();
+				Ref<Script> scr = current->get_edited_script();
+				if (scr.is_null())
+					return;
+				int line = tx->cursor_get_line();
+				int next_line = line + 1;
+				int column = tx->cursor_get_column();
+
+				if (line >= tx->get_line_count() - 1)
+					tx->set_line(line, tx->get_line(line) + "\n");
+
+				String line_clone = tx->get_line(line);
+				tx->insert_at(line_clone, next_line);
+				tx->cursor_set_column(column);
+				tx->update();
+
+			} break;
+			case EDIT_TOGGLE_COMMENT: {
+
+				TextEdit *tx = current->get_text_edit();
+				Ref<Script> scr = current->get_edited_script();
+				if (scr.is_null())
+					return;
+
+
+
+				if (tx->is_selection_active())
+				{
+					int begin = tx->get_selection_from_line();
+					int end = tx->get_selection_to_line();
+					for (int i = begin; i <= end; i++)
+					{
+						String line_text = tx->get_line(i);
+
+						if (line_text.begins_with("#"))
+							line_text = line_text.substr(1, line_text.length());
+						else
+							line_text = "#" + line_text;
+						tx->set_line(i, line_text);
+					}
+				}
+				else
+				{
+					int begin = tx->cursor_get_line();
+					String line_text = tx->get_line(begin);
+
+					if (line_text.begins_with("#"))
+						line_text = line_text.substr(1, line_text.length());
+					else
+						line_text = "#" + line_text;
+					tx->set_line(begin, line_text);
+				}
+				tx->update();
+				//tx->deselect();
+
+			} break;
+			case EDIT_COMPLETE: {
+
+				current->get_text_edit()->query_code_comple();
+
+			} break;
+			case EDIT_AUTO_INDENT: {
+
+				TextEdit *te = current->get_text_edit();
+				String text = te->get_text();
+				Ref<Script> scr = current->get_edited_script();
+				if (scr.is_null())
+					return;
+				int begin,end;
+				if (te->is_selection_active()) {
+					begin=te->get_selection_from_line();
+					end=te->get_selection_to_line();
+				} else {
+					begin=0;
+					end=te->get_line_count()-1;
+				}
+				scr->get_language()->auto_indent_code(text,begin,end);
+				te->set_text(text);
+
+
+			} break;
+			case SEARCH_FIND: {
+
+				find_replace_dialog->set_text_edit(current->get_text_edit());
+				find_replace_dialog->popup_search();
+			} break;
+			case SEARCH_FIND_NEXT: {
+
+				find_replace_dialog->set_text_edit(current->get_text_edit());
+				find_replace_dialog->search_next();
+			} break;
+			case SEARCH_REPLACE: {
+
+				find_replace_dialog->set_text_edit(current->get_text_edit());
+				find_replace_dialog->popup_replace();
+			} break;
+			case SEARCH_LOCATE_FUNCTION: {
+
+				if (!current)
+					return;
+				quick_open->popup(current->get_functions());
+			} break;
+			case SEARCH_GOTO_LINE: {
+
+				goto_line_dialog->popup_find_line(current->get_text_edit());
+			} break;
+			case DEBUG_TOGGLE_BREAKPOINT: {
+				int line=current->get_text_edit()->cursor_get_line();
+				bool dobreak = !current->get_text_edit()->is_line_set_as_breakpoint(line);
+				current->get_text_edit()->set_line_as_breakpoint(line,dobreak);
+				get_debugger()->set_breakpoint(current->get_edited_script()->get_path(),line+1,dobreak);
+			} break;
+			case DEBUG_NEXT: {
+
+				if (debugger)
+					debugger->debug_next();
+			} break;
+			case DEBUG_STEP: {
+
+				if (debugger)
+					debugger->debug_step();
+
+			} break;
+			case DEBUG_BREAK: {
+
+				if (debugger)
+					debugger->debug_break();
+
+			} break;
+			case DEBUG_CONTINUE: {
+
+				if (debugger)
+					debugger->debug_continue();
+
+			} break;
+			case DEBUG_SHOW: {
+				if (debugger) {
+					bool visible = debug_menu->get_popup()->is_item_checked( debug_menu->get_popup()->get_item_index(DEBUG_SHOW) );
+					debug_menu->get_popup()->set_item_checked( debug_menu->get_popup()->get_item_index(DEBUG_SHOW), !visible);
+					if (visible)
+						debugger->hide();
+					else
+						debugger->show();
+				}
+			} break;
+			case HELP_CONTEXTUAL: {
+				String text = current->get_text_edit()->get_selection_text();
+				if (text == "")
+					text = current->get_text_edit()->get_word_under_cursor();
+				if (text != "")
+					help_search_dialog->popup(text);
+			} break;
+			case FILE_CLOSE: {
+				if (current->get_text_edit()->get_version()!=current->get_text_edit()->get_saved_version()) {
+					erase_tab_confirm->set_text("Close and save changes?\n\""+current->get_name()+"\"");
+					erase_tab_confirm->popup_centered_minsize();
+				} else {
+					_close_current_tab();
+				}
+			} break;
+			case WINDOW_MOVE_LEFT: {
+
+				if (tab_container->get_current_tab()>0) {
+					tab_container->call_deferred("set_current_tab",tab_container->get_current_tab()-1);
+					script_list->call_deferred("select",tab_container->get_current_tab()-1);
+					tab_container->move_child(current,tab_container->get_current_tab()-1);
+					_update_script_names();
+				}
+			} break;
+			case WINDOW_MOVE_RIGHT: {
+
+				if (tab_container->get_current_tab()<tab_container->get_child_count()-1) {
+					tab_container->call_deferred("set_current_tab",tab_container->get_current_tab()+1);
+					script_list->call_deferred("select",tab_container->get_current_tab()+1);
+					tab_container->move_child(current,tab_container->get_current_tab()+1);
+					_update_script_names();
+				}
 
-			if (debugger)
-				debugger->debug_break();
 
-		} break;
-		case DEBUG_CONTINUE: {
+			} break;
 
-			if (debugger)
-				debugger->debug_continue();
+			default: {
 
-		} break;
-		case DEBUG_SHOW: {
-			if (debugger) {
-				bool visible = debug_menu->get_popup()->is_item_checked( debug_menu->get_popup()->get_item_index(DEBUG_SHOW) );
-				debug_menu->get_popup()->set_item_checked( debug_menu->get_popup()->get_item_index(DEBUG_SHOW), !visible);
-				if (visible)
-					debugger->hide();
-				else
-					debugger->show();
-			}
-		} break;
-		case HELP_CONTEXTUAL: {
-			String text = current->get_text_edit()->get_selection_text();
-			if (text == "")
-				text = current->get_text_edit()->get_word_under_cursor();
-			if (text != "")
-				editor->emit_signal("request_help", text);
-		} break;
-		case FILE_CLOSE: {
-			if (current->get_text_edit()->get_version()!=current->get_text_edit()->get_saved_version()) {
-				erase_tab_confirm->set_text("Close and save changes?\n\""+current->get_name()+"\"");
-				erase_tab_confirm->popup_centered_minsize();
-			} else {
-				_close_current_tab();
-			}
-		} break;
-		case WINDOW_MOVE_LEFT: {
+				if (p_option>=WINDOW_SELECT_BASE) {
 
-			if (tab_container->get_current_tab()>0) {
-				tab_container->call_deferred("set_current_tab",tab_container->get_current_tab()-1);
-				script_list->call_deferred("select",tab_container->get_current_tab()-1);
-				tab_container->move_child(current,tab_container->get_current_tab()-1);
-				_update_script_names();
-			}
-		} break;
-		case WINDOW_MOVE_RIGHT: {
+					tab_container->set_current_tab(p_option-WINDOW_SELECT_BASE);
+					script_list->select(p_option-WINDOW_SELECT_BASE);
 
-			if (tab_container->get_current_tab()<tab_container->get_child_count()-1) {
-				tab_container->call_deferred("set_current_tab",tab_container->get_current_tab()+1);
-				script_list->call_deferred("select",tab_container->get_current_tab()+1);
-				tab_container->move_child(current,tab_container->get_current_tab()+1);
-				_update_script_names();
+				}
 			}
+		}
+	}
 
+	EditorHelp *help = tab_container->get_child(selected)->cast_to<EditorHelp>();
+	if (help) {
 
-		} break;
-		default: {
+		switch(p_option) {
 
-			if (p_option>=WINDOW_SELECT_BASE) {
+			case SEARCH_FIND: {
+				help->popup_search();
+			} break;
+			case SEARCH_FIND_NEXT: {
+				help->search_again();
+			} break;
+			case FILE_CLOSE: {
+				_close_current_tab();
+			} break;
 
-				tab_container->set_current_tab(p_option-WINDOW_SELECT_BASE);
-				script_list->select(p_option-WINDOW_SELECT_BASE);
 
-			}
 		}
 	}
 
+
 }
 
 void ScriptEditor::_tab_changed(int p_which) {
@@ -1217,6 +1374,14 @@ void ScriptEditor::_notification(int p_what) {
 		}
 
 		EditorSettings::get_singleton()->connect("settings_changed",this,"_editor_settings_changed");
+		help_search->set_icon(get_icon("Help","EditorIcons"));
+		site_search->set_icon(get_icon("Godot","EditorIcons"));
+		class_search->set_icon(get_icon("ClassList","EditorIcons"));
+
+		script_forward->set_icon(get_icon("Forward","EditorIcons"));
+		script_back->set_icon(get_icon("Back","EditorIcons"));
+
+
 
 
 	}
@@ -1224,6 +1389,7 @@ void ScriptEditor::_notification(int p_what) {
 	if (p_what==NOTIFICATION_READY) {
 
 		get_tree()->connect("tree_changed",this,"_tree_changed");
+		editor->connect("request_help",this,"_request_help");
 	}
 
 	if (p_what==NOTIFICATION_EXIT_TREE) {
@@ -1346,7 +1512,7 @@ void ScriptEditor::set_state(const Dictionary& p_state) {
 
 }
 void ScriptEditor::clear() {
-
+#if 0
 	List<ScriptTextEditor*> stes;
 	for(int i=0;i<tab_container->get_child_count();i++) {
 
@@ -1371,7 +1537,7 @@ void ScriptEditor::clear() {
 		script_list->select( script_list->find_metadata(idx) );
 	}
 
-
+#endif
 
 
 }
@@ -1402,31 +1568,6 @@ void ScriptEditor::get_breakpoints(List<String> *p_breakpoints) {
 
 
 
-void ScriptEditor::_bind_methods() {
-
-	ObjectTypeDB::bind_method("_tab_changed",&ScriptEditor::_tab_changed);
-	ObjectTypeDB::bind_method("_menu_option",&ScriptEditor::_menu_option);
-	ObjectTypeDB::bind_method("_close_current_tab",&ScriptEditor::_close_current_tab);
-	ObjectTypeDB::bind_method("_editor_play",&ScriptEditor::_editor_play);
-	ObjectTypeDB::bind_method("_editor_pause",&ScriptEditor::_editor_pause);
-	ObjectTypeDB::bind_method("_editor_stop",&ScriptEditor::_editor_stop);
-	ObjectTypeDB::bind_method("_add_callback",&ScriptEditor::_add_callback);
-	ObjectTypeDB::bind_method("_reload_scripts",&ScriptEditor::_reload_scripts);
-	ObjectTypeDB::bind_method("_resave_scripts",&ScriptEditor::_resave_scripts);
-	ObjectTypeDB::bind_method("_res_saved_callback",&ScriptEditor::_res_saved_callback);
-	ObjectTypeDB::bind_method("_goto_script_line",&ScriptEditor::_goto_script_line);
-	ObjectTypeDB::bind_method("_goto_script_line2",&ScriptEditor::_goto_script_line2);
-	ObjectTypeDB::bind_method("_breaked",&ScriptEditor::_breaked);
-	ObjectTypeDB::bind_method("_show_debugger",&ScriptEditor::_show_debugger);
-	ObjectTypeDB::bind_method("_get_debug_tooltip",&ScriptEditor::_get_debug_tooltip);
-	ObjectTypeDB::bind_method("_autosave_scripts",&ScriptEditor::_autosave_scripts);
-	ObjectTypeDB::bind_method("_editor_settings_changed",&ScriptEditor::_editor_settings_changed);
-	ObjectTypeDB::bind_method("_update_script_names",&ScriptEditor::_update_script_names);
-	ObjectTypeDB::bind_method("_tree_changed",&ScriptEditor::_tree_changed);
-	ObjectTypeDB::bind_method("_script_selected",&ScriptEditor::_script_selected);
-	ObjectTypeDB::bind_method("_script_created",&ScriptEditor::_script_created);
-	ObjectTypeDB::bind_method("_script_split_dragged",&ScriptEditor::_script_split_dragged);
-}
 
 
 void ScriptEditor::ensure_focus_current() {
@@ -1448,7 +1589,8 @@ void ScriptEditor::ensure_focus_current() {
 void ScriptEditor::_script_selected(int p_idx) {
 
 	grab_focus_block = !Input::get_singleton()->is_mouse_button_pressed(1); //amazing hack, simply amazing
-	tab_container->set_current_tab(script_list->get_item_metadata(p_idx));
+
+	_go_to_tab(script_list->get_item_metadata(p_idx));
 	grab_focus_block=false;
 }
 
@@ -1457,16 +1599,38 @@ void ScriptEditor::ensure_select_current() {
 
 	if (tab_container->get_child_count() && tab_container->get_current_tab()>=0) {
 
-		ScriptTextEditor *ste = tab_container->get_child(tab_container->get_current_tab())->cast_to<ScriptTextEditor>();
-		if (!ste)
-			return;
-		Ref<Script> script = ste->get_edited_script();
+		Node *current = tab_container->get_child(tab_container->get_current_tab());
+
+
+		ScriptTextEditor *ste = current->cast_to<ScriptTextEditor>();
+		if (ste) {
+
+			Ref<Script> script = ste->get_edited_script();
+
+			if (!grab_focus_block && is_inside_tree())
+				ste->get_text_edit()->grab_focus();
+
+			edit_menu->show();
+			search_menu->show();
+			script_search_menu->hide();
+
+
+		}
+
+		EditorHelp *eh = current->cast_to<EditorHelp>();
 
-		if (!grab_focus_block && is_inside_tree())
-			ste->get_text_edit()->grab_focus();
+		if (eh) {
+			edit_menu->hide();
+			search_menu->hide();
+			script_search_menu->show();
+
+		}
 	}
 
 
+
+
+
 }
 
 void ScriptEditor::_find_scripts(Node* p_base, Node* p_current, Set<Ref<Script> > &used) {
@@ -1485,6 +1649,57 @@ void ScriptEditor::_find_scripts(Node* p_base, Node* p_current, Set<Ref<Script>
 
 }
 
+struct _ScriptEditorItemData {
+
+	String name;
+	Ref<Texture> icon;
+	int index;
+	String tooltip;
+	bool used;
+	int category;
+
+
+	bool operator<(const _ScriptEditorItemData& id) const {
+
+		return category==id.category?name.nocasecmp_to(id.name)<0:category<id.category;
+	}
+
+};
+
+
+void ScriptEditor::_update_script_colors() {
+
+	bool enabled = EditorSettings::get_singleton()->get("text_editor/script_temperature_enabled");
+	if (!enabled)
+		return;
+
+	int hist_size = EditorSettings::get_singleton()->get("text_editor/script_temperature_history_size");
+	Color hot_color=EditorSettings::get_singleton()->get("text_editor/script_temperature_hot_color");
+	Color cold_color=EditorSettings::get_singleton()->get("text_editor/script_temperature_cold_color");
+
+	for(int i=0;i<script_list->get_item_count();i++) {
+
+		int c = script_list->get_item_metadata(i);
+		Node *n = tab_container->get_child(c);
+		if (!n)
+			continue;
+
+		script_list->set_item_custom_bg_color(i,Color(0,0,0,0));
+		if (!n->has_meta("__editor_pass")) {
+			continue;
+		}
+
+		int pass=n->get_meta("__editor_pass");
+		int h = edit_pass - pass;
+		if (h>hist_size) {
+			continue;
+		}
+		float v = Math::ease((edit_pass-pass)/float_t(hist_size),0.4);
+
+
+		script_list->set_item_custom_bg_color(i,hot_color.linear_interpolate(cold_color,v));
+	}
+}
 
 void ScriptEditor::_update_script_names() {
 
@@ -1496,31 +1711,75 @@ void ScriptEditor::_update_script_names() {
 	}
 
 	script_list->clear();
+	bool split_script_help = EditorSettings::get_singleton()->get("text_editor/group_help_pages");
+
+	Vector<_ScriptEditorItemData> sedata;
+
 	for(int i=0;i<tab_container->get_child_count();i++) {
 
+
 		ScriptTextEditor *ste = tab_container->get_child(i)->cast_to<ScriptTextEditor>();
-		if (!ste)
-			continue;
+		if (ste) {
 
-		String script = ste->get_name();
-		Ref<Texture> icon = ste->get_icon();
-		String path = ste->get_edited_script()->get_path();
-		script_list->add_item(script,icon);
+			String name = ste->get_name();
+			Ref<Texture> icon = ste->get_icon();
+			String tooltip = ste->get_edited_script()->get_path();
 
-		int index = script_list->get_item_count()-1;
+			_ScriptEditorItemData sd;
+			sd.icon=icon;
+			sd.name=name;
+			sd.tooltip=tooltip;
+			sd.index=i;
+			sd.used=used.has(ste->get_edited_script());
+			sd.category=0;
+
+			sedata.push_back(sd);
+		}
 
-		script_list->set_item_tooltip(index,path);
-		script_list->set_item_metadata(index,i);
-		if (used.has(ste->get_edited_script())) {
+		EditorHelp *eh = tab_container->get_child(i)->cast_to<EditorHelp>();
+		if (eh) {
+
+			String name = eh->get_class_name();
+			Ref<Texture> icon = get_icon("Help","EditorIcons");
+			String tooltip = name+" Class Reference";
+
+			_ScriptEditorItemData sd;
+			sd.icon=icon;
+			sd.name=name;
+			sd.tooltip=tooltip;
+			sd.index=i;
+			sd.used=false;
+			sd.category=split_script_help?1:0;
+			sedata.push_back(sd);
+
+		}
+
+	}
+
+	sedata.sort();
+
+	for(int i=0;i<sedata.size();i++) {
+
+		script_list->add_item(sedata[i].name,sedata[i].icon);
+		int index = script_list->get_item_count()-1;
+		script_list->set_item_tooltip(index,sedata[i].tooltip);
+		script_list->set_item_metadata(index,sedata[i].index);
+		if (sedata[i].used) {
 
 			script_list->set_item_custom_bg_color(index,Color(88/255.0,88/255.0,60/255.0));
 		}
-		if (tab_container->get_current_tab()==index) {
+		if (tab_container->get_current_tab()==sedata[i].index) {
 			script_list->select(index);
+			script_name_label->set_text(sedata[i].name);
+			script_icon->set_texture(sedata[i].icon);
+
 		}
 	}
 
-	script_list->sort_items_by_text();
+	_update_script_colors();
+
+
+
 
 }
 
@@ -1531,6 +1790,8 @@ void ScriptEditor::edit(const Ref<Script>& p_script) {
 
 	// see if already has it
 
+	bool open_dominant = EditorSettings::get_singleton()->get("text_editor/open_dominant_script_on_scene_change");
+
 	if (p_script->get_path().is_resource_file() && bool(EditorSettings::get_singleton()->get("external_editor/use_external_editor"))) {
 
 		String path = EditorSettings::get_singleton()->get("external_editor/exec_path");
@@ -1559,12 +1820,13 @@ void ScriptEditor::edit(const Ref<Script>& p_script) {
 
 		if (ste->get_edited_script()==p_script) {
 
-			if (!EditorNode::get_singleton()->is_changing_scene()) {
+			if (open_dominant || !EditorNode::get_singleton()->is_changing_scene()) {
 				if (tab_container->get_current_tab()!=i) {
-					tab_container->set_current_tab(i);
+					_go_to_tab(i);
 					script_list->select( script_list->find_metadata(i) );
 				}
-				ste->get_text_edit()->grab_focus();
+				if (is_visible())
+					ste->get_text_edit()->grab_focus();
 			}
 			return;
 		}
@@ -1575,8 +1837,10 @@ void ScriptEditor::edit(const Ref<Script>& p_script) {
 	ScriptTextEditor *ste = memnew( ScriptTextEditor );
 	ste->set_edited_script(p_script);
 	ste->get_text_edit()->set_tooltip_request_func(this,"_get_debug_tooltip",ste);
+	ste->get_text_edit()->set_auto_brace_completion(EditorSettings::get_singleton()->get("text_editor/auto_brace_complete"));
 	tab_container->add_child(ste);
-	tab_container->set_current_tab(tab_container->get_tab_count()-1);
+	_go_to_tab(tab_container->get_tab_count()-1);
+
 
 
 
@@ -1675,7 +1939,7 @@ void ScriptEditor::_add_callback(Object *p_obj, const String& p_function, const
 			ste->get_text_edit()->insert_text_at_cursor("\n\n"+func);
 		}
 
-		tab_container->set_current_tab(i);
+		_go_to_tab(i);
 		ste->get_text_edit()->cursor_set_line(pos);
 		ste->get_text_edit()->cursor_set_column(1);
 
@@ -1726,10 +1990,13 @@ void ScriptEditor::set_window_layout(Ref<ConfigFile> p_layout) {
 		return;
 	}
 
-	if (!p_layout->has_section_key("ScriptEditor","open_scripts"))
+	if (!p_layout->has_section_key("ScriptEditor","open_scripts") && !p_layout->has_section_key("ScriptEditor","open_help"))
 		return;
 
 	Array scripts = p_layout->get_value("ScriptEditor","open_scripts");
+	Array helps;
+	if (p_layout->has_section_key("ScriptEditor","open_help"))
+		helps=p_layout->get_value("ScriptEditor","open_help");
 
 	restoring_layout=true;
 
@@ -1742,6 +2009,18 @@ void ScriptEditor::set_window_layout(Ref<ConfigFile> p_layout) {
 		}
 	}
 
+
+	for(int i=0;i<helps.size();i++) {
+
+		String path = helps[i];
+		_help_class_open(path);
+	}
+
+	for(int i=0;i<tab_container->get_child_count();i++) {
+		tab_container->get_child(i)->set_meta("__editor_pass",Variant());
+	}
+
+
 	if (p_layout->has_section_key("ScriptEditor","split_offset")) {
 		script_split->set_split_offset(p_layout->get_value("ScriptEditor","split_offset"));
 	}
@@ -1754,27 +2033,185 @@ void ScriptEditor::set_window_layout(Ref<ConfigFile> p_layout) {
 void ScriptEditor::get_window_layout(Ref<ConfigFile> p_layout) {
 
 	Array scripts;
+	Array helps;
 
 	for(int i=0;i<tab_container->get_child_count();i++) {
 
 		ScriptTextEditor *ste = tab_container->get_child(i)->cast_to<ScriptTextEditor>();
-		if (!ste)
-			continue;
+		if (ste) {
 
-		String path = ste->get_edited_script()->get_path();
-		if (!path.is_resource_file())
-			continue;
+			String path = ste->get_edited_script()->get_path();
+			if (!path.is_resource_file())
+				continue;
+
+			scripts.push_back(path);
+		}
+
+		EditorHelp *eh = tab_container->get_child(i)->cast_to<EditorHelp>();
+
+		if (eh) {
+
+			helps.push_back(eh->get_class_name());
+		}
 
-		scripts.push_back(path);
 
 	}
 
 	p_layout->set_value("ScriptEditor","open_scripts",scripts);
+	p_layout->set_value("ScriptEditor","open_help",helps);
 	p_layout->set_value("ScriptEditor","split_offset",script_split->get_split_offset());
 
 }
 
 
+void ScriptEditor::_help_class_open(const String& p_class) {
+
+
+	for(int i=0;i<tab_container->get_child_count();i++) {
+
+		EditorHelp *eh = tab_container->get_child(i)->cast_to<EditorHelp>();
+
+		if (eh && eh->get_class_name()==p_class) {
+
+			_go_to_tab(i);
+			_update_script_names();
+			return;
+		}
+	}
+
+	EditorHelp * eh = memnew( EditorHelp );
+
+
+	eh->set_name(p_class);
+	tab_container->add_child(eh);
+	_go_to_tab(tab_container->get_tab_count()-1);
+	eh->go_to_class(p_class,0);
+	eh->connect("go_to_help",this,"_help_class_goto");
+	_update_script_names();
+
+}
+
+void ScriptEditor::_help_class_goto(const String& p_desc) {
+
+
+	String cname=p_desc.get_slice(":",1);
+
+	for(int i=0;i<tab_container->get_child_count();i++) {
+
+		EditorHelp *eh = tab_container->get_child(i)->cast_to<EditorHelp>();
+
+		if (eh && eh->get_class_name()==cname) {
+
+			_go_to_tab(i);
+			eh->go_to_help(p_desc);
+			_update_script_names();
+			return;
+		}
+	}
+
+	EditorHelp * eh = memnew( EditorHelp );
+
+	eh->set_name(cname);
+	tab_container->add_child(eh);
+	_go_to_tab(tab_container->get_tab_count()-1);
+	eh->go_to_help(p_desc);
+	eh->connect("go_to_help",this,"_help_class_goto");
+	_update_script_names();
+
+}
+
+void ScriptEditor::_update_history_pos(int p_new_pos) {
+
+	Node *n = tab_container->get_current_tab_control();
+
+	if (n->cast_to<ScriptTextEditor>()) {
+
+		history[history_pos].scroll_pos=n->cast_to<ScriptTextEditor>()->get_text_edit()->get_v_scroll();
+		history[history_pos].cursor_column=n->cast_to<ScriptTextEditor>()->get_text_edit()->cursor_get_column();
+		history[history_pos].cursor_row=n->cast_to<ScriptTextEditor>()->get_text_edit()->cursor_get_line();
+	}
+	if (n->cast_to<EditorHelp>()) {
+
+		history[history_pos].scroll_pos=n->cast_to<EditorHelp>()->get_scroll();
+	}
+
+	history_pos=p_new_pos;
+	tab_container->set_current_tab(history[history_pos].control->get_index());
+
+	n = history[history_pos].control;
+
+	if (n->cast_to<ScriptTextEditor>()) {
+
+		n->cast_to<ScriptTextEditor>()->get_text_edit()->set_v_scroll(history[history_pos].scroll_pos);
+		n->cast_to<ScriptTextEditor>()->get_text_edit()->cursor_set_column( history[history_pos].cursor_column );
+		n->cast_to<ScriptTextEditor>()->get_text_edit()->cursor_set_line( history[history_pos].cursor_row );
+		n->cast_to<ScriptTextEditor>()->get_text_edit()->grab_focus();
+	}
+
+	if (n->cast_to<EditorHelp>()) {
+
+		n->cast_to<EditorHelp>()->set_scroll(history[history_pos].scroll_pos);
+		n->cast_to<EditorHelp>()->set_focused();
+	}
+
+	n->set_meta("__editor_pass",++edit_pass);
+	_update_script_names();
+	_update_history_arrows();
+
+}
+
+void ScriptEditor::_history_forward() {
+
+	if (history_pos<history.size()-1) {
+		_update_history_pos(history_pos+1);
+	}
+}
+
+void ScriptEditor::_history_back(){
+
+	if (history_pos>0) {
+		_update_history_pos(history_pos-1);
+	}
+
+}
+void ScriptEditor::set_scene_root_script( Ref<Script> p_script ) {
+
+	bool open_dominant = EditorSettings::get_singleton()->get("text_editor/open_dominant_script_on_scene_change");
+	if (open_dominant && p_script.is_valid()) {
+		edit(p_script);
+	}
+}
+
+void ScriptEditor::_bind_methods() {
+
+	ObjectTypeDB::bind_method("_tab_changed",&ScriptEditor::_tab_changed);
+	ObjectTypeDB::bind_method("_menu_option",&ScriptEditor::_menu_option);
+	ObjectTypeDB::bind_method("_close_current_tab",&ScriptEditor::_close_current_tab);
+	ObjectTypeDB::bind_method("_editor_play",&ScriptEditor::_editor_play);
+	ObjectTypeDB::bind_method("_editor_pause",&ScriptEditor::_editor_pause);
+	ObjectTypeDB::bind_method("_editor_stop",&ScriptEditor::_editor_stop);
+	ObjectTypeDB::bind_method("_add_callback",&ScriptEditor::_add_callback);
+	ObjectTypeDB::bind_method("_reload_scripts",&ScriptEditor::_reload_scripts);
+	ObjectTypeDB::bind_method("_resave_scripts",&ScriptEditor::_resave_scripts);
+	ObjectTypeDB::bind_method("_res_saved_callback",&ScriptEditor::_res_saved_callback);
+	ObjectTypeDB::bind_method("_goto_script_line",&ScriptEditor::_goto_script_line);
+	ObjectTypeDB::bind_method("_goto_script_line2",&ScriptEditor::_goto_script_line2);
+	ObjectTypeDB::bind_method("_breaked",&ScriptEditor::_breaked);
+	ObjectTypeDB::bind_method("_show_debugger",&ScriptEditor::_show_debugger);
+	ObjectTypeDB::bind_method("_get_debug_tooltip",&ScriptEditor::_get_debug_tooltip);
+	ObjectTypeDB::bind_method("_autosave_scripts",&ScriptEditor::_autosave_scripts);
+	ObjectTypeDB::bind_method("_editor_settings_changed",&ScriptEditor::_editor_settings_changed);
+	ObjectTypeDB::bind_method("_update_script_names",&ScriptEditor::_update_script_names);
+	ObjectTypeDB::bind_method("_tree_changed",&ScriptEditor::_tree_changed);
+	ObjectTypeDB::bind_method("_script_selected",&ScriptEditor::_script_selected);
+	ObjectTypeDB::bind_method("_script_created",&ScriptEditor::_script_created);
+	ObjectTypeDB::bind_method("_script_split_dragged",&ScriptEditor::_script_split_dragged);
+	ObjectTypeDB::bind_method("_help_class_open",&ScriptEditor::_help_class_open);
+	ObjectTypeDB::bind_method("_help_class_goto",&ScriptEditor::_help_class_goto);
+	ObjectTypeDB::bind_method("_request_help",&ScriptEditor::_help_class_open);
+	ObjectTypeDB::bind_method("_history_forward",&ScriptEditor::_history_forward);
+	ObjectTypeDB::bind_method("_history_back",&ScriptEditor::_history_back);
+}
 
 ScriptEditor::ScriptEditor(EditorNode *p_editor) {
 
@@ -1816,6 +2253,9 @@ ScriptEditor::ScriptEditor(EditorNode *p_editor) {
 	file_menu->get_popup()->add_item("Save As..",FILE_SAVE_AS);
 	file_menu->get_popup()->add_item("Save All",FILE_SAVE_ALL,KEY_MASK_CMD|KEY_MASK_SHIFT|KEY_S);
 	file_menu->get_popup()->add_separator();
+	file_menu->get_popup()->add_item("History Prev",WINDOW_PREV,KEY_MASK_CTRL|KEY_MASK_ALT|KEY_LEFT);
+	file_menu->get_popup()->add_item("History Next",WINDOW_NEXT,KEY_MASK_CTRL|KEY_MASK_ALT|KEY_RIGHT);
+	file_menu->get_popup()->add_separator();
 	file_menu->get_popup()->add_item("Close",FILE_CLOSE,KEY_MASK_CMD|KEY_W);
 	file_menu->get_popup()->connect("item_pressed", this,"_menu_option");
 
@@ -1851,13 +2291,22 @@ ScriptEditor::ScriptEditor(EditorNode *p_editor) {
 	menu_hb->add_child(search_menu);
 	search_menu->set_text("Search");
 	search_menu->get_popup()->add_item("Find..",SEARCH_FIND,KEY_MASK_CMD|KEY_F);
-	search_menu->get_popup()->add_item("Find Next",SEARCH_FIND_NEXT,KEY_MASK_CMD|KEY_G);
+	search_menu->get_popup()->add_item("Find Next",SEARCH_FIND_NEXT,KEY_F3);
 	search_menu->get_popup()->add_item("Replace..",SEARCH_REPLACE,KEY_MASK_CMD|KEY_R);
 	search_menu->get_popup()->add_separator();
 	search_menu->get_popup()->add_item("Goto Function..",SEARCH_LOCATE_FUNCTION,KEY_MASK_SHIFT|KEY_MASK_CMD|KEY_F);
 	search_menu->get_popup()->add_item("Goto Line..",SEARCH_GOTO_LINE,KEY_MASK_CMD|KEY_L);
 	search_menu->get_popup()->connect("item_pressed", this,"_menu_option");
 
+	script_search_menu = memnew( MenuButton );
+	menu_hb->add_child(script_search_menu);
+	script_search_menu->set_text("Search");
+	script_search_menu->get_popup()->add_item("Find..",SEARCH_FIND,KEY_MASK_CMD|KEY_F);
+	script_search_menu->get_popup()->add_item("Find Next",SEARCH_FIND_NEXT,KEY_F3);
+	script_search_menu->get_popup()->connect("item_pressed", this,"_menu_option");
+	script_search_menu->hide();
+
+
 	debug_menu = memnew( MenuButton );
 	menu_hb->add_child(debug_menu);
 	debug_menu->set_text("Debug");
@@ -1897,6 +2346,53 @@ ScriptEditor::ScriptEditor(EditorNode *p_editor) {
 	help_menu->get_popup()->add_item("Contextual", HELP_CONTEXTUAL, KEY_MASK_SHIFT|KEY_F1);
 	help_menu->get_popup()->connect("item_pressed", this,"_menu_option");
 
+	menu_hb->add_spacer();
+
+
+	script_icon = memnew( TextureFrame );
+	menu_hb->add_child(script_icon);
+	script_name_label = memnew( Label );
+	menu_hb->add_child(script_name_label);
+
+	script_icon->hide();
+	script_name_label->hide();
+
+	menu_hb->add_spacer();
+
+	site_search = memnew( ToolButton );
+	site_search->set_text("Tutorials");
+	site_search->connect("pressed",this,"_menu_option",varray(SEARCH_WEBSITE));
+	menu_hb->add_child(site_search);
+	site_search->set_tooltip("Open http://www.godotengine.org at tutorials section.");
+
+	class_search = memnew( ToolButton );
+	class_search->set_text("Classes");
+	class_search->connect("pressed",this,"_menu_option",varray(SEARCH_CLASSES));
+	menu_hb->add_child(class_search);
+	class_search->set_tooltip("Search the class hierarchy.");
+
+	help_search = memnew( ToolButton );
+	help_search->set_text("Search Help");
+	help_search->connect("pressed",this,"_menu_option",varray(SEARCH_HELP));
+	menu_hb->add_child(help_search);
+	help_search->set_tooltip("Search the reference documentation.");
+
+	menu_hb->add_child( memnew( VSeparator) );
+
+	script_back = memnew( ToolButton );
+	script_back->connect("pressed",this,"_history_back");
+	menu_hb->add_child(script_back);
+	script_back->set_disabled(true);
+	help_search->set_tooltip("Go to previous edited document.");
+
+	script_forward = memnew( ToolButton );
+	script_forward->connect("pressed",this,"_history_forward");
+	menu_hb->add_child(script_forward);
+	script_forward->set_disabled(true);
+	help_search->set_tooltip("Go to next edited document.");
+
+
+
 	tab_container->connect("tab_changed", this,"_tab_changed");
 
 	find_replace_dialog = memnew(FindReplaceDialog);
@@ -1959,8 +2455,20 @@ ScriptEditor::ScriptEditor(EditorNode *p_editor) {
 
 	grab_focus_block=false;
 
+	help_search_dialog = memnew( EditorHelpSearch );
+	add_child(help_search_dialog);
+	help_search_dialog->connect("go_to_help",this,"_help_class_goto");
+
+
+	help_index = memnew( EditorHelpIndex );
+	add_child(help_index);
+	help_index->connect("open_class",this,"_help_class_open");
+
+	history_pos=-1;
 //	debugger_gui->hide();
 
+	edit_pass=0;
+
 }
 
 
@@ -2062,8 +2570,14 @@ ScriptEditorPlugin::ScriptEditorPlugin(EditorNode *p_node) {
 	script_editor->hide();
 
 	EDITOR_DEF("text_editor/auto_reload_changed_scripts",false);
+	EDITOR_DEF("text_editor/open_dominant_script_on_scene_change",true);
 	EDITOR_DEF("external_editor/use_external_editor",false);
 	EDITOR_DEF("external_editor/exec_path","");
+	EDITOR_DEF("text_editor/script_temperature_enabled",true);
+	EDITOR_DEF("text_editor/script_temperature_history_size",15);
+	EDITOR_DEF("text_editor/script_temperature_hot_color",Color(1,0,0,0.3));
+	EDITOR_DEF("text_editor/script_temperature_cold_color",Color(0,0,1,0.3));
+	EDITOR_DEF("text_editor/group_help_pages",false);
 	EditorSettings::get_singleton()->add_property_hint(PropertyInfo(Variant::STRING,"external_editor/exec_path",PROPERTY_HINT_GLOBAL_FILE));
 	EDITOR_DEF("external_editor/exec_flags","");
 
diff --git a/tools/editor/plugins/script_editor_plugin.h b/tools/editor/plugins/script_editor_plugin.h
index e635a1974b..e755f570ef 100644
--- a/tools/editor/plugins/script_editor_plugin.h
+++ b/tools/editor/plugins/script_editor_plugin.h
@@ -41,6 +41,7 @@
 #include "tools/editor/code_editor.h"
 #include "scene/gui/split_container.h"
 #include "scene/gui/item_list.h"
+#include "tools/editor/editor_help.h"
 
 class ScriptEditorQuickOpen : public ConfirmationDialog {
 
@@ -141,6 +142,9 @@ class ScriptEditor : public VBoxContainer {
 		SEARCH_REPLACE,
 		SEARCH_LOCATE_FUNCTION,
 		SEARCH_GOTO_LINE,
+		SEARCH_HELP,
+		SEARCH_CLASSES,
+		SEARCH_WEBSITE,
 		DEBUG_TOGGLE_BREAKPOINT,
 		DEBUG_NEXT,
 		DEBUG_STEP,
@@ -150,6 +154,8 @@ class ScriptEditor : public VBoxContainer {
 		HELP_CONTEXTUAL,		
 		WINDOW_MOVE_LEFT,
 		WINDOW_MOVE_RIGHT,
+		WINDOW_NEXT,
+		WINDOW_PREV,
 		WINDOW_SELECT_BASE=100
 	};
 
@@ -157,11 +163,17 @@ class ScriptEditor : public VBoxContainer {
 	MenuButton *file_menu;
 	MenuButton *edit_menu;
 	MenuButton *search_menu;
+	MenuButton *script_search_menu;
 	MenuButton *debug_menu;
 	MenuButton *help_menu;
 	Timer *autosave_timer;
 	uint64_t idle;
 
+	Button *help_search;
+	Button *site_search;
+	Button *class_search;
+	EditorHelpSearch *help_search_dialog;
+
 	ItemList *script_list;
 	HSplitContainer *script_split;
 	TabContainer *tab_container;
@@ -172,6 +184,27 @@ class ScriptEditor : public VBoxContainer {
 	ScriptEditorDebugger* debugger;
 	ToolButton *scripts_visible;
 
+	TextureFrame *script_icon;
+	Label *script_name_label;
+
+	ToolButton *script_back;
+	ToolButton *script_forward;
+
+
+	struct ScriptHistory {
+
+		Control *control;
+		int scroll_pos;
+		int cursor_column;
+		int cursor_row;
+	};
+
+	Vector<ScriptHistory> history;
+	int history_pos;
+
+
+	EditorHelpIndex *help_index;
+
 	void _tab_changed(int p_which);
 	void _menu_option(int p_optin);
 
@@ -201,6 +234,8 @@ class ScriptEditor : public VBoxContainer {
 	void _editor_pause();
 	void _editor_stop();
 
+	int edit_pass;
+
 	void _add_callback(Object *p_obj, const String& p_function, const StringArray& p_args);
 	void _res_saved_callback(const Ref<Resource>& p_res);
 
@@ -224,8 +259,20 @@ class ScriptEditor : public VBoxContainer {
 
 	void _script_split_dragged(float);
 
+
+	void _history_forward();
+	void _history_back();
+
 	bool waiting_update_names;
 
+	void _help_class_open(const String& p_class);
+	void _help_class_goto(const String& p_desc);
+	void _update_history_arrows();
+	void _go_to_tab(int p_idx);
+	void _update_history_pos(int p_new_pos);
+	void _update_script_colors();
+
+
 	static ScriptEditor *script_editor;
 protected:
 	void _notification(int p_what);
@@ -253,6 +300,8 @@ public:
 	void set_window_layout(Ref<ConfigFile> p_layout);
 	void get_window_layout(Ref<ConfigFile> p_layout);
 
+	void set_scene_root_script( Ref<Script> p_script );
+
 	ScriptEditorDebugger *get_debugger() { return debugger; }
 
 	ScriptEditor(EditorNode *p_editor);
diff --git a/tools/editor/plugins/shader_graph_editor_plugin.cpp b/tools/editor/plugins/shader_graph_editor_plugin.cpp
index 684e7e32ef..3a7dc26466 100644
--- a/tools/editor/plugins/shader_graph_editor_plugin.cpp
+++ b/tools/editor/plugins/shader_graph_editor_plugin.cpp
@@ -2542,7 +2542,7 @@ void ShaderGraphView::_notification(int p_what) {
 
 void ShaderGraphView::add_node(int p_type, const Vector2 &location) {
 
-	if ((p_type==ShaderGraph::NODE_INPUT||p_type==ShaderGraph::NODE_INPUT) && graph->node_count(type, p_type)>0)
+	if (p_type==ShaderGraph::NODE_INPUT && graph->node_count(type, p_type)>0)
 		return;
 
 	List<int> existing;
diff --git a/tools/editor/plugins/spatial_editor_plugin.cpp b/tools/editor/plugins/spatial_editor_plugin.cpp
index 3ab9339265..7816efe89f 100644
--- a/tools/editor/plugins/spatial_editor_plugin.cpp
+++ b/tools/editor/plugins/spatial_editor_plugin.cpp
@@ -232,15 +232,6 @@ void SpatialEditorViewport::_select(Spatial *p_node, bool p_append,bool p_single
 
 }
 
-
-struct _RayResult {
-
-	Spatial* item;
-	float depth;
-	int handle;
-	_FORCE_INLINE_ bool operator<(const _RayResult& p_rr) const { return depth<p_rr.depth; }
-};
-
 ObjectID SpatialEditorViewport::_select_ray(const Point2& p_pos, bool p_append,bool &r_includes_current,int *r_gizmo_handle,bool p_alt_select) {
 
 	if (r_gizmo_handle)
@@ -379,6 +370,70 @@ ObjectID SpatialEditorViewport::_select_ray(const Point2& p_pos, bool p_append,b
 
 }
 
+void SpatialEditorViewport::_find_items_at_pos(const Point2& p_pos,bool &r_includes_current,Vector<_RayResult> &results,bool p_alt_select) {
+
+	Vector3 ray=_get_ray(p_pos);
+	Vector3 pos=_get_ray_pos(p_pos);
+
+	Vector<RID> instances=VisualServer::get_singleton()->instances_cull_ray(pos,ray,get_tree()->get_root()->get_world()->get_scenario() );
+	Set<Ref<SpatialEditorGizmo> > found_gizmos;
+
+	r_includes_current=false;
+
+	for (int i=0;i<instances.size();i++) {
+
+		uint32_t id=VisualServer::get_singleton()->instance_get_object_instance_ID(instances[i]);
+		Object *obj=ObjectDB::get_instance(id);
+		if (!obj)
+			continue;
+
+		Spatial *spat=obj->cast_to<Spatial>();
+
+		if (!spat)
+			continue;
+
+		Ref<SpatialEditorGizmo> seg = spat->get_gizmo();
+
+		if (!seg.is_valid())
+			continue;
+
+		if (found_gizmos.has(seg))
+			continue;
+
+		found_gizmos.insert(seg);
+		Vector3 point;
+		Vector3 normal;
+
+		int handle=-1;
+		bool inters = seg->intersect_ray(camera,p_pos,point,normal,NULL,p_alt_select);
+
+		if (!inters)
+			continue;
+
+		float dist = pos.distance_to(point);
+
+		if (dist<0)
+			continue;
+
+
+
+		if (editor_selection->is_selected(spat))
+			r_includes_current=true;
+
+		_RayResult res;
+		res.item=spat;
+		res.depth=dist;
+		res.handle=handle;
+		results.push_back(res);
+	}
+
+
+	if (results.empty())
+		return;
+
+	results.sort();
+}
+
 
 Vector3 SpatialEditorViewport::_get_screen_to_space(const Vector3& p_pos) {
 
@@ -724,6 +779,7 @@ void SpatialEditorViewport::_sinput(const InputEvent &p_event) {
 				} break;
 				case BUTTON_RIGHT: {
 
+					NavigationScheme nav_scheme = _get_navigation_schema("3d_editor/navigation_scheme");
 
 					if (b.pressed && _edit.gizmo.is_valid()) {
 						//restore
@@ -806,6 +862,57 @@ void SpatialEditorViewport::_sinput(const InputEvent &p_event) {
 								//VisualServer::get_singleton()->instance_set_transform(cursor_instance,Transform(Matrix3(),cursor.cursor_pos));
 							}
 						}
+
+						if (b.mod.alt) {
+
+							if (nav_scheme == NAVIGATION_MAYA)
+								break;
+
+							_find_items_at_pos(Vector2( b.x, b.y ),clicked_includes_current,selection_results,b.mod.shift);
+
+							clicked_wants_append=b.mod.shift;
+
+							if (selection_results.size() == 1) {
+
+								clicked=selection_results[0].item->get_instance_ID();
+								selection_results.clear();
+
+								if (clicked) {
+									_select_clicked(clicked_wants_append,true);
+									clicked=0;
+								}
+
+							} else if (!selection_results.empty()) {
+
+								NodePath root_path = get_tree()->get_edited_scene_root()->get_path();
+								StringName root_name = root_path.get_name(root_path.get_name_count()-1);
+
+								for (int i = 0; i < selection_results.size(); i++) {
+
+									Spatial *spat=selection_results[i].item;
+
+									Ref<Texture> icon;
+									if (spat->has_meta("_editor_icon"))
+										icon=spat->get_meta("_editor_icon");
+									else
+										icon=get_icon( has_icon(spat->get_type(),"EditorIcons")?spat->get_type():String("Object"),"EditorIcons");
+
+									String node_path="/"+root_name+"/"+root_path.rel_path_to(spat->get_path());
+
+									selection_menu->add_item(spat->get_name());
+									selection_menu->set_item_icon(i, icon );
+									selection_menu->set_item_metadata(i, node_path);
+									selection_menu->set_item_tooltip(i,String(spat->get_name())+
+											"\nType: "+spat->get_type()+"\nPath: "+node_path);
+								}
+
+								selection_menu->set_global_pos(Vector2( b.global_x, b.global_y ));
+								selection_menu->popup();
+								selection_menu->call_deferred("grab_click_focus");
+
+								break;
+							}
+						}
 					}
 
 					if (_edit.mode!=TRANSFORM_NONE && b.pressed) {
@@ -1957,11 +2064,11 @@ void SpatialEditorViewport::_menu_option(int p_option) {
 				if (!se)
 					continue;
 
-				Vector3 original_scale = sp->get_scale();
-				sp->set_global_transform(camera_transform);
-				sp->set_scale(original_scale);
-				undo_redo->add_do_method(sp,"set_global_transform",sp->get_global_transform());
-				undo_redo->add_undo_method(sp,"set_global_transform",se->original);
+				Transform xform = camera_transform;
+				xform.scale_basis(sp->get_scale());
+
+				undo_redo->add_do_method(sp,"set_global_transform",xform);
+				undo_redo->add_undo_method(sp,"set_global_transform",sp->get_global_transform());
 			}
 			undo_redo->commit_action();
 		} break;
@@ -2096,6 +2203,26 @@ void SpatialEditorViewport::_toggle_camera_preview(bool p_activate) {
 	}
 }
 
+void SpatialEditorViewport::_selection_result_pressed(int p_result) {
+
+	if (selection_results.size() <= p_result)
+		return;
+
+	clicked=selection_results[p_result].item->get_instance_ID();
+
+	if (clicked) {
+		_select_clicked(clicked_wants_append,true);
+		clicked=0;
+	}
+}
+
+void SpatialEditorViewport::_selection_menu_hide() {
+
+	selection_results.clear();
+	selection_menu->clear();
+	selection_menu->set_size(Vector2(0, 0));
+}
+
 void SpatialEditorViewport::set_can_preview(Camera* p_preview) {
 
 	preview=p_preview;
@@ -2210,6 +2337,8 @@ void SpatialEditorViewport::_bind_methods(){
 	ObjectTypeDB::bind_method(_MD("_toggle_camera_preview"),&SpatialEditorViewport::_toggle_camera_preview);
 	ObjectTypeDB::bind_method(_MD("_preview_exited_scene"),&SpatialEditorViewport::_preview_exited_scene);
 	ObjectTypeDB::bind_method(_MD("update_transform_gizmo_view"),&SpatialEditorViewport::update_transform_gizmo_view);
+	ObjectTypeDB::bind_method(_MD("_selection_result_pressed"),&SpatialEditorViewport::_selection_result_pressed);
+	ObjectTypeDB::bind_method(_MD("_selection_menu_hide"),&SpatialEditorViewport::_selection_menu_hide);
 
 	ADD_SIGNAL( MethodInfo("toggle_maximize_view", PropertyInfo(Variant::OBJECT, "viewport")) );
 }
@@ -2307,6 +2436,12 @@ SpatialEditorViewport::SpatialEditorViewport(SpatialEditor *p_spatial_editor, Ed
 	preview=NULL;
 	gizmo_scale=1.0;
 
+	selection_menu = memnew( PopupMenu );
+	add_child(selection_menu);
+	selection_menu->set_custom_minimum_size(Vector2(100, 0));
+	selection_menu->connect("item_pressed", this, "_selection_result_pressed");
+	selection_menu->connect("popup_hide", this, "_selection_menu_hide");
+
 	if (p_index==0) {
 		view_menu->get_popup()->set_item_checked(view_menu->get_popup()->get_item_index(VIEW_AUDIO_LISTENER),true);
 		viewport->set_as_audio_listener(true);
@@ -2725,7 +2860,7 @@ void SpatialEditor::_menu_item_pressed(int p_option) {
 		} break;
 		case MENU_TRANSFORM_CONFIGURE_SNAP: {
 
-			snap_dialog->popup_centered(Size2(200,160));
+			snap_dialog->popup_centered(Size2(200,180));
 		} break;
 		case MENU_TRANSFORM_LOCAL_COORDS: {
 
@@ -3793,46 +3928,24 @@ SpatialEditor::SpatialEditor(EditorNode *p_editor) {
 	snap_dialog = memnew( ConfirmationDialog );
 	snap_dialog->set_title("Snap Settings");
 	add_child(snap_dialog);
-	Label *l = memnew(Label);
-	l->set_text("Translate Snap:");
-	l->set_pos(Point2(5,5));
-	snap_dialog->add_child(l);
+
+	VBoxContainer *snap_dialog_vbc = memnew( VBoxContainer );
+	snap_dialog->add_child(snap_dialog_vbc);
+	snap_dialog->set_child_rect(snap_dialog_vbc);
 
 	snap_translate = memnew( LineEdit );
-	snap_translate->set_anchor( MARGIN_RIGHT, ANCHOR_END );
-	snap_translate->set_begin( Point2(15,22) );
-	snap_translate->set_end( Point2(15,35) );
 	snap_translate->set_text("1");
-	snap_dialog->add_child(snap_translate);
-
-	l = memnew(Label);
-	l->set_text("Rotate Snap (deg.):");
-	l->set_pos(Point2(5,45));
-	snap_dialog->add_child(l);
+	snap_dialog_vbc->add_margin_child("Translate Snap:",snap_translate);
 
 	snap_rotate = memnew( LineEdit );
-	snap_rotate->set_anchor( MARGIN_RIGHT, ANCHOR_END );
-	snap_rotate->set_begin( Point2(15,62) );
-	snap_rotate->set_end( Point2(15,75) );
 	snap_rotate->set_text("5");
-	snap_dialog->add_child(snap_rotate);
-
-
-	l = memnew(Label);
-	l->set_text("Scale Snap (%):");
-	l->set_pos(Point2(5,85));
-	snap_dialog->add_child(l);
+	snap_dialog_vbc->add_margin_child("Rotate Snap (deg.):",snap_rotate);
 
 	snap_scale = memnew( LineEdit );
-	snap_scale->set_anchor( MARGIN_RIGHT, ANCHOR_END );
-	snap_scale->set_begin( Point2(15,102) );
-	snap_scale->set_end( Point2(15,115) );
 	snap_scale->set_text("5");
-	snap_dialog->add_child(snap_scale);
+	snap_dialog_vbc->add_margin_child("Scale Snap (%):",snap_scale);
 
-	//snap_dialog->get_cancel()->hide();
-
-	/* SNAP DIALOG */
+	/* SETTINGS DIALOG */
 
 	settings_dialog = memnew( ConfirmationDialog );
 	settings_dialog->set_title("Viewport Settings");
@@ -3906,7 +4019,7 @@ SpatialEditor::SpatialEditor(EditorNode *p_editor) {
 	xform_dialog = memnew( ConfirmationDialog );
 	xform_dialog->set_title("Transform Change");
 	add_child(xform_dialog);
-	l = memnew(Label);
+	Label *l = memnew(Label);
 	l->set_text("Translate:");
 	l->set_pos(Point2(5,5));
 	xform_dialog->add_child(l);
diff --git a/tools/editor/plugins/spatial_editor_plugin.h b/tools/editor/plugins/spatial_editor_plugin.h
index ff8912fca8..ebd3f77fe7 100644
--- a/tools/editor/plugins/spatial_editor_plugin.h
+++ b/tools/editor/plugins/spatial_editor_plugin.h
@@ -111,12 +111,21 @@ private:
 	bool orthogonal;
 	float gizmo_scale;
 
+	struct _RayResult {
+
+		Spatial* item;
+		float depth;
+		int handle;
+		_FORCE_INLINE_ bool operator<(const _RayResult& p_rr) const { return depth<p_rr.depth; }
+	};
+
 	void _update_name();
 	void _compute_edit(const Point2& p_point);
 	void _clear_selected();
 	void _select_clicked(bool p_append,bool p_single);
 	void _select(Spatial *p_node, bool p_append,bool p_single);
 	ObjectID _select_ray(const Point2& p_pos, bool p_append,bool &r_includes_current,int *r_gizmo_handle=NULL,bool p_alt_select=false);
+	void _find_items_at_pos(const Point2& p_pos,bool &r_includes_current,Vector<_RayResult> &results,bool p_alt_select=false);
 	Vector3 _get_ray_pos(const Vector2& p_pos) const;
 	Vector3 _get_ray(const Vector2& p_pos);
 	Point2 _point_to_screen(const Vector3& p_point);
@@ -136,9 +145,12 @@ private:
 	float get_fov() const;
 
 	ObjectID clicked;
+	Vector<_RayResult> selection_results;
 	bool clicked_includes_current;
 	bool clicked_wants_append;
 
+	PopupMenu *selection_menu;
+
 	enum NavigationScheme {
 		NAVIGATION_GODOT,
 		NAVIGATION_MAYA,
@@ -225,6 +237,8 @@ private:
 	void _toggle_camera_preview(bool);
 	void _init_gizmo_instance(int p_idx);
 	void _finish_gizmo_instances();
+	void _selection_result_pressed(int);
+	void _selection_menu_hide();
 
 
 protected:
diff --git a/tools/editor/plugins/theme_editor_plugin.cpp b/tools/editor/plugins/theme_editor_plugin.cpp
index 55e8f164d6..63ba57bfc0 100644
--- a/tools/editor/plugins/theme_editor_plugin.cpp
+++ b/tools/editor/plugins/theme_editor_plugin.cpp
@@ -568,26 +568,24 @@ ThemeEditor::ThemeEditor() {
 	CheckButton *cb = memnew( CheckButton );
 	cb->set_text("CheckButton");
 	first_vb->add_child(cb );
-    CheckBox *cbx = memnew( CheckBox );
-    cbx->set_text("CheckBox");
-    first_vb->add_child(cbx );
-
-    /* TODO: This is not working properly, controls are overlapping*/
-    /*
-    ButtonGroup *bg = memnew( ButtonGroup );
-    bg->set_v_size_flags(SIZE_EXPAND_FILL);
-    VBoxContainer *gbvb = memnew( VBoxContainer );
-    gbvb->set_v_size_flags(SIZE_EXPAND_FILL);
-    CheckBox *rbx1 = memnew( CheckBox );
-    rbx1->set_text("CheckBox Radio1");
-    rbx1->set_pressed(true);
-    gbvb->add_child(rbx1);
-    CheckBox *rbx2 = memnew( CheckBox );
-    rbx2->set_text("CheckBox Radio2");
-    gbvb->add_child(rbx2);
-    bg->add_child(gbvb);
-    first_vb->add_child(bg);
-    */
+	CheckBox *cbx = memnew( CheckBox );
+	cbx->set_text("CheckBox");
+	first_vb->add_child(cbx );
+
+
+	ButtonGroup *bg = memnew( ButtonGroup );
+	bg->set_v_size_flags(SIZE_EXPAND_FILL);
+	VBoxContainer *gbvb = memnew( VBoxContainer );
+	gbvb->set_v_size_flags(SIZE_EXPAND_FILL);
+	CheckBox *rbx1 = memnew( CheckBox );
+	rbx1->set_text("CheckBox Radio1");
+	rbx1->set_pressed(true);
+	gbvb->add_child(rbx1);
+	CheckBox *rbx2 = memnew( CheckBox );
+	rbx2->set_text("CheckBox Radio2");
+	gbvb->add_child(rbx2);
+	bg->add_child(gbvb);
+	first_vb->add_child(bg);
 
 	MenuButton* test_menu_button = memnew( MenuButton );
 	test_menu_button->set_text("MenuButton");
diff --git a/tools/editor/plugins/tile_set_editor_plugin.cpp b/tools/editor/plugins/tile_set_editor_plugin.cpp
index 39b0ef3c75..09115472a8 100644
--- a/tools/editor/plugins/tile_set_editor_plugin.cpp
+++ b/tools/editor/plugins/tile_set_editor_plugin.cpp
@@ -110,11 +110,15 @@ void TileSetEditor::_import_scene(Node *scene, Ref<TileSet> p_library, bool p_me
 			if (!child2->cast_to<StaticBody2D>())
 				continue;
 			StaticBody2D *sb = child2->cast_to<StaticBody2D>();
-			if (sb->get_shape_count()==0)
+			int shape_count = sb->get_shape_count();
+			if (shape_count==0)
 				continue;
-			Ref<Shape2D> collision=sb->get_shape(0);
-			if (collision.is_valid()) {
-				collisions.push_back(collision);
+			for (int shape_index=0; shape_index<shape_count; ++shape_index)
+			{
+				Ref<Shape2D> collision=sb->get_shape(shape_index);
+				if (collision.is_valid()) {
+					collisions.push_back(collision);
+				}
 			}
 		}
 
diff --git a/tools/editor/project_export.cpp b/tools/editor/project_export.cpp
index 6003b976aa..b288439b74 100644
--- a/tools/editor/project_export.cpp
+++ b/tools/editor/project_export.cpp
@@ -193,6 +193,8 @@ void ProjectExportDialog::_prop_edited(String what) {
 
 	_save_export_cfg();
 
+	_validate_platform();
+
 }
 
 void ProjectExportDialog::_filters_edited(String what) {
@@ -252,6 +254,13 @@ void ProjectExportDialog::_script_edited(Variant v) {
 
 }
 
+void ProjectExportDialog::_sample_convert_edited(int what) {
+	EditorImportExport::get_singleton()->sample_set_action( EditorImportExport::SampleAction(sample_mode->get_selected()));
+	EditorImportExport::get_singleton()->sample_set_max_hz(  sample_max_hz->get_val() );
+	EditorImportExport::get_singleton()->sample_set_trim(  sample_trim->is_pressed() );
+
+}
+
 void ProjectExportDialog::_notification(int p_what) {
 
 	switch(p_what) {
@@ -288,6 +297,7 @@ void ProjectExportDialog::_notification(int p_what) {
 //			_rescan();
 			_update_platform();
 			export_mode->select( EditorImportExport::get_singleton()->get_export_filter() );
+			convert_text_scenes->set_pressed( EditorImportExport::get_singleton()->get_convert_text_scenes() );
 			filters->set_text( EditorImportExport::get_singleton()->get_export_custom_filter() );
 			if (EditorImportExport::get_singleton()->get_export_filter()!=EditorImportExport::EXPORT_SELECTED)
 				tree_vb->hide();
@@ -319,6 +329,15 @@ void ProjectExportDialog::_notification(int p_what) {
 			_update_group();
 			_update_group_tree();
 
+			sample_mode->select( EditorImportExport::get_singleton()->sample_get_action() );
+			sample_max_hz->set_val( EditorImportExport::get_singleton()->sample_get_max_hz() );
+			sample_trim->set_pressed( EditorImportExport::get_singleton()->sample_get_trim() );
+
+			sample_mode->connect("item_selected",this,"_sample_convert_edited");
+			sample_max_hz->connect("value_changed",this,"_sample_convert_edited");
+			sample_trim->connect("toggled",this,"_sample_convert_edited");
+
+
 		} break;
 		case NOTIFICATION_EXIT_TREE: {
 
@@ -402,6 +421,8 @@ void ProjectExportDialog::_export_mode_changed(int p_idx) {
 	else
 		tree_vb->show();
 
+	EditorImportExport::get_singleton()->set_convert_text_scenes( convert_text_scenes->is_pressed() );
+
 	_save_export_cfg();
 
 }
@@ -473,6 +494,18 @@ Error ProjectExportDialog::export_platform(const String& p_platform, const Strin
 	Ref<EditorExportPlatform> exporter = EditorImportExport::get_singleton()->get_export_platform(p_platform);
 	if (exporter.is_null()) {
 		ERR_PRINT("Invalid platform for export");
+
+		List<StringName> platforms;
+		EditorImportExport::get_singleton()->get_export_platforms(&platforms);
+		print_line("Valid export plaftorms are:");
+		for (List<StringName>::Element *E=platforms.front();E;E=E->next())
+			print_line("    \""+E->get()+"\"");
+
+		if (p_quit_after) {
+			OS::get_singleton()->set_exit_code(255);
+			get_tree()->quit();
+		}
+
 		return ERR_INVALID_PARAMETER;
 	}
 	Error err = exporter->export_project(p_path,p_debug);
@@ -1045,6 +1078,7 @@ void ProjectExportDialog::_bind_methods() {
 	ObjectTypeDB::bind_method(_MD("_group_select_none"),&ProjectExportDialog::_group_select_none);
 	ObjectTypeDB::bind_method(_MD("_script_edited"),&ProjectExportDialog::_script_edited);
 	ObjectTypeDB::bind_method(_MD("_update_script"),&ProjectExportDialog::_update_script);
+	ObjectTypeDB::bind_method(_MD("_sample_convert_edited"),&ProjectExportDialog::_sample_convert_edited);
 
 
 	ObjectTypeDB::bind_method(_MD("export_platform"),&ProjectExportDialog::export_platform);
@@ -1106,6 +1140,7 @@ ProjectExportDialog::ProjectExportDialog(EditorNode *p_editor) {
 	vb = memnew( VBoxContainer );
 	vb->set_name("Resources");
 	sections->add_child(vb);
+
 	export_mode = memnew( OptionButton );
 	export_mode->add_item("Export selected resources (including dependencies).");
 	export_mode->add_item("Export all resources in the project.");
@@ -1114,6 +1149,8 @@ ProjectExportDialog::ProjectExportDialog(EditorNode *p_editor) {
 
 	vb->add_margin_child("Export Mode:",export_mode);
 
+
+
 	tree_vb = memnew( VBoxContainer );
 	vb->add_child(tree_vb);
 	tree_vb->set_v_size_flags(SIZE_EXPAND_FILL);
@@ -1134,6 +1171,10 @@ ProjectExportDialog::ProjectExportDialog(EditorNode *p_editor) {
 	vb->add_margin_child("Filters to export non-resource files (Comma Separated, ie: *.json, *.txt):",filters);
 	filters->connect("text_changed",this,"_filters_edited");
 
+	convert_text_scenes = memnew( CheckButton );
+	convert_text_scenes->set_text("Convert text scenes to binary on export");
+	vb->add_child(convert_text_scenes);
+	convert_text_scenes->connect("toggled",this,"_export_mode_changed");
 
 	image_vb = memnew( VBoxContainer );
 	image_vb->set_name("Images");
@@ -1316,6 +1357,22 @@ ProjectExportDialog::ProjectExportDialog(EditorNode *p_editor) {
 	hbc->add_child(button_reload);
 */
 
+
+	sample_vbox = memnew( VBoxContainer );
+	sample_vbox->set_name("Samples");
+	sections->add_child(sample_vbox);
+	sample_mode = memnew( OptionButton );
+	sample_vbox->add_margin_child("Sample Conversion Mode: (.wav files):",sample_mode);
+	sample_mode->add_item("Keep");
+	sample_mode->add_item("Compress (RAM - IMA-ADPCM)");
+	sample_max_hz = memnew( SpinBox );
+	sample_max_hz->set_max(192000);
+	sample_max_hz->set_min(8000);
+	sample_vbox->add_margin_child("Sampling Rate Limit: (hz)",sample_max_hz);
+	sample_trim = memnew( CheckButton );
+	sample_trim->set_text("Trim");
+	sample_vbox->add_margin_child("Trailing Silence:",sample_trim);
+
 	script_vbox = memnew( VBoxContainer );
 	script_vbox->set_name("Script");
 	sections->add_child(script_vbox);
diff --git a/tools/editor/project_export.h b/tools/editor/project_export.h
index d85e688e58..09c8f10206 100644
--- a/tools/editor/project_export.h
+++ b/tools/editor/project_export.h
@@ -108,6 +108,7 @@ private:
 	PropertyEditor *platform_options;
 
 	OptionButton *export_mode;
+	CheckButton *convert_text_scenes;
 	VBoxContainer *tree_vb;
 
 	VBoxContainer *image_vb;
@@ -139,6 +140,10 @@ private:
 	OptionButton *script_mode;
 	LineEdit *script_key;
 
+	VBoxContainer *sample_vbox;
+	OptionButton *sample_mode;
+	SpinBox *sample_max_hz;
+	CheckButton *sample_trim;
 
 
 	void _export_mode_changed(int p_idx);
@@ -162,6 +167,8 @@ private:
 	void _image_export_edited(int what);
 	void _shrink_edited(float what);
 
+	void _sample_convert_edited(int what);
+
 	void _update_group_list();
 	void _select_group(const String& p_by_name);
 
diff --git a/tools/editor/project_manager.cpp b/tools/editor/project_manager.cpp
index 9f47291433..04705017d2 100644
--- a/tools/editor/project_manager.cpp
+++ b/tools/editor/project_manager.cpp
@@ -144,7 +144,7 @@ class NewProjectDialog : public ConfirmationDialog {
 
 			fdialog->set_mode(FileDialog::MODE_OPEN_FILE);
 			fdialog->clear_filters();
-			fdialog->add_filter("engine.cfg ; "_MKSTR(VERSION_NAME)" Project");
+			fdialog->add_filter("engine.cfg ; " _MKSTR(VERSION_NAME) " Project");
 		} else {
 			fdialog->set_mode(FileDialog::MODE_OPEN_DIR);
 		}
@@ -193,7 +193,7 @@ class NewProjectDialog : public ConfirmationDialog {
 				f->store_line("\n");
 				f->store_line("[application]");
 				f->store_line("name=\""+project_name->get_text()+"\"");
-				f->store_line("icon=\"icon.png\"");
+				f->store_line("icon=\"res://icon.png\"");
 
 				memdelete(f);
 
@@ -480,20 +480,25 @@ void ProjectManager::_load_recent_projects() {
 		bool favorite = (_name.begins_with("favorite_projects/"))?true:false;
 
 		uint64_t last_modified = 0;
-		if (FileAccess::exists(conf))
+		if (FileAccess::exists(conf)) {
 			last_modified = FileAccess::get_modified_time(conf);
-		String fscache = path.plus_file(".fscache");
-		if (FileAccess::exists(fscache)) {
-			uint64_t cache_modified = FileAccess::get_modified_time(fscache);
-			if ( cache_modified > last_modified )
-				last_modified = cache_modified;
-		}
 
-		ProjectItem item(project, path, conf, last_modified, favorite);
-		if (favorite)
-			favorite_projects.push_back(item);
-		else
-			projects.push_back(item);
+			String fscache = path.plus_file(".fscache");
+			if (FileAccess::exists(fscache)) {
+				uint64_t cache_modified = FileAccess::get_modified_time(fscache);
+				if ( cache_modified > last_modified )
+					last_modified = cache_modified;
+			}
+
+			ProjectItem item(project, path, conf, last_modified, favorite);
+			if (favorite)
+				favorite_projects.push_back(item);
+			else
+				projects.push_back(item);
+		} else {
+			//project doesn't exist on disk but it's in the XML settings file
+			EditorSettings::get_singleton()->erase(_name); //remove it
+		}
 	}
 
 	projects.sort();
@@ -601,6 +606,8 @@ void ProjectManager::_load_recent_projects() {
 	erase_btn->set_disabled(selected_list.size()<1);
 	open_btn->set_disabled(selected_list.size()<1);
 	run_btn->set_disabled(selected_list.size()<1 || (selected_list.size()==1 && single_selected_main==""));
+
+	EditorSettings::get_singleton()->save();
 }
 
 void ProjectManager::_open_project_confirm() {
@@ -617,11 +624,6 @@ void ProjectManager::_open_project_confirm() {
 
 		args.push_back("-editor");
 
-		const String &selected_main = E->get();
-		if (selected_main!="") {
-			args.push_back(selected_main);
-		}
-
 		String exec = OS::get_singleton()->get_executable_path();
 
 		OS::ProcessID pid=0;
@@ -839,7 +841,7 @@ ProjectManager::ProjectManager() {
 	l->set_align(Label::ALIGN_CENTER);
 	vb->add_child(l);
 	l = memnew( Label );
-	l->set_text("v"VERSION_MKSTRING);
+	l->set_text("v" VERSION_MKSTRING);
 	//l->add_font_override("font",get_font("bold","Fonts"));
 	l->set_align(Label::ALIGN_CENTER);
 	vb->add_child(l);
diff --git a/tools/editor/project_settings.cpp b/tools/editor/project_settings.cpp
index 25a2750166..2fd8b37753 100644
--- a/tools/editor/project_settings.cpp
+++ b/tools/editor/project_settings.cpp
@@ -59,6 +59,9 @@ void ProjectSettings::_notification(int p_what) {
 
 	if (p_what==NOTIFICATION_ENTER_TREE) {
 
+		search_button->set_icon(get_icon("Zoom","EditorIcons"));
+		clear_button->set_icon(get_icon("Close","EditorIcons"));
+
 		translation_list->connect("button_pressed",this,"_translation_delete");
 		_update_actions();
 		popup_add->add_icon_item(get_icon("Keyboard","EditorIcons"),"Key",InputEvent::KEY);
@@ -1171,6 +1174,31 @@ void ProjectSettings::_update_autoload() {
 
 }
 
+void ProjectSettings::_toggle_search_bar(bool p_pressed) {
+
+	globals_editor->set_use_filter(p_pressed);
+
+	if (p_pressed) {
+
+		search_bar->show();
+		add_prop_bar->hide();
+		search_box->grab_focus();
+		search_box->select_all();
+	} else {
+
+		search_bar->hide();
+		add_prop_bar->show();
+	}
+}
+
+void ProjectSettings::_clear_search_box() {
+
+	if (search_box->get_text()=="")
+		return;
+
+	search_box->clear();
+	globals_editor->update_tree();
+}
 
 void ProjectSettings::_bind_methods() {
 
@@ -1212,6 +1240,9 @@ void ProjectSettings::_bind_methods() {
 	ObjectTypeDB::bind_method(_MD("_update_autoload"),&ProjectSettings::_update_autoload);
 	ObjectTypeDB::bind_method(_MD("_autoload_delete"),&ProjectSettings::_autoload_delete);
 
+	ObjectTypeDB::bind_method(_MD("_clear_search_box"),&ProjectSettings::_clear_search_box);
+	ObjectTypeDB::bind_method(_MD("_toggle_search_bar"),&ProjectSettings::_toggle_search_bar);
+
 }
 
 ProjectSettings::ProjectSettings(EditorData *p_data) {
@@ -1232,87 +1263,93 @@ ProjectSettings::ProjectSettings(EditorData *p_data) {
 	//tab_container->set_anchor_and_margin(MARGIN_TOP,ANCHOR_BEGIN, 15 );
 	//tab_container->set_anchor_and_margin(MARGIN_BOTTOM,ANCHOR_END, 35 );
 
-	Control *props_base = memnew( Control );
+	VBoxContainer *props_base = memnew( VBoxContainer );
+	props_base->set_alignment(BoxContainer::ALIGN_BEGIN);
+	props_base->set_v_size_flags(Control::SIZE_EXPAND_FILL);
 	tab_container->add_child(props_base);
 	props_base->set_name("General");
-	globals_editor = memnew( PropertyEditor );
-	props_base->add_child(globals_editor);
-	globals_editor->set_area_as_parent_rect();
-	globals_editor->hide_top_label();
-	globals_editor->set_anchor_and_margin(MARGIN_TOP,ANCHOR_BEGIN, 55 );
-	globals_editor->set_anchor_and_margin(MARGIN_BOTTOM,ANCHOR_END, 35 );
-	globals_editor->set_anchor_and_margin(MARGIN_LEFT,ANCHOR_BEGIN, 5 );
-	globals_editor->set_anchor_and_margin(MARGIN_RIGHT,ANCHOR_END, 5 );
-	globals_editor->set_capitalize_paths(false);
-	globals_editor->get_scene_tree()->connect("cell_selected",this,"_item_selected");
-	globals_editor->connect("property_toggled",this,"_item_checked");
-	globals_editor->connect("property_edited",this,"_settings_prop_edited");
 
+	HBoxContainer *hbc = memnew( HBoxContainer );
+	hbc->set_h_size_flags(Control::SIZE_EXPAND_FILL);
+	props_base->add_child(hbc);
 
-	Label *l = memnew( Label );
-	props_base->add_child(l);
-	l->set_pos(Point2(6,5));
-	l->set_text("Category:");
+	search_button = memnew( ToolButton );
+	search_button->set_toggle_mode(true);
+	search_button->set_pressed(false);
+	search_button->set_text("Search");
+	hbc->add_child(search_button);
+	search_button->connect("toggled",this,"_toggle_search_bar");
 
+	hbc->add_child( memnew( VSeparator ) );
 
-	l = memnew( Label );
-	l->set_anchor(MARGIN_LEFT,ANCHOR_RATIO);
-	props_base->add_child(l);
-	l->set_begin(Point2(0.21,5));
-	l->set_text("Property:");
+	add_prop_bar = memnew( HBoxContainer );
+	add_prop_bar->set_h_size_flags(Control::SIZE_EXPAND_FILL);
+	hbc->add_child(add_prop_bar);
 
-	l = memnew( Label );
-	l->set_anchor(MARGIN_LEFT,ANCHOR_RATIO);
-	props_base->add_child(l);
-	l->set_begin(Point2(0.51,5));
-	l->set_text("Type:");
+	Label *l = memnew( Label );
+	add_prop_bar->add_child(l);
+	l->set_text("Category:");
 
 	category = memnew( LineEdit );
-	props_base->add_child(category);
-	category->set_anchor(MARGIN_RIGHT,ANCHOR_RATIO);
-	category->set_begin( Point2(5,25) );
-	category->set_end( Point2(0.20,26) );
+	category->set_h_size_flags(Control::SIZE_EXPAND_FILL);
+	add_prop_bar->add_child(category);
 	category->connect("text_entered",this,"_item_adds");
 
+	l = memnew( Label );
+	add_prop_bar->add_child(l);
+	l->set_text("Property:");
+
 	property = memnew( LineEdit );
-	props_base->add_child(property);
-	property->set_anchor(MARGIN_LEFT,ANCHOR_RATIO);
-	property->set_anchor(MARGIN_RIGHT,ANCHOR_RATIO);
-	property->set_begin( Point2(0.21,25) );
-	property->set_end( Point2(0.50,26) );
+	property->set_h_size_flags(Control::SIZE_EXPAND_FILL);
+	add_prop_bar->add_child(property);
 	property->connect("text_entered",this,"_item_adds");
 
+	l = memnew( Label );
+	add_prop_bar->add_child(l);
+	l->set_text("Type:");
 
 	type = memnew( OptionButton );
-	props_base->add_child(type);
-	type->set_anchor(MARGIN_LEFT,ANCHOR_RATIO);
-	type->set_anchor(MARGIN_RIGHT,ANCHOR_RATIO);
-	type->set_begin( Point2(0.51,25) );
-	type->set_end( Point2(0.70,26) );
+	type->set_h_size_flags(Control::SIZE_EXPAND_FILL);
+	add_prop_bar->add_child(type);
 	type->add_item("bool");
 	type->add_item("int");
 	type->add_item("float");
 	type->add_item("string");
 
 	Button *add = memnew( Button );
-	props_base->add_child(add);
-	add->set_anchor(MARGIN_LEFT,ANCHOR_RATIO);
-	add->set_anchor(MARGIN_RIGHT,ANCHOR_RATIO);
-	add->set_begin( Point2(0.71,25) );
-	add->set_end( Point2(0.85,26) );
+	add_prop_bar->add_child(add);
 	add->set_text("Add");
 	add->connect("pressed",this,"_item_add");
 
 	Button *del = memnew( Button );
-	props_base->add_child(del);
-	del->set_anchor(MARGIN_LEFT,ANCHOR_RATIO);
-	del->set_anchor(MARGIN_RIGHT,ANCHOR_END);
-	del->set_begin( Point2(0.86,25) );
-	del->set_end( Point2(5,26) );
+	add_prop_bar->add_child(del);
 	del->set_text("Del");
 	del->connect("pressed",this,"_item_del");
 
-	/*
+	search_bar = memnew( HBoxContainer );
+	search_bar->set_h_size_flags(Control::SIZE_EXPAND_FILL);
+	hbc->add_child(search_bar);
+	search_bar->hide();
+
+	search_box = memnew( LineEdit );
+	search_box->set_h_size_flags(Control::SIZE_EXPAND_FILL);
+	search_bar->add_child(search_box);
+
+	clear_button = memnew( ToolButton );
+	search_bar->add_child(clear_button);
+	clear_button->connect("pressed",this,"_clear_search_box");
+
+	globals_editor = memnew( PropertyEditor );
+	props_base->add_child(globals_editor);
+	globals_editor->hide_top_label();
+	globals_editor->set_v_size_flags(Control::SIZE_EXPAND_FILL);
+	globals_editor->register_text_enter(search_box);
+	globals_editor->set_capitalize_paths(false);
+	globals_editor->get_scene_tree()->connect("cell_selected",this,"_item_selected");
+	globals_editor->connect("property_toggled",this,"_item_checked");
+	globals_editor->connect("property_edited",this,"_settings_prop_edited");
+
+/*
 	Button *save = memnew( Button );
 	props_base->add_child(save);
 
@@ -1325,17 +1362,16 @@ ProjectSettings::ProjectSettings(EditorData *p_data) {
 	save->set_text("Save");
 	save->connect("pressed",this,"_save");
 */
+
+	hbc = memnew( HBoxContainer );
+	props_base->add_child(hbc);
+
 	popup_platform = memnew( MenuButton );
 	popup_platform->set_text("Copy To Platform..");
 	popup_platform->set_disabled(true);
-	props_base->add_child(popup_platform);
-
-	popup_platform->set_anchor(MARGIN_LEFT,ANCHOR_BEGIN);
-	popup_platform->set_anchor(MARGIN_RIGHT,ANCHOR_BEGIN);
-	popup_platform->set_anchor(MARGIN_TOP,ANCHOR_END);
-	popup_platform->set_anchor(MARGIN_BOTTOM,ANCHOR_END);
-	popup_platform->set_begin( Point2(10,28) );
-	popup_platform->set_end( Point2(150,20) );
+	hbc->add_child(popup_platform);
+
+	hbc->add_spacer();
 
 	List<StringName> ep;
 	EditorImportExport::get_singleton()->get_export_platforms(&ep);
diff --git a/tools/editor/project_settings.h b/tools/editor/project_settings.h
index 7c91254764..b122609e52 100644
--- a/tools/editor/project_settings.h
+++ b/tools/editor/project_settings.h
@@ -47,6 +47,12 @@ class ProjectSettings : public AcceptDialog {
 	UndoRedo *undo_redo;
 	PropertyEditor *globals_editor;
 
+	HBoxContainer *search_bar;
+	ToolButton *search_button;
+	LineEdit *search_box;
+	ToolButton *clear_button;
+
+	HBoxContainer *add_prop_bar;
 	ConfirmationDialog *message;
 	LineEdit *category;
 	LineEdit *property;
@@ -130,6 +136,9 @@ class ProjectSettings : public AcceptDialog {
 	void _translation_res_option_changed();
 	void _translation_res_option_delete(Object *p_item,int p_column, int p_button);
 
+	void _toggle_search_bar(bool p_pressed);
+	void _clear_search_box();
+
 	ProjectSettings();
 
 
diff --git a/tools/editor/property_editor.cpp b/tools/editor/property_editor.cpp
index 549a3f7ffb..7ab09f0487 100644
--- a/tools/editor/property_editor.cpp
+++ b/tools/editor/property_editor.cpp
@@ -1361,7 +1361,7 @@ void CustomPropertyEditor::_modified(String p_string) {
 		} break;
 		case Variant::MATRIX32: {
 
-			Matrix3 m;
+			Matrix32 m;
 			for(int i=0;i<6;i++) {
 
 				m.elements[i/2][i%2]=value_editor[i]->get_text().to_double();
@@ -2207,9 +2207,9 @@ TreeItem *PropertyEditor::get_parent_node(String p_path,HashMap<String,TreeItem*
 		}
 
 		item->set_editable(0,false);
-		item->set_selectable(0,false);
+		item->set_selectable(0,subsection_selectable);
 		item->set_editable(1,false);
-		item->set_selectable(1,false);
+		item->set_selectable(1,subsection_selectable);
 
 		if (item->get_parent()==root) {
 
@@ -2363,6 +2363,8 @@ void PropertyEditor::update_tree() {
 
 	TreeItem * current_category=NULL;
 
+	String filter = search_box ? search_box->get_text() : "";
+
 	for (List<PropertyInfo>::Element *I=plist.front() ; I ; I=I->next()) {
 
 		PropertyInfo& p = I->get();
@@ -2426,7 +2428,24 @@ void PropertyEditor::update_tree() {
 		} else  if ( ! (p.usage&PROPERTY_USAGE_EDITOR ) )
 			continue;
 
+		String name = (p.name.find("/")!=-1)?p.name.right( p.name.find_last("/")+1 ):p.name;
+
+		if (capitalize_paths)
+			name = name.camelcase_to_underscore().capitalize();
+
 		String path=p.name.left( p.name.find_last("/") ) ;
+
+		if (use_filter && filter!="") {
+
+			String cat = path;
+
+			if (capitalize_paths)
+				cat = cat.capitalize();
+
+			if (cat.findn(filter)==-1 && name.findn(filter)==-1)
+				continue;
+		}
+
 		//printf("property %s\n",p.name.ascii().get_data());
 		TreeItem * parent = get_parent_node(path,item_path,current_category?current_category:root );
 		//if (parent->get_parent()==root)
@@ -2448,8 +2467,6 @@ void PropertyEditor::update_tree() {
 
 		TreeItem * item = tree->create_item( parent );
 
-		String name = (p.name.find("/")!=-1)?p.name.right( p.name.find_last("/")+1 ):p.name;
-
 		if (level>0) {
 			item->set_custom_bg_color(0,col);
 			//item->set_custom_bg_color(1,col);
@@ -2465,11 +2482,7 @@ void PropertyEditor::update_tree() {
 			item->set_checked(0,p.usage&PROPERTY_USAGE_CHECKED);
 		}
 
-		if (capitalize_paths)
-			item->set_text( 0, name.camelcase_to_underscore().capitalize() );
-		else
-			item->set_text( 0, name );
-
+		item->set_text(0, name);
 		item->set_tooltip(0, p.name);
 
 		if (use_doc_hints) {
@@ -2833,6 +2846,13 @@ void PropertyEditor::update_tree() {
 				item->set_icon( 0,get_icon("Vector","EditorIcons") );
 
 			} break;
+			case Variant::MATRIX32:
+			case Variant::MATRIX3: {
+
+				item->set_cell_mode( 1, TreeItem::CELL_MODE_CUSTOM );
+				item->set_editable( 1, true );
+				item->set_text(1, obj->get(p.name));
+			} break;
 			case Variant::TRANSFORM: {
 
 				item->set_cell_mode( 1, TreeItem::CELL_MODE_CUSTOM );
@@ -3403,6 +3423,11 @@ void PropertyEditor::_draw_flags(Object *t,const Rect2& p_rect) {
 
 }
 
+void PropertyEditor::_filter_changed(const String& p_text) {
+
+	update_tree();
+}
+
 void PropertyEditor::_bind_methods() {
 
 	ObjectTypeDB::bind_method( "_item_edited",&PropertyEditor::_item_edited);
@@ -3415,6 +3440,7 @@ void PropertyEditor::_bind_methods() {
 	ObjectTypeDB::bind_method( "_changed_callback",&PropertyEditor::_changed_callbacks);
 	ObjectTypeDB::bind_method( "_draw_flags",&PropertyEditor::_draw_flags);
 	ObjectTypeDB::bind_method( "_set_range_def",&PropertyEditor::_set_range_def);
+	ObjectTypeDB::bind_method( "_filter_changed",&PropertyEditor::_filter_changed);
 
 	ADD_SIGNAL( MethodInfo("property_toggled",PropertyInfo( Variant::STRING, "property"),PropertyInfo( Variant::BOOL, "value")));
 	ADD_SIGNAL( MethodInfo("resource_selected", PropertyInfo( Variant::OBJECT, "res"),PropertyInfo( Variant::STRING, "prop") ) );
@@ -3469,12 +3495,40 @@ void PropertyEditor::set_show_categories(bool p_show) {
 	update_tree();
 }
 
+void PropertyEditor::set_use_filter(bool p_use) {
+
+	if (p_use==use_filter)
+		return;
+
+	use_filter=p_use;
+	update_tree();
+}
+
+void PropertyEditor::register_text_enter(Node* p_line_edit) {
+
+	ERR_FAIL_NULL(p_line_edit);
+	search_box=p_line_edit->cast_to<LineEdit>();
+
+	if (search_box)
+		search_box->connect("text_changed",this,"_filter_changed");
+}
+
+void PropertyEditor::set_subsection_selectable(bool p_selectable) {
+
+	if (p_selectable==subsection_selectable)
+		return;
+
+	subsection_selectable=p_selectable;
+	update_tree();
+}
+
 PropertyEditor::PropertyEditor() {
 
 	_prop_edited="property_edited";
 	_prop_edited_name.push_back(String());
 	undo_redo=NULL;
 	obj=NULL;
+	search_box=NULL;
 	changing=false;
 	update_tree_pending=false;
 
@@ -3514,6 +3568,7 @@ PropertyEditor::PropertyEditor() {
 	tree->connect("button_pressed", this,"_edit_button");
 	custom_editor->connect("variant_changed", this,"_custom_editor_edited");
 	custom_editor->connect("resource_edit_request", this,"_resource_edit_request",make_binds(),CONNECT_DEFERRED);
+	tree->set_hide_folding(true);
 
 	capitalize_paths=true;
 	autoclear=false;
@@ -3526,7 +3581,9 @@ PropertyEditor::PropertyEditor() {
 	show_categories=false;
 	refresh_countdown=0;
 	use_doc_hints=false;
-	
+	use_filter=false;
+	subsection_selectable=false;
+
 }
 
 
diff --git a/tools/editor/property_editor.h b/tools/editor/property_editor.h
index dcb7b66abd..f004616c08 100644
--- a/tools/editor/property_editor.h
+++ b/tools/editor/property_editor.h
@@ -147,6 +147,7 @@ class PropertyEditor : public Control {
 	Tree *tree;
 	Label *top_label;
 	//Object *object;
+	LineEdit *search_box;
 
 	Object* obj;
 
@@ -162,6 +163,8 @@ class PropertyEditor : public Control {
 	bool show_categories;
 	float refresh_countdown;
 	bool use_doc_hints;
+	bool use_filter;
+	bool subsection_selectable;
 
 	HashMap<String,String> pending;
 	String selected_property;
@@ -201,6 +204,8 @@ class PropertyEditor : public Control {
 	void _refresh_item(TreeItem *p_item);
 	void _set_range_def(Object *p_item, String prop, float p_frame);
 
+	void _filter_changed(const String& p_text);
+
 	UndoRedo *undo_redo;
 protected:
 
@@ -230,7 +235,12 @@ public:
 
 	void set_show_categories(bool p_show);
 	void set_use_doc_hints(bool p_enable) { use_doc_hints=p_enable; }
-	
+
+	void set_use_filter(bool p_use);
+	void register_text_enter(Node *p_line_edit);
+
+	void set_subsection_selectable(bool p_selectable);
+
 	PropertyEditor();	
 	~PropertyEditor();
 
diff --git a/tools/editor/scene_tree_dock.cpp b/tools/editor/scene_tree_dock.cpp
index 08aa68d792..8b5bf8c1e1 100644
--- a/tools/editor/scene_tree_dock.cpp
+++ b/tools/editor/scene_tree_dock.cpp
@@ -1223,24 +1223,21 @@ void SceneTreeDock::_create() {
 
 		}
 
-
 		String newname=n->get_name();
 		n->replace_by(newnode,true);
 
-
 		if (n==edited_scene) {
 			edited_scene=newnode;
 			editor->set_edited_scene(newnode);
 		}
 
-
-
-
 		editor_data->get_undo_redo().clear_history();
-		memdelete(n);
 		newnode->set_name(newname);
+
 		editor->push_item(newnode);
 
+		memdelete(n);
+		
 		_update_tool_buttons();
 
 	}
@@ -1267,7 +1264,10 @@ void SceneTreeDock::import_subscene() {
 void SceneTreeDock::_import_subscene() {
 
 	Node* parent = scene_tree->get_selected();
-	ERR_FAIL_COND(!parent);
+	if (!parent) {
+		parent = editor_data->get_edited_scene_root();
+		ERR_FAIL_COND(!parent);
+	}
 
 	import_subscene_dialog->move(parent,edited_scene);
 	editor_data->get_undo_redo().clear_history(); //no undo for now..
diff --git a/tools/editor/scene_tree_editor.cpp b/tools/editor/scene_tree_editor.cpp
index 60395d5ff5..6575603073 100644
--- a/tools/editor/scene_tree_editor.cpp
+++ b/tools/editor/scene_tree_editor.cpp
@@ -565,7 +565,6 @@ void SceneTreeEditor::_notification(int p_what) {
 		get_tree()->disconnect("node_removed",this,"_node_removed");
 		tree->disconnect("item_collapsed",this,"_cell_collapsed");
 		clear_inherit_confirm->disconnect("confirmed",this,"_subscene_option");
-		_update_tree();
 	}
 
 }
@@ -649,20 +648,28 @@ void SceneTreeEditor::_rename_node(ObjectID p_node,const String& p_name) {
 void SceneTreeEditor::_renamed() {
 
 	TreeItem *which=tree->get_edited();
-	
+
 	ERR_FAIL_COND(!which);
 	NodePath np = which->get_metadata(0);
 	Node *n=get_node(np);
 	ERR_FAIL_COND(!n);
 
+	String new_name=which->get_text(0);
+	if (new_name.find(".") != -1 || new_name.find("/") != -1) {
+
+		error->set_text("Invalid node name, the following characters are not allowed:\n  \".\", \"/\"");
+		error->popup_centered_minsize();
+		new_name=n->get_name();
+	}
+
 	if (!undo_redo) {
-		n->set_name( which->get_text(0) );
+		n->set_name( new_name );
 		which->set_metadata(0,n->get_path());
 		emit_signal("node_renamed");
 	} else {
 		undo_redo->create_action("Rename Node");
-		emit_signal("node_prerename",n,which->get_text(0));
-		undo_redo->add_do_method(this,"_rename_node",n->get_instance_ID(),which->get_text(0));
+		emit_signal("node_prerename",n,new_name);
+		undo_redo->add_do_method(this,"_rename_node",n->get_instance_ID(),new_name);
 		undo_redo->add_undo_method(this,"_rename_node",n->get_instance_ID(),n->get_name());
 		undo_redo->commit_action();
 	}
diff --git a/tools/editor/scenes_dock.cpp b/tools/editor/scenes_dock.cpp
index 7d9c5b24b2..c9b376ebec 100644
--- a/tools/editor/scenes_dock.cpp
+++ b/tools/editor/scenes_dock.cpp
@@ -155,7 +155,7 @@ void ScenesDock::_notification(int p_what) {
 
 			if (initialized)
 				return;
-			initialized=false;
+			initialized=true;
 
 			EditorFileSystem::get_singleton()->connect("filesystem_changed",this,"_fs_changed");
 
@@ -352,10 +352,30 @@ String ScenesDock::get_selected_path() const {
 
 void ScenesDock::_instance_pressed() {
 
-	TreeItem *sel = tree->get_selected();
-	if (!sel)
-		return;
-	String path = sel->get_metadata(0);
+	if (tree_mode)
+	{
+		TreeItem *sel = tree->get_selected();
+		if (!sel)
+			return;
+		String path = sel->get_metadata(0);
+	}
+	else
+	{
+		int idx = -1;
+		for (int i = 0; i<files->get_item_count(); i++) {
+			if (files->is_selected(i))
+			{
+				idx = i;
+				break;
+			}
+		}
+
+		if (idx<0)
+			return;
+
+		path = files->get_item_metadata(idx);
+	}
+
 	emit_signal("instance",path);
 }
 
diff --git a/tools/editor/script_editor_debugger.cpp b/tools/editor/script_editor_debugger.cpp
index 8e0e7ddb49..60f2afa2c2 100644
--- a/tools/editor/script_editor_debugger.cpp
+++ b/tools/editor/script_editor_debugger.cpp
@@ -1376,7 +1376,6 @@ ScriptEditorDebugger::ScriptEditorDebugger(EditorNode *p_editor){
 	vmem_refresh->connect("pressed",this,"_video_mem_request");
 
 	MarginContainer *vmmc = memnew( MarginContainer );
-	vmmc = memnew( MarginContainer );
 	vmem_tree = memnew( Tree );
 	vmem_tree->set_v_size_flags(SIZE_EXPAND_FILL);
 	vmem_tree->set_h_size_flags(SIZE_EXPAND_FILL);
diff --git a/tools/editor/settings_config_dialog.cpp b/tools/editor/settings_config_dialog.cpp
index f73de26eec..6d8f849427 100644
--- a/tools/editor/settings_config_dialog.cpp
+++ b/tools/editor/settings_config_dialog.cpp
@@ -72,6 +72,10 @@ void EditorSettingsDialog::popup_edit_settings() {
 
 	property_editor->edit(EditorSettings::get_singleton());
 	property_editor->update_tree();
+
+	search_box->select_all();
+	search_box->grab_focus();
+
 	popup_centered_ratio(0.7);
 }
 
@@ -244,11 +248,21 @@ void EditorSettingsDialog::_update_plugins() {
 
 }
 
+void EditorSettingsDialog::_clear_search_box() {
+
+	if (search_box->get_text()=="")
+		return;
+
+	search_box->clear();
+	property_editor->update_tree();
+}
+
 void EditorSettingsDialog::_notification(int p_what) {
 
 	if (p_what==NOTIFICATION_ENTER_TREE) {
 
 		rescan_plugins->set_icon(get_icon("Reload","EditorIcons"));
+		clear_button->set_icon(get_icon("Close","EditorIcons"));
 		_update_plugins();
 	}
 }
@@ -261,6 +275,7 @@ void EditorSettingsDialog::_bind_methods() {
 	ObjectTypeDB::bind_method(_MD("_plugin_settings"),&EditorSettingsDialog::_plugin_settings);
 	ObjectTypeDB::bind_method(_MD("_plugin_edited"),&EditorSettingsDialog::_plugin_edited);
 	ObjectTypeDB::bind_method(_MD("_plugin_install"),&EditorSettingsDialog::_plugin_install);
+	ObjectTypeDB::bind_method(_MD("_clear_search_box"),&EditorSettingsDialog::_clear_search_box);
 }
 
 EditorSettingsDialog::EditorSettingsDialog() {
@@ -271,16 +286,38 @@ EditorSettingsDialog::EditorSettingsDialog() {
 	add_child(tabs);
 	set_child_rect(tabs);
 
+	VBoxContainer *vbc = memnew( VBoxContainer );
+	tabs->add_child(vbc);
+	vbc->set_name("General");
+
+	HBoxContainer *hbc = memnew( HBoxContainer );
+	hbc->set_h_size_flags(Control::SIZE_EXPAND_FILL);
+	vbc->add_child(hbc);
+
+	Label *l = memnew( Label );
+	l->set_text("Search: ");
+	hbc->add_child(l);
+
+	search_box = memnew( LineEdit );
+	search_box->set_h_size_flags(Control::SIZE_EXPAND_FILL);
+	hbc->add_child(search_box);
+
+	clear_button = memnew( ToolButton );
+	hbc->add_child(clear_button);
+	clear_button->connect("pressed",this,"_clear_search_box");
+
 	property_editor = memnew( PropertyEditor );
 	property_editor->hide_top_label();
-	tabs->add_child(property_editor);
-	property_editor->set_name("General");
+	property_editor->set_use_filter(true);
+	property_editor->register_text_enter(search_box);
+	property_editor->set_v_size_flags(Control::SIZE_EXPAND_FILL);
+	vbc->add_child(property_editor);
 
-	VBoxContainer *vbc = memnew( VBoxContainer );
+	vbc = memnew( VBoxContainer );
 	tabs->add_child(vbc);
 	vbc->set_name("Plugins");
 
-	HBoxContainer *hbc = memnew( HBoxContainer );
+	hbc = memnew( HBoxContainer );
 	vbc->add_child(hbc);
 	hbc->add_child( memnew( Label("Plugin List: ")));
 	hbc->add_spacer();
diff --git a/tools/editor/settings_config_dialog.h b/tools/editor/settings_config_dialog.h
index cca1ef33d5..50159cf488 100644
--- a/tools/editor/settings_config_dialog.h
+++ b/tools/editor/settings_config_dialog.h
@@ -51,6 +51,8 @@ class EditorSettingsDialog : public AcceptDialog {
 
 	Button *rescan_plugins;
 	Tree *plugins;
+	LineEdit *search_box;
+	ToolButton *clear_button;
 	PropertyEditor *property_editor;
 
 	Timer *timer;
@@ -71,6 +73,8 @@ class EditorSettingsDialog : public AcceptDialog {
 	void _rescan_plugins();
 	void _update_plugins();
 
+	void _clear_search_box();
+
 protected:
 
 	static void _bind_methods();
diff --git a/tools/editor/spatial_editor_gizmos.cpp b/tools/editor/spatial_editor_gizmos.cpp
index 4dc9c4f43e..5efca44c7d 100644
--- a/tools/editor/spatial_editor_gizmos.cpp
+++ b/tools/editor/spatial_editor_gizmos.cpp
@@ -745,7 +745,7 @@ static float _find_closest_angle_to_half_pi_arc(const Vector3& p_from, const Vec
 	}
 
 	//min_p = p_arc_xform.affine_inverse().xform(min_p);
-	float a = Vector2(min_p.x,-min_p.z).atan2();
+	float a = Vector2(min_p.x,-min_p.z).angle();
 	return a*180.0/Math_PI;
 }
 
diff --git a/tools/export/blender25/godot_export_manager.py b/tools/export/blender25/godot_export_manager.py
index e390ae6ce3..582d76f94f 100644
--- a/tools/export/blender25/godot_export_manager.py
+++ b/tools/export/blender25/godot_export_manager.py
@@ -107,7 +107,6 @@ class godot_export_manager(bpy.types.Panel):
             col.prop(group,"use_triangles")
             col.prop(group,"use_copy_images")
             col.prop(group,"use_active_layers")
-            col.prop(group,"use_exclude_ctrl_bones")
             col.prop(group,"use_anim")
             col.prop(group,"use_anim_action_all")
             col.prop(group,"use_anim_skip_noexp")
@@ -351,7 +350,7 @@ class export_group(bpy.types.Operator):
                     bpy.data.objects[object.name].select = True
                     
             bpy.ops.object.transform_apply(location=group[self.idx].apply_loc, rotation=group[self.idx].apply_rot, scale=group[self.idx].apply_scale)
-            bpy.ops.export_scene.dae(check_existing=True, filepath=path, filter_glob="*.dae", object_types=group[self.idx].object_types, use_export_selected=group[self.idx].use_export_selected, use_mesh_modifiers=group[self.idx].use_mesh_modifiers, use_tangent_arrays=group[self.idx].use_tangent_arrays, use_triangles=group[self.idx].use_triangles, use_copy_images=group[self.idx].use_copy_images, use_active_layers=group[self.idx].use_active_layers, use_exclude_ctrl_bones=group[self.idx].use_exclude_ctrl_bones, use_anim=group[self.idx].use_anim, use_anim_action_all=group[self.idx].use_anim_action_all, use_anim_skip_noexp=group[self.idx].use_anim_skip_noexp, use_anim_optimize=group[self.idx].use_anim_optimize, anim_optimize_precision=group[self.idx].anim_optimize_precision, use_metadata=group[self.idx].use_metadata)    
+            bpy.ops.export_scene.dae(check_existing=True, filepath=path, filter_glob="*.dae", object_types=group[self.idx].object_types, use_export_selected=group[self.idx].use_export_selected, use_mesh_modifiers=group[self.idx].use_mesh_modifiers, use_tangent_arrays=group[self.idx].use_tangent_arrays, use_triangles=group[self.idx].use_triangles, use_copy_images=group[self.idx].use_copy_images, use_active_layers=group[self.idx].use_active_layers, use_anim=group[self.idx].use_anim, use_anim_action_all=group[self.idx].use_anim_action_all, use_anim_skip_noexp=group[self.idx].use_anim_skip_noexp, use_anim_optimize=group[self.idx].use_anim_optimize, anim_optimize_precision=group[self.idx].anim_optimize_precision, use_metadata=group[self.idx].use_metadata)    
           
             self.report({'INFO'}, '"'+group[self.idx].name+'"' + " Group exported." )  
             msg = "Export Group "+group[self.idx].name
@@ -422,7 +421,6 @@ class godot_export_groups(bpy.types.PropertyGroup):
 
     use_copy_images = BoolProperty(name="Copy Images",description="Copy Images (create images/ subfolder)",default=False)
     use_active_layers = BoolProperty(name="Active Layers",description="Export only objects on the active layers.",default=True)
-    use_exclude_ctrl_bones = BoolProperty(name="Exclude Control Bones",description="Exclude skeleton bones with names that begin with 'ctrl'.",default=True)
     use_anim = BoolProperty(name="Export Animation",description="Export keyframe animation",default=False)
     use_anim_action_all = BoolProperty(name="All Actions",description=("Export all actions for the first armature found in separate DAE files"),default=False)
     use_anim_skip_noexp = BoolProperty(name="Skip (-noexp) Actions",description="Skip exporting of actions whose name end in (-noexp). Useful to skip control animations.",default=True)
diff --git a/tools/freetype/SCsub b/tools/freetype/SCsub
index a31b8c4602..65b4827f9c 100644
--- a/tools/freetype/SCsub
+++ b/tools/freetype/SCsub
@@ -65,7 +65,5 @@ if (env["freetype"]=="builtin"):
 
 #   lib = env.Library("freetype_builtin",ft_sources)
 #   env.Prepend(LIBS=[lib])
- 
-Export('env')
-
 
+Export('env')
diff --git a/tools/ios_xcode_template/godot_ios.xcodeproj/project.pbxproj b/tools/ios_xcode_template/godot_ios.xcodeproj/project.pbxproj
index 7cd4da0f4a..4ae1ec8a53 100644
--- a/tools/ios_xcode_template/godot_ios.xcodeproj/project.pbxproj
+++ b/tools/ios_xcode_template/godot_ios.xcodeproj/project.pbxproj
@@ -192,7 +192,7 @@
 			isa = PBXProject;
 			attributes = {
 				LastUpgradeCheck = 0500;
-				ORGANIZATIONNAME = Okam;
+				ORGANIZATIONNAME = GodotEngine;
 				TargetAttributes = {
 					D0BCFE5F18AEBDA3004A7AAE = {
 						TestTargetID = D0BCFE3318AEBDA2004A7AAE;
diff --git a/tools/ios_xcode_template/godot_ios/godot_ios-Info.plist b/tools/ios_xcode_template/godot_ios/godot_ios-Info.plist
index f34ebb97f0..357970920a 100644
--- a/tools/ios_xcode_template/godot_ios/godot_ios-Info.plist
+++ b/tools/ios_xcode_template/godot_ios/godot_ios-Info.plist
@@ -9,7 +9,7 @@
 	<key>CFBundleExecutable</key>
 	<string>godot_opt.iphone</string>
 	<key>CFBundleIdentifier</key>
-	<string>com.okamstudios.${PRODUCT_NAME:rfc1034identifier}</string>
+	<string>org.godotengine.${PRODUCT_NAME:rfc1034identifier}</string>
 	<key>CFBundleInfoDictionaryVersion</key>
 	<string>6.0</string>
 	<key>CFBundleName</key>
diff --git a/tools/ios_xcode_template/godot_ios/main.m b/tools/ios_xcode_template/godot_ios/main.m
index dca55f40ce..6bb6bc2188 100644
--- a/tools/ios_xcode_template/godot_ios/main.m
+++ b/tools/ios_xcode_template/godot_ios/main.m
@@ -1,10 +1,31 @@
-//
-//  main.m
-//  godot_ios
-//
-//  Created by Ariel m on 2/14/14.
-//  Copyright (c) 2014 Okam. All rights reserved.
-//
+/*************************************************************************/
+/*  main.m                                                               */
+/*************************************************************************/
+/*                       This file is part of:                           */
+/*                           GODOT ENGINE                                */
+/*                    http://www.godotengine.org                         */
+/*************************************************************************/
+/* Copyright (c) 2007-2015 Juan Linietsky, Ariel Manzur.                 */
+/*                                                                       */
+/* Permission is hereby granted, free of charge, to any person obtaining */
+/* a copy of this software and associated documentation files (the       */
+/* "Software"), to deal in the Software without restriction, including   */
+/* without limitation the rights to use, copy, modify, merge, publish,   */
+/* distribute, sublicense, and/or sell copies of the Software, and to    */
+/* permit persons to whom the Software is furnished to do so, subject to */
+/* the following conditions:                                             */
+/*                                                                       */
+/* The above copyright notice and this permission notice shall be        */
+/* included in all copies or substantial portions of the Software.       */
+/*                                                                       */
+/* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,       */
+/* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF    */
+/* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.*/
+/* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY  */
+/* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,  */
+/* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE     */
+/* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.                */
+/*************************************************************************/
 
 #import <UIKit/UIKit.h>
 
diff --git a/tools/ios_xcode_template/godot_iosTests/godot_iosTests-Info.plist b/tools/ios_xcode_template/godot_iosTests/godot_iosTests-Info.plist
index 3b3eec9a51..0f69aa80eb 100644
--- a/tools/ios_xcode_template/godot_iosTests/godot_iosTests-Info.plist
+++ b/tools/ios_xcode_template/godot_iosTests/godot_iosTests-Info.plist
@@ -7,7 +7,7 @@
 	<key>CFBundleExecutable</key>
 	<string>${EXECUTABLE_NAME}</string>
 	<key>CFBundleIdentifier</key>
-	<string>com.okamstudios.${PRODUCT_NAME:rfc1034identifier}</string>
+	<string>org.godotengine.${PRODUCT_NAME:rfc1034identifier}</string>
 	<key>CFBundleInfoDictionaryVersion</key>
 	<string>6.0</string>
 	<key>CFBundlePackageType</key>
diff --git a/tools/ios_xcode_template/godot_iosTests/godot_iosTests.m b/tools/ios_xcode_template/godot_iosTests/godot_iosTests.m
index fce6288670..75e44659e0 100644
--- a/tools/ios_xcode_template/godot_iosTests/godot_iosTests.m
+++ b/tools/ios_xcode_template/godot_iosTests/godot_iosTests.m
@@ -1,10 +1,31 @@
-//
-//  godot_iosTests.m
-//  godot_iosTests
-//
-//  Created by Ariel m on 2/14/14.
-//  Copyright (c) 2014 Okam. All rights reserved.
-//
+/*************************************************************************/
+/*  godot_iosTests.m                                                     */
+/*************************************************************************/
+/*                       This file is part of:                           */
+/*                           GODOT ENGINE                                */
+/*                    http://www.godotengine.org                         */
+/*************************************************************************/
+/* Copyright (c) 2007-2015 Juan Linietsky, Ariel Manzur.                 */
+/*                                                                       */
+/* Permission is hereby granted, free of charge, to any person obtaining */
+/* a copy of this software and associated documentation files (the       */
+/* "Software"), to deal in the Software without restriction, including   */
+/* without limitation the rights to use, copy, modify, merge, publish,   */
+/* distribute, sublicense, and/or sell copies of the Software, and to    */
+/* permit persons to whom the Software is furnished to do so, subject to */
+/* the following conditions:                                             */
+/*                                                                       */
+/* The above copyright notice and this permission notice shall be        */
+/* included in all copies or substantial portions of the Software.       */
+/*                                                                       */
+/* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,       */
+/* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF    */
+/* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.*/
+/* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY  */
+/* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,  */
+/* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE     */
+/* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.                */
+/*************************************************************************/
 
 #import <XCTest/XCTest.h>
 
diff --git a/tools/osx_template.app/Contents/Resources/icon.icns b/tools/osx_template.app/Contents/Resources/icon.icns
index 18bc68d6ea..4a3dc0415a 100644
--- a/tools/osx_template.app/Contents/Resources/icon.icns
+++ b/tools/osx_template.app/Contents/Resources/icon.icns
diff --git a/tools/pck/SCsub b/tools/pck/SCsub
index b1fed9a472..cf98ae145d 100644
--- a/tools/pck/SCsub
+++ b/tools/pck/SCsub
@@ -2,4 +2,3 @@ Import('env')
 
 if env["tools"] == "yes":
 	env.add_source_files(env.tool_sources, "*.cpp")
-
diff --git a/tools/pe_bliss/README b/tools/pe_bliss/README
new file mode 100644
index 0000000000..d5d1355444
--- /dev/null
+++ b/tools/pe_bliss/README
@@ -0,0 +1,84 @@
+/*************************************************************************/
+/* Copyright (c) 2015 dx, http://kaimi.ru                                */
+/*                                                                       */
+/* Permission is hereby granted, free of charge, to any person           */
+/* obtaining a copy of this software and associated documentation        */
+/* files (the "Software"), to deal in the Software without               */
+/* restriction, including without limitation the rights to use,          */
+/* copy, modify, merge, publish, distribute, sublicense, and/or          */
+/* sell copies of the Software, and to permit persons to whom the        */
+/* Software is furnished to do so, subject to the following conditions:  */
+/* The above copyright notice and this permission notice shall be        */
+/* included in all copies or substantial portions of the Software.       */
+/*                                                                       */
+/* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,       */
+/* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF    */
+/* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.*/
+/* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY  */
+/* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,  */
+/* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE     */
+/* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.                */
+/*************************************************************************/
+
+
+Открытая бесплатная библиотека для работы с PE-файлами PE Bliss.
+Бесплатна к использованию, модификации и распространению.
+Автор: DX
+(c) DX 2011-2012, kaimi.ru
+
+Совместимость: Windows, Linux
+
+Возможности:
+[+] Создание PE или PE+ файла с нуля
+[+] Чтение 32-разрядных и 64-разрядных PE-файлов (PE, PE+) и единообразная работа с ними
+[+] Пересборка 32-разрядных и 64-разрядных PE-файлов
+[+] Работа с директориями и заголовками
+[+] Конвертирование адресов
+[+] Чтение и редактирование секций PE-файла
+[+] Чтение и редактирование таблицы импортов
+[+] Чтение и редактирование таблицы экспортов
+[+] Чтение и редактирование таблиц релокаций
+[+] Чтение и редактирование ресурсов
+[+] Чтение и редактирование TLS
+[+] Чтение и редактирование конфигурации образа (image config)
+[+] Чтение базовой информации .NET
+[+] Чтение и редактирование информации о привязанном импорте
+[+] Чтение директории исключений (только PE+)
+[+] Чтение отладочной директории с расширенной информацией
+[+] Вычисление энтропии
+[+] Изменение файлового выравнивания
+[+] Изменение базового адреса загрузки
+[+] Работа с DOS Stub'ом и Rich overlay
+[+] Высокоуровневое чтение ресурсов: картинки, иконки, курсоры, информация о версии, строковые таблицы, таблицы сообщений
+[+] Высокоуровневое редактирование ресурсов: картинки, иконки, курсоры, информация о версии
+
+[English]
+Open a free library for working with PE-file PE Bliss.
+Free to use, modify, and distribute.
+Author: DX
+(c) DX 2011-2012, kaimi.ru
+Compatibility: Windows, Linux
+
+### Capabilities:
+[+] Creation of PE or PE + file from scratch
+[+] Reading the 32-bit and 64-bit PE-file (PE, PE +) and uniform working with them
+[+] Rebuild 32-bit and 64-bit PE-files
+[+] Working with the directors and titles
+[+] Converting addresses
+[+] Reading and editing sections of PE-file
+[+] Reading and editing the import table
+[+] Reading and editing tables exports
+[+] Reading and editing tables relocations
+[+] Reading and editing resources
+[+] Reading and editing TLS
+[+] Reading and editing the configuration of the image (image config)
+[+] Reading data base .NET
+[+] Reading and editing information about tethered import
+[+] Read the directory exceptions (only PE +)
+[+] Read debug directories with extended information
+[+] The calculation of entropy
+[+] Changing file alignment
+[+] Change the base load address
+[+] Support of DOS Stub'om and Rich overlay
+[+] High-level reading resources: images, icons, cursors, version information, string tables, message table
+[+] High-level editing resources: images, icons, cursors, version information
+\ No newline at end of file
diff --git a/tools/pe_bliss/SCsub b/tools/pe_bliss/SCsub
new file mode 100644
index 0000000000..34524f10ef
--- /dev/null
+++ b/tools/pe_bliss/SCsub
@@ -0,0 +1,5 @@
+Import('env')
+
+env.add_source_files(env.tool_sources,"*.cpp")
+
+Export('env')
diff --git a/tools/pe_bliss/entropy.cpp b/tools/pe_bliss/entropy.cpp
new file mode 100644
index 0000000000..acefa63e83
--- /dev/null
+++ b/tools/pe_bliss/entropy.cpp
@@ -0,0 +1,111 @@
+/*************************************************************************/
+/* Copyright (c) 2015 dx, http://kaimi.ru                                */
+/*                                                                       */
+/* Permission is hereby granted, free of charge, to any person           */
+/* obtaining a copy of this software and associated documentation        */
+/* files (the "Software"), to deal in the Software without               */
+/* restriction, including without limitation the rights to use,          */
+/* copy, modify, merge, publish, distribute, sublicense, and/or          */
+/* sell copies of the Software, and to permit persons to whom the        */
+/* Software is furnished to do so, subject to the following conditions:  */
+/* The above copyright notice and this permission notice shall be        */
+/* included in all copies or substantial portions of the Software.       */
+/*                                                                       */
+/* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,       */
+/* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF    */
+/* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.*/
+/* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY  */
+/* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,  */
+/* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE     */
+/* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.                */
+/*************************************************************************/
+#include <cmath>
+#include "entropy.h"
+#include "utils.h"
+
+namespace pe_bliss
+{
+//Calculates entropy for PE image section
+double entropy_calculator::calculate_entropy(const section& s)
+{
+	if(s.get_raw_data().empty()) //Don't count entropy for empty sections
+		throw pe_exception("Section is empty", pe_exception::section_is_empty);
+
+	return calculate_entropy(s.get_raw_data().data(), s.get_raw_data().length());
+}
+
+//Calculates entropy for istream (from current position of stream)
+double entropy_calculator::calculate_entropy(std::istream& file)
+{
+	uint32_t byte_count[256] = {0}; //Byte count for each of 255 bytes
+
+	if(file.bad())
+		throw pe_exception("Stream is bad", pe_exception::stream_is_bad);
+
+	std::streamoff pos = file.tellg();
+
+	std::streamoff length = pe_utils::get_file_size(file);
+	length -= file.tellg();
+
+	if(!length) //Don't calculate entropy for empty buffers
+		throw pe_exception("Data length is zero", pe_exception::data_is_empty);
+
+	//Count bytes
+	for(std::streamoff i = 0; i != length; ++i)
+		++byte_count[static_cast<unsigned char>(file.get())];
+
+	file.seekg(pos);
+
+	return calculate_entropy(byte_count, length);
+}
+
+//Calculates entropy for data block
+double entropy_calculator::calculate_entropy(const char* data, size_t length)
+{
+	uint32_t byte_count[256] = {0}; //Byte count for each of 255 bytes
+
+	if(!length) //Don't calculate entropy for empty buffers
+		throw pe_exception("Data length is zero", pe_exception::data_is_empty);
+
+	//Count bytes
+	for(size_t i = 0; i != length; ++i)
+		++byte_count[static_cast<unsigned char>(data[i])];
+
+	return calculate_entropy(byte_count, length);
+}
+
+//Calculates entropy for this PE file (only section data)
+double entropy_calculator::calculate_entropy(const pe_base& pe)
+{
+	uint32_t byte_count[256] = {0}; //Byte count for each of 255 bytes
+
+	size_t total_data_length = 0;
+
+	//Count bytes for each section
+	for(section_list::const_iterator it = pe.get_image_sections().begin(); it != pe.get_image_sections().end(); ++it)
+	{
+		const std::string& data = (*it).get_raw_data();
+		size_t length = data.length();
+		total_data_length += length;
+		for(size_t i = 0; i != length; ++i)
+			++byte_count[static_cast<unsigned char>(data[i])];
+	}
+
+	return calculate_entropy(byte_count, total_data_length);
+}
+
+//Calculates entropy from bytes count
+double entropy_calculator::calculate_entropy(const uint32_t byte_count[256], std::streamoff total_length)
+{
+	double entropy = 0.; //Entropy result value
+	//Calculate entropy
+	for(uint32_t i = 0; i < 256; ++i)
+	{
+		double temp = static_cast<double>(byte_count[i]) / total_length;
+		if(temp > 0.)
+			entropy += std::abs(temp * (std::log(temp) * pe_utils::log_2));
+	}
+
+	return entropy;
+}
+}
diff --git a/tools/pe_bliss/entropy.h b/tools/pe_bliss/entropy.h
new file mode 100644
index 0000000000..7d225a3e32
--- /dev/null
+++ b/tools/pe_bliss/entropy.h
@@ -0,0 +1,51 @@
+/*************************************************************************/
+/* Copyright (c) 2015 dx, http://kaimi.ru                                */
+/*                                                                       */
+/* Permission is hereby granted, free of charge, to any person           */
+/* obtaining a copy of this software and associated documentation        */
+/* files (the "Software"), to deal in the Software without               */
+/* restriction, including without limitation the rights to use,          */
+/* copy, modify, merge, publish, distribute, sublicense, and/or          */
+/* sell copies of the Software, and to permit persons to whom the        */
+/* Software is furnished to do so, subject to the following conditions:  */
+/* The above copyright notice and this permission notice shall be        */
+/* included in all copies or substantial portions of the Software.       */
+/*                                                                       */
+/* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,       */
+/* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF    */
+/* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.*/
+/* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY  */
+/* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,  */
+/* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE     */
+/* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.                */
+/*************************************************************************/
+#pragma once
+#include <istream>
+#include "pe_base.h"
+
+namespace pe_bliss
+{
+class entropy_calculator
+{
+public:
+	//Calculates entropy for PE image section
+	static double calculate_entropy(const section& s);
+
+	//Calculates entropy for istream (from current position of stream)
+	static double calculate_entropy(std::istream& file);
+
+	//Calculates entropy for data block
+	static double calculate_entropy(const char* data, size_t length);
+
+	//Calculates entropy for this PE file (only section data)
+	static double calculate_entropy(const pe_base& pe);
+
+private:
+	entropy_calculator();
+	entropy_calculator(const entropy_calculator&);
+	entropy_calculator& operator=(const entropy_calculator&);
+
+	//Calculates entropy from bytes count
+	static double calculate_entropy(const uint32_t byte_count[256], std::streamoff total_length);
+};
+}
diff --git a/tools/pe_bliss/file_version_info.cpp b/tools/pe_bliss/file_version_info.cpp
new file mode 100644
index 0000000000..3f2ba454b4
--- /dev/null
+++ b/tools/pe_bliss/file_version_info.cpp
@@ -0,0 +1,440 @@
+/*************************************************************************/
+/* Copyright (c) 2015 dx, http://kaimi.ru                                */
+/*                                                                       */
+/* Permission is hereby granted, free of charge, to any person           */
+/* obtaining a copy of this software and associated documentation        */
+/* files (the "Software"), to deal in the Software without               */
+/* restriction, including without limitation the rights to use,          */
+/* copy, modify, merge, publish, distribute, sublicense, and/or          */
+/* sell copies of the Software, and to permit persons to whom the        */
+/* Software is furnished to do so, subject to the following conditions:  */
+/* The above copyright notice and this permission notice shall be        */
+/* included in all copies or substantial portions of the Software.       */
+/*                                                                       */
+/* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,       */
+/* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF    */
+/* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.*/
+/* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY  */
+/* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,  */
+/* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE     */
+/* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.                */
+/*************************************************************************/
+#include "file_version_info.h"
+#include "pe_structures.h"
+
+namespace pe_bliss
+{
+using namespace pe_win;
+
+//Default constructor
+file_version_info::file_version_info()
+	:file_version_ms_(0), file_version_ls_(0),
+	product_version_ms_(0), product_version_ls_(0),
+	file_flags_(0),
+	file_os_(0),
+	file_type_(0), file_subtype_(0),
+	file_date_ms_(0), file_date_ls_(0)
+{}
+
+//Constructor from Windows fixed version info structure
+file_version_info::file_version_info(const vs_fixedfileinfo& info)
+	:file_version_ms_(info.dwFileVersionMS), file_version_ls_(info.dwFileVersionLS),
+	product_version_ms_(info.dwProductVersionMS), product_version_ls_(info.dwProductVersionLS),
+	file_flags_(info.dwFileFlags),
+	file_os_(info.dwFileOS),
+	file_type_(info.dwFileType), file_subtype_(info.dwFileSubtype),
+	file_date_ms_(info.dwFileDateMS), file_date_ls_(info.dwFileDateLS)
+{}
+
+//Returns true if file is debug-built
+bool file_version_info::is_debug() const
+{
+	return file_flags_ & vs_ff_debug ? true : false;
+}
+
+//Returns true if file is release-built
+bool file_version_info::is_prerelease() const
+{
+	return file_flags_ & vs_ff_prerelease ? true : false;
+}
+
+//Returns true if file is patched
+bool file_version_info::is_patched() const
+{
+	return file_flags_ & vs_ff_patched ? true : false;
+}
+
+//Returns true if private build
+bool file_version_info::is_private_build() const
+{
+	return file_flags_ & vs_ff_privatebuild ? true : false;
+}
+
+//Returns true if special build
+bool file_version_info::is_special_build() const
+{
+	return file_flags_ & vs_ff_specialbuild ? true : false;
+}
+
+//Returns true if info inferred
+bool file_version_info::is_info_inferred() const
+{
+	return file_flags_ & vs_ff_infoinferred ? true : false;
+}
+
+//Retuens file flags (raw DWORD)
+uint32_t file_version_info::get_file_flags() const
+{
+	return file_flags_;
+}
+
+//Returns file version most significant DWORD
+uint32_t file_version_info::get_file_version_ms() const
+{
+	return file_version_ms_;
+}
+
+//Returns file version least significant DWORD
+uint32_t file_version_info::get_file_version_ls() const
+{
+	return file_version_ls_;
+}
+
+//Returns product version most significant DWORD
+uint32_t file_version_info::get_product_version_ms() const
+{
+	return product_version_ms_;
+}
+
+//Returns product version least significant DWORD
+uint32_t file_version_info::get_product_version_ls() const
+{
+	return product_version_ls_;
+}
+
+//Returns file OS type (raw DWORD)
+uint32_t file_version_info::get_file_os_raw() const
+{
+	return file_os_;
+}
+
+//Returns file OS type
+file_version_info::file_os_type file_version_info::get_file_os() const
+{
+	//Determine file operation system type
+	switch(file_os_)
+	{
+	case vos_dos:
+		return file_os_dos;
+
+	case vos_os216:
+		return file_os_os216;
+
+	case vos_os232:
+		return file_os_os232;
+
+	case vos_nt:
+		return file_os_nt;
+
+	case vos_wince:
+		return file_os_wince;
+
+	case vos__windows16:
+		return file_os_win16;
+
+	case vos__pm16:
+		return file_os_pm16;
+
+	case vos__pm32:
+		return file_os_pm32;
+
+	case vos__windows32:
+		return file_os_win32;
+
+	case vos_dos_windows16:
+		return file_os_dos_win16;
+
+	case vos_dos_windows32:
+		return file_os_dos_win32;
+
+	case vos_os216_pm16:
+		return file_os_os216_pm16;
+
+	case vos_os232_pm32:
+		return file_os_os232_pm32;
+
+	case vos_nt_windows32:
+		return file_os_nt_win32;
+	}
+
+	return file_os_unknown;
+}
+
+//Returns file type (raw DWORD)
+uint32_t file_version_info::get_file_type_raw() const
+{
+	return file_type_;
+}
+
+//Returns file type
+file_version_info::file_type file_version_info::get_file_type() const
+{
+	//Determine file type
+	switch(file_type_)
+	{
+	case vft_app:
+		return file_type_application;
+
+	case vft_dll:
+		return file_type_dll;
+
+	case vft_drv:
+		return file_type_driver;
+
+	case vft_font:
+		return file_type_font;
+
+	case vft_vxd:
+		return file_type_vxd;
+
+	case vft_static_lib:
+		return file_type_static_lib;
+	}
+
+	return file_type_unknown;
+}
+
+//Returns file subtype (usually non-zero for drivers and fonts)
+uint32_t file_version_info::get_file_subtype() const
+{
+	return file_subtype_;
+}
+
+//Returns file date most significant DWORD
+uint32_t file_version_info::get_file_date_ms() const
+{
+	return file_date_ms_;
+}
+
+//Returns file date least significant DWORD
+uint32_t file_version_info::get_file_date_ls() const
+{
+	return file_date_ls_;
+}
+
+//Helper to set file flag
+void file_version_info::set_file_flag(uint32_t flag)
+{
+	file_flags_ |= flag;
+}
+
+//Helper to clear file flag
+void file_version_info::clear_file_flag(uint32_t flag)
+{
+	file_flags_ &= ~flag;
+}
+
+//Helper to set or clear file flag
+void file_version_info::set_file_flag(uint32_t flag, bool set_flag)
+{
+	set_flag ? set_file_flag(flag) : clear_file_flag(flag);
+}
+
+//Sets if file is debug-built
+void file_version_info::set_debug(bool debug)
+{
+	set_file_flag(vs_ff_debug, debug);
+}
+
+//Sets if file is prerelease
+void file_version_info::set_prerelease(bool prerelease)
+{
+	set_file_flag(vs_ff_prerelease, prerelease);
+}
+
+//Sets if file is patched
+void file_version_info::set_patched(bool patched)
+{
+	set_file_flag(vs_ff_patched, patched);
+}
+
+//Sets if private build
+void file_version_info::set_private_build(bool private_build)
+{
+	set_file_flag(vs_ff_privatebuild, private_build);
+}
+
+//Sets if special build
+void file_version_info::set_special_build(bool special_build)
+{
+	set_file_flag(vs_ff_specialbuild, special_build);
+}
+
+//Sets if info inferred
+void file_version_info::set_info_inferred(bool info_inferred)
+{
+	set_file_flag(vs_ff_infoinferred, info_inferred);
+}
+
+//Sets flags (raw DWORD)
+void file_version_info::set_file_flags(uint32_t file_flags)
+{
+	file_flags_ = file_flags;
+}
+
+//Sets file version most significant DWORD
+void file_version_info::set_file_version_ms(uint32_t file_version_ms)
+{
+	file_version_ms_ = file_version_ms;
+}
+
+//Sets file version least significant DWORD
+void file_version_info::set_file_version_ls(uint32_t file_version_ls)
+{
+	file_version_ls_ = file_version_ls;
+}
+
+//Sets product version most significant DWORD
+void file_version_info::set_product_version_ms(uint32_t product_version_ms)
+{
+	product_version_ms_ = product_version_ms;
+}
+
+//Sets product version least significant DWORD
+void file_version_info::set_product_version_ls(uint32_t product_version_ls)
+{
+	product_version_ls_ = product_version_ls;
+}
+
+//Sets file OS type (raw DWORD)
+void file_version_info::set_file_os_raw(uint32_t file_os)
+{
+	file_os_ = file_os;
+}
+
+//Sets file OS type
+void file_version_info::set_file_os(file_os_type file_os)
+{
+	//Determine file operation system type
+	switch(file_os)
+	{
+	case file_os_dos:
+		file_os_ = vos_dos;
+		return;
+
+	case file_os_os216:
+		file_os_ = vos_os216;
+		return;
+
+	case file_os_os232:
+		file_os_ = vos_os232;
+		return;
+
+	case file_os_nt:
+		file_os_ = vos_nt;
+		return;
+
+	case file_os_wince:
+		file_os_ = vos_wince;
+		return;
+
+	case file_os_win16:
+		file_os_ = vos__windows16;
+		return;
+		
+	case file_os_pm16:
+		file_os_ = vos__pm16;
+		return;
+
+	case file_os_pm32:
+		file_os_ = vos__pm32;
+		return;
+
+	case file_os_win32:
+		file_os_ = vos__windows32;
+		return;
+
+	case file_os_dos_win16:
+		file_os_ = vos_dos_windows16;
+		return;
+
+	case file_os_dos_win32:
+		file_os_ = vos_dos_windows32;
+		return;
+
+	case file_os_os216_pm16:
+		file_os_ = vos_os216_pm16;
+		return;
+
+	case file_os_os232_pm32:
+		file_os_ = vos_os232_pm32;
+		return;
+
+	case file_os_nt_win32:
+		file_os_ = vos_nt_windows32;
+		return;
+
+	default:
+		return;
+	}
+}
+
+//Sets file type (raw DWORD)
+void file_version_info::set_file_type_raw(uint32_t file_type)
+{
+	file_type_ = file_type;
+}
+
+//Sets file type
+void file_version_info::set_file_type(file_type file_type)
+{
+	//Determine file type
+	switch(file_type)
+	{
+	case file_type_application:
+		file_type_ = vft_app;
+		return;
+		
+	case file_type_dll:
+		file_type_ = vft_dll;
+		return;
+
+	case file_type_driver:
+		file_type_ = vft_drv;
+		return;
+
+	case file_type_font:
+		file_type_ = vft_font;
+		return;
+
+	case file_type_vxd:
+		file_type_ = vft_vxd;
+		return;
+
+	case file_type_static_lib:
+		file_type_ = vft_static_lib;
+		return;
+
+	default:
+		return;
+	}
+}
+
+//Sets file subtype (usually non-zero for drivers and fonts)
+void file_version_info::set_file_subtype(uint32_t file_subtype)
+{
+	file_subtype_ = file_subtype;
+}
+
+//Sets file date most significant DWORD
+void file_version_info::set_file_date_ms(uint32_t file_date_ms)
+{
+	file_date_ms_ = file_date_ms;
+}
+
+//Sets file date least significant DWORD
+void file_version_info::set_file_date_ls(uint32_t file_date_ls)
+{
+	file_date_ls_ = file_date_ls;
+}
+}
diff --git a/tools/pe_bliss/file_version_info.h b/tools/pe_bliss/file_version_info.h
new file mode 100644
index 0000000000..d898351ba1
--- /dev/null
+++ b/tools/pe_bliss/file_version_info.h
@@ -0,0 +1,199 @@
+/*************************************************************************/
+/* Copyright (c) 2015 dx, http://kaimi.ru                                */
+/*                                                                       */
+/* Permission is hereby granted, free of charge, to any person           */
+/* obtaining a copy of this software and associated documentation        */
+/* files (the "Software"), to deal in the Software without               */
+/* restriction, including without limitation the rights to use,          */
+/* copy, modify, merge, publish, distribute, sublicense, and/or          */
+/* sell copies of the Software, and to permit persons to whom the        */
+/* Software is furnished to do so, subject to the following conditions:  */
+/* The above copyright notice and this permission notice shall be        */
+/* included in all copies or substantial portions of the Software.       */
+/*                                                                       */
+/* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,       */
+/* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF    */
+/* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.*/
+/* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY  */
+/* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,  */
+/* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE     */
+/* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.                */
+/*************************************************************************/
+#pragma once
+#include <string>
+#include <map>
+#include "stdint_defs.h"
+#include "pe_structures.h"
+
+namespace pe_bliss
+{
+//Structure representing fixed file version info
+class file_version_info
+{
+public:
+	//Enumeration of file operating system types
+	enum file_os_type
+	{
+		file_os_unknown,
+		file_os_dos,
+		file_os_os216,
+		file_os_os232,
+		file_os_nt,
+		file_os_wince,
+		file_os_win16,
+		file_os_pm16,
+		file_os_pm32,
+		file_os_win32,
+		file_os_dos_win16,
+		file_os_dos_win32,
+		file_os_os216_pm16,
+		file_os_os232_pm32,
+		file_os_nt_win32
+	};
+
+	//Enumeration of file types
+	enum file_type
+	{
+		file_type_unknown,
+		file_type_application,
+		file_type_dll,
+		file_type_driver,
+		file_type_font,
+		file_type_vxd,
+		file_type_static_lib
+	};
+
+public:
+	//Default constructor
+	file_version_info();
+	//Constructor from Windows fixed version info structure
+	explicit file_version_info(const pe_win::vs_fixedfileinfo& info);
+
+public: //Getters
+	//Returns true if file is debug-built
+	bool is_debug() const;
+	//Returns true if file is prerelease
+	bool is_prerelease() const;
+	//Returns true if file is patched
+	bool is_patched() const;
+	//Returns true if private build
+	bool is_private_build() const;
+	//Returns true if special build
+	bool is_special_build() const;
+	//Returns true if info inferred
+	bool is_info_inferred() const;
+	//Retuens file flags (raw DWORD)
+	uint32_t get_file_flags() const;
+
+	//Returns file version most significant DWORD
+	uint32_t get_file_version_ms() const;
+	//Returns file version least significant DWORD
+	uint32_t get_file_version_ls() const;
+	//Returns product version most significant DWORD
+	uint32_t get_product_version_ms() const;
+	//Returns product version least significant DWORD
+	uint32_t get_product_version_ls() const;
+
+	//Returns file OS type (raw DWORD)
+	uint32_t get_file_os_raw() const;
+	//Returns file OS type
+	file_os_type get_file_os() const;
+
+	//Returns file type (raw DWORD)
+	uint32_t get_file_type_raw() const;
+	//Returns file type
+	file_type get_file_type() const;
+
+	//Returns file subtype (usually non-zero for drivers and fonts)
+	uint32_t get_file_subtype() const;
+
+	//Returns file date most significant DWORD
+	uint32_t get_file_date_ms() const;
+	//Returns file date least significant DWORD
+	uint32_t get_file_date_ls() const;
+
+	//Returns file version string
+	template<typename T>
+	const std::basic_string<T> get_file_version_string() const
+	{
+		return get_version_string<T>(file_version_ms_, file_version_ls_);
+	}
+
+	//Returns product version string
+	template<typename T>
+	const std::basic_string<T> get_product_version_string() const
+	{
+		return get_version_string<T>(product_version_ms_, product_version_ls_);
+	}
+		
+public: //Setters
+	//Sets if file is debug-built
+	void set_debug(bool debug);
+	//Sets if file is prerelease
+	void set_prerelease(bool prerelease);
+	//Sets if file is patched
+	void set_patched(bool patched);
+	//Sets if private build
+	void set_private_build(bool private_build);
+	//Sets if special build
+	void set_special_build(bool special_build);
+	//Sets if info inferred
+	void set_info_inferred(bool info_inferred);
+	//Sets flags (raw DWORD)
+	void set_file_flags(uint32_t file_flags);
+
+	//Sets file version most significant DWORD
+	void set_file_version_ms(uint32_t file_version_ms);
+	//Sets file version least significant DWORD
+	void set_file_version_ls(uint32_t file_version_ls);
+	//Sets product version most significant DWORD
+	void set_product_version_ms(uint32_t product_version_ms);
+	//Sets product version least significant DWORD
+	void set_product_version_ls(uint32_t product_version_ls);
+
+	//Sets file OS type (raw DWORD)
+	void set_file_os_raw(uint32_t file_os);
+	//Sets file OS type
+	void set_file_os(file_os_type file_os);
+
+	//Sets file type (raw DWORD)
+	void set_file_type_raw(uint32_t file_type);
+	//Sets file type
+	void set_file_type(file_type file_type);
+
+	//Sets file subtype (usually non-zero for drivers and fonts)
+	void set_file_subtype(uint32_t file_subtype);
+
+	//Sets file date most significant DWORD
+	void set_file_date_ms(uint32_t file_date_ms);
+	//Sets file date least significant DWORD
+	void set_file_date_ls(uint32_t file_date_ls);
+
+private:
+	//Helper to convert version DWORDs to string
+	template<typename T>
+	static const std::basic_string<T> get_version_string(uint32_t ms, uint32_t ls)
+	{
+		std::basic_stringstream<T> ss;
+		ss << (ms >> 16) << static_cast<T>(L'.')
+			<< (ms & 0xFFFF) << static_cast<T>(L'.')
+			<< (ls >> 16) << static_cast<T>(L'.')
+			<< (ls & 0xFFFF);
+		return ss.str();
+	}
+
+	//Helper to set file flag
+	void set_file_flag(uint32_t flag);
+	//Helper to clear file flag
+	void clear_file_flag(uint32_t flag);
+	//Helper to set or clear file flag
+	void set_file_flag(uint32_t flag, bool set_flag);
+
+	uint32_t file_version_ms_, file_version_ls_,
+		product_version_ms_, product_version_ls_;
+	uint32_t file_flags_;
+	uint32_t file_os_;
+	uint32_t file_type_, file_subtype_;
+	uint32_t file_date_ms_, file_date_ls_;
+};
+}
diff --git a/tools/pe_bliss/message_table.cpp b/tools/pe_bliss/message_table.cpp
new file mode 100644
index 0000000000..909be5d494
--- /dev/null
+++ b/tools/pe_bliss/message_table.cpp
@@ -0,0 +1,81 @@
+/*************************************************************************/
+/* Copyright (c) 2015 dx, http://kaimi.ru                                */
+/*                                                                       */
+/* Permission is hereby granted, free of charge, to any person           */
+/* obtaining a copy of this software and associated documentation        */
+/* files (the "Software"), to deal in the Software without               */
+/* restriction, including without limitation the rights to use,          */
+/* copy, modify, merge, publish, distribute, sublicense, and/or          */
+/* sell copies of the Software, and to permit persons to whom the        */
+/* Software is furnished to do so, subject to the following conditions:  */
+/* The above copyright notice and this permission notice shall be        */
+/* included in all copies or substantial portions of the Software.       */
+/*                                                                       */
+/* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,       */
+/* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF    */
+/* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.*/
+/* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY  */
+/* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,  */
+/* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE     */
+/* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.                */
+/*************************************************************************/
+#include "message_table.h"
+#include "utils.h"
+
+namespace pe_bliss
+{
+//Default constructor
+message_table_item::message_table_item()
+	:unicode_(false)
+{}
+
+//Constructor from ANSI string
+message_table_item::message_table_item(const std::string& str)
+	:unicode_(false), ansi_str_(str)
+{
+	pe_utils::strip_nullbytes(ansi_str_);
+}
+
+//Constructor from UNICODE string
+message_table_item::message_table_item(const std::wstring& str)
+	:unicode_(true), unicode_str_(str)
+{
+	pe_utils::strip_nullbytes(unicode_str_);
+}
+
+//Returns true if contained string is unicode
+bool message_table_item::is_unicode() const
+{
+	return unicode_;
+}
+
+//Returns ANSI string
+const std::string& message_table_item::get_ansi_string() const
+{
+	return ansi_str_;
+}
+
+//Returns UNICODE string
+const std::wstring& message_table_item::get_unicode_string() const
+{
+	return unicode_str_;
+}
+
+//Sets ANSI string (clears UNICODE one)
+void message_table_item::set_string(const std::string& str)
+{
+	ansi_str_ = str;
+	pe_utils::strip_nullbytes(ansi_str_);
+	unicode_str_.clear();
+	unicode_ = false;
+}
+
+//Sets UNICODE string (clears ANSI one)
+void message_table_item::set_string(const std::wstring& str)
+{
+	unicode_str_ = str;
+	pe_utils::strip_nullbytes(unicode_str_);
+	ansi_str_.clear();
+	unicode_ = true;
+}
+}
diff --git a/tools/pe_bliss/message_table.h b/tools/pe_bliss/message_table.h
new file mode 100644
index 0000000000..5a3feb32c1
--- /dev/null
+++ b/tools/pe_bliss/message_table.h
@@ -0,0 +1,56 @@
+/*************************************************************************/
+/* Copyright (c) 2015 dx, http://kaimi.ru                                */
+/*                                                                       */
+/* Permission is hereby granted, free of charge, to any person           */
+/* obtaining a copy of this software and associated documentation        */
+/* files (the "Software"), to deal in the Software without               */
+/* restriction, including without limitation the rights to use,          */
+/* copy, modify, merge, publish, distribute, sublicense, and/or          */
+/* sell copies of the Software, and to permit persons to whom the        */
+/* Software is furnished to do so, subject to the following conditions:  */
+/* The above copyright notice and this permission notice shall be        */
+/* included in all copies or substantial portions of the Software.       */
+/*                                                                       */
+/* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,       */
+/* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF    */
+/* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.*/
+/* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY  */
+/* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,  */
+/* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE     */
+/* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.                */
+/*************************************************************************/
+#pragma once
+#include <string>
+#include <map>
+#include "stdint_defs.h"
+
+namespace pe_bliss
+{
+//Structure representing message table string
+class message_table_item
+{
+public:
+	//Default constructor
+	message_table_item();
+	//Constructors from ANSI and UNICODE strings
+	explicit message_table_item(const std::string& str);
+	explicit message_table_item(const std::wstring& str);
+
+	//Returns true if string is UNICODE
+	bool is_unicode() const;
+	//Returns ANSI string
+	const std::string& get_ansi_string() const;
+	//Returns UNICODE string
+	const std::wstring& get_unicode_string() const;
+
+public:
+	//Sets ANSI or UNICODE string
+	void set_string(const std::string& str);
+	void set_string(const std::wstring& str);
+
+private:
+	bool unicode_;
+	std::string ansi_str_;
+	std::wstring unicode_str_;
+};
+}
diff --git a/tools/pe_bliss/pe_base.cpp b/tools/pe_bliss/pe_base.cpp
new file mode 100644
index 0000000000..97baa17cb3
--- /dev/null
+++ b/tools/pe_bliss/pe_base.cpp
@@ -0,0 +1,1680 @@
+/*************************************************************************/
+/* Copyright (c) 2015 dx, http://kaimi.ru                                */
+/*                                                                       */
+/* Permission is hereby granted, free of charge, to any person           */
+/* obtaining a copy of this software and associated documentation        */
+/* files (the "Software"), to deal in the Software without               */
+/* restriction, including without limitation the rights to use,          */
+/* copy, modify, merge, publish, distribute, sublicense, and/or          */
+/* sell copies of the Software, and to permit persons to whom the        */
+/* Software is furnished to do so, subject to the following conditions:  */
+/* The above copyright notice and this permission notice shall be        */
+/* included in all copies or substantial portions of the Software.       */
+/*                                                                       */
+/* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,       */
+/* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF    */
+/* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.*/
+/* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY  */
+/* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,  */
+/* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE     */
+/* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.                */
+/*************************************************************************/
+#include <string>
+#include <vector>
+#include <istream>
+#include <ostream>
+#include <algorithm>
+#include <cmath>
+#include <set>
+#include <string.h>
+#include "pe_exception.h"
+#include "pe_base.h"
+
+namespace pe_bliss
+{
+using namespace pe_win;
+
+//Constructor
+pe_base::pe_base(std::istream& file, const pe_properties& props, bool read_debug_raw_data)
+{
+	props_ = props.duplicate().release();
+
+	//Save istream state
+	std::ios_base::iostate state = file.exceptions();
+	std::streamoff old_offset = file.tellg();
+
+	try
+	{
+		file.exceptions(std::ios::goodbit);
+		//Read DOS header, PE headers and section data
+		read_dos_header(file);
+		read_pe(file, read_debug_raw_data);
+	}
+	catch(const std::exception&)
+	{
+		//If something went wrong, restore istream state
+		file.seekg(old_offset);
+		file.exceptions(state);
+		file.clear();
+		//Rethrow
+		throw;
+	}
+
+	//Restore istream state
+	file.seekg(old_offset);
+	file.exceptions(state);
+	file.clear();
+}
+
+pe_base::pe_base(const pe_properties& props, uint32_t section_alignment, bool dll, uint16_t subsystem)
+{
+	props_ = props.duplicate().release();
+	props_->create_pe(section_alignment, subsystem);
+
+	has_overlay_ = false;
+	memset(&dos_header_, 0, sizeof(dos_header_));
+
+	dos_header_.e_magic = 0x5A4D; //"MZ"
+	//Magic numbers from MSVC++ build
+	dos_header_.e_maxalloc = 0xFFFF;
+	dos_header_.e_cblp = 0x90;
+	dos_header_.e_cp = 3;
+	dos_header_.e_cparhdr = 4;
+	dos_header_.e_sp = 0xB8;
+	dos_header_.e_lfarlc = 64;
+
+	set_characteristics(image_file_executable_image | image_file_relocs_stripped);
+
+	if(get_pe_type() == pe_type_32)
+		set_characteristics_flags(image_file_32bit_machine);
+
+	if(dll)
+		set_characteristics_flags(image_file_dll);
+
+	set_subsystem_version(5, 1); //WinXP
+	set_os_version(5, 1); //WinXP
+}
+
+pe_base::pe_base(const pe_base& pe)
+	:dos_header_(pe.dos_header_),
+	rich_overlay_(pe.rich_overlay_),
+	sections_(pe.sections_),
+	has_overlay_(pe.has_overlay_),
+	full_headers_data_(pe.full_headers_data_),
+	debug_data_(pe.debug_data_),
+	props_(0)
+{
+	props_ = pe.props_->duplicate().release();
+}
+
+pe_base& pe_base::operator=(const pe_base& pe)
+{
+	dos_header_ = pe.dos_header_;
+	rich_overlay_ = pe.rich_overlay_;
+	sections_ = pe.sections_;
+	has_overlay_ = pe.has_overlay_;
+	full_headers_data_ = pe.full_headers_data_;
+	debug_data_ = pe.debug_data_;
+	delete props_;
+	props_ = 0;
+	props_ = pe.props_->duplicate().release();
+
+	return *this;
+}
+
+pe_base::~pe_base()
+{
+	delete props_;
+}
+
+//Returns dos header
+const image_dos_header& pe_base::get_dos_header() const
+{
+	return dos_header_;
+}
+
+//Returns dos header
+image_dos_header& pe_base::get_dos_header()
+{
+	return dos_header_;
+}
+
+//Returns PE headers start position (e_lfanew)
+int32_t pe_base::get_pe_header_start() const
+{
+	return dos_header_.e_lfanew;
+}
+
+//Strips MSVC stub overlay
+void pe_base::strip_stub_overlay()
+{
+	rich_overlay_.clear();
+}
+
+//Fills MSVC stub overlay with character c
+void pe_base::fill_stub_overlay(char c)
+{
+	if(rich_overlay_.length())
+		rich_overlay_.assign(rich_overlay_.length(), c);
+}
+
+//Sets stub MSVS overlay
+void pe_base::set_stub_overlay(const std::string& data)
+{
+	rich_overlay_ = data;
+}
+
+//Returns stub overlay
+const std::string& pe_base::get_stub_overlay() const
+{
+	return rich_overlay_;
+}
+
+//Realigns all sections
+void pe_base::realign_all_sections()
+{
+	for(unsigned int i = 0; i < sections_.size(); i++)
+		realign_section(i);
+}
+
+//Returns number of sections from PE header
+uint16_t pe_base::get_number_of_sections() const
+{
+	return props_->get_number_of_sections();
+}
+
+//Updates number of sections in PE header
+uint16_t pe_base::update_number_of_sections()
+{
+	uint16_t new_number = static_cast<uint16_t>(sections_.size());
+	props_->set_number_of_sections(new_number);
+	return new_number;
+}
+
+//Returns section alignment
+uint32_t pe_base::get_section_alignment() const
+{
+	return props_->get_section_alignment();
+}
+
+//Returns image sections list
+section_list& pe_base::get_image_sections()
+{
+	return sections_;
+}
+
+//Returns image sections list
+const section_list& pe_base::get_image_sections() const
+{
+	return sections_;
+}
+
+//Realigns section by index
+void pe_base::realign_section(uint32_t index)
+{
+	//Check index
+	if(sections_.size() <= index)
+		throw pe_exception("Section not found", pe_exception::section_not_found);
+
+	//Get section iterator
+	section_list::iterator it = sections_.begin() + index;
+	section& s = *it;
+
+	//Calculate, how many null bytes we have in the end of raw section data
+	std::size_t strip = 0;
+	for(std::size_t i = (*it).get_raw_data().length(); i >= 1; --i)
+	{
+		if(s.get_raw_data()[i - 1] == 0)
+			strip++;
+		else
+			break;
+	}
+
+	if(it == sections_.end() - 1) //If we're realigning the last section
+	{
+		//We can strip ending null bytes
+		s.set_size_of_raw_data(static_cast<uint32_t>(s.get_raw_data().length() - strip));
+		s.get_raw_data().resize(s.get_raw_data().length() - strip, 0);
+	}
+	else
+	{
+		//Else just set size of raw data
+		uint32_t raw_size_aligned = s.get_aligned_raw_size(get_file_alignment());
+		s.set_size_of_raw_data(raw_size_aligned);
+		s.get_raw_data().resize(raw_size_aligned, 0);
+	}
+}
+
+//Returns file alignment
+uint32_t pe_base::get_file_alignment() const
+{
+	return props_->get_file_alignment();
+}
+
+//Sets file alignment
+void pe_base::set_file_alignment(uint32_t alignment)
+{
+	//Check alignment
+	if(alignment < minimum_file_alignment)
+		throw pe_exception("File alignment can't be less than 512", pe_exception::incorrect_file_alignment);
+
+	if(!pe_utils::is_power_of_2(alignment))
+		throw pe_exception("File alignment must be a power of 2", pe_exception::incorrect_file_alignment);
+
+	if(alignment > get_section_alignment())
+		throw pe_exception("File alignment must be <= section alignment", pe_exception::incorrect_file_alignment);
+
+	//Set file alignment without any additional checks
+	set_file_alignment_unchecked(alignment);
+}
+
+//Returns size of image
+uint32_t pe_base::get_size_of_image() const
+{
+	return props_->get_size_of_image();
+}
+
+//Returns image entry point
+uint32_t pe_base::get_ep() const
+{
+	return props_->get_ep();
+}
+
+//Sets image entry point (just a value of PE header)
+void pe_base::set_ep(uint32_t new_ep)
+{
+	props_->set_ep(new_ep);
+}
+
+//Returns number of RVA and sizes (number of DATA_DIRECTORY entries)
+uint32_t pe_base::get_number_of_rvas_and_sizes() const
+{
+	return props_->get_number_of_rvas_and_sizes();
+}
+
+//Sets number of RVA and sizes (number of DATA_DIRECTORY entries)
+void pe_base::set_number_of_rvas_and_sizes(uint32_t number)
+{
+	props_->set_number_of_rvas_and_sizes(number);
+}
+
+//Returns PE characteristics
+uint16_t pe_base::get_characteristics() const
+{
+	return props_->get_characteristics();
+}
+
+//Sets PE characteristics (a value inside header)
+void pe_base::set_characteristics(uint16_t ch)
+{
+	props_->set_characteristics(ch);
+}
+
+//Returns section from RVA
+section& pe_base::section_from_rva(uint32_t rva)
+{
+	//Search for section
+	for(section_list::iterator i = sections_.begin(); i != sections_.end(); ++i)
+	{
+		section& s = *i;
+		//Return section if found
+		if(rva >= s.get_virtual_address() && rva < s.get_virtual_address() + s.get_aligned_virtual_size(get_section_alignment()))
+			return s;
+	}
+
+	throw pe_exception("No section found by presented address", pe_exception::no_section_found);
+}
+
+//Returns section from RVA
+const section& pe_base::section_from_rva(uint32_t rva) const
+{
+	//Search for section
+	for(section_list::const_iterator i = sections_.begin(); i != sections_.end(); ++i)
+	{
+		const section& s = *i;
+		//Return section if found
+		if(rva >= s.get_virtual_address() && rva < s.get_virtual_address() + s.get_aligned_virtual_size(get_section_alignment()))
+			return s;
+	}
+
+	throw pe_exception("No section found by presented address", pe_exception::no_section_found);
+}
+
+//Returns section from directory ID
+section& pe_base::section_from_directory(uint32_t directory_id)
+{
+	return section_from_rva(get_directory_rva(directory_id));		
+}
+
+//Returns section from directory ID
+const section& pe_base::section_from_directory(uint32_t directory_id) const
+{
+	return section_from_rva(get_directory_rva(directory_id));	
+}
+
+//Sets section virtual size (actual for the last one of this PE or for unbound section)
+void pe_base::set_section_virtual_size(section& s, uint32_t vsize)
+{
+	//Check if we're changing virtual size of the last section
+	//Of course, we can change virtual size of section that's not bound to this PE file
+	if(sections_.empty() || std::find_if(sections_.begin(), sections_.end() - 1, section_ptr_finder(s)) != sections_.end() - 1)
+		throw pe_exception("Can't change virtual size of any section, except last one", pe_exception::error_changing_section_virtual_size);
+
+	//If we're setting virtual size to zero
+	if(vsize == 0)
+	{
+		//Check if section is empty
+		if(s.empty())
+			throw pe_exception("Cannot set virtual size of empty section to zero", pe_exception::error_changing_section_virtual_size);
+
+		//Set virtual size equal to aligned size of raw data
+		s.set_virtual_size(s.get_size_of_raw_data());
+	}
+	else
+	{
+		s.set_virtual_size(vsize);
+	}
+
+	//Update image size if we're changing virtual size for the last section of this PE
+	if(!sections_.empty() || &s == &(*(sections_.end() - 1)))
+		update_image_size();
+}
+
+//Expands section raw or virtual size to hold data from specified RVA with specified size
+//Section must be free (not bound to any image)
+//or the last section of this image
+bool pe_base::expand_section(section& s, uint32_t needed_rva, uint32_t needed_size, section_expand_type expand)
+{
+	//Check if we're changing the last section
+	//Of course, we can change the section that's not bound to this PE file
+	if(sections_.empty() || std::find_if(sections_.begin(), sections_.end() - 1, section_ptr_finder(s)) != sections_.end() - 1)
+		throw pe_exception("Can't expand any section, except last one", pe_exception::error_expanding_section);
+
+	//Check if we should expand our section
+	if(expand == expand_section_raw && section_data_length_from_rva(s, needed_rva, section_data_raw) < needed_size)
+	{
+		//Expand section raw data
+		s.get_raw_data().resize(needed_rva - s.get_virtual_address() + needed_size);
+		recalculate_section_sizes(s, false);
+		return true;
+	}
+	else if(expand == expand_section_virtual && section_data_length_from_rva(s, needed_rva, section_data_virtual) < needed_size)
+	{
+		//Expand section virtual data
+		set_section_virtual_size(s, needed_rva - s.get_virtual_address() + needed_size);
+		return true;
+	}
+	
+	return false;
+}
+
+//Updates image virtual size
+void pe_base::update_image_size()
+{
+	//Write virtual size of image to headers
+	if(!sections_.empty())
+		set_size_of_image(sections_.back().get_virtual_address() + sections_.back().get_aligned_virtual_size(get_section_alignment()));
+	else
+		set_size_of_image(get_size_of_headers());
+}
+
+//Returns checksum of PE file from header
+uint32_t pe_base::get_checksum() const
+{
+	return props_->get_checksum();
+}
+
+//Sets checksum of PE file
+void pe_base::set_checksum(uint32_t checksum)
+{
+	props_->set_checksum(checksum);
+}
+
+//Returns timestamp of PE file from header
+uint32_t pe_base::get_time_date_stamp() const
+{
+	return props_->get_time_date_stamp();
+}
+
+//Sets timestamp of PE file
+void pe_base::set_time_date_stamp(uint32_t timestamp)
+{
+	props_->set_time_date_stamp(timestamp);
+}
+
+//Returns Machine field value of PE file from header
+uint16_t pe_base::get_machine() const
+{
+	return props_->get_machine();
+}
+
+//Sets Machine field value of PE file
+void pe_base::set_machine(uint16_t machine)
+{
+	props_->set_machine(machine);
+}
+
+//Prepares section before attaching it
+void pe_base::prepare_section(section& s)
+{
+	//Calculate its size of raw data
+	s.set_size_of_raw_data(static_cast<uint32_t>(pe_utils::align_up(s.get_raw_data().length(), get_file_alignment())));
+
+	//Check section virtual and raw size
+	if(!s.get_size_of_raw_data() && !s.get_virtual_size())
+		throw pe_exception("Virtual and Physical sizes of section can't be 0 at the same time", pe_exception::zero_section_sizes);
+
+	//If section virtual size is zero
+	if(!s.get_virtual_size())
+	{
+		s.set_virtual_size(s.get_size_of_raw_data());
+	}
+	else
+	{
+		//Else calculate its virtual size
+		s.set_virtual_size(
+			std::max<uint32_t>(pe_utils::align_up(s.get_size_of_raw_data(), get_file_alignment()),
+			pe_utils::align_up(s.get_virtual_size(), get_section_alignment())));
+	}
+}
+
+//Adds section to image
+section& pe_base::add_section(section s)
+{
+	if(sections_.size() >= maximum_number_of_sections)
+		throw pe_exception("Maximum number of sections has been reached", pe_exception::no_more_sections_can_be_added);
+
+	//Prepare section before adding it
+	prepare_section(s);
+
+	//Calculate section virtual address
+	if(!sections_.empty())
+	{
+		s.set_virtual_address(pe_utils::align_up(sections_.back().get_virtual_address() + sections_.back().get_aligned_virtual_size(get_section_alignment()), get_section_alignment()));
+
+		//We should align last section raw size, if it wasn't aligned
+		section& last = sections_.back();
+		last.set_size_of_raw_data(static_cast<uint32_t>(pe_utils::align_up(last.get_raw_data().length(), get_file_alignment())));
+	}
+	else
+	{
+		s.set_virtual_address(
+			s.get_virtual_address() == 0
+			? pe_utils::align_up(get_size_of_headers(), get_section_alignment())
+			: pe_utils::align_up(s.get_virtual_address(), get_section_alignment()));
+	}
+
+	//Add section to the end of section list
+	sections_.push_back(s);
+	//Set number of sections in PE header
+	set_number_of_sections(static_cast<uint16_t>(sections_.size()));
+	//Recalculate virtual size of image
+	set_size_of_image(get_size_of_image() + s.get_aligned_virtual_size(get_section_alignment()));
+	//Return last section
+	return sections_.back();
+}
+
+//Returns true if sectios "s" is already attached to this PE file
+bool pe_base::section_attached(const section& s) const
+{
+	return sections_.end() != std::find_if(sections_.begin(), sections_.end(), section_ptr_finder(s));
+}
+
+//Returns true if directory exists
+bool pe_base::directory_exists(uint32_t id) const
+{
+	return props_->directory_exists(id);
+}
+
+//Removes directory
+void pe_base::remove_directory(uint32_t id)
+{
+	props_->remove_directory(id);
+}
+
+//Returns directory RVA
+uint32_t pe_base::get_directory_rva(uint32_t id) const
+{
+	return props_->get_directory_rva(id);
+}
+
+//Returns directory size
+uint32_t pe_base::get_directory_size(uint32_t id) const
+{
+	return props_->get_directory_size(id);
+}
+
+//Sets directory RVA (just a value of PE header, no moving occurs)
+void pe_base::set_directory_rva(uint32_t id, uint32_t rva)
+{
+	return props_->set_directory_rva(id, rva);
+}
+
+//Sets directory size (just a value of PE header, no moving occurs)
+void pe_base::set_directory_size(uint32_t id, uint32_t size)
+{
+	return props_->set_directory_size(id, size);
+}
+
+//Strips only zero DATA_DIRECTORY entries to count = min_count
+//Returns resulting number of data directories
+//strip_iat_directory - if true, even not empty IAT directory will be stripped
+uint32_t pe_base::strip_data_directories(uint32_t min_count, bool strip_iat_directory)
+{
+	return props_->strip_data_directories(min_count, strip_iat_directory);
+}
+
+//Returns true if image has import directory
+bool pe_base::has_imports() const
+{
+	return directory_exists(image_directory_entry_import);
+}
+
+//Returns true if image has export directory
+bool pe_base::has_exports() const
+{
+	return directory_exists(image_directory_entry_export);
+}
+
+//Returns true if image has resource directory
+bool pe_base::has_resources() const
+{
+	return directory_exists(image_directory_entry_resource);
+}
+
+//Returns true if image has security directory
+bool pe_base::has_security() const
+{
+	return directory_exists(image_directory_entry_security);
+}
+
+//Returns true if image has relocations
+bool pe_base::has_reloc() const
+{
+	return directory_exists(image_directory_entry_basereloc) && !(get_characteristics() & image_file_relocs_stripped);
+}
+
+//Returns true if image has TLS directory
+bool pe_base::has_tls() const
+{
+	return directory_exists(image_directory_entry_tls);
+}
+
+//Returns true if image has config directory
+bool pe_base::has_config() const
+{
+	return directory_exists(image_directory_entry_load_config);
+}
+
+//Returns true if image has bound import directory
+bool pe_base::has_bound_import() const
+{
+	return directory_exists(image_directory_entry_bound_import);
+}
+
+//Returns true if image has delay import directory
+bool pe_base::has_delay_import() const
+{
+	return directory_exists(image_directory_entry_delay_import);
+}
+
+//Returns true if image has COM directory
+bool pe_base::is_dotnet() const
+{
+	return directory_exists(image_directory_entry_com_descriptor);
+}
+
+//Returns true if image has exception directory
+bool pe_base::has_exception_directory() const
+{
+	return directory_exists(image_directory_entry_exception);
+}
+
+//Returns true if image has debug directory
+bool pe_base::has_debug() const
+{
+	return directory_exists(image_directory_entry_debug);
+}
+
+//Returns corresponding section data pointer from RVA inside section "s" (checks bounds)
+char* pe_base::section_data_from_rva(section& s, uint32_t rva)
+{
+	//Check if RVA is inside section "s"
+	if(rva >= s.get_virtual_address() && rva < s.get_virtual_address() + s.get_aligned_virtual_size(get_section_alignment()))
+	{
+		if(s.get_raw_data().empty())
+			throw pe_exception("Section raw data is empty and cannot be changed", pe_exception::section_is_empty);
+
+		return &s.get_raw_data()[rva - s.get_virtual_address()];
+	}
+
+	throw pe_exception("RVA not found inside section", pe_exception::rva_not_exists);
+}
+
+//Returns corresponding section data pointer from RVA inside section "s" (checks bounds)
+const char* pe_base::section_data_from_rva(const section& s, uint32_t rva, section_data_type datatype) const
+{
+	//Check if RVA is inside section "s"
+	if(rva >= s.get_virtual_address() && rva < s.get_virtual_address() + s.get_aligned_virtual_size(get_section_alignment()))
+		return (datatype == section_data_raw ? s.get_raw_data().data() : s.get_virtual_data(get_section_alignment()).c_str()) + rva - s.get_virtual_address();
+
+	throw pe_exception("RVA not found inside section", pe_exception::rva_not_exists);
+}
+
+//Returns section TOTAL RAW/VIRTUAL data length from RVA inside section
+uint32_t pe_base::section_data_length_from_rva(uint32_t rva, section_data_type datatype, bool include_headers) const
+{
+	//if RVA is inside of headers and we're searching them too...
+	if(include_headers && rva < full_headers_data_.length())
+		return static_cast<unsigned long>(full_headers_data_.length());
+
+	const section& s = section_from_rva(rva);
+	return static_cast<unsigned long>(datatype == section_data_raw ? s.get_raw_data().length() /* instead of SizeOfRawData */ : s.get_aligned_virtual_size(get_section_alignment()));
+}
+
+//Returns section TOTAL RAW/VIRTUAL data length from VA inside section for PE32
+uint32_t pe_base::section_data_length_from_va(uint32_t va, section_data_type datatype, bool include_headers) const
+{
+	return section_data_length_from_rva(va_to_rva(va), datatype, include_headers);
+}
+
+//Returns section TOTAL RAW/VIRTUAL data length from VA inside section for PE32/PE64
+uint32_t pe_base::section_data_length_from_va(uint64_t va, section_data_type datatype, bool include_headers) const
+{
+	return section_data_length_from_rva(va_to_rva(va), datatype, include_headers);
+}
+
+//Returns section remaining RAW/VIRTUAL data length from RVA "rva_inside" to the end of section containing RVA "rva"
+uint32_t pe_base::section_data_length_from_rva(uint32_t rva, uint32_t rva_inside, section_data_type datatype, bool include_headers) const
+{
+	//if RVAs are inside of headers and we're searching them too...
+	if(include_headers && rva < full_headers_data_.length() && rva_inside < full_headers_data_.length())
+		return static_cast<unsigned long>(full_headers_data_.length() - rva_inside);
+
+	const section& s = section_from_rva(rva);
+	if(rva_inside < s.get_virtual_address())
+		throw pe_exception("RVA not found inside section", pe_exception::rva_not_exists);
+
+	//Calculate remaining length of section data from "rva" address
+	long length = static_cast<long>(datatype == section_data_raw ? s.get_raw_data().length() /* instead of SizeOfRawData */ : s.get_aligned_virtual_size(get_section_alignment()))
+		+ s.get_virtual_address() - rva_inside;
+
+	if(length < 0)
+		return 0;
+
+	return static_cast<unsigned long>(length);
+}
+
+//Returns section remaining RAW/VIRTUAL data length from VA "va_inside" to the end of section containing VA "va" for PE32
+uint32_t pe_base::section_data_length_from_va(uint32_t va, uint32_t va_inside, section_data_type datatype, bool include_headers) const
+{
+	return section_data_length_from_rva(va_to_rva(va), va_to_rva(va_inside), datatype, include_headers);
+}
+
+//Returns section remaining RAW/VIRTUAL data length from VA "va_inside" to the end of section containing VA "va" for PE32/PE64
+uint32_t pe_base::section_data_length_from_va(uint64_t va, uint64_t va_inside, section_data_type datatype, bool include_headers) const
+{
+	return section_data_length_from_rva(va_to_rva(va), va_to_rva(va_inside), datatype, include_headers);
+}
+
+//Returns section remaining RAW/VIRTUAL data length from RVA to the end of section "s" (checks bounds)
+uint32_t pe_base::section_data_length_from_rva(const section& s, uint32_t rva_inside, section_data_type datatype) const
+{
+	//Check rva_inside
+	if(rva_inside >= s.get_virtual_address() && rva_inside < s.get_virtual_address() + s.get_aligned_virtual_size(get_section_alignment()))
+	{
+		//Calculate remaining length of section data from "rva" address
+		int32_t length = static_cast<int32_t>(datatype == section_data_raw ? s.get_raw_data().length() /* instead of SizeOfRawData */ : s.get_aligned_virtual_size(get_section_alignment()))
+			+ s.get_virtual_address() - rva_inside;
+
+		if(length < 0)
+			return 0;
+
+		return static_cast<uint32_t>(length);
+	}
+
+	throw pe_exception("RVA not found inside section", pe_exception::rva_not_exists);
+}
+
+//Returns section remaining RAW/VIRTUAL data length from VA to the end of section "s" for PE32 (checks bounds)
+uint32_t pe_base::section_data_length_from_va(const section& s, uint32_t va_inside, section_data_type datatype) const
+{
+	return section_data_length_from_rva(s, va_to_rva(va_inside), datatype);
+}
+
+//Returns section remaining RAW/VIRTUAL data length from VA to the end of section "s" for PE32/PE64 (checks bounds)
+uint32_t pe_base::section_data_length_from_va(const section& s, uint64_t va_inside, section_data_type datatype) const
+{
+	return section_data_length_from_rva(s, va_to_rva(va_inside), datatype);
+}
+
+//Returns corresponding section data pointer from RVA inside section
+char* pe_base::section_data_from_rva(uint32_t rva, bool include_headers)
+{
+	//if RVA is inside of headers and we're searching them too...
+	if(include_headers && rva < full_headers_data_.length())
+		return &full_headers_data_[rva];
+
+	section& s = section_from_rva(rva);
+
+	if(s.get_raw_data().empty())
+		throw pe_exception("Section raw data is empty and cannot be changed", pe_exception::section_is_empty);
+
+	return &s.get_raw_data()[rva - s.get_virtual_address()];
+}
+
+//Returns corresponding section data pointer from RVA inside section
+const char* pe_base::section_data_from_rva(uint32_t rva, section_data_type datatype, bool include_headers) const
+{
+	//if RVA is inside of headers and we're searching them too...
+	if(include_headers && rva < full_headers_data_.length())
+		return &full_headers_data_[rva];
+
+	const section& s = section_from_rva(rva);
+	return (datatype == section_data_raw ? s.get_raw_data().data() : s.get_virtual_data(get_section_alignment()).c_str()) + rva - s.get_virtual_address();
+}
+
+//Reads DOS headers from istream
+void pe_base::read_dos_header(std::istream& file, image_dos_header& header)
+{
+	//Check istream flags
+	if(file.bad() || file.eof())
+		throw pe_exception("PE file stream is bad or closed.", pe_exception::bad_pe_file);
+
+	//Read DOS header and check istream
+	file.read(reinterpret_cast<char*>(&header), sizeof(image_dos_header));
+	if(file.bad() || file.eof())
+		throw pe_exception("Unable to read IMAGE_DOS_HEADER", pe_exception::bad_dos_header);
+
+	//Check DOS header magic
+	if(header.e_magic != 0x5a4d) //"MZ"
+		throw pe_exception("IMAGE_DOS_HEADER signature is incorrect", pe_exception::bad_dos_header);
+}
+
+//Reads DOS headers from istream
+void pe_base::read_dos_header(std::istream& file)
+{
+	read_dos_header(file, dos_header_);
+}
+
+//Reads PE image from istream
+void pe_base::read_pe(std::istream& file, bool read_debug_raw_data)
+{
+	//Get istream size
+	std::streamoff filesize = pe_utils::get_file_size(file);
+
+	//Check if PE header is DWORD-aligned
+	if((dos_header_.e_lfanew % sizeof(uint32_t)) != 0)
+		throw pe_exception("PE header is not DWORD-aligned", pe_exception::bad_dos_header);
+
+	//Seek to NT headers
+	file.seekg(dos_header_.e_lfanew);
+	if(file.bad() || file.fail())
+		throw pe_exception("Cannot reach IMAGE_NT_HEADERS", pe_exception::image_nt_headers_not_found);
+
+	//Read NT headers
+	file.read(get_nt_headers_ptr(), get_sizeof_nt_header() - sizeof(image_data_directory) * image_numberof_directory_entries);
+	if(file.bad() || file.eof())
+		throw pe_exception("Error reading IMAGE_NT_HEADERS", pe_exception::error_reading_image_nt_headers);
+
+	//Check PE signature
+	if(get_pe_signature() != 0x4550) //"PE"
+		throw pe_exception("Incorrect PE signature", pe_exception::pe_signature_incorrect);
+
+	//Check number of directories
+	if(get_number_of_rvas_and_sizes() > image_numberof_directory_entries)
+		set_number_of_rvas_and_sizes(image_numberof_directory_entries);
+
+	if(get_number_of_rvas_and_sizes() > 0)
+	{
+		//Read data directory headers, if any
+		file.read(get_nt_headers_ptr() + (get_sizeof_nt_header() - sizeof(image_data_directory) * image_numberof_directory_entries), sizeof(image_data_directory) * get_number_of_rvas_and_sizes());
+		if(file.bad() || file.eof())
+			throw pe_exception("Error reading DATA_DIRECTORY headers", pe_exception::error_reading_data_directories);
+	}
+
+	//Check section number
+	//Images with zero section number accepted
+	if(get_number_of_sections() > maximum_number_of_sections)
+		throw pe_exception("Incorrect number of sections", pe_exception::section_number_incorrect);
+
+	//Check PE magic
+	if(get_magic() != get_needed_magic())
+		throw pe_exception("Incorrect PE signature", pe_exception::pe_signature_incorrect);
+
+	//Check section alignment
+	if(!pe_utils::is_power_of_2(get_section_alignment()))
+		throw pe_exception("Incorrect section alignment", pe_exception::incorrect_section_alignment);
+
+	//Check file alignment
+	if(!pe_utils::is_power_of_2(get_file_alignment()))
+		throw pe_exception("Incorrect file alignment", pe_exception::incorrect_file_alignment);
+
+	if(get_file_alignment() != get_section_alignment() && (get_file_alignment() < minimum_file_alignment || get_file_alignment() > get_section_alignment()))
+		throw pe_exception("Incorrect file alignment", pe_exception::incorrect_file_alignment);
+
+	//Check size of image
+	if(pe_utils::align_up(get_size_of_image(), get_section_alignment()) == 0)
+		throw pe_exception("Incorrect size of image", pe_exception::incorrect_size_of_image);
+	
+	//Read rich data overlay / DOS stub (if any)
+	if(static_cast<uint32_t>(dos_header_.e_lfanew) > sizeof(image_dos_header))
+	{
+		rich_overlay_.resize(dos_header_.e_lfanew - sizeof(image_dos_header));
+		file.seekg(sizeof(image_dos_header));
+		file.read(&rich_overlay_[0], dos_header_.e_lfanew - sizeof(image_dos_header));
+		if(file.bad() || file.eof())
+			throw pe_exception("Error reading 'Rich' & 'DOS stub' overlay", pe_exception::error_reading_overlay);
+	}
+
+	//Calculate first section raw position
+	//Sum is safe here
+	uint32_t first_section = dos_header_.e_lfanew + get_size_of_optional_header() + sizeof(image_file_header) + sizeof(uint32_t) /* Signature */;
+
+	if(get_number_of_sections() > 0)
+	{
+		//Go to first section
+		file.seekg(first_section);
+		if(file.bad() || file.fail())
+			throw pe_exception("Cannot reach section headers", pe_exception::image_section_headers_not_found);
+	}
+
+	uint32_t last_raw_size = 0;
+
+	//Read all sections
+	for(int i = 0; i < get_number_of_sections(); i++)
+	{
+		section s;
+		//Read section header
+		file.read(reinterpret_cast<char*>(&s.get_raw_header()), sizeof(image_section_header));
+		if(file.bad() || file.eof())
+			throw pe_exception("Error reading section header", pe_exception::error_reading_section_header);
+
+		//Save next section header position
+		std::streamoff next_sect = file.tellg();
+
+		//Check section virtual and raw sizes
+		if(!s.get_size_of_raw_data() && !s.get_virtual_size())
+			throw pe_exception("Virtual and Physical sizes of section can't be 0 at the same time", pe_exception::zero_section_sizes);
+
+		//Check for adequate values of section fields
+		if(!pe_utils::is_sum_safe(s.get_virtual_address(), s.get_virtual_size()) || s.get_virtual_size() > pe_utils::two_gb
+			|| !pe_utils::is_sum_safe(s.get_pointer_to_raw_data(), s.get_size_of_raw_data()) || s.get_size_of_raw_data() > pe_utils::two_gb)
+			throw pe_exception("Incorrect section address or size", pe_exception::section_incorrect_addr_or_size);
+
+		if(s.get_size_of_raw_data() != 0)
+		{
+			//If section has raw data
+
+			//If section raw data size is greater than virtual, fix it
+			last_raw_size = s.get_size_of_raw_data();
+			if(pe_utils::align_up(s.get_size_of_raw_data(), get_file_alignment()) > pe_utils::align_up(s.get_virtual_size(), get_section_alignment()))
+				s.set_size_of_raw_data(s.get_virtual_size());
+
+			//Check virtual and raw section sizes and addresses
+			if(s.get_virtual_address() + pe_utils::align_up(s.get_virtual_size(), get_section_alignment()) > pe_utils::align_up(get_size_of_image(), get_section_alignment())
+				||
+				pe_utils::align_down(s.get_pointer_to_raw_data(), get_file_alignment()) + s.get_size_of_raw_data() > static_cast<uint32_t>(filesize))
+				throw pe_exception("Incorrect section address or size", pe_exception::section_incorrect_addr_or_size);
+
+			//Seek to section raw data
+			file.seekg(pe_utils::align_down(s.get_pointer_to_raw_data(), get_file_alignment()));
+			if(file.bad() || file.fail())
+				throw pe_exception("Cannot reach section data", pe_exception::image_section_data_not_found);
+
+			//Read section raw data
+			s.get_raw_data().resize(s.get_size_of_raw_data());
+			file.read(&s.get_raw_data()[0], s.get_size_of_raw_data());
+			if(file.bad() || file.fail())
+				throw pe_exception("Error reading section data", pe_exception::image_section_data_not_found);
+		}
+
+		//Check virtual address and size of section
+		if(s.get_virtual_address() + s.get_aligned_virtual_size(get_section_alignment()) > pe_utils::align_up(get_size_of_image(), get_section_alignment()))
+			throw pe_exception("Incorrect section address or size", pe_exception::section_incorrect_addr_or_size);
+
+		//Save section
+		sections_.push_back(s);
+
+		//Seek to the next section header
+		file.seekg(next_sect);
+	}
+
+	//Check size of headers: SizeOfHeaders can't be larger than first section VA
+	if(!sections_.empty() && get_size_of_headers() > sections_.front().get_virtual_address())
+		throw pe_exception("Incorrect size of headers", pe_exception::incorrect_size_of_headers);
+
+	//If image has more than two sections
+	if(sections_.size() >= 2)
+	{
+		//Check sections virtual sizes
+		for(section_list::const_iterator i = sections_.begin() + 1; i != sections_.end(); ++i)
+		{
+			if((*i).get_virtual_address() != (*(i - 1)).get_virtual_address() + (*(i - 1)).get_aligned_virtual_size(get_section_alignment()))
+				throw pe_exception("Section table is incorrect", pe_exception::image_section_table_incorrect);
+		}
+	}
+
+	//Check if image has overlay in the end of file
+	has_overlay_ = !sections_.empty() && filesize > static_cast<std::streamoff>(sections_.back().get_pointer_to_raw_data() + last_raw_size);
+
+	{
+		//Additionally, read data from the beginning of istream to size of headers
+		file.seekg(0);
+		uint32_t size_of_headers = std::min<uint32_t>(get_size_of_headers(), static_cast<uint32_t>(filesize));
+
+		if(!sections_.empty())
+		{
+			for(section_list::const_iterator i = sections_.begin(); i != sections_.end(); ++i)
+			{
+				if(!(*i).empty())
+				{
+					size_of_headers = std::min<uint32_t>(get_size_of_headers(), (*i).get_pointer_to_raw_data());
+					break;
+				}
+			}
+		}
+
+		full_headers_data_.resize(size_of_headers);
+		file.read(&full_headers_data_[0], size_of_headers);
+		if(file.bad() || file.eof())
+			throw pe_exception("Error reading file", pe_exception::error_reading_file);
+	}
+
+	//Moreover, if there's debug directory, read its raw data for some debug info types
+	while(read_debug_raw_data && has_debug())
+	{
+		try
+		{
+			//Check the length in bytes of the section containing debug directory
+			if(section_data_length_from_rva(get_directory_rva(image_directory_entry_debug), get_directory_rva(image_directory_entry_debug), section_data_virtual, true) < sizeof(image_debug_directory))
+				break;
+
+			unsigned long current_pos = get_directory_rva(image_directory_entry_debug);
+
+			//First IMAGE_DEBUG_DIRECTORY table
+			image_debug_directory directory = section_data_from_rva<image_debug_directory>(current_pos, section_data_virtual, true);
+
+			//Iterate over all IMAGE_DEBUG_DIRECTORY directories
+			while(directory.PointerToRawData
+				&& current_pos < get_directory_rva(image_directory_entry_debug) + get_directory_size(image_directory_entry_debug))
+			{
+				//If we have something to read
+				if((directory.Type == image_debug_type_codeview
+					|| directory.Type == image_debug_type_misc
+					|| directory.Type == image_debug_type_coff)
+					&& directory.SizeOfData)
+				{
+					std::string data;
+					data.resize(directory.SizeOfData);
+					file.seekg(directory.PointerToRawData);
+					file.read(&data[0], directory.SizeOfData);
+					if(file.bad() || file.eof())
+						throw pe_exception("Error reading file", pe_exception::error_reading_file);
+
+					debug_data_.insert(std::make_pair(directory.PointerToRawData, data));
+				}
+
+				//Go to next debug entry
+				current_pos += sizeof(image_debug_directory);
+				directory = section_data_from_rva<image_debug_directory>(current_pos, section_data_virtual, true);
+			}
+
+			break;
+		}
+		catch(const pe_exception&)
+		{
+			//Don't throw any exception here, if debug info is corrupted or incorrect
+			break;
+		}
+		catch(const std::bad_alloc&)
+		{
+			//Don't throw any exception here, if debug info is corrupted or incorrect
+			break;
+		}
+	}
+}
+
+//Returns PE type of this image
+pe_type pe_base::get_pe_type() const
+{
+	return props_->get_pe_type();
+}
+
+//Returns PE type (PE or PE+) from pe_type enumeration (minimal correctness checks)
+pe_type pe_base::get_pe_type(std::istream& file)
+{
+	//Save state of the istream
+	std::ios_base::iostate state = file.exceptions();
+	std::streamoff old_offset = file.tellg();
+	image_nt_headers32 nt_headers;
+	image_dos_header header;
+
+	try
+	{
+		//Read dos header
+		file.exceptions(std::ios::goodbit);
+		read_dos_header(file, header);
+
+		//Seek to the NT headers start
+		file.seekg(header.e_lfanew);
+		if(file.bad() || file.fail())
+			throw pe_exception("Cannot reach IMAGE_NT_HEADERS", pe_exception::image_nt_headers_not_found);
+
+		//Read NT headers (we're using 32-bit version, because there's no significant differencies between 32 and 64 bit version structures)
+		file.read(reinterpret_cast<char*>(&nt_headers), sizeof(image_nt_headers32) - sizeof(image_data_directory) * image_numberof_directory_entries);
+		if(file.bad() || file.eof())
+			throw pe_exception("Error reading IMAGE_NT_HEADERS", pe_exception::error_reading_image_nt_headers);
+
+		//Check NT headers signature
+		if(nt_headers.Signature != 0x4550) //"PE"
+			throw pe_exception("Incorrect PE signature", pe_exception::pe_signature_incorrect);
+
+		//Check NT headers magic
+		if(nt_headers.OptionalHeader.Magic != image_nt_optional_hdr32_magic && nt_headers.OptionalHeader.Magic != image_nt_optional_hdr64_magic)
+			throw pe_exception("Incorrect PE signature", pe_exception::pe_signature_incorrect);
+	}
+	catch(const std::exception&)
+	{
+		//If something went wrong, restore istream state
+		file.exceptions(state);
+		file.seekg(old_offset);
+		file.clear();
+		//Retrhow exception
+		throw;
+	}
+
+	//Restore stream state
+	file.exceptions(state);
+	file.seekg(old_offset);
+	file.clear();
+
+	//Determine PE type and return it
+	return nt_headers.OptionalHeader.Magic == image_nt_optional_hdr64_magic ? pe_type_64 : pe_type_32;
+}
+
+//Returns true if image has overlay data at the end of file
+bool pe_base::has_overlay() const
+{
+	return has_overlay_;
+}
+
+//Clears PE characteristics flag
+void pe_base::clear_characteristics_flags(uint16_t flags)
+{
+	set_characteristics(get_characteristics() & ~flags);
+}
+
+//Sets PE characteristics flag
+void pe_base::set_characteristics_flags(uint16_t flags)
+{
+	set_characteristics(get_characteristics() | flags);
+}
+
+//Returns true if PE characteristics flag set
+bool pe_base::check_characteristics_flag(uint16_t flag) const
+{
+	return (get_characteristics() & flag) ? true : false;
+}
+
+//Returns subsystem value
+uint16_t pe_base::get_subsystem() const
+{
+	return props_->get_subsystem();
+}
+
+//Sets subsystem value
+void pe_base::set_subsystem(uint16_t subsystem)
+{
+	props_->set_subsystem(subsystem);
+}
+
+//Returns true if image has console subsystem
+bool pe_base::is_console() const
+{
+	return get_subsystem() == image_subsystem_windows_cui;
+}
+
+//Returns true if image has Windows GUI subsystem
+bool pe_base::is_gui() const
+{
+	return get_subsystem() == image_subsystem_windows_gui;
+}
+
+//Sets required operation system version
+void pe_base::set_os_version(uint16_t major, uint16_t minor)
+{
+	props_->set_os_version(major, minor);
+}
+
+//Returns required operation system version (minor word)
+uint16_t pe_base::get_minor_os_version() const
+{
+	return props_->get_minor_os_version();
+}
+
+//Returns required operation system version (major word)
+uint16_t pe_base::get_major_os_version() const
+{
+	return props_->get_major_os_version();
+}
+
+//Sets required subsystem version
+void pe_base::set_subsystem_version(uint16_t major, uint16_t minor)
+{
+	props_->set_subsystem_version(major, minor);
+}
+
+//Returns required subsystem version (minor word)
+uint16_t pe_base::get_minor_subsystem_version() const
+{
+	return props_->get_minor_subsystem_version();
+}
+
+//Returns required subsystem version (major word)
+uint16_t pe_base::get_major_subsystem_version() const
+{
+	return props_->get_major_subsystem_version();
+}
+
+//Returns corresponding section data pointer from VA inside section "s" for PE32 (checks bounds)
+char* pe_base::section_data_from_va(section& s, uint32_t va) //Always returns raw data
+{
+	return section_data_from_rva(s, va_to_rva(va));
+}
+
+//Returns corresponding section data pointer from VA inside section "s" for PE32 (checks bounds)
+const char* pe_base::section_data_from_va(const section& s, uint32_t va, section_data_type datatype) const
+{
+	return section_data_from_rva(s, va_to_rva(va), datatype);
+}
+
+//Returns corresponding section data pointer from VA inside section for PE32
+char* pe_base::section_data_from_va(uint32_t va, bool include_headers) //Always returns raw data
+{
+	return section_data_from_rva(va_to_rva(va), include_headers);
+}
+
+//Returns corresponding section data pointer from VA inside section for PE32
+const char* pe_base::section_data_from_va(uint32_t va, section_data_type datatype, bool include_headers) const
+{
+	return section_data_from_rva(va_to_rva(va), datatype, include_headers);
+}
+
+//Returns corresponding section data pointer from VA inside section "s" for PE32/PE64 (checks bounds)
+char* pe_base::section_data_from_va(section& s, uint64_t va)  //Always returns raw data
+{
+	return section_data_from_rva(s, va_to_rva(va));
+}
+
+//Returns corresponding section data pointer from VA inside section "s" for PE32/PE64 (checks bounds)
+const char* pe_base::section_data_from_va(const section& s, uint64_t va, section_data_type datatype) const
+{
+	return section_data_from_rva(s, va_to_rva(va), datatype);
+}
+
+//Returns corresponding section data pointer from VA inside section for PE32/PE64
+char* pe_base::section_data_from_va(uint64_t va, bool include_headers)  //Always returns raw data
+{
+	return section_data_from_rva(va_to_rva(va), include_headers);
+}
+
+//Returns corresponding section data pointer from VA inside section for PE32/PE64
+const char* pe_base::section_data_from_va(uint64_t va, section_data_type datatype, bool include_headers) const
+{
+	return section_data_from_rva(va_to_rva(va), datatype, include_headers);
+}
+
+//Returns section from VA inside it for PE32
+section& pe_base::section_from_va(uint32_t va)
+{
+	return section_from_rva(va_to_rva(va));
+}
+
+//Returns section from VA inside it for PE32/PE64
+section& pe_base::section_from_va(uint64_t va)
+{
+	return section_from_rva(va_to_rva(va));
+}
+
+//Returns section from RVA inside it for PE32
+const section& pe_base::section_from_va(uint32_t va) const
+{
+	return section_from_rva(va_to_rva(va));
+}
+
+//Returns section from RVA inside it for PE32/PE64
+const section& pe_base::section_from_va(uint64_t va) const
+{
+	return section_from_rva(va_to_rva(va));
+}
+
+uint32_t pe_base::va_to_rva(uint32_t va, bool bound_check) const
+{
+	return props_->va_to_rva(va, bound_check);
+}
+
+uint32_t pe_base::va_to_rva(uint64_t va, bool bound_check) const
+{
+	return props_->va_to_rva(va, bound_check);
+}
+
+uint32_t pe_base::rva_to_va_32(uint32_t rva) const
+{
+	return props_->rva_to_va_32(rva);
+}
+
+uint64_t pe_base::rva_to_va_64(uint32_t rva) const
+{
+	return props_->rva_to_va_64(rva);
+}
+
+//Relative Virtual Address (RVA) to Virtual Address (VA) convertion for PE32
+void pe_base::rva_to_va(uint32_t rva, uint32_t& va) const
+{
+	va = rva_to_va_32(rva);
+}
+
+//Relative Virtual Address (RVA) to Virtual Address (VA) convertions for PE32/PE64
+void pe_base::rva_to_va(uint32_t rva, uint64_t& va) const
+{
+	va = rva_to_va_64(rva);
+}
+
+//Returns section from file offset (4gb max)
+section& pe_base::section_from_file_offset(uint32_t offset)
+{
+	return *file_offset_to_section(offset);
+}
+
+//Returns section from file offset (4gb max)
+const section& pe_base::section_from_file_offset(uint32_t offset) const
+{
+	return *file_offset_to_section(offset);
+}
+
+//Returns section and offset (raw data only) from its start from RVA
+const std::pair<uint32_t, const section*> pe_base::section_and_offset_from_rva(uint32_t rva) const
+{
+	const section& s = section_from_rva(rva);
+	return std::make_pair(rva - s.get_virtual_address(), &s);
+}
+
+//Returns DLL Characteristics
+uint16_t pe_base::get_dll_characteristics() const
+{
+	return props_->get_dll_characteristics();
+}
+
+//Sets DLL Characteristics
+void pe_base::set_dll_characteristics(uint16_t characteristics)
+{
+	props_->set_dll_characteristics(characteristics);
+}
+
+//Returns size of headers
+uint32_t pe_base::get_size_of_headers() const
+{
+	return props_->get_size_of_headers();
+}
+
+//Returns size of optional header
+uint16_t pe_base::get_size_of_optional_header() const
+{
+	return props_->get_size_of_optional_header();
+}
+
+//Returns PE signature
+uint32_t pe_base::get_pe_signature() const
+{
+	return props_->get_pe_signature();
+}
+
+//Returns magic value
+uint32_t pe_base::get_magic() const
+{
+	return props_->get_magic();
+}
+
+//Returns image base for PE32
+void pe_base::get_image_base(uint32_t& base) const
+{
+	base = get_image_base_32();
+}
+
+//Returns image base for PE32 and PE64 respectively
+uint32_t pe_base::get_image_base_32() const
+{
+	return props_->get_image_base_32();
+}
+
+//Sets image base for PE32 and PE64 respectively
+uint64_t pe_base::get_image_base_64() const
+{
+	return props_->get_image_base_64();
+}
+
+//RVA to RAW file offset convertion (4gb max)
+uint32_t pe_base::rva_to_file_offset(uint32_t rva) const
+{
+	//Maybe, RVA is inside PE headers
+	if(rva < get_size_of_headers())
+		return rva;
+
+	const section& s = section_from_rva(rva);
+	return s.get_pointer_to_raw_data() + rva - s.get_virtual_address();
+}
+
+//RAW file offset to RVA convertion (4gb max)
+uint32_t pe_base::file_offset_to_rva(uint32_t offset) const
+{
+	//Maybe, offset is inside PE headers
+	if(offset < get_size_of_headers())
+		return offset;
+
+	const section_list::const_iterator it = file_offset_to_section(offset);
+	return offset - (*it).get_pointer_to_raw_data() + (*it).get_virtual_address();
+}
+
+//RAW file offset to section convertion helper (4gb max)
+section_list::const_iterator pe_base::file_offset_to_section(uint32_t offset) const
+{
+	section_list::const_iterator it = std::find_if(sections_.begin(), sections_.end(), section_by_raw_offset(offset));
+	if(it == sections_.end())
+		throw pe_exception("No section found by presented file offset", pe_exception::no_section_found);
+
+	return it;
+}
+
+//RAW file offset to section convertion helper (4gb max)
+section_list::iterator pe_base::file_offset_to_section(uint32_t offset)
+{
+	section_list::iterator it = std::find_if(sections_.begin(), sections_.end(), section_by_raw_offset(offset));
+	if(it == sections_.end())
+		throw pe_exception("No section found by presented file offset", pe_exception::no_section_found);
+	
+	return it;
+}
+
+//RVA from section raw data offset
+uint32_t pe_base::rva_from_section_offset(const section& s, uint32_t raw_offset_from_section_start)
+{
+	return s.get_virtual_address() + raw_offset_from_section_start;
+}
+
+//Returns image base for PE32/PE64
+void pe_base::get_image_base(uint64_t& base) const
+{
+	base = get_image_base_64();
+}
+
+//Sets new image base
+void pe_base::set_image_base(uint32_t base)
+{
+	props_->set_image_base(base);
+}
+
+void pe_base::set_image_base_64(uint64_t base)
+{
+	props_->set_image_base_64(base);
+}
+
+//Sets heap size commit for PE32 and PE64 respectively
+void pe_base::set_heap_size_commit(uint32_t size)
+{
+	props_->set_heap_size_commit(size);
+}
+
+void pe_base::set_heap_size_commit(uint64_t size)
+{
+	props_->set_heap_size_commit(size);
+}
+
+//Sets heap size reserve for PE32 and PE64 respectively
+void pe_base::set_heap_size_reserve(uint32_t size)
+{
+	props_->set_heap_size_reserve(size);
+}
+
+void pe_base::set_heap_size_reserve(uint64_t size)
+{
+	props_->set_heap_size_reserve(size);
+}
+
+//Sets stack size commit for PE32 and PE64 respectively
+void pe_base::set_stack_size_commit(uint32_t size)
+{
+	props_->set_stack_size_commit(size);
+}
+
+void pe_base::set_stack_size_commit(uint64_t size)
+{
+	props_->set_stack_size_commit(size);
+}
+
+//Sets stack size reserve for PE32 and PE64 respectively
+void pe_base::set_stack_size_reserve(uint32_t size)
+{
+	props_->set_stack_size_reserve(size);
+}
+
+void pe_base::set_stack_size_reserve(uint64_t size)
+{
+	props_->set_stack_size_reserve(size);
+}
+
+//Returns heap size commit for PE32 and PE64 respectively
+uint32_t pe_base::get_heap_size_commit_32() const
+{
+	return props_->get_heap_size_commit_32();
+}
+
+uint64_t pe_base::get_heap_size_commit_64() const
+{
+	return props_->get_heap_size_commit_64();
+}
+
+//Returns heap size reserve for PE32 and PE64 respectively
+uint32_t pe_base::get_heap_size_reserve_32() const
+{
+	return props_->get_heap_size_reserve_32();
+}
+
+uint64_t pe_base::get_heap_size_reserve_64() const
+{
+	return props_->get_heap_size_reserve_64();
+}
+
+//Returns stack size commit for PE32 and PE64 respectively
+uint32_t pe_base::get_stack_size_commit_32() const
+{
+	return props_->get_stack_size_commit_32();
+}
+
+uint64_t pe_base::get_stack_size_commit_64() const
+{
+	return props_->get_stack_size_commit_64();
+}
+
+//Returns stack size reserve for PE32 and PE64 respectively
+uint32_t pe_base::get_stack_size_reserve_32() const
+{
+	return props_->get_stack_size_reserve_32();
+}
+
+uint64_t pe_base::get_stack_size_reserve_64() const
+{
+	return props_->get_stack_size_reserve_64();
+}
+
+//Returns heap size commit for PE32
+void pe_base::get_heap_size_commit(uint32_t& size) const
+{
+	size = get_heap_size_commit_32();
+}
+
+//Returns heap size commit for PE32/PE64
+void pe_base::get_heap_size_commit(uint64_t& size) const
+{
+	size = get_heap_size_commit_64();
+}
+
+//Returns heap size reserve for PE32
+void pe_base::get_heap_size_reserve(uint32_t& size) const
+{
+	size = get_heap_size_reserve_32();
+}
+
+//Returns heap size reserve for PE32/PE64
+void pe_base::get_heap_size_reserve(uint64_t& size) const
+{
+	size = get_heap_size_reserve_64();
+}
+
+//Returns stack size commit for PE32
+void pe_base::get_stack_size_commit(uint32_t& size) const
+{
+	size = get_stack_size_commit_32();
+}
+
+//Returns stack size commit for PE32/PE64
+void pe_base::get_stack_size_commit(uint64_t& size) const
+{
+	size = get_stack_size_commit_64();
+}
+
+//Returns stack size reserve for PE32
+void pe_base::get_stack_size_reserve(uint32_t& size) const
+{
+	size = get_stack_size_reserve_32();
+}
+
+//Returns stack size reserve for PE32/PE64
+void pe_base::get_stack_size_reserve(uint64_t& size) const
+{
+	size = get_stack_size_reserve_64();
+}
+
+//Realigns file (changes file alignment)
+void pe_base::realign_file(uint32_t new_file_alignment)
+{
+	//Checks alignment for correctness
+	set_file_alignment(new_file_alignment);
+	realign_all_sections();
+}
+
+//Helper function to recalculate RAW and virtual section sizes and strip it, if necessary
+void pe_base::recalculate_section_sizes(section& s, bool auto_strip)
+{
+	prepare_section(s); //Recalculate section raw addresses
+
+	//Strip RAW size of section, if it is the last one
+	//For all others it must be file-aligned and calculated by prepare_section() call
+	if(auto_strip && !(sections_.empty() || &s == &*(sections_.end() - 1)))
+	{
+		//Strip ending raw data nullbytes to optimize size
+		std::string& raw_data = s.get_raw_data();
+		if(!raw_data.empty())
+		{
+			std::string::size_type i = raw_data.length();
+			for(; i != 1; --i)
+			{
+				if(raw_data[i - 1] != 0)
+					break;
+			}
+			
+			raw_data.resize(i);
+		}
+
+		s.set_size_of_raw_data(static_cast<uint32_t>(raw_data.length()));
+	}
+
+	//Can occur only for last section
+	if(pe_utils::align_up(s.get_virtual_size(), get_section_alignment()) < pe_utils::align_up(s.get_size_of_raw_data(), get_file_alignment()))
+		set_section_virtual_size(s, pe_utils::align_up(s.get_size_of_raw_data(), get_section_alignment())); //Recalculate section virtual size
+}
+
+//Returns data from the beginning of image
+//Size = SizeOfHeaders
+const std::string& pe_base::get_full_headers_data() const
+{
+	return full_headers_data_;
+}
+
+const pe_base::debug_data_list& pe_base::get_raw_debug_data_list() const
+{
+	return debug_data_;
+}
+
+//Sets number of sections
+void pe_base::set_number_of_sections(uint16_t number)
+{
+	props_->set_number_of_sections(number);
+}
+
+//Sets size of image
+void pe_base::set_size_of_image(uint32_t size)
+{
+	props_->set_size_of_image(size);
+}
+
+//Sets size of headers
+void pe_base::set_size_of_headers(uint32_t size)
+{
+	props_->set_size_of_headers(size);
+}
+
+//Sets size of optional headers
+void pe_base::set_size_of_optional_header(uint16_t size)
+{
+	props_->set_size_of_optional_header(size);
+}
+
+//Returns nt headers data pointer
+char* pe_base::get_nt_headers_ptr()
+{
+	return props_->get_nt_headers_ptr();
+}
+
+//Returns nt headers data pointer
+const char* pe_base::get_nt_headers_ptr() const
+{
+	return props_->get_nt_headers_ptr();
+}
+
+//Returns sizeof() nt headers
+uint32_t pe_base::get_sizeof_nt_header() const
+{
+	return props_->get_sizeof_nt_header();
+}
+
+//Returns sizeof() optional headers
+uint32_t pe_base::get_sizeof_opt_headers() const
+{
+	return props_->get_sizeof_opt_headers();
+}
+
+//Sets file alignment (no checks)
+void pe_base::set_file_alignment_unchecked(uint32_t alignment)
+{
+	props_->set_file_alignment_unchecked(alignment);
+}
+
+//Sets base of code
+void pe_base::set_base_of_code(uint32_t base)
+{
+	props_->set_base_of_code(base);
+}
+
+//Returns base of code
+uint32_t pe_base::get_base_of_code() const
+{
+	return props_->get_base_of_code();
+}
+
+//Returns needed magic of image
+uint32_t pe_base::get_needed_magic() const
+{
+	return props_->get_needed_magic();
+}
+}
diff --git a/tools/pe_bliss/pe_base.h b/tools/pe_bliss/pe_base.h
new file mode 100644
index 0000000000..b5416cf1e2
--- /dev/null
+++ b/tools/pe_bliss/pe_base.h
@@ -0,0 +1,544 @@
+/*************************************************************************/
+/* Copyright (c) 2015 dx, http://kaimi.ru                                */
+/*                                                                       */
+/* Permission is hereby granted, free of charge, to any person           */
+/* obtaining a copy of this software and associated documentation        */
+/* files (the "Software"), to deal in the Software without               */
+/* restriction, including without limitation the rights to use,          */
+/* copy, modify, merge, publish, distribute, sublicense, and/or          */
+/* sell copies of the Software, and to permit persons to whom the        */
+/* Software is furnished to do so, subject to the following conditions:  */
+/* The above copyright notice and this permission notice shall be        */
+/* included in all copies or substantial portions of the Software.       */
+/*                                                                       */
+/* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,       */
+/* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF    */
+/* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.*/
+/* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY  */
+/* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,  */
+/* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE     */
+/* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.                */
+/*************************************************************************/
+#pragma once
+#include <string>
+#include <vector>
+#include <istream>
+#include <ostream>
+#include <map>
+#include "pe_exception.h"
+#include "pe_structures.h"
+#include "utils.h"
+#include "pe_section.h"
+#include "pe_properties.h"
+
+//Please don't remove this information from header
+//PEBliss 1.0.0
+//(c) DX 2011 - 2012, http://kaimi.ru
+//Free to use for commertial and non-commertial purposes, modification and distribution
+
+// == more important ==
+//TODO: compact import rebuilder
+//TODO: remove sections in the middle
+//== less important ==
+//TODO: relocations that take more than one element (seems to be not possible in Windows PE, but anyway)
+//TODO: delay import directory
+//TODO: write message tables
+//TODO: write string tables
+//TODO: read security information
+//TODO: read full .NET information
+
+namespace pe_bliss
+{
+//Portable executable class
+class pe_base
+{
+public: //CONSTRUCTORS
+	//Constructor from stream
+	pe_base(std::istream& file, const pe_properties& props, bool read_debug_raw_data = true);
+
+	//Constructor of empty PE-file
+	explicit pe_base(const pe_properties& props, uint32_t section_alignment = 0x1000, bool dll = false, uint16_t subsystem = pe_win::image_subsystem_windows_gui);
+
+	pe_base(const pe_base& pe);
+	pe_base& operator=(const pe_base& pe);
+
+public:
+	~pe_base();
+
+public: //STUB
+	//Strips stub MSVS overlay, if any
+	void strip_stub_overlay();
+	//Fills stub MSVS overlay with specified byte
+	void fill_stub_overlay(char c);
+	//Sets stub MSVS overlay
+	void set_stub_overlay(const std::string& data);
+	//Returns stub overlay contents
+	const std::string& get_stub_overlay() const;
+
+
+public: //DIRECTORIES
+	//Returns true if directory exists
+	bool directory_exists(uint32_t id) const;
+	//Removes directory
+	void remove_directory(uint32_t id);
+
+	//Returns directory RVA
+	uint32_t get_directory_rva(uint32_t id) const;
+	//Returns directory size
+	uint32_t get_directory_size(uint32_t id) const;
+
+	//Sets directory RVA (just a value of PE header, no moving occurs)
+	void set_directory_rva(uint32_t id, uint32_t rva);
+	//Sets directory size (just a value of PE header, no moving occurs)
+	void set_directory_size(uint32_t id, uint32_t size);
+
+	//Strips only zero DATA_DIRECTORY entries to count = min_count
+	//Returns resulting number of data directories
+	//strip_iat_directory - if true, even not empty IAT directory will be stripped
+	uint32_t strip_data_directories(uint32_t min_count = 1, bool strip_iat_directory = true);
+
+	//Returns true if image has import directory
+	bool has_imports() const;
+	//Returns true if image has export directory
+	bool has_exports() const;
+	//Returns true if image has resource directory
+	bool has_resources() const;
+	//Returns true if image has security directory
+	bool has_security() const;
+	//Returns true if image has relocations
+	bool has_reloc() const;
+	//Returns true if image has TLS directory
+	bool has_tls() const;
+	//Returns true if image has config directory
+	bool has_config() const;
+	//Returns true if image has bound import directory
+	bool has_bound_import() const;
+	//Returns true if image has delay import directory
+	bool has_delay_import() const;
+	//Returns true if image has COM directory
+	bool is_dotnet() const;
+	//Returns true if image has exception directory
+	bool has_exception_directory() const;
+	//Returns true if image has debug directory
+	bool has_debug() const;
+
+	//Returns subsystem value
+	uint16_t get_subsystem() const;
+	//Sets subsystem value
+	void set_subsystem(uint16_t subsystem);
+	//Returns true if image has console subsystem
+	bool is_console() const;
+	//Returns true if image has Windows GUI subsystem
+	bool is_gui() const;
+
+	//Sets required operation system version
+	void set_os_version(uint16_t major, uint16_t minor);
+	//Returns required operation system version (minor word)
+	uint16_t get_minor_os_version() const;
+	//Returns required operation system version (major word)
+	uint16_t get_major_os_version() const;
+
+	//Sets required subsystem version
+	void set_subsystem_version(uint16_t major, uint16_t minor);
+	//Returns required subsystem version (minor word)
+	uint16_t get_minor_subsystem_version() const;
+	//Returns required subsystem version (major word)
+	uint16_t get_major_subsystem_version() const;
+
+public: //PE HEADER
+	//Returns DOS header
+	const pe_win::image_dos_header& get_dos_header() const;
+	pe_win::image_dos_header& get_dos_header();
+
+	//Returns PE header start (e_lfanew)
+	int32_t get_pe_header_start() const;
+
+	//Returns file alignment
+	uint32_t get_file_alignment() const;
+	//Sets file alignment, checking the correctness of its value
+	void set_file_alignment(uint32_t alignment);
+
+	//Returns size of image
+	uint32_t get_size_of_image() const;
+
+	//Returns image entry point
+	uint32_t get_ep() const;
+	//Sets image entry point (just a value of PE header)
+	void set_ep(uint32_t new_ep);
+
+	//Returns number of RVA and sizes (number of DATA_DIRECTORY entries)
+	uint32_t get_number_of_rvas_and_sizes() const;
+	//Sets number of RVA and sizes (number of DATA_DIRECTORY entries)
+	void set_number_of_rvas_and_sizes(uint32_t number);
+
+	//Returns PE characteristics
+	uint16_t get_characteristics() const;
+	//Sets PE characteristics (a value inside header)
+	void set_characteristics(uint16_t ch);
+	//Clears PE characteristics flag
+	void clear_characteristics_flags(uint16_t flags);
+	//Sets PE characteristics flag
+	void set_characteristics_flags(uint16_t flags);
+	//Returns true if PE characteristics flag set
+	bool check_characteristics_flag(uint16_t flag) const;
+	
+	//Returns DLL Characteristics
+	uint16_t get_dll_characteristics() const;
+	//Sets DLL Characteristics
+	void set_dll_characteristics(uint16_t characteristics);
+
+	//Returns size of headers
+	uint32_t get_size_of_headers() const;
+	//Returns size of optional header
+	uint16_t get_size_of_optional_header() const;
+
+	//Returns PE signature
+	uint32_t get_pe_signature() const;
+
+	//Returns magic value
+	uint32_t get_magic() const;
+
+	//Returns image base for PE32 and PE64 respectively
+	uint32_t get_image_base_32() const;
+	void get_image_base(uint32_t& base) const;
+	//Sets image base for PE32 and PE64 respectively
+	uint64_t get_image_base_64() const;
+	void get_image_base(uint64_t& base) const;
+
+	//Sets new image base
+	void set_image_base(uint32_t base);
+	void set_image_base_64(uint64_t base);
+
+	//Sets heap size commit for PE32 and PE64 respectively
+	void set_heap_size_commit(uint32_t size);
+	void set_heap_size_commit(uint64_t size);
+	//Sets heap size reserve for PE32 and PE64 respectively
+	void set_heap_size_reserve(uint32_t size);
+	void set_heap_size_reserve(uint64_t size);
+	//Sets stack size commit for PE32 and PE64 respectively
+	void set_stack_size_commit(uint32_t size);
+	void set_stack_size_commit(uint64_t size);
+	//Sets stack size reserve for PE32 and PE64 respectively
+	void set_stack_size_reserve(uint32_t size);
+	void set_stack_size_reserve(uint64_t size);
+
+	//Returns heap size commit for PE32 and PE64 respectively
+	uint32_t get_heap_size_commit_32() const;
+	void get_heap_size_commit(uint32_t& size) const;
+	uint64_t get_heap_size_commit_64() const;
+	void get_heap_size_commit(uint64_t& size) const;
+	//Returns heap size reserve for PE32 and PE64 respectively
+	uint32_t get_heap_size_reserve_32() const;
+	void get_heap_size_reserve(uint32_t& size) const;
+	uint64_t get_heap_size_reserve_64() const;
+	void get_heap_size_reserve(uint64_t& size) const;
+	//Returns stack size commit for PE32 and PE64 respectively
+	uint32_t get_stack_size_commit_32() const;
+	void get_stack_size_commit(uint32_t& size) const;
+	uint64_t get_stack_size_commit_64() const;
+	void get_stack_size_commit(uint64_t& size) const;
+	//Returns stack size reserve for PE32 and PE64 respectively
+	uint32_t get_stack_size_reserve_32() const;
+	void get_stack_size_reserve(uint32_t& size) const;
+	uint64_t get_stack_size_reserve_64() const;
+	void get_stack_size_reserve(uint64_t& size) const;
+
+	//Updates virtual size of image corresponding to section virtual sizes
+	void update_image_size();
+
+	//Returns checksum of PE file from header
+	uint32_t get_checksum() const;
+	//Sets checksum of PE file
+	void set_checksum(uint32_t checksum);
+	
+	//Returns timestamp of PE file from header
+	uint32_t get_time_date_stamp() const;
+	//Sets timestamp of PE file
+	void set_time_date_stamp(uint32_t timestamp);
+	
+	//Returns Machine field value of PE file from header
+	uint16_t get_machine() const;
+	//Sets Machine field value of PE file
+	void set_machine(uint16_t machine);
+
+	//Returns data from the beginning of image
+	//Size = SizeOfHeaders
+	const std::string& get_full_headers_data() const;
+	
+	typedef std::multimap<uint32_t, std::string> debug_data_list;
+	//Returns raw list of debug data
+	const debug_data_list& get_raw_debug_data_list() const;
+	
+	//Reads and checks DOS header
+	static void read_dos_header(std::istream& file, pe_win::image_dos_header& header);
+	
+	//Returns sizeof() nt headers
+	uint32_t get_sizeof_nt_header() const;
+	//Returns sizeof() optional headers
+	uint32_t get_sizeof_opt_headers() const;
+	//Returns raw nt headers data pointer
+	const char* get_nt_headers_ptr() const;
+	
+	//Sets size of headers (to NT headers)
+	void set_size_of_headers(uint32_t size);
+	//Sets size of optional headers (to NT headers)
+	void set_size_of_optional_header(uint16_t size);
+	
+	//Sets base of code
+	void set_base_of_code(uint32_t base);
+	//Returns base of code
+	uint32_t get_base_of_code() const;
+
+public: //ADDRESS CONVERTIONS
+	//Virtual Address (VA) to Relative Virtual Address (RVA) convertions
+	//for PE32 and PE64 respectively
+	//bound_check checks integer overflow
+	uint32_t va_to_rva(uint32_t va, bool bound_check = true) const;
+	uint32_t va_to_rva(uint64_t va, bool bound_check = true) const;
+
+	//Relative Virtual Address (RVA) to Virtual Address (VA) convertions
+	//for PE32 and PE64 respectively
+	uint32_t rva_to_va_32(uint32_t rva) const;
+	void rva_to_va(uint32_t rva, uint32_t& va) const;
+	uint64_t rva_to_va_64(uint32_t rva) const;
+	void rva_to_va(uint32_t rva, uint64_t& va) const;
+
+	//RVA to RAW file offset convertion (4gb max)
+	uint32_t rva_to_file_offset(uint32_t rva) const;
+	//RAW file offset to RVA convertion (4gb max)
+	uint32_t file_offset_to_rva(uint32_t offset) const;
+
+	//RVA from section raw data offset
+	static uint32_t rva_from_section_offset(const section& s, uint32_t raw_offset_from_section_start);
+
+public: //IMAGE SECTIONS
+	//Returns number of sections from PE header
+	uint16_t get_number_of_sections() const;
+
+	//Updates number of sections in PE header
+	uint16_t update_number_of_sections();
+
+	//Returns section alignment
+	uint32_t get_section_alignment() const;
+
+	//Returns section list
+	section_list& get_image_sections();
+	const section_list& get_image_sections() const;
+
+	//Realigns all sections, if you made any changes to sections or alignments
+	void realign_all_sections();
+	//Resligns section with specified index
+	void realign_section(uint32_t index);
+
+	//Returns section from RVA inside it
+	section& section_from_rva(uint32_t rva);
+	const section& section_from_rva(uint32_t rva) const;
+	//Returns section from directory ID
+	section& section_from_directory(uint32_t directory_id);
+	const section& section_from_directory(uint32_t directory_id) const;
+	//Returns section from VA inside it for PE32 and PE64 respectively
+	section& section_from_va(uint32_t va);
+	const section& section_from_va(uint32_t va) const;
+	section& section_from_va(uint64_t va);
+	const section& section_from_va(uint64_t va) const;
+	//Returns section from file offset (4gb max)
+	section& section_from_file_offset(uint32_t offset);
+	const section& section_from_file_offset(uint32_t offset) const;
+
+	//Returns section TOTAL RAW/VIRTUAL data length from RVA inside section
+	//If include_headers = true, data from the beginning of PE file to SizeOfHeaders will be searched, too
+	uint32_t section_data_length_from_rva(uint32_t rva, section_data_type datatype = section_data_raw, bool include_headers = false) const;
+	//Returns section TOTAL RAW/VIRTUAL data length from VA inside section for PE32 and PE64 respectively
+	//If include_headers = true, data from the beginning of PE file to SizeOfHeaders will be searched, too
+	uint32_t section_data_length_from_va(uint32_t va, section_data_type datatype = section_data_raw, bool include_headers = false) const;
+	uint32_t section_data_length_from_va(uint64_t va, section_data_type datatype = section_data_raw, bool include_headers = false) const;
+
+	//Returns section remaining RAW/VIRTUAL data length from RVA to the end of section "s" (checks bounds)
+	uint32_t section_data_length_from_rva(const section& s, uint32_t rva_inside, section_data_type datatype = section_data_raw) const;
+	//Returns section remaining RAW/VIRTUAL data length from VA to the end of section "s" for PE32 and PE64 respectively (checks bounds)
+	uint32_t section_data_length_from_va(const section& s, uint64_t va_inside, section_data_type datatype = section_data_raw) const;
+	uint32_t section_data_length_from_va(const section& s, uint32_t va_inside, section_data_type datatype = section_data_raw) const;
+
+	//Returns section remaining RAW/VIRTUAL data length from RVA "rva_inside" to the end of section containing RVA "rva"
+	//If include_headers = true, data from the beginning of PE file to SizeOfHeaders will be searched, too
+	uint32_t section_data_length_from_rva(uint32_t rva, uint32_t rva_inside, section_data_type datatype = section_data_raw, bool include_headers = false) const;
+	//Returns section remaining RAW/VIRTUAL data length from VA "va_inside" to the end of section containing VA "va" for PE32 and PE64 respectively
+	//If include_headers = true, data from the beginning of PE file to SizeOfHeaders will be searched, too
+	uint32_t section_data_length_from_va(uint32_t va, uint32_t va_inside, section_data_type datatype = section_data_raw, bool include_headers = false) const;
+	uint32_t section_data_length_from_va(uint64_t va, uint64_t va_inside, section_data_type datatype = section_data_raw, bool include_headers = false) const;
+	
+	//If include_headers = true, data from the beginning of PE file to SizeOfHeaders will be searched, too
+	//Returns corresponding section data pointer from RVA inside section
+	char* section_data_from_rva(uint32_t rva, bool include_headers = false);
+	const char* section_data_from_rva(uint32_t rva, section_data_type datatype = section_data_raw, bool include_headers = false) const;
+	//Returns corresponding section data pointer from VA inside section for PE32 and PE64 respectively
+	char* section_data_from_va(uint32_t va, bool include_headers = false);
+	const char* section_data_from_va(uint32_t va, section_data_type datatype = section_data_raw, bool include_headers = false) const;
+	char* section_data_from_va(uint64_t va, bool include_headers = false);
+	const char* section_data_from_va(uint64_t va, section_data_type datatype = section_data_raw, bool include_headers = false) const;
+
+	//Returns corresponding section data pointer from RVA inside section "s" (checks bounds)
+	char* section_data_from_rva(section& s, uint32_t rva);
+	const char* section_data_from_rva(const section& s, uint32_t rva, section_data_type datatype = section_data_raw) const;
+	//Returns corresponding section data pointer from VA inside section "s" for PE32 and PE64 respectively (checks bounds)
+	char* section_data_from_va(section& s, uint32_t va); //Always returns raw data
+	const char* section_data_from_va(const section& s, uint32_t va, section_data_type datatype = section_data_raw) const;
+	char* section_data_from_va(section& s, uint64_t va); //Always returns raw data
+	const char* section_data_from_va(const section& s, uint64_t va, section_data_type datatype = section_data_raw) const;
+
+	//Returns corresponding section data pointer from RVA inside section "s" (checks bounds, checks sizes, the most safe function)
+	template<typename T>
+	T section_data_from_rva(const section& s, uint32_t rva, section_data_type datatype = section_data_raw) const
+	{
+		if(rva >= s.get_virtual_address() && rva < s.get_virtual_address() + s.get_aligned_virtual_size(get_section_alignment()) && pe_utils::is_sum_safe(rva, sizeof(T)))
+		{
+			const std::string& data = datatype == section_data_raw ? s.get_raw_data() : s.get_virtual_data(get_section_alignment());
+			//Don't check for underflow here, comparsion is unsigned
+			if(data.size() < rva - s.get_virtual_address() + sizeof(T))
+				throw pe_exception("RVA and requested data size does not exist inside section", pe_exception::rva_not_exists);
+
+			return *reinterpret_cast<const T*>(data.data() + rva - s.get_virtual_address());
+		}
+
+		throw pe_exception("RVA not found inside section", pe_exception::rva_not_exists);
+	}
+
+	//Returns corresponding section data pointer from RVA inside section (checks rva, checks sizes, the most safe function)
+	//If include_headers = true, data from the beginning of PE file to SizeOfHeaders will be searched, too
+	template<typename T>
+	T section_data_from_rva(uint32_t rva, section_data_type datatype = section_data_raw, bool include_headers = false) const
+	{
+		//if RVA is inside of headers and we're searching them too...
+		if(include_headers && pe_utils::is_sum_safe(rva, sizeof(T)) && (rva + sizeof(T) < full_headers_data_.length()))
+			return *reinterpret_cast<const T*>(&full_headers_data_[rva]);
+
+		const section& s = section_from_rva(rva);
+		const std::string& data = datatype == section_data_raw ? s.get_raw_data() : s.get_virtual_data(get_section_alignment());
+		//Don't check for underflow here, comparsion is unsigned
+		if(data.size() < rva - s.get_virtual_address() + sizeof(T))
+			throw pe_exception("RVA and requested data size does not exist inside section", pe_exception::rva_not_exists);
+
+		return *reinterpret_cast<const T*>(data.data() + rva - s.get_virtual_address());
+	}
+
+	//Returns corresponding section data pointer from VA inside section "s" (checks bounds, checks sizes, the most safe function)
+	template<typename T>
+	T section_data_from_va(const section& s, uint32_t va, section_data_type datatype = section_data_raw) const
+	{
+		return section_data_from_rva<T>(s, va_to_rva(va), datatype);
+	}
+
+	template<typename T>
+	T section_data_from_va(const section& s, uint64_t va, section_data_type datatype = section_data_raw) const
+	{
+		return section_data_from_rva<T>(s, va_to_rva(va), datatype);
+	}
+
+	//Returns corresponding section data pointer from VA inside section (checks rva, checks sizes, the most safe function)
+	//If include_headers = true, data from the beginning of PE file to SizeOfHeaders will be searched, too
+	template<typename T>
+	T section_data_from_va(uint32_t va, section_data_type datatype = section_data_raw, bool include_headers = false) const
+	{
+		return section_data_from_rva<T>(va_to_rva(va), datatype, include_headers);
+	}
+
+	template<typename T>
+	T section_data_from_va(uint64_t va, section_data_type datatype = section_data_raw, bool include_headers = false) const
+	{
+		return section_data_from_rva<T>(va_to_rva(va), datatype, include_headers);
+	}
+
+	//Returns section and offset (raw data only) from its start from RVA
+	const std::pair<uint32_t, const section*> section_and_offset_from_rva(uint32_t rva) const;
+
+	//Sets virtual size of section "s"
+	//Section must be free (not bound to any image)
+	//or the last section of this image
+	//Function calls update_image_size automatically in second case
+	void set_section_virtual_size(section& s, uint32_t vsize);
+
+	//Represents section expand type for expand_section function
+	enum section_expand_type
+	{
+		expand_section_raw, //Section raw data size will be expanded
+		expand_section_virtual //Section virtual data size will be expanded
+	};
+
+	//Expands section raw or virtual size to hold data from specified RVA with specified size
+	//Section must be free (not bound to any image)
+	//or the last section of this image
+	//Returns true if section was expanded
+	bool expand_section(section& s, uint32_t needed_rva, uint32_t needed_size, section_expand_type expand);
+
+	//Adds section to image
+	//Returns last section
+	section& add_section(section s);
+	//Prepares section to later add it to image (checks and recalculates virtual and raw section size)
+	//Section must be prepared by this function before calling add_section
+	void prepare_section(section& s);
+
+	//Returns true if sectios "s" is already attached to this PE file
+	bool section_attached(const section& s) const;
+
+
+public: //IMAGE
+	//Returns PE type (PE or PE+) from pe_type enumeration (minimal correctness checks)
+	static pe_type get_pe_type(std::istream& file);
+	//Returns PE type of this image
+	pe_type get_pe_type() const;
+
+	//Returns true if image has overlay data at the end of file
+	bool has_overlay() const;
+
+	//Realigns file (changes file alignment)
+	void realign_file(uint32_t new_file_alignment);
+	
+	//Helper function to recalculate RAW and virtual section sizes and strip it, if necessary
+	//auto_strip = strip section, if necessary
+	void recalculate_section_sizes(section& s, bool auto_strip);
+
+	// ========== END OF PUBLIC MEMBERS AND STRUCTURES ========== //
+private:
+	//Image DOS header
+	pe_win::image_dos_header dos_header_;
+	//Rich (stub) overlay data (for MSVS)
+	std::string rich_overlay_;
+	//List of image sections
+	section_list sections_;
+	//True if image has overlay
+	bool has_overlay_;
+	//Raw SizeOfHeaders-sized data from the beginning of image
+	std::string full_headers_data_;
+	//Raw debug data for all directories
+	//PointerToRawData; Data
+	debug_data_list debug_data_;
+	//PE or PE+ related properties
+	pe_properties* props_;
+
+	//Reads and checks DOS header
+	void read_dos_header(std::istream& file);
+
+	//Reads and checks PE headers and section headers, data
+	void read_pe(std::istream& file, bool read_debug_raw_data);
+
+	//Sets number of sections
+	void set_number_of_sections(uint16_t number);
+	//Sets size of image
+	void set_size_of_image(uint32_t size);
+	//Sets file alignment (no checks)
+	void set_file_alignment_unchecked(uint32_t alignment);
+	//Returns needed magic of image
+	uint32_t get_needed_magic() const;
+	//Returns nt headers data pointer
+	char* get_nt_headers_ptr();
+
+private:
+	static const uint16_t maximum_number_of_sections = 0x60;
+	static const uint32_t minimum_file_alignment = 512;
+
+private:
+	//RAW file offset to section convertion helpers (4gb max)
+	section_list::const_iterator file_offset_to_section(uint32_t offset) const;
+	section_list::iterator file_offset_to_section(uint32_t offset);
+};
+}
diff --git a/tools/pe_bliss/pe_bliss.h b/tools/pe_bliss/pe_bliss.h
new file mode 100644
index 0000000000..1a8b430284
--- /dev/null
+++ b/tools/pe_bliss/pe_bliss.h
@@ -0,0 +1,39 @@
+/*************************************************************************/
+/* Copyright (c) 2015 dx, http://kaimi.ru                                */
+/*                                                                       */
+/* Permission is hereby granted, free of charge, to any person           */
+/* obtaining a copy of this software and associated documentation        */
+/* files (the "Software"), to deal in the Software without               */
+/* restriction, including without limitation the rights to use,          */
+/* copy, modify, merge, publish, distribute, sublicense, and/or          */
+/* sell copies of the Software, and to permit persons to whom the        */
+/* Software is furnished to do so, subject to the following conditions:  */
+/* The above copyright notice and this permission notice shall be        */
+/* included in all copies or substantial portions of the Software.       */
+/*                                                                       */
+/* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,       */
+/* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF    */
+/* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.*/
+/* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY  */
+/* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,  */
+/* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE     */
+/* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.                */
+/*************************************************************************/
+#pragma once
+#include "pe_base.h"
+#include "pe_rebuilder.h"
+#include "pe_factory.h"
+#include "pe_bound_import.h"
+#include "pe_debug.h"
+#include "pe_dotnet.h"
+#include "pe_exception_directory.h"
+#include "pe_exports.h"
+#include "pe_imports.h"
+#include "pe_load_config.h"
+#include "pe_relocations.h"
+#include "pe_resources.h"
+#include "pe_rich_data.h"
+#include "pe_tls.h"
+#include "pe_properties_generic.h"
+#include "pe_checksum.h"
+#include "entropy.h"
diff --git a/tools/pe_bliss/pe_bliss_godot.cpp b/tools/pe_bliss/pe_bliss_godot.cpp
new file mode 100644
index 0000000000..8297aa1045
--- /dev/null
+++ b/tools/pe_bliss/pe_bliss_godot.cpp
@@ -0,0 +1,118 @@
+#include "pe_bliss/pe_bliss.h"
+#include "pe_bliss/pe_bliss_resources.h"
+#include "core/ustring.h"
+#include "core/dvector.h"
+#include "os/file_access.h"
+
+using namespace pe_bliss;
+
+String pe_bliss_add_resrc(const char* p_path, int version_major, int version_minor,
+												String& company_name, String& file_description,
+												String& legal_copyright, String& version_text, 
+												String& product_name, String& godot_version,
+												DVector<uint8_t>& icon_content) {
+	try
+	{
+		pe_base image(pe_factory::create_pe(p_path));
+		
+		const section_list& pe_sections = image.get_image_sections();
+		uint32_t end_of_pe = 0;
+		FileAccess *dst;
+		DVector<uint8_t> overlay_data;
+		if(image.has_overlay())
+		{
+			end_of_pe = pe_sections.back().get_pointer_to_raw_data() + pe_sections.back().get_size_of_raw_data();
+			dst=FileAccess::open(p_path,FileAccess::READ);
+			if (dst) {
+				overlay_data.resize(dst->get_len()-end_of_pe);
+				dst->seek(end_of_pe);
+				DVector<uint8_t>::Write overlay_data_write = overlay_data.write();
+				dst->get_buffer(overlay_data_write.ptr(),overlay_data.size());
+				dst->close();
+				memdelete(dst);
+			}
+		}
+		resource_directory root;
+		if(image.has_resources())
+		{
+			root = resource_directory(get_resources(image));
+		}
+		pe_resource_manager res(root);
+		if(image.has_resources())
+		{
+			if(icon_content.size()) {
+				if(res.resource_exists(pe_resource_viewer::resource_icon))
+				{
+					res.remove_resource_type(pe_resource_viewer::resource_icon);
+				}
+				if(res.resource_exists(pe_resource_viewer::resource_icon_group))
+				{
+					res.remove_resource_type(pe_resource_viewer::resource_icon_group);
+				}
+			}
+			if(res.resource_exists(pe_resource_viewer::resource_version))
+			{
+				res.remove_resource_type(pe_resource_viewer::resource_version);
+			}
+		}
+		file_version_info file_info;
+		file_info.set_file_os(file_version_info::file_os_nt_win32);
+		file_info.set_file_type(file_version_info::file_type_application);
+		unsigned int ver = version_major << 16;
+		ver = ver + version_minor;
+		file_info.set_file_version_ms(ver);
+		file_info.set_file_version_ls(0x00000000);
+		file_info.set_product_version_ms(ver);
+		file_info.set_product_version_ls(0x00000000);
+		lang_string_values_map strings;
+		translation_values_map translations;
+		version_info_editor version(strings, translations);
+		version.add_translation(version_info_editor::default_language_translation);
+		version.set_company_name(company_name.c_str());
+		version.set_file_description(file_description.c_str());
+		if (!product_name.empty()) {
+			version.set_internal_name((product_name+String(".exe")).c_str());
+			version.set_original_filename((product_name+String(".exe")).c_str());
+			version.set_product_name(product_name.c_str());
+		}
+		version.set_legal_copyright(legal_copyright.c_str());
+		version.set_product_version(version_text.c_str());
+		if(!godot_version.empty()) version.set_property(L"Godot Engine Version", godot_version.c_str() );
+		resource_version_info_writer(res).set_version_info(file_info, strings, translations, 1033, 1200);
+		if(icon_content.size()) {
+			std::string icon;
+			icon.resize(icon_content.size());
+			for(int i=0; i<icon_content.size(); i++)
+			{
+				icon[i] = icon_content[i];
+			}
+			resource_cursor_icon_writer(res).add_icon(icon, L"MAIN_ICON", 1033);
+		}
+		if(image.has_resources())
+		{
+			rebuild_resources(image, root, image.section_from_directory(pe_win::image_directory_entry_resource));
+		} else {
+			section new_resources;
+			new_resources.get_raw_data().resize(1);
+			new_resources.set_name(".rsrc");
+			new_resources.readable(true);
+			section& attached_section = image.add_section(new_resources);
+			rebuild_resources(image, root, attached_section);
+		}
+		rebuild_pe(image, p_path);
+		if(image.has_overlay() && end_of_pe) {
+			dst=FileAccess::open(p_path,FileAccess::READ_WRITE);
+			if (dst) {
+				dst->seek_end();
+				DVector<uint8_t>::Read overlay_data_read = overlay_data.read();
+				dst->store_buffer(overlay_data_read.ptr(),overlay_data.size());
+				dst->close();
+				memdelete(dst);
+			}
+		}
+		return String();
+	} catch(const pe_exception& e) {
+		String ret("Error In Add rsrc Section : ");
+		return ret + String(e.what());
+	}
+}
diff --git a/tools/pe_bliss/pe_bliss_godot.h b/tools/pe_bliss/pe_bliss_godot.h
new file mode 100644
index 0000000000..0365ca9eaf
--- /dev/null
+++ b/tools/pe_bliss/pe_bliss_godot.h
@@ -0,0 +1,7 @@
+
+
+String pe_bliss_add_resrc(const char* p_path, int version_major, int version_minor,
+												String& company_name, String& file_description,
+												String& legal_copyright, String& version_text, 
+												String& product_name, String& godot_version,
+												DVector<uint8_t>& icon_content);
diff --git a/tools/pe_bliss/pe_bliss_resources.h b/tools/pe_bliss/pe_bliss_resources.h
new file mode 100644
index 0000000000..60369f8011
--- /dev/null
+++ b/tools/pe_bliss/pe_bliss_resources.h
@@ -0,0 +1,36 @@
+/*************************************************************************/
+/* Copyright (c) 2015 dx, http://kaimi.ru                                */
+/*                                                                       */
+/* Permission is hereby granted, free of charge, to any person           */
+/* obtaining a copy of this software and associated documentation        */
+/* files (the "Software"), to deal in the Software without               */
+/* restriction, including without limitation the rights to use,          */
+/* copy, modify, merge, publish, distribute, sublicense, and/or          */
+/* sell copies of the Software, and to permit persons to whom the        */
+/* Software is furnished to do so, subject to the following conditions:  */
+/* The above copyright notice and this permission notice shall be        */
+/* included in all copies or substantial portions of the Software.       */
+/*                                                                       */
+/* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,       */
+/* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF    */
+/* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.*/
+/* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY  */
+/* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,  */
+/* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE     */
+/* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.                */
+/*************************************************************************/
+#pragma once
+#include "file_version_info.h"
+#include "message_table.h"
+#include "pe_resource_manager.h"
+#include "pe_resource_viewer.h"
+#include "version_info_editor.h"
+#include "version_info_viewer.h"
+#include "resource_bitmap_reader.h"
+#include "resource_bitmap_writer.h"
+#include "resource_cursor_icon_reader.h"
+#include "resource_cursor_icon_writer.h"
+#include "resource_version_info_reader.h"
+#include "resource_version_info_writer.h"
+#include "resource_string_table_reader.h"
+#include "resource_message_list_reader.h"
diff --git a/tools/pe_bliss/pe_bound_import.cpp b/tools/pe_bliss/pe_bound_import.cpp
new file mode 100644
index 0000000000..4b54b36105
--- /dev/null
+++ b/tools/pe_bliss/pe_bound_import.cpp
@@ -0,0 +1,311 @@
+/*************************************************************************/
+/* Copyright (c) 2015 dx, http://kaimi.ru                                */
+/*                                                                       */
+/* Permission is hereby granted, free of charge, to any person           */
+/* obtaining a copy of this software and associated documentation        */
+/* files (the "Software"), to deal in the Software without               */
+/* restriction, including without limitation the rights to use,          */
+/* copy, modify, merge, publish, distribute, sublicense, and/or          */
+/* sell copies of the Software, and to permit persons to whom the        */
+/* Software is furnished to do so, subject to the following conditions:  */
+/* The above copyright notice and this permission notice shall be        */
+/* included in all copies or substantial portions of the Software.       */
+/*                                                                       */
+/* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,       */
+/* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF    */
+/* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.*/
+/* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY  */
+/* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,  */
+/* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE     */
+/* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.                */
+/*************************************************************************/
+#include <string.h>
+#include "pe_bound_import.h"
+#include "utils.h"
+
+namespace pe_bliss
+{
+using namespace pe_win;
+
+//BOUND IMPORT
+//Default constructor
+bound_import_ref::bound_import_ref()
+	:timestamp_(0)
+{}
+
+//Constructor from data
+bound_import_ref::bound_import_ref(const std::string& module_name, uint32_t timestamp)
+	:module_name_(module_name), timestamp_(timestamp)
+{}
+
+//Returns imported module name
+const std::string& bound_import_ref::get_module_name() const
+{
+	return module_name_;
+}
+
+//Returns bound import date and time stamp
+uint32_t bound_import_ref::get_timestamp() const
+{
+	return timestamp_;
+}
+
+//Sets module name
+void bound_import_ref::set_module_name(const std::string& module_name)
+{
+	module_name_ = module_name;
+}
+
+//Sets timestamp
+void bound_import_ref::set_timestamp(uint32_t timestamp)
+{
+	timestamp_ = timestamp;
+}
+
+//Default constructor
+bound_import::bound_import()
+	:timestamp_(0)
+{}
+
+//Constructor from data
+bound_import::bound_import(const std::string& module_name, uint32_t timestamp)
+	:module_name_(module_name), timestamp_(timestamp)
+{}
+
+//Returns imported module name
+const std::string& bound_import::get_module_name() const
+{
+	return module_name_;
+}
+
+//Returns bound import date and time stamp
+uint32_t bound_import::get_timestamp() const
+{
+	return timestamp_;
+}
+
+//Returns bound references cound
+size_t bound_import::get_module_ref_count() const
+{
+	return refs_.size();
+}
+
+//Returns module references
+const bound_import::ref_list& bound_import::get_module_ref_list() const
+{
+	return refs_;
+}
+
+//Adds module reference
+void bound_import::add_module_ref(const bound_import_ref& ref)
+{
+	refs_.push_back(ref);
+}
+
+//Clears module references list
+void bound_import::clear_module_refs()
+{
+	refs_.clear();
+}
+
+//Returns module references
+bound_import::ref_list& bound_import::get_module_ref_list()
+{
+	return refs_;
+}
+
+//Sets module name
+void bound_import::set_module_name(const std::string& module_name)
+{
+	module_name_ = module_name;
+}
+
+//Sets timestamp
+void bound_import::set_timestamp(uint32_t timestamp)
+{
+	timestamp_ = timestamp;
+}
+
+const bound_import_module_list get_bound_import_module_list(const pe_base& pe)
+{
+	//Returned bound import modules list
+	bound_import_module_list ret;
+
+	//If image has no bound imports
+	if(!pe.has_bound_import())
+		return ret;
+
+	uint32_t bound_import_data_len =
+		pe.section_data_length_from_rva(pe.get_directory_rva(image_directory_entry_bound_import), pe.get_directory_rva(image_directory_entry_bound_import), section_data_raw, true);
+
+	if(bound_import_data_len < pe.get_directory_size(image_directory_entry_bound_import))
+		throw pe_exception("Incorrect bound import directory", pe_exception::incorrect_bound_import_directory);
+	
+	const char* bound_import_data = pe.section_data_from_rva(pe.get_directory_rva(image_directory_entry_bound_import), section_data_raw, true);
+
+	//Check read in "read_pe" function raw bound import data size
+	if(bound_import_data_len < sizeof(image_bound_import_descriptor))
+		throw pe_exception("Incorrect bound import directory", pe_exception::incorrect_bound_import_directory);
+
+	//current bound_import_data_ in-string position
+	unsigned long current_pos = 0;
+	//first bound import descriptor
+	//so, we're working with raw data here, no section helpers available
+	const image_bound_import_descriptor* descriptor = reinterpret_cast<const image_bound_import_descriptor*>(&bound_import_data[current_pos]);
+
+	//Enumerate until zero
+	while(descriptor->OffsetModuleName)
+	{
+		//Check module name offset
+		if(descriptor->OffsetModuleName >= bound_import_data_len)
+			throw pe_exception("Incorrect bound import directory", pe_exception::incorrect_bound_import_directory);
+
+		//Check module name for null-termination
+		if(!pe_utils::is_null_terminated(&bound_import_data[descriptor->OffsetModuleName], bound_import_data_len - descriptor->OffsetModuleName))
+			throw pe_exception("Incorrect bound import directory", pe_exception::incorrect_bound_import_directory);
+
+		//Create bound import descriptor structure
+		bound_import elem(&bound_import_data[descriptor->OffsetModuleName], descriptor->TimeDateStamp);
+
+		//Check DWORDs
+		if(descriptor->NumberOfModuleForwarderRefs >= pe_utils::max_dword / sizeof(image_bound_forwarder_ref)
+			|| !pe_utils::is_sum_safe(current_pos, 2 /* this descriptor and the next one */ * sizeof(image_bound_import_descriptor) + descriptor->NumberOfModuleForwarderRefs * sizeof(image_bound_forwarder_ref)))
+			throw pe_exception("Incorrect bound import directory", pe_exception::incorrect_bound_import_directory);
+
+		//Move after current descriptor
+		current_pos += sizeof(image_bound_import_descriptor);
+
+		//Enumerate referenced bound import descriptors
+		for(unsigned long i = 0; i != descriptor->NumberOfModuleForwarderRefs; ++i)
+		{
+			//They're just after parent descriptor
+			//Check size of structure
+			if(current_pos + sizeof(image_bound_forwarder_ref) > bound_import_data_len)
+				throw pe_exception("Incorrect bound import directory", pe_exception::incorrect_bound_import_directory);
+
+			//Get IMAGE_BOUND_FORWARDER_REF pointer
+			const image_bound_forwarder_ref* ref_descriptor = reinterpret_cast<const image_bound_forwarder_ref*>(&bound_import_data[current_pos]);
+
+			//Check referenced module name
+			if(ref_descriptor->OffsetModuleName >= bound_import_data_len)
+				throw pe_exception("Incorrect bound import directory", pe_exception::incorrect_bound_import_directory);
+
+			//And its null-termination
+			if(!pe_utils::is_null_terminated(&bound_import_data[ref_descriptor->OffsetModuleName], bound_import_data_len - ref_descriptor->OffsetModuleName))
+				throw pe_exception("Incorrect bound import directory", pe_exception::incorrect_bound_import_directory);
+
+			//Add referenced module to current bound import structure
+			elem.add_module_ref(bound_import_ref(&bound_import_data[ref_descriptor->OffsetModuleName], ref_descriptor->TimeDateStamp));
+
+			//Move after referenced bound import descriptor
+			current_pos += sizeof(image_bound_forwarder_ref);
+		}
+
+		//Check structure size
+		if(current_pos + sizeof(image_bound_import_descriptor) > bound_import_data_len)
+			throw pe_exception("Incorrect bound import directory", pe_exception::incorrect_bound_import_directory);
+
+		//Move to next bound import descriptor
+		descriptor = reinterpret_cast<const image_bound_import_descriptor*>(&bound_import_data[current_pos]);
+
+		//Save created descriptor structure and references
+		ret.push_back(elem);
+	}
+
+	//Return result
+	return ret;
+}
+
+//imports - bound imported modules list
+//imports_section - section where export directory will be placed (must be attached to PE image)
+//offset_from_section_start - offset from imports_section raw data start
+//save_to_pe_headers - if true, new bound import directory information will be saved to PE image headers
+//auto_strip_last_section - if true and bound imports are placed in the last section, it will be automatically stripped
+const image_directory rebuild_bound_imports(pe_base& pe, const bound_import_module_list& imports, section& imports_section, uint32_t offset_from_section_start, bool save_to_pe_header, bool auto_strip_last_section)
+{
+	//Check that exports_section is attached to this PE image
+	if(!pe.section_attached(imports_section))
+		throw pe_exception("Bound import section must be attached to PE file", pe_exception::section_is_not_attached);
+
+	uint32_t directory_pos = pe_utils::align_up(offset_from_section_start, sizeof(uint32_t));
+	uint32_t needed_size = sizeof(image_bound_import_descriptor) /* Ending null descriptor */;
+	uint32_t needed_size_for_strings = 0;
+
+	//Calculate needed size for bound import data
+	for(bound_import_module_list::const_iterator it = imports.begin(); it != imports.end(); ++it)
+	{
+		const bound_import& import = *it;
+		needed_size += sizeof(image_bound_import_descriptor);
+		needed_size_for_strings += static_cast<uint32_t>((*it).get_module_name().length()) + 1 /* nullbyte */;
+
+		const bound_import::ref_list& refs = import.get_module_ref_list();
+		for(bound_import::ref_list::const_iterator ref_it = refs.begin(); ref_it != refs.end(); ++ref_it)
+		{
+			needed_size_for_strings += static_cast<uint32_t>((*ref_it).get_module_name().length()) + 1 /* nullbyte */;
+			needed_size += sizeof(image_bound_forwarder_ref);
+		}
+	}
+	
+	needed_size += needed_size_for_strings;
+	
+	//Check if imports_section is last one. If it's not, check if there's enough place for bound import data
+	if(&imports_section != &*(pe.get_image_sections().end() - 1) && 
+		(imports_section.empty() || pe_utils::align_up(imports_section.get_size_of_raw_data(), pe.get_file_alignment()) < needed_size + directory_pos))
+		throw pe_exception("Insufficient space for bound import directory", pe_exception::insufficient_space);
+
+	std::string& raw_data = imports_section.get_raw_data();
+
+	//This will be done only if imports_section is the last section of image or for section with unaligned raw length of data
+	if(raw_data.length() < needed_size + directory_pos)
+		raw_data.resize(needed_size + directory_pos); //Expand section raw data
+	
+	uint32_t current_pos_for_structures = directory_pos;
+	uint32_t current_pos_for_strings = current_pos_for_structures + needed_size - needed_size_for_strings;
+
+	for(bound_import_module_list::const_iterator it = imports.begin(); it != imports.end(); ++it)
+	{
+		const bound_import& import = *it;
+		image_bound_import_descriptor descriptor;
+		descriptor.NumberOfModuleForwarderRefs = static_cast<uint16_t>(import.get_module_ref_list().size());
+		descriptor.OffsetModuleName = static_cast<uint16_t>(current_pos_for_strings - directory_pos);
+		descriptor.TimeDateStamp = import.get_timestamp();
+
+		memcpy(&raw_data[current_pos_for_structures], &descriptor, sizeof(descriptor));
+		current_pos_for_structures += sizeof(descriptor);
+		
+		size_t length = import.get_module_name().length() + 1 /* nullbyte */;
+		memcpy(&raw_data[current_pos_for_strings], import.get_module_name().c_str(), length);
+		current_pos_for_strings += static_cast<uint32_t>(length);
+
+		const bound_import::ref_list& refs = import.get_module_ref_list();
+		for(bound_import::ref_list::const_iterator ref_it = refs.begin(); ref_it != refs.end(); ++ref_it)
+		{
+			const bound_import_ref& ref = *ref_it;
+			image_bound_forwarder_ref ref_descriptor = {0};
+			ref_descriptor.OffsetModuleName = static_cast<uint16_t>(current_pos_for_strings - directory_pos);
+			ref_descriptor.TimeDateStamp = ref.get_timestamp();
+
+			memcpy(&raw_data[current_pos_for_structures], &ref_descriptor, sizeof(ref_descriptor));
+			current_pos_for_structures += sizeof(ref_descriptor);
+
+			length = ref.get_module_name().length() + 1 /* nullbyte */;
+			memcpy(&raw_data[current_pos_for_strings], ref.get_module_name().c_str(), length);
+			current_pos_for_strings += static_cast<uint32_t>(length);
+		}
+	}
+
+	//Adjust section raw and virtual sizes
+	pe.recalculate_section_sizes(imports_section, auto_strip_last_section);
+	
+	image_directory ret(pe.rva_from_section_offset(imports_section, directory_pos), needed_size);
+
+	//If auto-rewrite of PE headers is required
+	if(save_to_pe_header)
+	{
+		pe.set_directory_rva(image_directory_entry_bound_import, ret.get_rva());
+		pe.set_directory_size(image_directory_entry_bound_import, ret.get_size());
+	}
+
+	return ret;
+}
+}
diff --git a/tools/pe_bliss/pe_bound_import.h b/tools/pe_bliss/pe_bound_import.h
new file mode 100644
index 0000000000..667e28792e
--- /dev/null
+++ b/tools/pe_bliss/pe_bound_import.h
@@ -0,0 +1,108 @@
+/*************************************************************************/
+/* Copyright (c) 2015 dx, http://kaimi.ru                                */
+/*                                                                       */
+/* Permission is hereby granted, free of charge, to any person           */
+/* obtaining a copy of this software and associated documentation        */
+/* files (the "Software"), to deal in the Software without               */
+/* restriction, including without limitation the rights to use,          */
+/* copy, modify, merge, publish, distribute, sublicense, and/or          */
+/* sell copies of the Software, and to permit persons to whom the        */
+/* Software is furnished to do so, subject to the following conditions:  */
+/* The above copyright notice and this permission notice shall be        */
+/* included in all copies or substantial portions of the Software.       */
+/*                                                                       */
+/* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,       */
+/* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF    */
+/* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.*/
+/* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY  */
+/* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,  */
+/* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE     */
+/* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.                */
+/*************************************************************************/
+#pragma once
+#include <vector>
+#include <string>
+#include "pe_structures.h"
+#include "pe_base.h"
+#include "pe_directory.h"
+
+namespace pe_bliss
+{
+//Class representing bound import reference
+class bound_import_ref
+{
+public:
+	//Default constructor
+	bound_import_ref();
+	//Constructor from data
+	bound_import_ref(const std::string& module_name, uint32_t timestamp);
+
+	//Returns imported module name
+	const std::string& get_module_name() const;
+	//Returns bound import date and time stamp
+	uint32_t get_timestamp() const;
+
+public: //Setters
+	//Sets module name
+	void set_module_name(const std::string& module_name);
+	//Sets timestamp
+	void set_timestamp(uint32_t timestamp);
+
+private:
+	std::string module_name_; //Imported module name
+	uint32_t timestamp_; //Bound import timestamp
+};
+
+//Class representing image bound import information
+class bound_import
+{
+public:
+	typedef std::vector<bound_import_ref> ref_list;
+
+public:
+	//Default constructor
+	bound_import();
+	//Constructor from data
+	bound_import(const std::string& module_name, uint32_t timestamp);
+
+	//Returns imported module name
+	const std::string& get_module_name() const;
+	//Returns bound import date and time stamp
+	uint32_t get_timestamp() const;
+
+	//Returns bound references cound
+	size_t get_module_ref_count() const;
+	//Returns module references
+	const ref_list& get_module_ref_list() const;
+
+public: //Setters
+	//Sets module name
+	void set_module_name(const std::string& module_name);
+	//Sets timestamp
+	void set_timestamp(uint32_t timestamp);
+
+	//Adds module reference
+	void add_module_ref(const bound_import_ref& ref);
+	//Clears module references list
+	void clear_module_refs();
+	//Returns module references
+	ref_list& get_module_ref_list();
+
+private:
+	std::string module_name_; //Imported module name
+	uint32_t timestamp_; //Bound import timestamp
+	ref_list refs_; //Module references list
+};
+
+typedef std::vector<bound_import> bound_import_module_list;
+
+//Returns bound import information
+const bound_import_module_list get_bound_import_module_list(const pe_base& pe);//Export directory rebuilder
+
+//imports - bound imported modules list
+//imports_section - section where export directory will be placed (must be attached to PE image)
+//offset_from_section_start - offset from imports_section raw data start
+//save_to_pe_headers - if true, new bound import directory information will be saved to PE image headers
+//auto_strip_last_section - if true and bound imports are placed in the last section, it will be automatically stripped
+const image_directory rebuild_bound_imports(pe_base& pe, const bound_import_module_list& imports, section& imports_section, uint32_t offset_from_section_start = 0, bool save_to_pe_header = true, bool auto_strip_last_section = true);
+}
diff --git a/tools/pe_bliss/pe_checksum.cpp b/tools/pe_bliss/pe_checksum.cpp
new file mode 100644
index 0000000000..5971a33c90
--- /dev/null
+++ b/tools/pe_bliss/pe_checksum.cpp
@@ -0,0 +1,103 @@
+/*************************************************************************/
+/* Copyright (c) 2015 dx, http://kaimi.ru                                */
+/*                                                                       */
+/* Permission is hereby granted, free of charge, to any person           */
+/* obtaining a copy of this software and associated documentation        */
+/* files (the "Software"), to deal in the Software without               */
+/* restriction, including without limitation the rights to use,          */
+/* copy, modify, merge, publish, distribute, sublicense, and/or          */
+/* sell copies of the Software, and to permit persons to whom the        */
+/* Software is furnished to do so, subject to the following conditions:  */
+/* The above copyright notice and this permission notice shall be        */
+/* included in all copies or substantial portions of the Software.       */
+/*                                                                       */
+/* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,       */
+/* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF    */
+/* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.*/
+/* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY  */
+/* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,  */
+/* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE     */
+/* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.                */
+/*************************************************************************/
+#include "pe_checksum.h"
+#include "pe_structures.h"
+#include "pe_base.h"
+
+namespace pe_bliss
+{
+using namespace pe_win;
+
+//Calculate checksum of image
+uint32_t calculate_checksum(std::istream& file)
+{
+	//Save istream state
+	std::ios_base::iostate state = file.exceptions();
+	std::streamoff old_offset = file.tellg();
+
+	//Checksum value
+	unsigned long long checksum = 0;
+
+	try
+	{
+		image_dos_header header;
+
+		file.exceptions(std::ios::goodbit);
+
+		//Read DOS header
+		pe_base::read_dos_header(file, header);
+
+		//Calculate PE checksum
+		file.seekg(0);
+		unsigned long long top = 0xFFFFFFFF;
+		top++;
+
+		//"CheckSum" field position in optional PE headers - it's always 64 for PE and PE+
+		static const unsigned long checksum_pos_in_optional_headers = 64;
+		//Calculate real PE headers "CheckSum" field position
+		//Sum is safe here
+		unsigned long pe_checksum_pos = header.e_lfanew + sizeof(image_file_header) + sizeof(uint32_t) + checksum_pos_in_optional_headers;
+
+		//Calculate checksum for each byte of file
+		std::streamoff filesize = pe_utils::get_file_size(file);
+		for(long long i = 0; i < filesize; i += 4)
+		{
+			unsigned long dw = 0;
+
+			//Read DWORD from file
+			file.read(reinterpret_cast<char*>(&dw), sizeof(unsigned long));
+			//Skip "CheckSum" DWORD
+			if(i == pe_checksum_pos)
+				continue;
+
+			//Calculate checksum
+			checksum = (checksum & 0xffffffff) + dw + (checksum >> 32);
+			if(checksum > top)
+				checksum = (checksum & 0xffffffff) + (checksum >> 32);
+		}
+
+		//Finish checksum
+		checksum = (checksum & 0xffff) + (checksum >> 16);
+		checksum = (checksum) + (checksum >> 16);
+		checksum = checksum & 0xffff;
+
+		checksum += static_cast<unsigned long>(filesize);
+	}
+	catch(const std::exception&)
+	{
+		//If something went wrong, restore istream state
+		file.exceptions(state);
+		file.seekg(old_offset);
+		file.clear();
+		//Rethrow
+		throw;
+	}
+
+	//Restore istream state
+	file.exceptions(state);
+	file.seekg(old_offset);
+	file.clear();
+
+	//Return checksum
+	return static_cast<uint32_t>(checksum);	
+}
+}
diff --git a/tools/pe_bliss/pe_checksum.h b/tools/pe_bliss/pe_checksum.h
new file mode 100644
index 0000000000..a568d5d369
--- /dev/null
+++ b/tools/pe_bliss/pe_checksum.h
@@ -0,0 +1,30 @@
+/*************************************************************************/
+/* Copyright (c) 2015 dx, http://kaimi.ru                                */
+/*                                                                       */
+/* Permission is hereby granted, free of charge, to any person           */
+/* obtaining a copy of this software and associated documentation        */
+/* files (the "Software"), to deal in the Software without               */
+/* restriction, including without limitation the rights to use,          */
+/* copy, modify, merge, publish, distribute, sublicense, and/or          */
+/* sell copies of the Software, and to permit persons to whom the        */
+/* Software is furnished to do so, subject to the following conditions:  */
+/* The above copyright notice and this permission notice shall be        */
+/* included in all copies or substantial portions of the Software.       */
+/*                                                                       */
+/* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,       */
+/* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF    */
+/* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.*/
+/* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY  */
+/* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,  */
+/* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE     */
+/* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.                */
+/*************************************************************************/
+#pragma once
+#include <istream>
+#include "stdint_defs.h"
+
+namespace pe_bliss
+{
+//Calculate checksum of image (performs no checks on PE structures)
+uint32_t calculate_checksum(std::istream& file);
+}
diff --git a/tools/pe_bliss/pe_debug.cpp b/tools/pe_bliss/pe_debug.cpp
new file mode 100644
index 0000000000..a0ed3f5af1
--- /dev/null
+++ b/tools/pe_bliss/pe_debug.cpp
@@ -0,0 +1,865 @@
+/*************************************************************************/
+/* Copyright (c) 2015 dx, http://kaimi.ru                                */
+/*                                                                       */
+/* Permission is hereby granted, free of charge, to any person           */
+/* obtaining a copy of this software and associated documentation        */
+/* files (the "Software"), to deal in the Software without               */
+/* restriction, including without limitation the rights to use,          */
+/* copy, modify, merge, publish, distribute, sublicense, and/or          */
+/* sell copies of the Software, and to permit persons to whom the        */
+/* Software is furnished to do so, subject to the following conditions:  */
+/* The above copyright notice and this permission notice shall be        */
+/* included in all copies or substantial portions of the Software.       */
+/*                                                                       */
+/* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,       */
+/* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF    */
+/* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.*/
+/* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY  */
+/* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,  */
+/* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE     */
+/* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.                */
+/*************************************************************************/
+#include <string.h>
+#include "pe_debug.h"
+#include "utils.h"
+
+namespace pe_bliss
+{
+using namespace pe_win;
+//DEBUG
+//Default constructor
+debug_info::debug_info()
+	:characteristics_(0),
+	time_stamp_(0),
+	major_version_(0), minor_version_(0),
+	type_(0),
+	size_of_data_(0),
+	address_of_raw_data_(0),
+	pointer_to_raw_data_(0),
+	advanced_info_type_(advanced_info_none)
+{}
+
+//Constructor from data
+debug_info::debug_info(const image_debug_directory& debug)
+	:characteristics_(debug.Characteristics),
+	time_stamp_(debug.TimeDateStamp),
+	major_version_(debug.MajorVersion), minor_version_(debug.MinorVersion),
+	type_(debug.Type),
+	size_of_data_(debug.SizeOfData),
+	address_of_raw_data_(debug.AddressOfRawData),
+	pointer_to_raw_data_(debug.PointerToRawData),
+	advanced_info_type_(advanced_info_none)
+{}
+
+//Returns debug characteristics
+uint32_t debug_info::get_characteristics() const
+{
+	return characteristics_;
+}
+
+//Returns debug datetimestamp
+uint32_t debug_info::get_time_stamp() const
+{
+	return time_stamp_;
+}
+
+//Returns major version
+uint32_t debug_info::get_major_version() const
+{
+	return major_version_;
+}
+
+//Returns minor version
+uint32_t debug_info::get_minor_version() const
+{
+	return minor_version_;
+}
+
+//Returns type of debug info (unchecked)
+uint32_t debug_info::get_type_raw() const
+{
+	return type_;
+}
+
+//Returns type of debug info from debug_info_type enumeration
+debug_info::debug_info_type debug_info::get_type() const
+{
+	//Determine debug type
+	switch(type_)
+	{
+	case image_debug_type_coff:
+		return debug_type_coff;
+
+	case image_debug_type_codeview:
+		return debug_type_codeview;
+
+	case image_debug_type_fpo:
+		return debug_type_fpo;
+
+	case image_debug_type_misc:
+		return debug_type_misc;
+
+	case image_debug_type_exception:
+		return debug_type_exception;
+
+	case image_debug_type_fixup:
+		return debug_type_fixup;
+
+	case image_debug_type_omap_to_src:
+		return debug_type_omap_to_src;
+
+	case image_debug_type_omap_from_src:
+		return debug_type_omap_from_src;
+
+	case image_debug_type_borland:
+		return debug_type_borland;
+
+	case image_debug_type_clsid:
+		return debug_type_clsid;
+
+	case image_debug_type_reserved10:
+		return debug_type_reserved10;
+	}
+
+	return debug_type_unknown;
+}
+
+//Returns size of debug data (internal, .pdb or other file doesn't count)
+uint32_t debug_info::get_size_of_data() const
+{
+	return size_of_data_;
+}
+
+//Returns RVA of debug info when mapped to memory or zero, if info is not mapped
+uint32_t debug_info::get_rva_of_raw_data() const
+{
+	return address_of_raw_data_;
+}
+
+//Returns raw file pointer to raw data
+uint32_t debug_info::get_pointer_to_raw_data() const
+{
+	return pointer_to_raw_data_;
+}
+
+//Copy constructor
+debug_info::debug_info(const debug_info& info)
+	:characteristics_(info.characteristics_),
+	time_stamp_(info.time_stamp_),
+	major_version_(info.major_version_), minor_version_(info.minor_version_),
+	type_(info.type_),
+	size_of_data_(info.size_of_data_),
+	address_of_raw_data_(info.address_of_raw_data_),
+	pointer_to_raw_data_(info.pointer_to_raw_data_),
+	advanced_info_type_(info.advanced_info_type_)
+{
+	copy_advanced_info(info);
+}
+
+//Copy assignment operator
+debug_info& debug_info::operator=(const debug_info& info)
+{
+	copy_advanced_info(info);
+
+	characteristics_ = info.characteristics_;
+	time_stamp_ = info.time_stamp_;
+	major_version_ = info.major_version_;
+	minor_version_ = info.minor_version_;
+	type_ = info.type_;
+	size_of_data_ = info.size_of_data_;
+	address_of_raw_data_ = info.address_of_raw_data_;
+	pointer_to_raw_data_ = info.pointer_to_raw_data_;
+	advanced_info_type_ = info.advanced_info_type_;
+
+	return *this;
+}
+
+//Default constructor
+debug_info::advanced_info::advanced_info()
+	:adv_pdb_7_0_info(0) //Zero pointer to advanced data
+{}
+
+//Returns true if advanced debug info is present
+bool debug_info::advanced_info::is_present() const
+{
+	return adv_pdb_7_0_info != 0;
+}
+
+//Helper for advanced debug information copying
+void debug_info::copy_advanced_info(const debug_info& info)
+{
+	free_present_advanced_info();
+
+	switch(info.advanced_info_type_)
+	{
+	case advanced_info_pdb_7_0:
+		advanced_debug_info_.adv_pdb_7_0_info = new pdb_7_0_info(*info.advanced_debug_info_.adv_pdb_7_0_info);
+		break;
+	case advanced_info_pdb_2_0:
+		advanced_debug_info_.adv_pdb_2_0_info = new pdb_2_0_info(*info.advanced_debug_info_.adv_pdb_2_0_info);
+		break;
+	case advanced_info_misc:
+		advanced_debug_info_.adv_misc_info = new misc_debug_info(*info.advanced_debug_info_.adv_misc_info);
+		break;
+	case advanced_info_coff:
+		advanced_debug_info_.adv_coff_info = new coff_debug_info(*info.advanced_debug_info_.adv_coff_info);
+		break;
+	default:
+		break;
+	}
+
+	advanced_info_type_ = info.advanced_info_type_;
+}
+
+//Helper for clearing any present advanced debug information
+void debug_info::free_present_advanced_info()
+{
+	switch(advanced_info_type_)
+	{
+	case advanced_info_pdb_7_0:
+		delete advanced_debug_info_.adv_pdb_7_0_info;
+		break;
+	case advanced_info_pdb_2_0:
+		delete advanced_debug_info_.adv_pdb_2_0_info;
+		break;
+	case advanced_info_misc:
+		delete advanced_debug_info_.adv_misc_info;
+		break;
+	case advanced_info_coff:
+		delete advanced_debug_info_.adv_coff_info;
+		break;
+	default:
+		break;
+	}
+
+	advanced_debug_info_.adv_pdb_7_0_info = 0;
+	advanced_info_type_ = advanced_info_none;
+}
+
+//Destructor
+debug_info::~debug_info()
+{
+	free_present_advanced_info();
+}
+
+//Sets advanced debug information
+void debug_info::set_advanced_debug_info(const pdb_7_0_info& info)
+{
+	free_present_advanced_info();
+	advanced_debug_info_.adv_pdb_7_0_info = new pdb_7_0_info(info);
+	advanced_info_type_ = advanced_info_pdb_7_0;
+}
+
+void debug_info::set_advanced_debug_info(const pdb_2_0_info& info)
+{
+	free_present_advanced_info();
+	advanced_debug_info_.adv_pdb_2_0_info = new pdb_2_0_info(info);
+	advanced_info_type_ = advanced_info_pdb_2_0;
+}
+
+void debug_info::set_advanced_debug_info(const misc_debug_info& info)
+{
+	free_present_advanced_info();
+	advanced_debug_info_.adv_misc_info = new misc_debug_info(info);
+	advanced_info_type_ = advanced_info_misc;
+}
+
+void debug_info::set_advanced_debug_info(const coff_debug_info& info)
+{
+	free_present_advanced_info();
+	advanced_debug_info_.adv_coff_info = new coff_debug_info(info);
+	advanced_info_type_ = advanced_info_coff;
+}
+
+//Returns advanced debug information type
+debug_info::advanced_info_type debug_info::get_advanced_info_type() const
+{
+	return advanced_info_type_;
+}
+
+//Returns advanced debug information or throws an exception,
+//if requested information type is not contained by structure
+template<>
+const pdb_7_0_info debug_info::get_advanced_debug_info<pdb_7_0_info>() const
+{
+	if(advanced_info_type_ != advanced_info_pdb_7_0)
+		throw pe_exception("Debug info structure does not contain PDB 7.0 data", pe_exception::advanced_debug_information_request_error);
+
+	return *advanced_debug_info_.adv_pdb_7_0_info;
+}
+
+template<>
+const pdb_2_0_info debug_info::get_advanced_debug_info<pdb_2_0_info>() const
+{
+	if(advanced_info_type_ != advanced_info_pdb_2_0)
+		throw pe_exception("Debug info structure does not contain PDB 2.0 data", pe_exception::advanced_debug_information_request_error);
+
+	return *advanced_debug_info_.adv_pdb_2_0_info;
+}
+
+template<>
+const misc_debug_info debug_info::get_advanced_debug_info<misc_debug_info>() const
+{
+	if(advanced_info_type_ != advanced_info_misc)
+		throw pe_exception("Debug info structure does not contain MISC data", pe_exception::advanced_debug_information_request_error);
+
+	return *advanced_debug_info_.adv_misc_info;
+}
+
+template<>
+const coff_debug_info debug_info::get_advanced_debug_info<coff_debug_info>() const
+{
+	if(advanced_info_type_ != advanced_info_coff)
+		throw pe_exception("Debug info structure does not contain COFF data", pe_exception::advanced_debug_information_request_error);
+
+	return *advanced_debug_info_.adv_coff_info;
+}
+
+//Sets advanced debug information type, if no advanced info structure available
+void debug_info::set_advanced_info_type(advanced_info_type type)
+{
+	free_present_advanced_info();
+	if(advanced_info_type_ >= advanced_info_codeview_4_0) //Don't set info type for those types, which have advanced info structures
+		advanced_info_type_ = type;
+}
+
+//Default constructor
+pdb_7_0_info::pdb_7_0_info()
+	:age_(0)
+{
+	memset(&guid_, 0, sizeof(guid_));
+}
+
+//Constructor from data
+pdb_7_0_info::pdb_7_0_info(const CV_INFO_PDB70* info)
+	:age_(info->Age), guid_(info->Signature),
+	pdb_file_name_(reinterpret_cast<const char*>(info->PdbFileName)) //Must be checked before for null-termination
+{}
+
+//Returns debug PDB 7.0 structure GUID
+const guid pdb_7_0_info::get_guid() const
+{
+	return guid_;
+}
+
+//Returns age of build
+uint32_t pdb_7_0_info::get_age() const
+{
+	return age_;
+}
+
+//Returns PDB file name / path
+const std::string& pdb_7_0_info::get_pdb_file_name() const
+{
+	return pdb_file_name_;
+}
+
+//Default constructor
+pdb_2_0_info::pdb_2_0_info()
+	:age_(0), signature_(0)
+{}
+
+//Constructor from data
+pdb_2_0_info::pdb_2_0_info(const CV_INFO_PDB20* info)
+	:age_(info->Age), signature_(info->Signature),
+	pdb_file_name_(reinterpret_cast<const char*>(info->PdbFileName)) //Must be checked before for null-termination
+{}
+
+//Returns debug PDB 2.0 structure signature
+uint32_t pdb_2_0_info::get_signature() const
+{
+	return signature_;
+}
+
+//Returns age of build
+uint32_t pdb_2_0_info::get_age() const
+{
+	return age_;
+}
+
+//Returns PDB file name / path
+const std::string& pdb_2_0_info::get_pdb_file_name() const
+{
+	return pdb_file_name_;
+}
+
+//Default constructor
+misc_debug_info::misc_debug_info()
+	:data_type_(0), unicode_(false)
+{}
+
+//Constructor from data
+misc_debug_info::misc_debug_info(const image_debug_misc* info)
+	:data_type_(info->DataType), unicode_(info->Unicode ? true : false)
+{
+	//IMAGE_DEBUG_MISC::Data must be checked before!
+	if(info->Unicode)
+	{
+#ifdef PE_BLISS_WINDOWS
+		debug_data_unicode_ = std::wstring(reinterpret_cast<const wchar_t*>(info->Data), (info->Length - sizeof(image_debug_misc) + 1 /* BYTE[1] in the end of structure */) / 2);
+#else
+		debug_data_unicode_ = pe_utils::from_ucs2(u16string(reinterpret_cast<const unicode16_t*>(info->Data), (info->Length - sizeof(image_debug_misc) + 1 /* BYTE[1] in the end of structure */) / 2));
+#endif
+		
+		pe_utils::strip_nullbytes(debug_data_unicode_); //Strip nullbytes in the end of string
+	}
+	else
+	{
+		debug_data_ansi_ = std::string(reinterpret_cast<const char*>(info->Data), info->Length - sizeof(image_debug_misc) + 1 /* BYTE[1] in the end of structure */);
+		pe_utils::strip_nullbytes(debug_data_ansi_); //Strip nullbytes in the end of string
+	}
+}
+
+//Returns debug data type
+uint32_t misc_debug_info::get_data_type() const
+{
+	return data_type_;
+}
+
+//Returns true if data type is exe name
+bool misc_debug_info::is_exe_name() const
+{
+	return data_type_ == image_debug_misc_exename;
+}
+
+//Returns true if debug data is UNICODE
+bool misc_debug_info::is_unicode() const
+{
+	return unicode_;
+}
+
+//Returns debug data (ANSI)
+const std::string& misc_debug_info::get_data_ansi() const
+{
+	return debug_data_ansi_;
+}
+
+//Returns debug data (UNICODE)
+const std::wstring& misc_debug_info::get_data_unicode() const
+{
+	return debug_data_unicode_;
+}
+
+//Default constructor
+coff_debug_info::coff_debug_info()
+	:number_of_symbols_(0),
+	lva_to_first_symbol_(0),
+	number_of_line_numbers_(0),
+	lva_to_first_line_number_(0),
+	rva_to_first_byte_of_code_(0),
+	rva_to_last_byte_of_code_(0),
+	rva_to_first_byte_of_data_(0),
+	rva_to_last_byte_of_data_(0)
+{}
+
+//Constructor from data
+coff_debug_info::coff_debug_info(const image_coff_symbols_header* info)
+	:number_of_symbols_(info->NumberOfSymbols),
+	lva_to_first_symbol_(info->LvaToFirstSymbol),
+	number_of_line_numbers_(info->NumberOfLinenumbers),
+	lva_to_first_line_number_(info->LvaToFirstLinenumber),
+	rva_to_first_byte_of_code_(info->RvaToFirstByteOfCode),
+	rva_to_last_byte_of_code_(info->RvaToLastByteOfCode),
+	rva_to_first_byte_of_data_(info->RvaToFirstByteOfData),
+	rva_to_last_byte_of_data_(info->RvaToLastByteOfData)
+{}
+
+//Returns number of symbols
+uint32_t coff_debug_info::get_number_of_symbols() const
+{
+	return number_of_symbols_;
+}
+
+//Returns virtual address of the first symbol
+uint32_t coff_debug_info::get_lva_to_first_symbol() const
+{
+	return lva_to_first_symbol_;
+}
+
+//Returns number of line-number entries
+uint32_t coff_debug_info::get_number_of_line_numbers() const
+{
+	return number_of_line_numbers_;
+}
+
+//Returns virtual address of the first line-number entry
+uint32_t coff_debug_info::get_lva_to_first_line_number() const
+{
+	return lva_to_first_line_number_;
+}
+
+//Returns relative virtual address of the first byte of code
+uint32_t coff_debug_info::get_rva_to_first_byte_of_code() const
+{
+	return rva_to_first_byte_of_code_;
+}
+
+//Returns relative virtual address of the last byte of code
+uint32_t coff_debug_info::get_rva_to_last_byte_of_code() const
+{
+	return rva_to_last_byte_of_code_;
+}
+
+//Returns relative virtual address of the first byte of data
+uint32_t coff_debug_info::get_rva_to_first_byte_of_data() const
+{
+	return rva_to_first_byte_of_data_;
+}
+
+//Returns relative virtual address of the last byte of data
+uint32_t coff_debug_info::get_rva_to_last_byte_of_data() const
+{
+	return rva_to_last_byte_of_data_;
+}
+
+//Returns COFF symbols list
+const coff_debug_info::coff_symbols_list& coff_debug_info::get_symbols() const
+{
+	return symbols_;
+}
+
+//Adds COFF symbol
+void coff_debug_info::add_symbol(const coff_symbol& sym)
+{
+	symbols_.push_back(sym);
+}
+
+//Default constructor
+coff_debug_info::coff_symbol::coff_symbol()
+	:storage_class_(0),
+	index_(0),
+	section_number_(0), rva_(0),
+	type_(0),
+	is_filename_(false)
+{}
+
+//Returns storage class
+uint32_t coff_debug_info::coff_symbol::get_storage_class() const
+{
+	return storage_class_;
+}
+
+//Returns symbol index
+uint32_t coff_debug_info::coff_symbol::get_index() const
+{
+	return index_;
+}
+
+//Returns section number
+uint32_t coff_debug_info::coff_symbol::get_section_number() const
+{
+	return section_number_;
+}
+
+//Returns RVA
+uint32_t coff_debug_info::coff_symbol::get_rva() const
+{
+	return rva_;
+}
+
+//Returns true if structure contains file name
+bool coff_debug_info::coff_symbol::is_file() const
+{
+	return is_filename_;
+}
+
+//Returns text data (symbol or file name)
+const std::string& coff_debug_info::coff_symbol::get_symbol() const
+{
+	return name_;
+}
+
+//Sets storage class
+void coff_debug_info::coff_symbol::set_storage_class(uint32_t storage_class)
+{
+	storage_class_ = storage_class;
+}
+
+//Sets symbol index
+void coff_debug_info::coff_symbol::set_index(uint32_t index)
+{
+	index_ = index;
+}
+
+//Sets section number
+void coff_debug_info::coff_symbol::set_section_number(uint32_t section_number)
+{
+	section_number_ = section_number;
+}
+
+//Sets RVA
+void coff_debug_info::coff_symbol::set_rva(uint32_t rva)
+{
+	rva_ = rva;
+}
+
+//Sets file name
+void coff_debug_info::coff_symbol::set_file_name(const std::string& file_name)
+{
+	name_ = file_name;
+	is_filename_ = true;
+}
+
+//Sets symbol name
+void coff_debug_info::coff_symbol::set_symbol_name(const std::string& symbol_name)
+{
+	name_ = symbol_name;
+	is_filename_ = false;
+}
+
+//Returns type
+uint16_t coff_debug_info::coff_symbol::get_type() const
+{
+	return type_;
+}
+
+//Sets type
+void coff_debug_info::coff_symbol::set_type(uint16_t type)
+{
+	type_ = type;
+}
+
+//Returns debug information list
+const debug_info_list get_debug_information(const pe_base& pe)
+{
+	debug_info_list ret;
+
+	//If there's no debug directory, return empty list
+	if(!pe.has_debug())
+		return ret;
+
+	//Check the length in bytes of the section containing debug directory
+	if(pe.section_data_length_from_rva(pe.get_directory_rva(image_directory_entry_debug), pe.get_directory_rva(image_directory_entry_debug), section_data_virtual, true)
+		< sizeof(image_debug_directory))
+		throw pe_exception("Incorrect debug directory", pe_exception::incorrect_debug_directory);
+
+	unsigned long current_pos = pe.get_directory_rva(image_directory_entry_debug);
+
+	//First IMAGE_DEBUG_DIRECTORY table
+	image_debug_directory directory = pe.section_data_from_rva<image_debug_directory>(current_pos, section_data_virtual, true);
+
+	if(!pe_utils::is_sum_safe(pe.get_directory_rva(image_directory_entry_debug), pe.get_directory_size(image_directory_entry_debug)))
+		throw pe_exception("Incorrect debug directory", pe_exception::incorrect_debug_directory);
+
+	//Iterate over all IMAGE_DEBUG_DIRECTORY directories
+	while(directory.PointerToRawData
+		&& current_pos < pe.get_directory_rva(image_directory_entry_debug) + pe.get_directory_size(image_directory_entry_debug))
+	{
+		//Create debug information structure
+		debug_info info(directory);
+
+		//Find raw debug data
+		const pe_base::debug_data_list& debug_datas = pe.get_raw_debug_data_list();
+		pe_base::debug_data_list::const_iterator it = debug_datas.find(directory.PointerToRawData);
+		if(it != debug_datas.end()) //If it exists, we'll do some detailed debug info research
+		{
+			const std::string& debug_data = (*it).second;
+			switch(directory.Type)
+			{
+			case image_debug_type_coff:
+				{
+					//Check data length
+					if(debug_data.length() < sizeof(image_coff_symbols_header))
+						throw pe_exception("Incorrect debug directory", pe_exception::incorrect_debug_directory);
+
+					//Get coff header structure pointer
+					const image_coff_symbols_header* coff = reinterpret_cast<const image_coff_symbols_header*>(debug_data.data());
+
+					//Check possible overflows
+					if(coff->NumberOfSymbols >= pe_utils::max_dword / sizeof(image_symbol)
+						|| !pe_utils::is_sum_safe(coff->NumberOfSymbols * sizeof(image_symbol), coff->LvaToFirstSymbol))
+						throw pe_exception("Incorrect debug directory", pe_exception::incorrect_debug_directory);
+
+					//Check data length again
+					if(debug_data.length() < coff->NumberOfSymbols * sizeof(image_symbol) + coff->LvaToFirstSymbol)
+						throw pe_exception("Incorrect debug directory", pe_exception::incorrect_debug_directory);
+
+					//Create COFF debug info structure
+					coff_debug_info coff_info(coff);
+
+					//Enumerate debug symbols data
+					for(uint32_t i = 0; i < coff->NumberOfSymbols; ++i)
+					{
+						//Safe sum (checked above)
+						const image_symbol* sym = reinterpret_cast<const image_symbol*>(debug_data.data() + i * sizeof(image_symbol) + coff->LvaToFirstSymbol);
+
+						coff_debug_info::coff_symbol symbol;
+						symbol.set_index(i); //Save symbol index
+						symbol.set_storage_class(sym->StorageClass); //Save storage class
+						symbol.set_type(sym->Type); //Save storage class
+
+						//Check data length again
+						if(!pe_utils::is_sum_safe(i, sym->NumberOfAuxSymbols)
+							|| (i + sym->NumberOfAuxSymbols) > coff->NumberOfSymbols
+							|| debug_data.length() < (i + 1) * sizeof(image_symbol) + coff->LvaToFirstSymbol + sym->NumberOfAuxSymbols * sizeof(image_symbol))
+							throw pe_exception("Incorrect debug directory", pe_exception::incorrect_debug_directory);
+
+						//If symbol is filename
+						if(sym->StorageClass == image_sym_class_file)
+						{
+							//Save file name, it is situated just after this IMAGE_SYMBOL structure
+							std::string file_name(reinterpret_cast<const char*>(debug_data.data() + (i + 1) * sizeof(image_symbol)), sym->NumberOfAuxSymbols * sizeof(image_symbol));
+							pe_utils::strip_nullbytes(file_name);
+							symbol.set_file_name(file_name);
+
+							//Save symbol info
+							coff_info.add_symbol(symbol);
+
+							//Move to next symbol
+							i += sym->NumberOfAuxSymbols;
+							continue;
+						}
+
+						//Dump some other symbols
+						if(((sym->StorageClass == image_sym_class_static)
+							&& (sym->NumberOfAuxSymbols == 0)
+							&& (sym->SectionNumber == 1))
+							||
+							((sym->StorageClass == image_sym_class_external)
+							&& ISFCN(sym->Type)
+							&& (sym->SectionNumber > 0))
+							)
+						{
+							//Save RVA and section number
+							symbol.set_section_number(sym->SectionNumber);
+							symbol.set_rva(sym->Value);
+
+							//If symbol has short name
+							if(sym->N.Name.Short)
+							{
+								//Copy and save symbol name
+								char name_buff[9];
+								memcpy(name_buff, sym->N.ShortName, 8);
+								name_buff[8] = '\0';
+								symbol.set_symbol_name(name_buff);
+							}
+							else
+							{
+								//Symbol has long name
+
+								//Check possible overflows
+								if(!pe_utils::is_sum_safe(coff->LvaToFirstSymbol + coff->NumberOfSymbols * sizeof(image_symbol), sym->N.Name.Long))
+									throw pe_exception("Incorrect debug directory", pe_exception::incorrect_debug_directory);
+
+								//Here we have an offset to the string table
+								uint32_t symbol_offset = coff->LvaToFirstSymbol + coff->NumberOfSymbols * sizeof(image_symbol) + sym->N.Name.Long;
+
+								//Check data length
+								if(debug_data.length() < symbol_offset)
+									throw pe_exception("Incorrect debug directory", pe_exception::incorrect_debug_directory);
+
+								//Check symbol name for null-termination
+								if(!pe_utils::is_null_terminated(debug_data.data() + symbol_offset, debug_data.length() - symbol_offset))
+									throw pe_exception("Incorrect debug directory", pe_exception::incorrect_debug_directory);
+
+								//Save symbol name
+								symbol.set_symbol_name(debug_data.data() + symbol_offset);
+							}
+
+							//Save symbol info
+							coff_info.add_symbol(symbol);
+
+							//Move to next symbol
+							i += sym->NumberOfAuxSymbols;
+							continue;
+						}
+					}
+
+					info.set_advanced_debug_info(coff_info);
+				}
+				break;
+
+			case image_debug_type_codeview:
+				{
+					//Check data length
+					if(debug_data.length() < sizeof(OMFSignature*))
+						throw pe_exception("Incorrect debug directory", pe_exception::incorrect_debug_directory);
+
+					//Get POMFSignature structure pointer from the very beginning of debug data
+					const OMFSignature* sig = reinterpret_cast<const OMFSignature*>(debug_data.data());
+					if(!memcmp(sig->Signature, "RSDS", 4))
+					{
+						//Signature is "RSDS" - PDB 7.0
+
+						//Check data length
+						if(debug_data.length() < sizeof(CV_INFO_PDB70))
+							throw pe_exception("Incorrect debug directory", pe_exception::incorrect_debug_directory);
+
+						const CV_INFO_PDB70* pdb_data = reinterpret_cast<const CV_INFO_PDB70*>(debug_data.data());
+
+						//Check PDB file name null-termination
+						if(!pe_utils::is_null_terminated(pdb_data->PdbFileName, debug_data.length() - (sizeof(CV_INFO_PDB70) - 1 /* BYTE of filename in structure */)))
+							throw pe_exception("Incorrect debug directory", pe_exception::incorrect_debug_directory);
+
+						info.set_advanced_debug_info(pdb_7_0_info(pdb_data));
+					}
+					else if(!memcmp(sig->Signature, "NB10", 4))
+					{
+						//Signature is "NB10" - PDB 2.0
+
+						//Check data length
+						if(debug_data.length() < sizeof(CV_INFO_PDB20))
+							throw pe_exception("Incorrect debug directory", pe_exception::incorrect_debug_directory);
+
+						const CV_INFO_PDB20* pdb_data = reinterpret_cast<const CV_INFO_PDB20*>(debug_data.data());
+
+						//Check PDB file name null-termination
+						if(!pe_utils::is_null_terminated(pdb_data->PdbFileName, debug_data.length() - (sizeof(CV_INFO_PDB20) - 1 /* BYTE of filename in structure */)))
+							throw pe_exception("Incorrect debug directory", pe_exception::incorrect_debug_directory);
+
+						info.set_advanced_debug_info(pdb_2_0_info(pdb_data));
+					}
+					else if(!memcmp(sig->Signature, "NB09", 4))
+					{
+						//CodeView 4.0, no structures available
+						info.set_advanced_info_type(debug_info::advanced_info_codeview_4_0);
+					}
+					else if(!memcmp(sig->Signature, "NB11", 4))
+					{
+						//CodeView 5.0, no structures available
+						info.set_advanced_info_type(debug_info::advanced_info_codeview_5_0);
+					}
+					else if(!memcmp(sig->Signature, "NB05", 4))
+					{
+						//Other CodeView, no structures available
+						info.set_advanced_info_type(debug_info::advanced_info_codeview);
+					}
+				}
+
+				break;
+
+			case image_debug_type_misc:
+				{
+					//Check data length
+					if(debug_data.length() < sizeof(image_debug_misc))
+						throw pe_exception("Incorrect debug directory", pe_exception::incorrect_debug_directory);
+
+					//Get misc structure pointer
+					const image_debug_misc* misc_data = reinterpret_cast<const image_debug_misc*>(debug_data.data());
+
+					//Check misc data length
+					if(debug_data.length() < misc_data->Length /* Total length of record */)
+						throw pe_exception("Incorrect debug directory", pe_exception::incorrect_debug_directory);
+
+					//Save advanced information
+					info.set_advanced_debug_info(misc_debug_info(misc_data));
+				}
+				break;
+			}
+		}
+
+		//Save debug information structure
+		ret.push_back(info);
+
+		//Check possible overflow
+		if(!pe_utils::is_sum_safe(current_pos, sizeof(image_debug_directory)))
+			throw pe_exception("Incorrect debug directory", pe_exception::incorrect_debug_directory);
+
+		//Go to next debug entry
+		current_pos += sizeof(image_debug_directory);
+		directory = pe.section_data_from_rva<image_debug_directory>(current_pos, section_data_virtual, true);
+	}
+
+	return ret;
+}
+}
diff --git a/tools/pe_bliss/pe_debug.h b/tools/pe_bliss/pe_debug.h
new file mode 100644
index 0000000000..73a7e6860d
--- /dev/null
+++ b/tools/pe_bliss/pe_debug.h
@@ -0,0 +1,324 @@
+/*************************************************************************/
+/* Copyright (c) 2015 dx, http://kaimi.ru                                */
+/*                                                                       */
+/* Permission is hereby granted, free of charge, to any person           */
+/* obtaining a copy of this software and associated documentation        */
+/* files (the "Software"), to deal in the Software without               */
+/* restriction, including without limitation the rights to use,          */
+/* copy, modify, merge, publish, distribute, sublicense, and/or          */
+/* sell copies of the Software, and to permit persons to whom the        */
+/* Software is furnished to do so, subject to the following conditions:  */
+/* The above copyright notice and this permission notice shall be        */
+/* included in all copies or substantial portions of the Software.       */
+/*                                                                       */
+/* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,       */
+/* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF    */
+/* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.*/
+/* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY  */
+/* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,  */
+/* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE     */
+/* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.                */
+/*************************************************************************/
+#pragma once
+#include <vector>
+#include "pe_structures.h"
+#include "pe_base.h"
+
+namespace pe_bliss
+{
+//Class representing advanced RSDS (PDB 7.0) information
+class pdb_7_0_info
+{
+public:
+	//Default constructor
+	pdb_7_0_info();
+	//Constructor from data
+	explicit pdb_7_0_info(const pe_win::CV_INFO_PDB70* info);
+
+	//Returns debug PDB 7.0 structure GUID
+	const pe_win::guid get_guid() const;
+	//Returns age of build
+	uint32_t get_age() const;
+	//Returns PDB file name / path
+	const std::string& get_pdb_file_name() const;
+
+private:
+	uint32_t age_;
+	pe_win::guid guid_;
+	std::string pdb_file_name_;
+};
+
+//Class representing advanced NB10 (PDB 2.0) information
+class pdb_2_0_info
+{
+public:
+	//Default constructor
+	pdb_2_0_info();
+	//Constructor from data
+	explicit pdb_2_0_info(const pe_win::CV_INFO_PDB20* info);
+
+	//Returns debug PDB 2.0 structure signature
+	uint32_t get_signature() const;
+	//Returns age of build
+	uint32_t get_age() const;
+	//Returns PDB file name / path
+	const std::string& get_pdb_file_name() const;
+
+private:
+	uint32_t age_;
+	uint32_t signature_;
+	std::string pdb_file_name_;
+};
+
+//Class representing advanced misc (IMAGE_DEBUG_TYPE_MISC) info
+class misc_debug_info
+{
+public:
+	//Default constructor
+	misc_debug_info();
+	//Constructor from data
+	explicit misc_debug_info(const pe_win::image_debug_misc* info);
+
+	//Returns debug data type
+	uint32_t get_data_type() const;
+	//Returns true if data type is exe name
+	bool is_exe_name() const;
+
+	//Returns true if debug data is UNICODE
+	bool is_unicode() const;
+	//Returns debug data (ANSI or UNICODE)
+	const std::string& get_data_ansi() const;
+	const std::wstring& get_data_unicode() const;
+
+private:
+	uint32_t data_type_;
+	bool unicode_;
+	std::string debug_data_ansi_;
+	std::wstring debug_data_unicode_;
+};
+
+//Class representing COFF (IMAGE_DEBUG_TYPE_COFF) debug info
+class coff_debug_info
+{
+public:
+	//Structure representing COFF symbol
+	struct coff_symbol
+	{
+	public:
+		//Default constructor
+		coff_symbol();
+
+		//Returns storage class
+		uint32_t get_storage_class() const;
+		//Returns symbol index
+		uint32_t get_index() const;
+		//Returns section number
+		uint32_t get_section_number() const;
+		//Returns RVA
+		uint32_t get_rva() const;
+		//Returns type
+		uint16_t get_type() const;
+
+		//Returns true if structure contains file name
+		bool is_file() const;
+		//Returns text data (symbol or file name)
+		const std::string& get_symbol() const;
+
+	public: //These functions do not change everything inside image, they are used by PE class
+		//Sets storage class
+		void set_storage_class(uint32_t storage_class);
+		//Sets symbol index
+		void set_index(uint32_t index);
+		//Sets section number
+		void set_section_number(uint32_t section_number);
+		//Sets RVA
+		void set_rva(uint32_t rva);
+		//Sets type
+		void set_type(uint16_t type);
+
+		//Sets file name
+		void set_file_name(const std::string& file_name);
+		//Sets symbol name
+		void set_symbol_name(const std::string& symbol_name);
+
+	private:
+		uint32_t storage_class_;
+		uint32_t index_;
+		uint32_t section_number_, rva_;
+		uint16_t type_;
+		bool is_filename_;
+		std::string name_;
+	};
+
+public:
+	typedef std::vector<coff_symbol> coff_symbols_list;
+
+public:
+	//Default constructor
+	coff_debug_info();
+	//Constructor from data
+	explicit coff_debug_info(const pe_win::image_coff_symbols_header* info);
+
+	//Returns number of symbols
+	uint32_t get_number_of_symbols() const;
+	//Returns virtual address of the first symbol
+	uint32_t get_lva_to_first_symbol() const;
+	//Returns number of line-number entries
+	uint32_t get_number_of_line_numbers() const;
+	//Returns virtual address of the first line-number entry
+	uint32_t get_lva_to_first_line_number() const;
+	//Returns relative virtual address of the first byte of code
+	uint32_t get_rva_to_first_byte_of_code() const;
+	//Returns relative virtual address of the last byte of code
+	uint32_t get_rva_to_last_byte_of_code() const;
+	//Returns relative virtual address of the first byte of data
+	uint32_t get_rva_to_first_byte_of_data() const;
+	//Returns relative virtual address of the last byte of data
+	uint32_t get_rva_to_last_byte_of_data() const;
+
+	//Returns COFF symbols list
+	const coff_symbols_list& get_symbols() const;
+
+public: //These functions do not change everything inside image, they are used by PE class
+	//Adds COFF symbol
+	void add_symbol(const coff_symbol& sym);
+
+private:
+	uint32_t number_of_symbols_;
+	uint32_t lva_to_first_symbol_;
+	uint32_t number_of_line_numbers_;
+	uint32_t lva_to_first_line_number_;
+	uint32_t rva_to_first_byte_of_code_;
+	uint32_t rva_to_last_byte_of_code_;
+	uint32_t rva_to_first_byte_of_data_;
+	uint32_t rva_to_last_byte_of_data_;
+
+private:
+	coff_symbols_list symbols_;
+};
+
+//Class representing debug information
+class debug_info
+{
+public:
+	//Enumeration of debug information types
+	enum debug_info_type
+	{
+		debug_type_unknown,
+		debug_type_coff,
+		debug_type_codeview,
+		debug_type_fpo,
+		debug_type_misc,
+		debug_type_exception,
+		debug_type_fixup,
+		debug_type_omap_to_src,
+		debug_type_omap_from_src,
+		debug_type_borland,
+		debug_type_reserved10,
+		debug_type_clsid
+	};
+
+public:
+	//Enumeration of advanced debug information types
+	enum advanced_info_type
+	{
+		advanced_info_none, //No advanced info
+		advanced_info_pdb_7_0, //PDB 7.0
+		advanced_info_pdb_2_0, //PDB 2.0
+		advanced_info_misc, //MISC debug info
+		advanced_info_coff, //COFF debug info
+		//No advanced info structures available for types below
+		advanced_info_codeview_4_0, //CodeView 4.0
+		advanced_info_codeview_5_0, //CodeView 5.0
+		advanced_info_codeview //CodeView
+	};
+
+public:
+	//Default constructor
+	debug_info();
+	//Constructor from data
+	explicit debug_info(const pe_win::image_debug_directory& debug);
+	//Copy constructor
+	debug_info(const debug_info& info);
+	//Copy assignment operator
+	debug_info& operator=(const debug_info& info);
+	//Destructor
+	~debug_info();
+
+	//Returns debug characteristics
+	uint32_t get_characteristics() const;
+	//Returns debug datetimestamp
+	uint32_t get_time_stamp() const;
+	//Returns major version
+	uint32_t get_major_version() const;
+	//Returns minor version
+	uint32_t get_minor_version() const;
+	//Returns type of debug info (unchecked)
+	uint32_t get_type_raw() const;
+	//Returns type of debug info from debug_info_type enumeration
+	debug_info_type get_type() const;
+	//Returns size of debug data (internal, .pdb or other file doesn't count)
+	uint32_t get_size_of_data() const;
+	//Returns RVA of debug info when mapped to memory or zero, if info is not mapped
+	uint32_t get_rva_of_raw_data() const;
+	//Returns raw file pointer to raw data
+	uint32_t get_pointer_to_raw_data() const;
+
+	//Returns advanced debug information type
+	advanced_info_type get_advanced_info_type() const;
+	//Returns advanced debug information or throws an exception,
+	//if requested information type is not contained by structure
+	template<typename AdvancedInfo>
+	const AdvancedInfo get_advanced_debug_info() const;
+
+public: //These functions do not change everything inside image, they are used by PE class
+	//Sets advanced debug information
+	void set_advanced_debug_info(const pdb_7_0_info& info);
+	void set_advanced_debug_info(const pdb_2_0_info& info);
+	void set_advanced_debug_info(const misc_debug_info& info);
+	void set_advanced_debug_info(const coff_debug_info& info);
+
+	//Sets advanced debug information type, if no advanced info structure available
+	void set_advanced_info_type(advanced_info_type type);
+
+private:
+	uint32_t characteristics_;
+	uint32_t time_stamp_;
+	uint32_t major_version_, minor_version_;
+	uint32_t type_;
+	uint32_t size_of_data_;
+	uint32_t address_of_raw_data_; //RVA when mapped or 0
+	uint32_t pointer_to_raw_data_; //RAW file offset
+
+	//Union containing advanced debug information pointer
+	union advanced_info
+	{
+	public:
+		//Default constructor
+		advanced_info();
+
+		//Returns true if advanced debug info is present
+		bool is_present() const;
+
+	public:
+		pdb_7_0_info* adv_pdb_7_0_info;
+		pdb_2_0_info* adv_pdb_2_0_info;
+		misc_debug_info* adv_misc_info;
+		coff_debug_info* adv_coff_info;
+	};
+
+	//Helper for advanced debug information copying
+	void copy_advanced_info(const debug_info& info);
+	//Helper for clearing any present advanced debug information
+	void free_present_advanced_info();
+
+	advanced_info advanced_debug_info_;
+	//Advanced information type
+	advanced_info_type advanced_info_type_;
+};
+
+typedef std::vector<debug_info> debug_info_list;
+
+//Returns debug information list
+const debug_info_list get_debug_information(const pe_base& pe);
+}
diff --git a/tools/pe_bliss/pe_directory.cpp b/tools/pe_bliss/pe_directory.cpp
new file mode 100644
index 0000000000..13ad2afc5d
--- /dev/null
+++ b/tools/pe_bliss/pe_directory.cpp
@@ -0,0 +1,59 @@
+/*************************************************************************/
+/* Copyright (c) 2015 dx, http://kaimi.ru                                */
+/*                                                                       */
+/* Permission is hereby granted, free of charge, to any person           */
+/* obtaining a copy of this software and associated documentation        */
+/* files (the "Software"), to deal in the Software without               */
+/* restriction, including without limitation the rights to use,          */
+/* copy, modify, merge, publish, distribute, sublicense, and/or          */
+/* sell copies of the Software, and to permit persons to whom the        */
+/* Software is furnished to do so, subject to the following conditions:  */
+/* The above copyright notice and this permission notice shall be        */
+/* included in all copies or substantial portions of the Software.       */
+/*                                                                       */
+/* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,       */
+/* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF    */
+/* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.*/
+/* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY  */
+/* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,  */
+/* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE     */
+/* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.                */
+/*************************************************************************/
+#include "pe_directory.h"
+
+namespace pe_bliss
+{
+//Default constructor
+image_directory::image_directory()
+	:rva_(0), size_(0)
+{}
+
+//Constructor from data
+image_directory::image_directory(uint32_t rva, uint32_t size)
+	:rva_(rva), size_(size)
+{}
+
+//Returns RVA
+uint32_t image_directory::get_rva() const
+{
+	return rva_;
+}
+
+//Returns size
+uint32_t image_directory::get_size() const
+{
+	return size_;
+}
+
+//Sets RVA
+void image_directory::set_rva(uint32_t rva)
+{
+	rva_ = rva;
+}
+
+//Sets size
+void image_directory::set_size(uint32_t size)
+{
+	size_ = size;
+}
+}
diff --git a/tools/pe_bliss/pe_directory.h b/tools/pe_bliss/pe_directory.h
new file mode 100644
index 0000000000..a7b1ea7a5f
--- /dev/null
+++ b/tools/pe_bliss/pe_directory.h
@@ -0,0 +1,50 @@
+/*************************************************************************/
+/* Copyright (c) 2015 dx, http://kaimi.ru                                */
+/*                                                                       */
+/* Permission is hereby granted, free of charge, to any person           */
+/* obtaining a copy of this software and associated documentation        */
+/* files (the "Software"), to deal in the Software without               */
+/* restriction, including without limitation the rights to use,          */
+/* copy, modify, merge, publish, distribute, sublicense, and/or          */
+/* sell copies of the Software, and to permit persons to whom the        */
+/* Software is furnished to do so, subject to the following conditions:  */
+/* The above copyright notice and this permission notice shall be        */
+/* included in all copies or substantial portions of the Software.       */
+/*                                                                       */
+/* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,       */
+/* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF    */
+/* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.*/
+/* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY  */
+/* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,  */
+/* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE     */
+/* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.                */
+/*************************************************************************/
+#pragma once
+#include "stdint_defs.h"
+
+namespace pe_bliss
+{
+//Class representing image directory data
+class image_directory
+{
+public:
+	//Default constructor
+	image_directory();
+	//Constructor from data
+	image_directory(uint32_t rva, uint32_t size);
+
+	//Returns RVA
+	uint32_t get_rva() const;
+	//Returns size
+	uint32_t get_size() const;
+
+	//Sets RVA
+	void set_rva(uint32_t rva);
+	//Sets size
+	void set_size(uint32_t size);
+
+private:
+	uint32_t rva_;
+	uint32_t size_;
+};
+}
diff --git a/tools/pe_bliss/pe_dotnet.cpp b/tools/pe_bliss/pe_dotnet.cpp
new file mode 100644
index 0000000000..f34a76eae8
--- /dev/null
+++ b/tools/pe_bliss/pe_dotnet.cpp
@@ -0,0 +1,186 @@
+/*************************************************************************/
+/* Copyright (c) 2015 dx, http://kaimi.ru                                */
+/*                                                                       */
+/* Permission is hereby granted, free of charge, to any person           */
+/* obtaining a copy of this software and associated documentation        */
+/* files (the "Software"), to deal in the Software without               */
+/* restriction, including without limitation the rights to use,          */
+/* copy, modify, merge, publish, distribute, sublicense, and/or          */
+/* sell copies of the Software, and to permit persons to whom the        */
+/* Software is furnished to do so, subject to the following conditions:  */
+/* The above copyright notice and this permission notice shall be        */
+/* included in all copies or substantial portions of the Software.       */
+/*                                                                       */
+/* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,       */
+/* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF    */
+/* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.*/
+/* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY  */
+/* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,  */
+/* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE     */
+/* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.                */
+/*************************************************************************/
+#include <string.h>
+#include "pe_dotnet.h"
+
+namespace pe_bliss
+{
+using namespace pe_win;
+
+//.NET
+basic_dotnet_info::basic_dotnet_info()
+{
+	memset(&header_, 0, sizeof(header_));
+}
+
+//Constructor from data
+basic_dotnet_info::basic_dotnet_info(const image_cor20_header& header)
+	:header_(header)
+{}
+
+//Returns major runtime version
+uint16_t basic_dotnet_info::get_major_runtime_version() const
+{
+	return header_.MajorRuntimeVersion;
+}
+
+//Returns minor runtime version
+uint16_t basic_dotnet_info::get_minor_runtime_version() const
+{
+	return header_.MinorRuntimeVersion;
+}
+
+//Returns RVA of metadata (symbol table and startup information)
+uint32_t basic_dotnet_info::get_rva_of_metadata() const
+{
+	return header_.MetaData.VirtualAddress;
+}
+
+//Returns size of metadata (symbol table and startup information)
+uint32_t basic_dotnet_info::get_size_of_metadata() const
+{
+	return header_.MetaData.Size;
+}
+
+//Returns flags
+uint32_t basic_dotnet_info::get_flags() const
+{
+	return header_.Flags;
+}
+
+//Returns true if entry point is native
+bool basic_dotnet_info::is_native_entry_point() const
+{
+	return (header_.Flags & comimage_flags_native_entrypoint) ? true : false;
+}
+
+//Returns true if 32 bit required
+bool basic_dotnet_info::is_32bit_required() const
+{
+	return (header_.Flags & comimage_flags_32bitrequired) ? true : false;
+}
+
+//Returns true if image is IL library
+bool basic_dotnet_info::is_il_library() const
+{
+	return (header_.Flags & comimage_flags_il_library) ? true : false;
+}
+
+//Returns true if image uses IL only
+bool basic_dotnet_info::is_il_only() const
+{
+	return (header_.Flags & comimage_flags_ilonly) ? true : false;
+}
+
+//Returns entry point RVA (if entry point is native)
+//Returns entry point managed token (if entry point is managed)
+uint32_t basic_dotnet_info::get_entry_point_rva_or_token() const
+{
+	return header_.EntryPointToken;
+}
+
+//Returns RVA of managed resources
+uint32_t basic_dotnet_info::get_rva_of_resources() const
+{
+	return header_.Resources.VirtualAddress;
+}
+
+//Returns size of managed resources
+uint32_t basic_dotnet_info::get_size_of_resources() const
+{
+	return header_.Resources.Size;
+}
+
+//Returns RVA of strong name signature
+uint32_t basic_dotnet_info::get_rva_of_strong_name_signature() const
+{
+	return header_.StrongNameSignature.VirtualAddress;
+}
+
+//Returns size of strong name signature
+uint32_t basic_dotnet_info::get_size_of_strong_name_signature() const
+{
+	return header_.StrongNameSignature.Size;
+}
+
+//Returns RVA of code manager table
+uint32_t basic_dotnet_info::get_rva_of_code_manager_table() const
+{
+	return header_.CodeManagerTable.VirtualAddress;
+}
+
+//Returns size of code manager table
+uint32_t basic_dotnet_info::get_size_of_code_manager_table() const
+{
+	return header_.CodeManagerTable.Size;
+}
+
+//Returns RVA of VTable fixups
+uint32_t basic_dotnet_info::get_rva_of_vtable_fixups() const
+{
+	return header_.VTableFixups.VirtualAddress;
+}
+
+//Returns size of VTable fixups
+uint32_t basic_dotnet_info::get_size_of_vtable_fixups() const
+{
+	return header_.VTableFixups.Size;
+}
+
+//Returns RVA of export address table jumps
+uint32_t basic_dotnet_info::get_rva_of_export_address_table_jumps() const
+{
+	return header_.ExportAddressTableJumps.VirtualAddress;
+}
+
+//Returns size of export address table jumps
+uint32_t basic_dotnet_info::get_size_of_export_address_table_jumps() const
+{
+	return header_.ExportAddressTableJumps.Size;
+}
+
+//Returns RVA of managed native header
+//(precompiled header info, usually set to zero, for internal use)
+uint32_t basic_dotnet_info::get_rva_of_managed_native_header() const
+{
+	return header_.ManagedNativeHeader.VirtualAddress;
+}
+
+//Returns size of managed native header
+//(precompiled header info, usually set to zero, for internal use)
+uint32_t basic_dotnet_info::get_size_of_managed_native_header() const
+{
+	return header_.ManagedNativeHeader.Size;
+}
+
+//Returns basic .NET information
+//If image is not native, throws an exception
+const basic_dotnet_info get_basic_dotnet_info(const pe_base& pe)
+{
+	//If there's no debug directory, return empty list
+	if(!pe.is_dotnet())
+		throw pe_exception("Image does not have managed code", pe_exception::image_does_not_have_managed_code);
+
+	//Return basic .NET information
+	return basic_dotnet_info(pe.section_data_from_rva<image_cor20_header>(pe.get_directory_rva(image_directory_entry_com_descriptor), section_data_virtual, true));
+}
+}
diff --git a/tools/pe_bliss/pe_dotnet.h b/tools/pe_bliss/pe_dotnet.h
new file mode 100644
index 0000000000..96b0ac7d0a
--- /dev/null
+++ b/tools/pe_bliss/pe_dotnet.h
@@ -0,0 +1,97 @@
+/*************************************************************************/
+/* Copyright (c) 2015 dx, http://kaimi.ru                                */
+/*                                                                       */
+/* Permission is hereby granted, free of charge, to any person           */
+/* obtaining a copy of this software and associated documentation        */
+/* files (the "Software"), to deal in the Software without               */
+/* restriction, including without limitation the rights to use,          */
+/* copy, modify, merge, publish, distribute, sublicense, and/or          */
+/* sell copies of the Software, and to permit persons to whom the        */
+/* Software is furnished to do so, subject to the following conditions:  */
+/* The above copyright notice and this permission notice shall be        */
+/* included in all copies or substantial portions of the Software.       */
+/*                                                                       */
+/* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,       */
+/* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF    */
+/* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.*/
+/* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY  */
+/* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,  */
+/* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE     */
+/* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.                */
+/*************************************************************************/
+#pragma once
+#include "pe_structures.h"
+#include "pe_base.h"
+
+namespace pe_bliss
+{
+//Class representing basic .NET header information
+class basic_dotnet_info
+{
+public:
+	//Default constructor
+	basic_dotnet_info();
+	//Constructor from data
+	explicit basic_dotnet_info(const pe_win::image_cor20_header& header);
+
+	//Returns major runtime version
+	uint16_t get_major_runtime_version() const;
+	//Returns minor runtime version
+	uint16_t get_minor_runtime_version() const;
+
+	//Returns RVA of metadata (symbol table and startup information)
+	uint32_t get_rva_of_metadata() const;
+	//Returns size of metadata (symbol table and startup information)
+	uint32_t get_size_of_metadata() const;
+
+	//Returns flags
+	uint32_t get_flags() const;
+
+	//Returns true if entry point is native
+	bool is_native_entry_point() const;
+	//Returns true if 32 bit required
+	bool is_32bit_required() const;
+	//Returns true if image is IL library
+	bool is_il_library() const;
+	//Returns true if image uses IL only
+	bool is_il_only() const;
+
+	//Returns entry point RVA (if entry point is native)
+	//Returns entry point managed token (if entry point is managed)
+	uint32_t get_entry_point_rva_or_token() const;
+
+	//Returns RVA of managed resources
+	uint32_t get_rva_of_resources() const;
+	//Returns size of managed resources
+	uint32_t get_size_of_resources() const;
+	//Returns RVA of strong name signature
+	uint32_t get_rva_of_strong_name_signature() const;
+	//Returns size of strong name signature
+	uint32_t get_size_of_strong_name_signature() const;
+	//Returns RVA of code manager table
+	uint32_t get_rva_of_code_manager_table() const;
+	//Returns size of code manager table
+	uint32_t get_size_of_code_manager_table() const;
+	//Returns RVA of VTable fixups
+	uint32_t get_rva_of_vtable_fixups() const;
+	//Returns size of VTable fixups
+	uint32_t get_size_of_vtable_fixups() const;
+	//Returns RVA of export address table jumps
+	uint32_t get_rva_of_export_address_table_jumps() const;
+	//Returns size of export address table jumps
+	uint32_t get_size_of_export_address_table_jumps() const;
+	//Returns RVA of managed native header
+	//(precompiled header info, usually set to zero, for internal use)
+	uint32_t get_rva_of_managed_native_header() const;
+	//Returns size of managed native header
+	//(precompiled header info, usually set to zero, for internal use)
+	uint32_t get_size_of_managed_native_header() const;
+
+private:
+	pe_win::image_cor20_header header_;
+};
+
+//Returns basic .NET information
+//If image is not native, throws an exception
+const basic_dotnet_info get_basic_dotnet_info(const pe_base& pe);
+}
diff --git a/tools/pe_bliss/pe_exception.cpp b/tools/pe_bliss/pe_exception.cpp
new file mode 100644
index 0000000000..3161f93599
--- /dev/null
+++ b/tools/pe_bliss/pe_exception.cpp
@@ -0,0 +1,40 @@
+/*************************************************************************/
+/* Copyright (c) 2015 dx, http://kaimi.ru                                */
+/*                                                                       */
+/* Permission is hereby granted, free of charge, to any person           */
+/* obtaining a copy of this software and associated documentation        */
+/* files (the "Software"), to deal in the Software without               */
+/* restriction, including without limitation the rights to use,          */
+/* copy, modify, merge, publish, distribute, sublicense, and/or          */
+/* sell copies of the Software, and to permit persons to whom the        */
+/* Software is furnished to do so, subject to the following conditions:  */
+/* The above copyright notice and this permission notice shall be        */
+/* included in all copies or substantial portions of the Software.       */
+/*                                                                       */
+/* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,       */
+/* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF    */
+/* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.*/
+/* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY  */
+/* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,  */
+/* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE     */
+/* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.                */
+/*************************************************************************/
+#include "pe_exception.h"
+
+namespace pe_bliss
+{
+//PE exception class constructors
+pe_exception::pe_exception(const char* text, exception_id id)
+	:std::runtime_error(text), id_(id)
+{}
+
+pe_exception::pe_exception(const std::string& text, exception_id id)
+	:std::runtime_error(text), id_(id)
+{}
+
+//Returns exception ID
+pe_exception::exception_id pe_exception::get_id() const
+{
+	return id_;
+}
+}
diff --git a/tools/pe_bliss/pe_exception.h b/tools/pe_bliss/pe_exception.h
new file mode 100644
index 0000000000..2b58a95772
--- /dev/null
+++ b/tools/pe_bliss/pe_exception.h
@@ -0,0 +1,130 @@
+/*************************************************************************/
+/* Copyright (c) 2015 dx, http://kaimi.ru                                */
+/*                                                                       */
+/* Permission is hereby granted, free of charge, to any person           */
+/* obtaining a copy of this software and associated documentation        */
+/* files (the "Software"), to deal in the Software without               */
+/* restriction, including without limitation the rights to use,          */
+/* copy, modify, merge, publish, distribute, sublicense, and/or          */
+/* sell copies of the Software, and to permit persons to whom the        */
+/* Software is furnished to do so, subject to the following conditions:  */
+/* The above copyright notice and this permission notice shall be        */
+/* included in all copies or substantial portions of the Software.       */
+/*                                                                       */
+/* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,       */
+/* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF    */
+/* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.*/
+/* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY  */
+/* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,  */
+/* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE     */
+/* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.                */
+/*************************************************************************/
+#pragma once
+#include <exception>
+#include <stdexcept>
+
+namespace pe_bliss
+{
+//PE exception class
+class pe_exception : public std::runtime_error
+{
+public:
+	//Exception IDs
+	enum exception_id
+	{
+		unknown_error,
+		bad_pe_file,
+		bad_dos_header,
+		image_nt_headers_not_found,
+		error_reading_image_nt_headers,
+		error_reading_data_directories,
+		error_reading_file,
+		pe_signature_incorrect,
+		incorrect_number_of_rva_and_sizes,
+		error_changing_section_virtual_size,
+		section_number_incorrect,
+		section_table_incorrect,
+		incorrect_section_alignment,
+		incorrect_file_alignment,
+		incorrect_size_of_image,
+		incorrect_size_of_headers,
+		image_section_headers_not_found,
+		zero_section_sizes,
+		section_incorrect_addr_or_size,
+		section_not_found,
+		image_section_data_not_found,
+		no_section_found,
+		image_section_table_incorrect,
+		directory_does_not_exist,
+		rva_not_exists,
+		error_reading_section_header,
+		error_reading_overlay,
+		incorrect_address_conversion,
+
+		incorrect_export_directory,
+		incorrect_import_directory,
+		incorrect_relocation_directory,
+		incorrect_tls_directory,
+		incorrect_config_directory,
+		incorrect_bound_import_directory,
+		incorrect_resource_directory,
+		incorrect_exception_directory,
+		incorrect_debug_directory,
+
+		resource_directory_entry_error,
+		resource_directory_entry_not_found,
+		resource_data_entry_not_found,
+		resource_incorrect_bitmap,
+		resource_incorrect_icon,
+		resource_incorrect_cursor,
+		resource_incorrect_string_table,
+		resource_string_not_found,
+		resource_incorrect_message_table,
+		resource_incorrect_version_info,
+
+		advanced_debug_information_request_error,
+		image_does_not_have_managed_code,
+
+		section_is_empty,
+		data_is_empty,
+		stream_is_bad,
+
+		section_is_not_attached,
+		insufficient_space,
+
+		cannot_rebase_relocations,
+
+		exports_list_is_empty,
+		duplicate_exported_function_ordinal,
+		duplicate_exported_function_name,
+
+		version_info_string_does_not_exist,
+
+		no_more_sections_can_be_added,
+
+		no_icon_group_found,
+		no_cursor_group_found,
+
+		encoding_convertion_error,
+
+		error_expanding_section,
+
+		cannot_rebuild_image
+	};
+
+public:
+	//Class constructors
+	explicit pe_exception(const char* text, exception_id id = unknown_error);
+	explicit pe_exception(const std::string& text, exception_id id = unknown_error);
+
+	//Returns exception ID from exception_id enumeration
+	exception_id get_id() const;
+
+	//Destructor
+	virtual ~pe_exception() throw()
+	{}
+
+private:
+	exception_id id_;
+};
+}
diff --git a/tools/pe_bliss/pe_exception_directory.cpp b/tools/pe_bliss/pe_exception_directory.cpp
new file mode 100644
index 0000000000..1813f02021
--- /dev/null
+++ b/tools/pe_bliss/pe_exception_directory.cpp
@@ -0,0 +1,177 @@
+/*************************************************************************/
+/* Copyright (c) 2015 dx, http://kaimi.ru                                */
+/*                                                                       */
+/* Permission is hereby granted, free of charge, to any person           */
+/* obtaining a copy of this software and associated documentation        */
+/* files (the "Software"), to deal in the Software without               */
+/* restriction, including without limitation the rights to use,          */
+/* copy, modify, merge, publish, distribute, sublicense, and/or          */
+/* sell copies of the Software, and to permit persons to whom the        */
+/* Software is furnished to do so, subject to the following conditions:  */
+/* The above copyright notice and this permission notice shall be        */
+/* included in all copies or substantial portions of the Software.       */
+/*                                                                       */
+/* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,       */
+/* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF    */
+/* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.*/
+/* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY  */
+/* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,  */
+/* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE     */
+/* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.                */
+/*************************************************************************/
+#include "pe_exception_directory.h"
+
+namespace pe_bliss
+{
+using namespace pe_win;
+
+//EXCEPTION DIRECTORY (exists on PE+ only)
+//Default constructor
+exception_entry::exception_entry()
+	:begin_address_(0), end_address_(0), unwind_info_address_(0),
+	unwind_info_version_(0),
+	flags_(0),
+	size_of_prolog_(0),
+	count_of_codes_(0),
+	frame_register_(0),
+	frame_offset_(0)
+{}
+
+//Constructor from data
+exception_entry::exception_entry(const image_runtime_function_entry& entry, const unwind_info& unwind_info)
+	:begin_address_(entry.BeginAddress), end_address_(entry.EndAddress), unwind_info_address_(entry.UnwindInfoAddress),
+	unwind_info_version_(unwind_info.Version),
+	flags_(unwind_info.Flags),
+	size_of_prolog_(unwind_info.SizeOfProlog),
+	count_of_codes_(unwind_info.CountOfCodes),
+	frame_register_(unwind_info.FrameRegister),
+	frame_offset_(unwind_info.FrameOffset)
+{}
+
+//Returns starting address of function, affected by exception unwinding
+uint32_t exception_entry::get_begin_address() const
+{
+	return begin_address_;
+}
+
+//Returns ending address of function, affected by exception unwinding
+uint32_t exception_entry::get_end_address() const
+{
+	return end_address_;
+}
+
+//Returns unwind info address
+uint32_t exception_entry::get_unwind_info_address() const
+{
+	return unwind_info_address_;
+}
+
+//Returns UNWIND_INFO structure version
+uint8_t exception_entry::get_unwind_info_version() const
+{
+	return unwind_info_version_;
+}
+
+//Returns unwind info flags
+uint8_t exception_entry::get_flags() const
+{
+	return flags_;
+}
+
+//The function has an exception handler that should be called
+//when looking for functions that need to examine exceptions
+bool exception_entry::has_exception_handler() const
+{
+	return (flags_ & unw_flag_ehandler) ? true : false;
+}
+
+//The function has a termination handler that should be called
+//when unwinding an exception
+bool exception_entry::has_termination_handler() const
+{
+	return (flags_ & unw_flag_uhandler) ? true : false;
+}
+
+//The unwind info structure is not the primary one for the procedure
+bool exception_entry::is_chaininfo() const
+{
+	return (flags_ & unw_flag_chaininfo) ? true : false;
+}
+
+//Returns size of function prolog
+uint8_t exception_entry::get_size_of_prolog() const
+{
+	return size_of_prolog_;
+}
+
+//Returns number of unwind slots
+uint8_t exception_entry::get_number_of_unwind_slots() const
+{
+	return count_of_codes_;
+}
+
+//If the function uses frame pointer
+bool exception_entry::uses_frame_pointer() const
+{
+	return frame_register_ != 0;
+}
+
+//Number of the nonvolatile register used as the frame pointer,
+//using the same encoding for the operation info field of UNWIND_CODE nodes
+uint8_t exception_entry::get_frame_pointer_register_number() const
+{
+	return frame_register_;
+}
+
+//The scaled offset from RSP that is applied to the FP reg when it is established.
+//The actual FP reg is set to RSP + 16 * this number, allowing offsets from 0 to 240
+uint8_t exception_entry::get_scaled_rsp_offset() const
+{
+	return frame_offset_;
+}
+
+//Returns exception directory data (exists on PE+ only)
+//Unwind opcodes are not listed, because their format and list are subject to change
+const exception_entry_list get_exception_directory_data(const pe_base& pe)
+{
+	exception_entry_list ret;
+
+	//If image doesn't have exception directory, return empty list
+	if(!pe.has_exception_directory())
+		return ret;
+
+	//Check the length in bytes of the section containing exception directory
+	if(pe.section_data_length_from_rva(pe.get_directory_rva(image_directory_entry_exception), pe.get_directory_rva(image_directory_entry_exception), section_data_virtual, true)
+		< sizeof(image_runtime_function_entry))
+		throw pe_exception("Incorrect exception directory", pe_exception::incorrect_exception_directory);
+
+	unsigned long current_pos = pe.get_directory_rva(image_directory_entry_exception);
+
+	//Check if structures are DWORD-aligned
+	if(current_pos % sizeof(uint32_t))
+		throw pe_exception("Incorrect exception directory", pe_exception::incorrect_exception_directory);
+
+	//First IMAGE_RUNTIME_FUNCTION_ENTRY table
+	image_runtime_function_entry exception_table = pe.section_data_from_rva<image_runtime_function_entry>(current_pos, section_data_virtual, true);
+
+	//todo: virtual addresses BeginAddress and EndAddress are not checked to be inside image
+	while(exception_table.BeginAddress)
+	{
+		//Check addresses
+		if(exception_table.BeginAddress > exception_table.EndAddress)
+			throw pe_exception("Incorrect exception directory", pe_exception::incorrect_exception_directory);
+
+		//Get unwind information
+		unwind_info info = pe.section_data_from_rva<unwind_info>(exception_table.UnwindInfoAddress, section_data_virtual, true);
+
+		//Create exception entry and save it
+		ret.push_back(exception_entry(exception_table, info));
+
+		//Go to next exception entry
+		current_pos += sizeof(image_runtime_function_entry);
+		exception_table = pe.section_data_from_rva<image_runtime_function_entry>(current_pos, section_data_virtual, true);
+	}
+
+	return ret;
+}
+}
diff --git a/tools/pe_bliss/pe_exception_directory.h b/tools/pe_bliss/pe_exception_directory.h
new file mode 100644
index 0000000000..6f4fc2298b
--- /dev/null
+++ b/tools/pe_bliss/pe_exception_directory.h
@@ -0,0 +1,88 @@
+/*************************************************************************/
+/* Copyright (c) 2015 dx, http://kaimi.ru                                */
+/*                                                                       */
+/* Permission is hereby granted, free of charge, to any person           */
+/* obtaining a copy of this software and associated documentation        */
+/* files (the "Software"), to deal in the Software without               */
+/* restriction, including without limitation the rights to use,          */
+/* copy, modify, merge, publish, distribute, sublicense, and/or          */
+/* sell copies of the Software, and to permit persons to whom the        */
+/* Software is furnished to do so, subject to the following conditions:  */
+/* The above copyright notice and this permission notice shall be        */
+/* included in all copies or substantial portions of the Software.       */
+/*                                                                       */
+/* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,       */
+/* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF    */
+/* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.*/
+/* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY  */
+/* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,  */
+/* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE     */
+/* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.                */
+/*************************************************************************/
+#pragma once
+#include <vector>
+#include "pe_structures.h"
+#include "pe_base.h"
+
+namespace pe_bliss
+{
+//Class representing exception directory entry
+class exception_entry
+{
+public:
+	//Default constructor
+	exception_entry();
+	//Constructor from data
+	exception_entry(const pe_win::image_runtime_function_entry& entry, const pe_win::unwind_info& unwind_info);
+
+	//Returns starting address of function, affected by exception unwinding
+	uint32_t get_begin_address() const;
+	//Returns ending address of function, affected by exception unwinding
+	uint32_t get_end_address() const;
+	//Returns unwind info address
+	uint32_t get_unwind_info_address() const;
+
+	//Returns UNWIND_INFO structure version
+	uint8_t get_unwind_info_version() const;
+
+	//Returns unwind info flags
+	uint8_t get_flags() const;
+	//The function has an exception handler that should be called
+	//when looking for functions that need to examine exceptions
+	bool has_exception_handler() const;
+	//The function has a termination handler that should be called
+	//when unwinding an exception
+	bool has_termination_handler() const;
+	//The unwind info structure is not the primary one for the procedure
+	bool is_chaininfo() const;
+
+	//Returns size of function prolog
+	uint8_t get_size_of_prolog() const;
+
+	//Returns number of unwind slots
+	uint8_t get_number_of_unwind_slots() const;
+
+	//If the function uses frame pointer
+	bool uses_frame_pointer() const;
+	//Number of the nonvolatile register used as the frame pointer,
+	//using the same encoding for the operation info field of UNWIND_CODE nodes
+	uint8_t get_frame_pointer_register_number() const;
+	//The scaled offset from RSP that is applied to the FP reg when it is established.
+	//The actual FP reg is set to RSP + 16 * this number, allowing offsets from 0 to 240
+	uint8_t get_scaled_rsp_offset() const;
+
+private:
+	uint32_t begin_address_, end_address_, unwind_info_address_;
+	uint8_t unwind_info_version_;
+	uint8_t flags_;
+	uint8_t size_of_prolog_;
+	uint8_t count_of_codes_;
+	uint8_t frame_register_, frame_offset_;
+};
+
+typedef std::vector<exception_entry> exception_entry_list;
+
+//Returns exception directory data (exists on PE+ only)
+//Unwind opcodes are not listed, because their format and list are subject to change
+const exception_entry_list get_exception_directory_data(const pe_base& pe);
+}
diff --git a/tools/pe_bliss/pe_exports.cpp b/tools/pe_bliss/pe_exports.cpp
new file mode 100644
index 0000000000..c2ad895554
--- /dev/null
+++ b/tools/pe_bliss/pe_exports.cpp
@@ -0,0 +1,700 @@
+/*************************************************************************/
+/* Copyright (c) 2015 dx, http://kaimi.ru                                */
+/*                                                                       */
+/* Permission is hereby granted, free of charge, to any person           */
+/* obtaining a copy of this software and associated documentation        */
+/* files (the "Software"), to deal in the Software without               */
+/* restriction, including without limitation the rights to use,          */
+/* copy, modify, merge, publish, distribute, sublicense, and/or          */
+/* sell copies of the Software, and to permit persons to whom the        */
+/* Software is furnished to do so, subject to the following conditions:  */
+/* The above copyright notice and this permission notice shall be        */
+/* included in all copies or substantial portions of the Software.       */
+/*                                                                       */
+/* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,       */
+/* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF    */
+/* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.*/
+/* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY  */
+/* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,  */
+/* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE     */
+/* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.                */
+/*************************************************************************/
+#include <set>
+#include <algorithm>
+#include <string.h>
+#include "pe_exports.h"
+#include "utils.h"
+
+namespace pe_bliss
+{
+using namespace pe_win;
+
+//EXPORTS
+//Default constructor
+exported_function::exported_function()
+	:ordinal_(0), rva_(0), has_name_(false), name_ordinal_(0), forward_(false)
+{}
+
+//Returns ordinal of function (actually, ordinal = hint + ordinal base)
+uint16_t exported_function::get_ordinal() const
+{
+	return ordinal_;
+}
+
+//Returns RVA of function
+uint32_t exported_function::get_rva() const
+{
+	return rva_;
+}
+
+//Returns name of function
+const std::string& exported_function::get_name() const
+{
+	return name_;
+}
+
+//Returns true if function has name and name ordinal
+bool exported_function::has_name() const
+{
+	return has_name_;
+}
+
+//Returns name ordinal of function
+uint16_t exported_function::get_name_ordinal() const
+{
+	return name_ordinal_;
+}
+
+//Returns true if function is forwarded to other library
+bool exported_function::is_forwarded() const
+{
+	return forward_;
+}
+
+//Returns the name of forwarded function
+const std::string& exported_function::get_forwarded_name() const
+{
+	return forward_name_;
+}
+
+//Sets ordinal of function
+void exported_function::set_ordinal(uint16_t ordinal)
+{
+	ordinal_ = ordinal;
+}
+
+//Sets RVA of function
+void exported_function::set_rva(uint32_t rva)
+{
+	rva_ = rva;
+}
+
+//Sets name of function (or clears it, if empty name is passed)
+void exported_function::set_name(const std::string& name)
+{
+	name_ = name;
+	has_name_ = !name.empty();
+}
+
+//Sets name ordinal
+void exported_function::set_name_ordinal(uint16_t name_ordinal)
+{
+	name_ordinal_ = name_ordinal;
+}
+
+//Sets forwarded function name (or clears it, if empty name is passed)
+void exported_function::set_forwarded_name(const std::string& name)
+{
+	forward_name_ = name;
+	forward_ = !name.empty();
+}
+
+//Default constructor
+export_info::export_info()
+	:characteristics_(0),
+	timestamp_(0),
+	major_version_(0),
+	minor_version_(0),
+	ordinal_base_(0),
+	number_of_functions_(0),
+	number_of_names_(0),
+	address_of_functions_(0),
+	address_of_names_(0),
+	address_of_name_ordinals_(0)
+{}
+
+//Returns characteristics
+uint32_t export_info::get_characteristics() const
+{
+	return characteristics_;
+}
+
+//Returns timestamp
+uint32_t export_info::get_timestamp() const
+{
+	return timestamp_;
+}
+
+//Returns major version
+uint16_t export_info::get_major_version() const
+{
+	return major_version_;
+}
+
+//Returns minor version
+uint16_t export_info::get_minor_version() const
+{
+	return minor_version_;
+}
+
+//Returns DLL name
+const std::string& export_info::get_name() const
+{
+	return name_;
+}
+
+//Returns ordinal base
+uint32_t export_info::get_ordinal_base() const
+{
+	return ordinal_base_;
+}
+
+//Returns number of functions
+uint32_t export_info::get_number_of_functions() const
+{
+	return number_of_functions_;
+}
+
+//Returns number of function names
+uint32_t export_info::get_number_of_names() const
+{
+	return number_of_names_;
+}
+
+//Returns RVA of function address table
+uint32_t export_info::get_rva_of_functions() const
+{
+	return address_of_functions_;
+}
+
+//Returns RVA of function name address table
+uint32_t export_info::get_rva_of_names() const
+{
+	return address_of_names_;
+}
+
+//Returns RVA of name ordinals table
+uint32_t export_info::get_rva_of_name_ordinals() const
+{
+	return address_of_name_ordinals_;
+}
+
+//Sets characteristics
+void export_info::set_characteristics(uint32_t characteristics)
+{
+	characteristics_ = characteristics;
+}
+
+//Sets timestamp
+void export_info::set_timestamp(uint32_t timestamp)
+{
+	timestamp_ = timestamp;
+}
+
+//Sets major version
+void export_info::set_major_version(uint16_t major_version)
+{
+	major_version_ = major_version;
+}
+
+//Sets minor version
+void export_info::set_minor_version(uint16_t minor_version)
+{
+	minor_version_ = minor_version;
+}
+
+//Sets DLL name
+void export_info::set_name(const std::string& name)
+{
+	name_ = name;
+}
+
+//Sets ordinal base
+void export_info::set_ordinal_base(uint32_t ordinal_base)
+{
+	ordinal_base_ = ordinal_base;
+}
+
+//Sets number of functions
+void export_info::set_number_of_functions(uint32_t number_of_functions)
+{
+	number_of_functions_ = number_of_functions;
+}
+
+//Sets number of function names
+void export_info::set_number_of_names(uint32_t number_of_names)
+{
+	number_of_names_ = number_of_names;
+}
+
+//Sets RVA of function address table
+void export_info::set_rva_of_functions(uint32_t rva_of_functions)
+{
+	address_of_functions_ = rva_of_functions;
+}
+
+//Sets RVA of function name address table
+void export_info::set_rva_of_names(uint32_t rva_of_names)
+{
+	address_of_names_ = rva_of_names;
+}
+
+//Sets RVA of name ordinals table
+void export_info::set_rva_of_name_ordinals(uint32_t rva_of_name_ordinals)
+{
+	address_of_name_ordinals_ = rva_of_name_ordinals;
+}
+
+const exported_functions_list get_exported_functions(const pe_base& pe, export_info* info);
+
+//Returns array of exported functions
+const exported_functions_list get_exported_functions(const pe_base& pe)
+{
+	return get_exported_functions(pe, 0);
+}
+
+//Returns array of exported functions and information about export
+const exported_functions_list get_exported_functions(const pe_base& pe, export_info& info)
+{
+	return get_exported_functions(pe, &info);
+}
+
+//Helper: sorts exported function list by ordinals
+struct ordinal_sorter
+{
+public:
+		bool operator()(const exported_function& func1, const exported_function& func2) const;
+};
+
+//Returns array of exported functions and information about export (if info != 0)
+const exported_functions_list get_exported_functions(const pe_base& pe, export_info* info)
+{
+	//Returned exported functions info array
+	std::vector<exported_function> ret;
+
+	if(pe.has_exports())
+	{
+		//Check the length in bytes of the section containing export directory
+		if(pe.section_data_length_from_rva(pe.get_directory_rva(image_directory_entry_export),
+			pe.get_directory_rva(image_directory_entry_export), section_data_virtual, true)
+			< sizeof(image_export_directory))
+			throw pe_exception("Incorrect export directory", pe_exception::incorrect_export_directory);
+
+		image_export_directory exports = pe.section_data_from_rva<image_export_directory>(pe.get_directory_rva(image_directory_entry_export), section_data_virtual, true);
+
+		unsigned long max_name_length;
+
+		if(info)
+		{
+			//Save some export info data
+			info->set_characteristics(exports.Characteristics);
+			info->set_major_version(exports.MajorVersion);
+			info->set_minor_version(exports.MinorVersion);
+
+			//Get byte count that we have for dll name
+			if((max_name_length = pe.section_data_length_from_rva(exports.Name, exports.Name, section_data_virtual, true)) < 2)
+				throw pe_exception("Incorrect export directory", pe_exception::incorrect_export_directory);
+
+			//Get dll name pointer
+			const char* dll_name = pe.section_data_from_rva(exports.Name, section_data_virtual, true);
+
+			//Check for null-termination
+			if(!pe_utils::is_null_terminated(dll_name, max_name_length))
+				throw pe_exception("Incorrect export directory", pe_exception::incorrect_export_directory);
+
+			//Save the rest of export information data
+			info->set_name(dll_name);
+			info->set_number_of_functions(exports.NumberOfFunctions);
+			info->set_number_of_names(exports.NumberOfNames);
+			info->set_ordinal_base(exports.Base);
+			info->set_rva_of_functions(exports.AddressOfFunctions);
+			info->set_rva_of_names(exports.AddressOfNames);
+			info->set_rva_of_name_ordinals(exports.AddressOfNameOrdinals);
+			info->set_timestamp(exports.TimeDateStamp);
+		}
+
+		if(!exports.NumberOfFunctions)
+			return ret;
+
+		//Check IMAGE_EXPORT_DIRECTORY fields
+		if(exports.NumberOfNames > exports.NumberOfFunctions)
+			throw pe_exception("Incorrect export directory", pe_exception::incorrect_export_directory);
+
+		//Check some export directory fields
+		if((!exports.AddressOfNameOrdinals && exports.AddressOfNames) ||
+			(exports.AddressOfNameOrdinals && !exports.AddressOfNames) ||
+			!exports.AddressOfFunctions
+			|| exports.NumberOfFunctions >= pe_utils::max_dword / sizeof(uint32_t)
+			|| exports.NumberOfNames > pe_utils::max_dword / sizeof(uint32_t)
+			|| !pe_utils::is_sum_safe(exports.AddressOfFunctions, exports.NumberOfFunctions * sizeof(uint32_t))
+			|| !pe_utils::is_sum_safe(exports.AddressOfNames, exports.NumberOfNames * sizeof(uint32_t))
+			|| !pe_utils::is_sum_safe(exports.AddressOfNameOrdinals, exports.NumberOfFunctions * sizeof(uint32_t))
+			|| !pe_utils::is_sum_safe(pe.get_directory_rva(image_directory_entry_export), pe.get_directory_size(image_directory_entry_export)))
+			throw pe_exception("Incorrect export directory", pe_exception::incorrect_export_directory);
+
+		//Check if it is enough bytes to hold AddressOfFunctions table
+		if(pe.section_data_length_from_rva(exports.AddressOfFunctions, exports.AddressOfFunctions, section_data_virtual, true)
+			< exports.NumberOfFunctions * sizeof(uint32_t))
+			throw pe_exception("Incorrect export directory", pe_exception::incorrect_export_directory);
+
+		if(exports.AddressOfNames)
+		{
+			//Check if it is enough bytes to hold name and ordinal tables
+			if(pe.section_data_length_from_rva(exports.AddressOfNameOrdinals, exports.AddressOfNameOrdinals, section_data_virtual, true)
+				< exports.NumberOfNames * sizeof(uint16_t))
+				throw pe_exception("Incorrect export directory", pe_exception::incorrect_export_directory);
+
+			if(pe.section_data_length_from_rva(exports.AddressOfNames, exports.AddressOfNames, section_data_virtual, true)
+				< exports.NumberOfNames * sizeof(uint32_t))
+				throw pe_exception("Incorrect export directory", pe_exception::incorrect_export_directory);
+		}
+		
+		for(uint32_t ordinal = 0; ordinal < exports.NumberOfFunctions; ordinal++)
+		{
+			//Get function address
+			//Sum and multiplication are safe (checked above)
+			uint32_t rva = pe.section_data_from_rva<uint32_t>(exports.AddressOfFunctions + ordinal * sizeof(uint32_t), section_data_virtual, true);
+
+			//If we have a skip
+			if(!rva)
+				continue;
+
+			exported_function func;
+			func.set_rva(rva);
+
+			if(!pe_utils::is_sum_safe(exports.Base, ordinal) || exports.Base + ordinal > pe_utils::max_word)
+				throw pe_exception("Incorrect export directory", pe_exception::incorrect_export_directory);
+
+			func.set_ordinal(static_cast<uint16_t>(ordinal + exports.Base));
+
+			//Scan for function name ordinal
+			for(uint32_t i = 0; i < exports.NumberOfNames; i++)
+			{
+				uint16_t ordinal2 = pe.section_data_from_rva<uint16_t>(exports.AddressOfNameOrdinals + i * sizeof(uint16_t), section_data_virtual, true);
+
+				//If function has name (and name ordinal)
+				if(ordinal == ordinal2)
+				{
+					//Get function name
+					//Sum and multiplication are safe (checked above)
+					uint32_t function_name_rva = pe.section_data_from_rva<uint32_t>(exports.AddressOfNames + i * sizeof(uint32_t), section_data_virtual, true);
+
+					//Get byte count that we have for function name
+					if((max_name_length = pe.section_data_length_from_rva(function_name_rva, function_name_rva, section_data_virtual, true)) < 2)
+						throw pe_exception("Incorrect export directory", pe_exception::incorrect_export_directory);
+
+					//Get function name pointer
+					const char* func_name = pe.section_data_from_rva(function_name_rva, section_data_virtual, true);
+
+					//Check for null-termination
+					if(!pe_utils::is_null_terminated(func_name, max_name_length))
+						throw pe_exception("Incorrect export directory", pe_exception::incorrect_export_directory);
+
+					//Save function info
+					func.set_name(func_name);
+					func.set_name_ordinal(ordinal2);
+
+					//If the function is just a redirect, save its name
+					if(rva >= pe.get_directory_rva(image_directory_entry_export) + sizeof(image_directory_entry_export) &&
+						rva < pe.get_directory_rva(image_directory_entry_export) + pe.get_directory_size(image_directory_entry_export))
+					{
+						if((max_name_length = pe.section_data_length_from_rva(rva, rva, section_data_virtual, true)) < 2)
+							throw pe_exception("Incorrect export directory", pe_exception::incorrect_export_directory);
+
+						//Get forwarded function name pointer
+						const char* forwarded_func_name = pe.section_data_from_rva(rva, section_data_virtual, true);
+
+						//Check for null-termination
+						if(!pe_utils::is_null_terminated(forwarded_func_name, max_name_length))
+							throw pe_exception("Incorrect export directory", pe_exception::incorrect_export_directory);
+
+						//Set the name of forwarded function
+						func.set_forwarded_name(forwarded_func_name);
+					}
+
+					break;
+				}
+			}
+
+			//Add function info to output array
+			ret.push_back(func);
+		}
+	}
+
+	return ret;
+}
+
+//Helper export functions
+//Returns pair: <ordinal base for supplied functions; maximum ordinal value for supplied functions>
+const std::pair<uint16_t, uint16_t> get_export_ordinal_limits(const exported_functions_list& exports)
+{
+	if(exports.empty())
+		return std::make_pair(0, 0);
+
+	uint16_t max_ordinal = 0; //Maximum ordinal number
+	uint16_t ordinal_base = pe_utils::max_word; //Minimum ordinal value
+	for(exported_functions_list::const_iterator it = exports.begin(); it != exports.end(); ++it)
+	{
+		const exported_function& func = (*it);
+
+		//Calculate maximum and minimum ordinal numbers
+		max_ordinal = std::max<uint16_t>(max_ordinal, func.get_ordinal());
+		ordinal_base = std::min<uint16_t>(ordinal_base, func.get_ordinal());
+	}
+
+	return std::make_pair(ordinal_base, max_ordinal);
+}
+
+//Checks if exported function name already exists
+bool exported_name_exists(const std::string& function_name, const exported_functions_list& exports)
+{
+	for(exported_functions_list::const_iterator it = exports.begin(); it != exports.end(); ++it)
+	{
+		if((*it).has_name() && (*it).get_name() == function_name)
+			return true;
+	}
+
+	return false;
+}
+
+//Checks if exported function name already exists
+bool exported_ordinal_exists(uint16_t ordinal, const exported_functions_list& exports)
+{
+	for(exported_functions_list::const_iterator it = exports.begin(); it != exports.end(); ++it)
+	{
+		if((*it).get_ordinal() == ordinal)
+			return true;
+	}
+
+	return false;
+}
+
+//Helper: sorts exported function list by ordinals
+bool ordinal_sorter::operator()(const exported_function& func1, const exported_function& func2) const
+{
+	return func1.get_ordinal() < func2.get_ordinal();
+}
+
+//Export directory rebuilder
+//info - export information
+//exported_functions_list - list of exported functions
+//exports_section - section where export directory will be placed (must be attached to PE image)
+//offset_from_section_start - offset from exports_section raw data start
+//save_to_pe_headers - if true, new export directory information will be saved to PE image headers
+//auto_strip_last_section - if true and exports are placed in the last section, it will be automatically stripped
+//number_of_functions and number_of_names parameters don't matter in "info" when rebuilding, they're calculated independently
+//characteristics, major_version, minor_version, timestamp and name are the only used members of "info" structure
+//Returns new export directory information
+//exported_functions_list is copied intentionally to be sorted by ordinal values later
+//Name ordinals in exported function don't matter, they will be recalculated
+const image_directory rebuild_exports(pe_base& pe, const export_info& info, exported_functions_list exports, section& exports_section, uint32_t offset_from_section_start, bool save_to_pe_header, bool auto_strip_last_section)
+{
+	//Check that exports_section is attached to this PE image
+	if(!pe.section_attached(exports_section))
+		throw pe_exception("Exports section must be attached to PE file", pe_exception::section_is_not_attached);
+
+	//Needed space for strings
+	uint32_t needed_size_for_strings = static_cast<uint32_t>(info.get_name().length() + 1);
+	uint32_t number_of_names = 0; //Number of named functions
+	uint32_t max_ordinal = 0; //Maximum ordinal number
+	uint32_t ordinal_base = static_cast<uint32_t>(-1); //Minimum ordinal value
+	
+	if(exports.empty())
+		ordinal_base = info.get_ordinal_base();
+
+	uint32_t needed_size_for_function_names = 0; //Needed space for function name strings
+	uint32_t needed_size_for_function_forwards = 0; //Needed space for function forwards names
+	
+	//List all exported functions
+	//Calculate needed size for function list
+	{
+		//Also check that there're no duplicate names and ordinals
+		std::set<std::string> used_function_names;
+		std::set<uint16_t> used_function_ordinals;
+
+		for(exported_functions_list::const_iterator it = exports.begin(); it != exports.end(); ++it)
+		{
+			const exported_function& func = (*it);
+			//Calculate maximum and minimum ordinal numbers
+			max_ordinal = std::max<uint32_t>(max_ordinal, func.get_ordinal());
+			ordinal_base = std::min<uint32_t>(ordinal_base, func.get_ordinal());
+
+			//Check if ordinal is unique
+			if(!used_function_ordinals.insert(func.get_ordinal()).second)
+				throw pe_exception("Duplicate exported function ordinal", pe_exception::duplicate_exported_function_ordinal);
+			
+			if(func.has_name())
+			{
+				//If function is named
+				++number_of_names;
+				needed_size_for_function_names += static_cast<uint32_t>(func.get_name().length() + 1);
+				
+				//Check if it's name and name ordinal are unique
+				if(!used_function_names.insert(func.get_name()).second)
+					throw pe_exception("Duplicate exported function name", pe_exception::duplicate_exported_function_name);
+			}
+
+			//If function is forwarded to another DLL
+			if(func.is_forwarded())
+				needed_size_for_function_forwards += static_cast<uint32_t>(func.get_forwarded_name().length() + 1);
+		}
+	}
+	
+	//Sort functions by ordinal value
+	std::sort(exports.begin(), exports.end(), ordinal_sorter());
+
+	//Calculate needed space for different things...
+	needed_size_for_strings += needed_size_for_function_names;
+	needed_size_for_strings += needed_size_for_function_forwards;
+	uint32_t needed_size_for_function_name_ordinals = number_of_names * sizeof(uint16_t);
+	uint32_t needed_size_for_function_name_rvas = number_of_names * sizeof(uint32_t);
+	uint32_t needed_size_for_function_addresses = (max_ordinal - ordinal_base + 1) * sizeof(uint32_t);
+	
+	//Export directory header will be placed first
+	uint32_t directory_pos = pe_utils::align_up(offset_from_section_start, sizeof(uint32_t));
+
+	uint32_t needed_size = sizeof(image_export_directory); //Calculate needed size for export tables and strings
+	//sizeof(IMAGE_EXPORT_DIRECTORY) = export directory header
+
+	//Total needed space...
+	needed_size += needed_size_for_function_name_ordinals; //For list of names ordinals
+	needed_size += needed_size_for_function_addresses; //For function RVAs
+	needed_size += needed_size_for_strings; //For all strings
+	needed_size += needed_size_for_function_name_rvas; //For function name strings RVAs
+
+	//Check if exports_section is last one. If it's not, check if there's enough place for exports data
+	if(&exports_section != &*(pe.get_image_sections().end() - 1) && 
+		(exports_section.empty() || pe_utils::align_up(exports_section.get_size_of_raw_data(), pe.get_file_alignment()) < needed_size + directory_pos))
+		throw pe_exception("Insufficient space for export directory", pe_exception::insufficient_space);
+
+	std::string& raw_data = exports_section.get_raw_data();
+
+	//This will be done only if exports_section is the last section of image or for section with unaligned raw length of data
+	if(raw_data.length() < needed_size + directory_pos)
+		raw_data.resize(needed_size + directory_pos); //Expand section raw data
+
+	//Library name will be placed after it
+	uint32_t current_pos_of_function_names = static_cast<uint32_t>(info.get_name().length() + 1 + directory_pos + sizeof(image_export_directory));
+	//Next - function names
+	uint32_t current_pos_of_function_name_ordinals = current_pos_of_function_names + needed_size_for_function_names;
+	//Next - function name ordinals
+	uint32_t current_pos_of_function_forwards = current_pos_of_function_name_ordinals + needed_size_for_function_name_ordinals;
+	//Finally - function addresses
+	uint32_t current_pos_of_function_addresses = current_pos_of_function_forwards + needed_size_for_function_forwards;
+	//Next - function names RVAs
+	uint32_t current_pos_of_function_names_rvas = current_pos_of_function_addresses + needed_size_for_function_addresses;
+
+	{
+		//Create export directory and fill it
+		image_export_directory dir = {0};
+		dir.Characteristics = info.get_characteristics();
+		dir.MajorVersion = info.get_major_version();
+		dir.MinorVersion = info.get_minor_version();
+		dir.TimeDateStamp = info.get_timestamp();
+		dir.NumberOfFunctions = max_ordinal - ordinal_base + 1;
+		dir.NumberOfNames = number_of_names;
+		dir.Base = ordinal_base;
+		dir.AddressOfFunctions = pe.rva_from_section_offset(exports_section, current_pos_of_function_addresses);
+		dir.AddressOfNameOrdinals = pe.rva_from_section_offset(exports_section, current_pos_of_function_name_ordinals);
+		dir.AddressOfNames = pe.rva_from_section_offset(exports_section, current_pos_of_function_names_rvas);
+		dir.Name = pe.rva_from_section_offset(exports_section, directory_pos + sizeof(image_export_directory));
+
+		//Save it
+		memcpy(&raw_data[directory_pos], &dir, sizeof(dir));
+	}
+
+	//Sve library name
+	memcpy(&raw_data[directory_pos + sizeof(image_export_directory)], info.get_name().c_str(), info.get_name().length() + 1);
+
+	//A map to sort function names alphabetically
+	typedef std::map<std::string, uint16_t> funclist; //function name; function name ordinal
+	funclist funcs;
+
+	uint32_t last_ordinal = ordinal_base;
+	//Enumerate all exported functions
+	for(exported_functions_list::const_iterator it = exports.begin(); it != exports.end(); ++it)
+	{
+		const exported_function& func = (*it);
+
+		//If we're skipping some ordinals...
+		if(func.get_ordinal() > last_ordinal)
+		{
+			//Fill this function RVAs data with zeros
+			uint32_t len = sizeof(uint32_t) * (func.get_ordinal() - last_ordinal - 1);
+			if(len)
+			{
+				memset(&raw_data[current_pos_of_function_addresses], 0, len);
+				current_pos_of_function_addresses += len;
+			}
+			
+			//Save last encountered ordinal
+			last_ordinal = func.get_ordinal();
+		}
+		
+		//If function is named, save its name ordinal and name in sorted alphabetically order
+		if(func.has_name())
+			funcs.insert(std::make_pair(func.get_name(), static_cast<uint16_t>(func.get_ordinal() - ordinal_base))); //Calculate name ordinal
+
+		//If function is forwarded to another DLL
+		if(func.is_forwarded())
+		{
+			//Write its forwarded name and its RVA
+			uint32_t function_rva = pe.rva_from_section_offset(exports_section, current_pos_of_function_forwards);
+			memcpy(&raw_data[current_pos_of_function_addresses], &function_rva, sizeof(function_rva));
+			current_pos_of_function_addresses += sizeof(function_rva);
+
+			memcpy(&raw_data[current_pos_of_function_forwards], func.get_forwarded_name().c_str(), func.get_forwarded_name().length() + 1);
+			current_pos_of_function_forwards += static_cast<uint32_t>(func.get_forwarded_name().length() + 1);
+		}
+		else
+		{
+			//Write actual function RVA
+			uint32_t function_rva = func.get_rva();
+			memcpy(&raw_data[current_pos_of_function_addresses], &function_rva, sizeof(function_rva));
+			current_pos_of_function_addresses += sizeof(function_rva);
+		}
+	}
+	
+	//Enumerate sorted function names
+	for(funclist::const_iterator it = funcs.begin(); it != funcs.end(); ++it)
+	{
+		//Save function name RVA
+		uint32_t function_name_rva = pe.rva_from_section_offset(exports_section, current_pos_of_function_names);
+		memcpy(&raw_data[current_pos_of_function_names_rvas], &function_name_rva, sizeof(function_name_rva));
+		current_pos_of_function_names_rvas += sizeof(function_name_rva);
+
+		//Save function name
+		memcpy(&raw_data[current_pos_of_function_names], (*it).first.c_str(), (*it).first.length() + 1);
+		current_pos_of_function_names += static_cast<uint32_t>((*it).first.length() + 1);
+
+		//Save function name ordinal
+		uint16_t name_ordinal = (*it).second;
+		memcpy(&raw_data[current_pos_of_function_name_ordinals], &name_ordinal, sizeof(name_ordinal));
+		current_pos_of_function_name_ordinals += sizeof(name_ordinal);
+	}
+	
+	//Adjust section raw and virtual sizes
+	pe.recalculate_section_sizes(exports_section, auto_strip_last_section);
+	
+	image_directory ret(pe.rva_from_section_offset(exports_section, directory_pos), needed_size);
+
+	//If auto-rewrite of PE headers is required
+	if(save_to_pe_header)
+	{
+		pe.set_directory_rva(image_directory_entry_export, ret.get_rva());
+		pe.set_directory_size(image_directory_entry_export, ret.get_size());
+	}
+
+	return ret;
+}
+}
diff --git a/tools/pe_bliss/pe_exports.h b/tools/pe_bliss/pe_exports.h
new file mode 100644
index 0000000000..127cf86ed6
--- /dev/null
+++ b/tools/pe_bliss/pe_exports.h
@@ -0,0 +1,184 @@
+/*************************************************************************/
+/* Copyright (c) 2015 dx, http://kaimi.ru                                */
+/*                                                                       */
+/* Permission is hereby granted, free of charge, to any person           */
+/* obtaining a copy of this software and associated documentation        */
+/* files (the "Software"), to deal in the Software without               */
+/* restriction, including without limitation the rights to use,          */
+/* copy, modify, merge, publish, distribute, sublicense, and/or          */
+/* sell copies of the Software, and to permit persons to whom the        */
+/* Software is furnished to do so, subject to the following conditions:  */
+/* The above copyright notice and this permission notice shall be        */
+/* included in all copies or substantial portions of the Software.       */
+/*                                                                       */
+/* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,       */
+/* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF    */
+/* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.*/
+/* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY  */
+/* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,  */
+/* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE     */
+/* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.                */
+/*************************************************************************/
+#pragma once
+#include <vector>
+#include <string>
+#include "pe_structures.h"
+#include "pe_base.h"
+#include "pe_directory.h"
+
+namespace pe_bliss
+{
+//Class representing exported function
+class exported_function
+{
+public:
+	//Default constructor
+	exported_function();
+
+	//Returns ordinal of function (actually, ordinal = hint + ordinal base)
+	uint16_t get_ordinal() const;
+
+	//Returns RVA of function
+	uint32_t get_rva() const;
+
+	//Returns true if function has name and name ordinal
+	bool has_name() const;
+	//Returns name of function
+	const std::string& get_name() const;
+	//Returns name ordinal of function
+	uint16_t get_name_ordinal() const;
+
+	//Returns true if function is forwarded to other library
+	bool is_forwarded() const;
+	//Returns the name of forwarded function
+	const std::string& get_forwarded_name() const;
+
+public: //Setters do not change everything inside image, they are used by PE class
+	//You can also use them to rebuild export directory
+
+	//Sets ordinal of function
+	void set_ordinal(uint16_t ordinal);
+
+	//Sets RVA of function
+	void set_rva(uint32_t rva);
+
+	//Sets name of function (or clears it, if empty name is passed)
+	void set_name(const std::string& name);
+	//Sets name ordinal
+	void set_name_ordinal(uint16_t name_ordinal);
+
+	//Sets forwarded function name (or clears it, if empty name is passed)
+	void set_forwarded_name(const std::string& name);
+
+private:
+	uint16_t ordinal_; //Function ordinal
+	uint32_t rva_; //Function RVA
+	std::string name_; //Function name
+	bool has_name_; //true == function has name
+	uint16_t name_ordinal_; //Function name ordinal
+	bool forward_; //true == function is forwarded
+	std::string forward_name_; //Name of forwarded function
+};
+
+//Class representing export information
+class export_info
+{
+public:
+	//Default constructor
+	export_info();
+
+	//Returns characteristics
+	uint32_t get_characteristics() const;
+	//Returns timestamp
+	uint32_t get_timestamp() const;
+	//Returns major version
+	uint16_t get_major_version() const;
+	//Returns minor version
+	uint16_t get_minor_version() const;
+	//Returns DLL name
+	const std::string& get_name() const;
+	//Returns ordinal base
+	uint32_t get_ordinal_base() const;
+	//Returns number of functions
+	uint32_t get_number_of_functions() const;
+	//Returns number of function names
+	uint32_t get_number_of_names() const;
+	//Returns RVA of function address table
+	uint32_t get_rva_of_functions() const;
+	//Returns RVA of function name address table
+	uint32_t get_rva_of_names() const;
+	//Returns RVA of name ordinals table
+	uint32_t get_rva_of_name_ordinals() const;
+
+public: //Setters do not change everything inside image, they are used by PE class
+	//You can also use them to rebuild export directory using rebuild_exports
+
+	//Sets characteristics
+	void set_characteristics(uint32_t characteristics);
+	//Sets timestamp
+	void set_timestamp(uint32_t timestamp);
+	//Sets major version
+	void set_major_version(uint16_t major_version);
+	//Sets minor version
+	void set_minor_version(uint16_t minor_version);
+	//Sets DLL name
+	void set_name(const std::string& name);
+	//Sets ordinal base
+	void set_ordinal_base(uint32_t ordinal_base);
+	//Sets number of functions
+	void set_number_of_functions(uint32_t number_of_functions);
+	//Sets number of function names
+	void set_number_of_names(uint32_t number_of_names);
+	//Sets RVA of function address table
+	void set_rva_of_functions(uint32_t rva_of_functions);
+	//Sets RVA of function name address table
+	void set_rva_of_names(uint32_t rva_of_names);
+	//Sets RVA of name ordinals table
+	void set_rva_of_name_ordinals(uint32_t rva_of_name_ordinals);
+
+private:
+	uint32_t characteristics_;
+	uint32_t timestamp_;
+	uint16_t major_version_;
+	uint16_t minor_version_;
+	std::string name_;
+	uint32_t ordinal_base_;
+	uint32_t number_of_functions_;
+	uint32_t number_of_names_;
+	uint32_t address_of_functions_;
+	uint32_t address_of_names_;
+	uint32_t address_of_name_ordinals_;
+};
+
+//Exported functions list typedef
+typedef std::vector<exported_function> exported_functions_list;
+
+//Returns array of exported functions
+const exported_functions_list get_exported_functions(const pe_base& pe);
+//Returns array of exported functions and information about export
+const exported_functions_list get_exported_functions(const pe_base& pe, export_info& info);
+	
+//Helper export functions
+//Returns pair: <ordinal base for supplied functions; maximum ordinal value for supplied functions>
+const std::pair<uint16_t, uint16_t> get_export_ordinal_limits(const exported_functions_list& exports);
+
+//Checks if exported function name already exists
+bool exported_name_exists(const std::string& function_name, const exported_functions_list& exports);
+
+//Checks if exported function ordinal already exists
+bool exported_ordinal_exists(uint16_t ordinal, const exported_functions_list& exports);
+
+//Export directory rebuilder
+//info - export information
+//exported_functions_list - list of exported functions
+//exports_section - section where export directory will be placed (must be attached to PE image)
+//offset_from_section_start - offset from exports_section raw data start
+//save_to_pe_headers - if true, new export directory information will be saved to PE image headers
+//auto_strip_last_section - if true and exports are placed in the last section, it will be automatically stripped
+//number_of_functions and number_of_names parameters don't matter in "info" when rebuilding, they're calculated independently
+//characteristics, major_version, minor_version, timestamp and name are the only used members of "info" structure
+//Returns new export directory information
+//exported_functions_list is copied intentionally to be sorted by ordinal values later
+//Name ordinals in exported function don't matter, they will be recalculated
+const image_directory rebuild_exports(pe_base& pe, const export_info& info, exported_functions_list exports, section& exports_section, uint32_t offset_from_section_start = 0, bool save_to_pe_header = true, bool auto_strip_last_section = true);
+}
diff --git a/tools/pe_bliss/pe_factory.cpp b/tools/pe_bliss/pe_factory.cpp
new file mode 100644
index 0000000000..f6d8a3e1ed
--- /dev/null
+++ b/tools/pe_bliss/pe_factory.cpp
@@ -0,0 +1,43 @@
+/*************************************************************************/
+/* Copyright (c) 2015 dx, http://kaimi.ru                                */
+/*                                                                       */
+/* Permission is hereby granted, free of charge, to any person           */
+/* obtaining a copy of this software and associated documentation        */
+/* files (the "Software"), to deal in the Software without               */
+/* restriction, including without limitation the rights to use,          */
+/* copy, modify, merge, publish, distribute, sublicense, and/or          */
+/* sell copies of the Software, and to permit persons to whom the        */
+/* Software is furnished to do so, subject to the following conditions:  */
+/* The above copyright notice and this permission notice shall be        */
+/* included in all copies or substantial portions of the Software.       */
+/*                                                                       */
+/* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,       */
+/* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF    */
+/* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.*/
+/* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY  */
+/* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,  */
+/* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE     */
+/* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.                */
+/*************************************************************************/
+#include "pe_factory.h"
+#include "pe_properties_generic.h"
+
+namespace pe_bliss
+{
+pe_base pe_factory::create_pe(std::istream& file, bool read_debug_raw_data)
+{
+	return pe_base::get_pe_type(file) == pe_type_32
+		? pe_base(file, pe_properties_32(), read_debug_raw_data)
+		: pe_base(file, pe_properties_64(), read_debug_raw_data);
+}
+
+pe_base pe_factory::create_pe(const char* file_path, bool read_debug_raw_data)
+{
+	std::ifstream pe_file(file_path, std::ios::in | std::ios::binary);
+	if(!pe_file)
+	{
+		throw pe_exception("Error in open file.", pe_exception::stream_is_bad);
+	}
+	return pe_factory::create_pe(pe_file,read_debug_raw_data);
+}
+}
diff --git a/tools/pe_bliss/pe_factory.h b/tools/pe_bliss/pe_factory.h
new file mode 100644
index 0000000000..60b42d9b71
--- /dev/null
+++ b/tools/pe_bliss/pe_factory.h
@@ -0,0 +1,39 @@
+/*************************************************************************/
+/* Copyright (c) 2015 dx, http://kaimi.ru                                */
+/*                                                                       */
+/* Permission is hereby granted, free of charge, to any person           */
+/* obtaining a copy of this software and associated documentation        */
+/* files (the "Software"), to deal in the Software without               */
+/* restriction, including without limitation the rights to use,          */
+/* copy, modify, merge, publish, distribute, sublicense, and/or          */
+/* sell copies of the Software, and to permit persons to whom the        */
+/* Software is furnished to do so, subject to the following conditions:  */
+/* The above copyright notice and this permission notice shall be        */
+/* included in all copies or substantial portions of the Software.       */
+/*                                                                       */
+/* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,       */
+/* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF    */
+/* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.*/
+/* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY  */
+/* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,  */
+/* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE     */
+/* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.                */
+/*************************************************************************/
+#pragma once
+#include <memory>
+#include <istream>
+#include <fstream>
+#include "pe_base.h"
+
+namespace pe_bliss
+{
+class pe_factory
+{
+public:
+	//Creates pe_base class instance from PE or PE+ istream
+	//If read_bound_import_raw_data, raw bound import data will be read (used to get bound import info)
+	//If read_debug_raw_data, raw debug data will be read (used to get image debug info)
+	static pe_base create_pe(std::istream& file, bool read_debug_raw_data = true);
+	static pe_base create_pe(const char* file_path, bool read_debug_raw_data = true);
+};
+}
diff --git a/tools/pe_bliss/pe_imports.cpp b/tools/pe_bliss/pe_imports.cpp
new file mode 100644
index 0000000000..0a6c01d6c0
--- /dev/null
+++ b/tools/pe_bliss/pe_imports.cpp
@@ -0,0 +1,777 @@
+/*************************************************************************/
+/* Copyright (c) 2015 dx, http://kaimi.ru                                */
+/*                                                                       */
+/* Permission is hereby granted, free of charge, to any person           */
+/* obtaining a copy of this software and associated documentation        */
+/* files (the "Software"), to deal in the Software without               */
+/* restriction, including without limitation the rights to use,          */
+/* copy, modify, merge, publish, distribute, sublicense, and/or          */
+/* sell copies of the Software, and to permit persons to whom the        */
+/* Software is furnished to do so, subject to the following conditions:  */
+/* The above copyright notice and this permission notice shall be        */
+/* included in all copies or substantial portions of the Software.       */
+/*                                                                       */
+/* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,       */
+/* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF    */
+/* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.*/
+/* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY  */
+/* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,  */
+/* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE     */
+/* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.                */
+/*************************************************************************/
+#include <string.h>
+#include "pe_imports.h"
+#include "pe_properties_generic.h"
+
+namespace pe_bliss
+{
+using namespace pe_win;
+
+//IMPORTS
+//Default constructor
+//If set_to_pe_headers = true, IMAGE_DIRECTORY_ENTRY_IMPORT entry will be reset
+//to new value after import rebuilding
+//If auto_zero_directory_entry_iat = true, IMAGE_DIRECTORY_ENTRY_IAT will be set to zero
+//IMAGE_DIRECTORY_ENTRY_IAT is used by loader to temporarily make section, where IMAGE_DIRECTORY_ENTRY_IAT RVA points, writeable
+//to be able to modify IAT thunks
+import_rebuilder_settings::import_rebuilder_settings(bool set_to_pe_headers, bool auto_zero_directory_entry_iat)
+	:offset_from_section_start_(0),
+	build_original_iat_(true),
+	save_iat_and_original_iat_rvas_(true),
+	fill_missing_original_iats_(false),
+	set_to_pe_headers_(set_to_pe_headers),
+	zero_directory_entry_iat_(auto_zero_directory_entry_iat),
+	rewrite_iat_and_original_iat_contents_(false),
+	auto_strip_last_section_(true)
+{}
+
+//Returns offset from section start where import directory data will be placed
+uint32_t import_rebuilder_settings::get_offset_from_section_start() const
+{
+	return offset_from_section_start_;
+}
+
+//Returns true if Original import address table (IAT) will be rebuilt
+bool import_rebuilder_settings::build_original_iat() const
+{
+	return build_original_iat_;
+}
+
+//Returns true if Original import address and import address tables will not be rebuilt,
+//works only if import descriptor IAT (and orig.IAT, if present) RVAs are not zero
+bool import_rebuilder_settings::save_iat_and_original_iat_rvas() const
+{
+	return save_iat_and_original_iat_rvas_;
+}
+
+//Returns true if Original import address and import address tables contents will be rewritten
+//works only if import descriptor IAT (and orig.IAT, if present) RVAs are not zero
+//and save_iat_and_original_iat_rvas is true
+bool import_rebuilder_settings::rewrite_iat_and_original_iat_contents() const
+{
+	return rewrite_iat_and_original_iat_contents_;
+}
+
+//Returns true if original missing IATs will be rebuilt
+//(only if IATs are saved)
+bool import_rebuilder_settings::fill_missing_original_iats() const
+{
+	return fill_missing_original_iats_;
+}
+
+//Returns true if PE headers should be updated automatically after rebuilding of imports
+bool import_rebuilder_settings::auto_set_to_pe_headers() const
+{
+	return set_to_pe_headers_;
+}
+
+//Returns true if IMAGE_DIRECTORY_ENTRY_IAT must be zeroed, works only if auto_set_to_pe_headers = true
+bool import_rebuilder_settings::zero_directory_entry_iat() const
+{
+	return zero_directory_entry_iat_;	
+}
+
+//Returns true if the last section should be stripped automatically, if imports are inside it
+bool import_rebuilder_settings::auto_strip_last_section_enabled() const
+{
+	return auto_strip_last_section_;
+}
+
+//Sets offset from section start where import directory data will be placed
+void import_rebuilder_settings::set_offset_from_section_start(uint32_t offset)
+{
+	offset_from_section_start_ = offset;
+}
+
+//Sets if Original import address table (IAT) will be rebuilt
+void import_rebuilder_settings::build_original_iat(bool enable)
+{
+	build_original_iat_ = enable;
+}
+
+//Sets if Original import address and import address tables will not be rebuilt,
+//works only if import descriptor IAT (and orig.IAT, if present) RVAs are not zero
+void import_rebuilder_settings::save_iat_and_original_iat_rvas(bool enable, bool enable_rewrite_iat_and_original_iat_contents)
+{
+	save_iat_and_original_iat_rvas_ = enable;
+	if(save_iat_and_original_iat_rvas_)
+		rewrite_iat_and_original_iat_contents_ = enable_rewrite_iat_and_original_iat_contents;
+	else
+		rewrite_iat_and_original_iat_contents_ = false;
+}
+
+//Sets if original missing IATs will be rebuilt
+//(only if IATs are saved)
+void import_rebuilder_settings::fill_missing_original_iats(bool enable)
+{
+	fill_missing_original_iats_ = enable;
+}
+
+//Sets if PE headers should be updated automatically after rebuilding of imports
+void import_rebuilder_settings::auto_set_to_pe_headers(bool enable)
+{
+	set_to_pe_headers_ = enable;
+}
+
+//Sets if IMAGE_DIRECTORY_ENTRY_IAT must be zeroed, works only if auto_set_to_pe_headers = true
+void import_rebuilder_settings::zero_directory_entry_iat(bool enable)
+{
+	zero_directory_entry_iat_ = enable;
+}
+
+//Sets if the last section should be stripped automatically, if imports are inside it, default true
+void import_rebuilder_settings::enable_auto_strip_last_section(bool enable)
+{
+	auto_strip_last_section_ = enable;
+}
+
+//Default constructor
+imported_function::imported_function()
+	:hint_(0), ordinal_(0), iat_va_(0)
+{}
+
+//Returns name of function
+const std::string& imported_function::get_name() const
+{
+	return name_;
+}
+
+//Returns true if imported function has name (and hint)
+bool imported_function::has_name() const
+{
+	return !name_.empty();
+}
+
+//Returns hint
+uint16_t imported_function::get_hint() const
+{
+	return hint_;
+}
+
+//Returns ordinal of function
+uint16_t imported_function::get_ordinal() const
+{
+	return ordinal_;
+}
+
+//Returns IAT entry VA (usable if image has both IAT and original IAT and is bound)
+uint64_t imported_function::get_iat_va() const
+{
+	return iat_va_;
+}
+
+//Sets name of function
+void imported_function::set_name(const std::string& name)
+{
+	name_ = name;
+}
+
+//Sets hint
+void imported_function::set_hint(uint16_t hint)
+{
+	hint_ = hint;
+}
+
+//Sets ordinal
+void imported_function::set_ordinal(uint16_t ordinal)
+{
+	ordinal_ = ordinal;
+}
+
+//Sets IAT entry VA (usable if image has both IAT and original IAT and is bound)
+void imported_function::set_iat_va(uint64_t va)
+{
+	iat_va_ = va;
+}
+
+//Default constructor
+import_library::import_library()
+	:rva_to_iat_(0), rva_to_original_iat_(0), timestamp_(0)
+{}
+
+//Returns name of library
+const std::string& import_library::get_name() const
+{
+	return name_;
+}
+
+//Returns RVA to Import Address Table (IAT)
+uint32_t import_library::get_rva_to_iat() const
+{
+	return rva_to_iat_;
+}
+
+//Returns RVA to Original Import Address Table (Original IAT)
+uint32_t import_library::get_rva_to_original_iat() const
+{
+	return rva_to_original_iat_;
+}
+
+//Returns timestamp
+uint32_t import_library::get_timestamp() const
+{
+	return timestamp_;
+}
+
+//Sets name of library
+void import_library::set_name(const std::string& name)
+{
+	name_ = name;
+}
+
+//Sets RVA to Import Address Table (IAT)
+void import_library::set_rva_to_iat(uint32_t rva_to_iat)
+{
+	rva_to_iat_ = rva_to_iat;
+}
+
+//Sets RVA to Original Import Address Table (Original IAT)
+void import_library::set_rva_to_original_iat(uint32_t rva_to_original_iat)
+{
+	rva_to_original_iat_ = rva_to_original_iat;
+}
+
+//Sets timestamp
+void import_library::set_timestamp(uint32_t timestamp)
+{
+	timestamp_ = timestamp;
+}
+
+//Returns imported functions list
+const import_library::imported_list& import_library::get_imported_functions() const
+{
+	return imports_;
+}
+
+//Adds imported function
+void import_library::add_import(const imported_function& func)
+{
+	imports_.push_back(func);
+}
+
+//Clears imported functions list
+void import_library::clear_imports()
+{
+	imports_.clear();
+}
+
+const imported_functions_list get_imported_functions(const pe_base& pe)
+{
+	return (pe.get_pe_type() == pe_type_32 ?
+		get_imported_functions_base<pe_types_class_32>(pe)
+		: get_imported_functions_base<pe_types_class_64>(pe));
+}
+
+const image_directory rebuild_imports(pe_base& pe, const imported_functions_list& imports, section& import_section, const import_rebuilder_settings& import_settings)
+{
+	return (pe.get_pe_type() == pe_type_32 ?
+		rebuild_imports_base<pe_types_class_32>(pe, imports, import_section, import_settings)
+		: rebuild_imports_base<pe_types_class_64>(pe, imports, import_section, import_settings));
+}
+
+//Returns imported functions list with related libraries info
+template<typename PEClassType>
+const imported_functions_list get_imported_functions_base(const pe_base& pe)
+{
+	imported_functions_list ret;
+
+	//If image has no imports, return empty array
+	if(!pe.has_imports())
+		return ret;
+
+	unsigned long current_descriptor_pos = pe.get_directory_rva(image_directory_entry_import);
+	//Get first IMAGE_IMPORT_DESCRIPTOR
+	image_import_descriptor import_descriptor = pe.section_data_from_rva<image_import_descriptor>(current_descriptor_pos, section_data_virtual, true);
+
+	//Iterate them until we reach zero-element
+	//We don't need to check correctness of this, because exception will be thrown
+	//inside of loop if we go outsize of section
+	while(import_descriptor.Name)
+	{
+		//Get imported library information
+		import_library lib;
+
+		unsigned long max_name_length;
+		//Get byte count that we have for library name
+		if((max_name_length = pe.section_data_length_from_rva(import_descriptor.Name, import_descriptor.Name, section_data_virtual, true)) < 2)
+			throw pe_exception("Incorrect import directory", pe_exception::incorrect_import_directory);
+
+		//Get DLL name pointer
+		const char* dll_name = pe.section_data_from_rva(import_descriptor.Name, section_data_virtual, true);
+
+		//Check for null-termination
+		if(!pe_utils::is_null_terminated(dll_name, max_name_length))
+			throw pe_exception("Incorrect import directory", pe_exception::incorrect_import_directory);
+
+		//Set library name
+		lib.set_name(dll_name);
+		//Set library timestamp
+		lib.set_timestamp(import_descriptor.TimeDateStamp);
+		//Set library RVA to IAT and original IAT
+		lib.set_rva_to_iat(import_descriptor.FirstThunk);
+		lib.set_rva_to_original_iat(import_descriptor.OriginalFirstThunk);
+
+		//Get RVA to IAT (it must be filled by loader when loading PE)
+		uint32_t current_thunk_rva = import_descriptor.FirstThunk;
+		typename PEClassType::BaseSize import_address_table = pe.section_data_from_rva<typename PEClassType::BaseSize>(current_thunk_rva, section_data_virtual, true);
+
+		//Get RVA to original IAT (lookup table), which must handle imported functions names
+		//Some linkers leave this pointer zero-filled
+		//Such image is valid, but it is not possible to restore imported functions names
+		//afted image was loaded, because IAT becomes the only one table
+		//containing both function names and function RVAs after loading
+		uint32_t current_original_thunk_rva = import_descriptor.OriginalFirstThunk;
+		typename PEClassType::BaseSize import_lookup_table = current_original_thunk_rva == 0 ? import_address_table : pe.section_data_from_rva<typename PEClassType::BaseSize>(current_original_thunk_rva, section_data_virtual, true);
+		if(current_original_thunk_rva == 0)
+			current_original_thunk_rva = current_thunk_rva;
+
+		//List all imported functions for current DLL
+		if(import_lookup_table != 0 && import_address_table != 0)
+		{
+			while(true)
+			{
+				//Imported function description
+				imported_function func;
+
+				//Get VA from IAT
+				typename PEClassType::BaseSize address = pe.section_data_from_rva<typename PEClassType::BaseSize>(current_thunk_rva, section_data_virtual, true);
+				//Move pointer
+				current_thunk_rva += sizeof(typename PEClassType::BaseSize);
+
+				//Jump to next DLL if we finished with this one
+				if(!address)
+					break;
+
+				func.set_iat_va(address);
+
+				//Get VA from original IAT
+				typename PEClassType::BaseSize lookup = pe.section_data_from_rva<typename PEClassType::BaseSize>(current_original_thunk_rva, section_data_virtual, true);
+				//Move pointer
+				current_original_thunk_rva += sizeof(typename PEClassType::BaseSize);
+
+				//Check if function is imported by ordinal
+				if((lookup & PEClassType::ImportSnapFlag) != 0)
+				{
+					//Set function ordinal
+					func.set_ordinal(static_cast<uint16_t>(lookup & 0xffff));
+				}
+				else
+				{
+					//Get byte count that we have for function name
+					if(lookup > static_cast<uint32_t>(-1) - sizeof(uint16_t))
+						throw pe_exception("Incorrect import directory", pe_exception::incorrect_import_directory);
+
+					//Get maximum available length of function name
+					if((max_name_length = pe.section_data_length_from_rva(static_cast<uint32_t>(lookup + sizeof(uint16_t)), static_cast<uint32_t>(lookup + sizeof(uint16_t)), section_data_virtual, true)) < 2)
+						throw pe_exception("Incorrect import directory", pe_exception::incorrect_import_directory);
+
+					//Get imported function name
+					const char* func_name = pe.section_data_from_rva(static_cast<uint32_t>(lookup + sizeof(uint16_t)), section_data_virtual, true);
+
+					//Check for null-termination
+					if(!pe_utils::is_null_terminated(func_name, max_name_length))
+						throw pe_exception("Incorrect import directory", pe_exception::incorrect_import_directory);
+
+					//HINT in import table is ORDINAL in export table
+					uint16_t hint = pe.section_data_from_rva<uint16_t>(static_cast<uint32_t>(lookup), section_data_virtual, true);
+
+					//Save hint and name
+					func.set_name(func_name);
+					func.set_hint(hint);
+				}
+
+				//Add function to list
+				lib.add_import(func);
+			}
+		}
+
+		//Check possible overflow
+		if(!pe_utils::is_sum_safe(current_descriptor_pos, sizeof(image_import_descriptor)))
+			throw pe_exception("Incorrect import directory", pe_exception::incorrect_import_directory);
+
+		//Go to next library
+		current_descriptor_pos += sizeof(image_import_descriptor);
+		import_descriptor = pe.section_data_from_rva<image_import_descriptor>(current_descriptor_pos, section_data_virtual, true);
+
+		//Save import information
+		ret.push_back(lib);
+	}
+
+	//Return resulting list
+	return ret;
+}
+
+
+//Simple import directory rebuilder
+//You can get all image imports with get_imported_functions() function
+//You can use returned value to, for example, add new imported library with some functions
+//to the end of list of imported libraries
+//To keep PE file working, rebuild its imports with save_iat_and_original_iat_rvas = true (default)
+//Don't add new imported functions to existing imported library entries, because this can cause
+//rewriting of some used memory (or other IAT/orig.IAT fields) by system loader
+//The safest way is just adding import libraries with functions to the end of imported_functions_list array
+template<typename PEClassType>
+const image_directory rebuild_imports_base(pe_base& pe, const imported_functions_list& imports, section& import_section, const import_rebuilder_settings& import_settings)
+{
+	//Check that import_section is attached to this PE image
+	if(!pe.section_attached(import_section))
+		throw pe_exception("Import section must be attached to PE file", pe_exception::section_is_not_attached);
+
+	uint32_t needed_size = 0; //Calculate needed size for import structures and strings
+	uint32_t needed_size_for_strings = 0; //Calculate needed size for import strings (library and function names and hints)
+	uint32_t size_of_iat = 0; //Size of IAT structures
+
+	needed_size += static_cast<uint32_t>((1 /* ending null descriptor */ + imports.size()) * sizeof(image_import_descriptor));
+	
+	//Enumerate imported functions
+	for(imported_functions_list::const_iterator it = imports.begin(); it != imports.end(); ++it)
+	{
+		needed_size_for_strings += static_cast<uint32_t>((*it).get_name().length() + 1 /* nullbyte */);
+
+		const import_library::imported_list& funcs = (*it).get_imported_functions();
+
+		//IMAGE_THUNK_DATA
+		size_of_iat += static_cast<uint32_t>(sizeof(typename PEClassType::BaseSize) * (1 /*ending null */ + funcs.size()));
+
+		//Enumerate all imported functions in library
+		for(import_library::imported_list::const_iterator f = funcs.begin(); f != funcs.end(); ++f)
+		{
+			if((*f).has_name())
+				needed_size_for_strings += static_cast<uint32_t>((*f).get_name().length() + 1 /* nullbyte */ + sizeof(uint16_t) /* hint */);
+		}
+	}
+
+	if(import_settings.build_original_iat() || import_settings.fill_missing_original_iats())
+		needed_size += size_of_iat * 2; //We'll have two similar-sized IATs if we're building original IAT
+	else
+		needed_size += size_of_iat;
+
+	needed_size += sizeof(typename PEClassType::BaseSize); //Maximum align for IAT and original IAT
+	
+	//Total needed size for import structures and strings
+	needed_size += needed_size_for_strings;
+
+	//Check if import_section is last one. If it's not, check if there's enough place for import data
+	if(&import_section != &*(pe.get_image_sections().end() - 1) && 
+		(import_section.empty() || pe_utils::align_up(import_section.get_size_of_raw_data(), pe.get_file_alignment()) < needed_size + import_settings.get_offset_from_section_start()))
+		throw pe_exception("Insufficient space for import directory", pe_exception::insufficient_space);
+
+	std::string& raw_data = import_section.get_raw_data();
+
+	//This will be done only if image_section is the last section of image or for section with unaligned raw length of data
+	if(raw_data.length() < needed_size + import_settings.get_offset_from_section_start())
+		raw_data.resize(needed_size + import_settings.get_offset_from_section_start()); //Expand section raw data
+	
+	uint32_t current_string_pointer = import_settings.get_offset_from_section_start();/* we will paste structures after strings */
+	
+	//Position for IAT
+	uint32_t current_pos_for_iat = pe_utils::align_up(static_cast<uint32_t>(needed_size_for_strings + import_settings.get_offset_from_section_start() + (1 + imports.size()) * sizeof(image_import_descriptor)), sizeof(typename PEClassType::BaseSize));
+	//Position for original IAT
+	uint32_t current_pos_for_original_iat = current_pos_for_iat + size_of_iat;
+	//Position for import descriptors
+	uint32_t current_pos_for_descriptors = needed_size_for_strings + import_settings.get_offset_from_section_start();
+
+	//Build imports
+	for(imported_functions_list::const_iterator it = imports.begin(); it != imports.end(); ++it)
+	{
+		//Create import descriptor
+		image_import_descriptor descr;
+		memset(&descr, 0, sizeof(descr));
+		descr.TimeDateStamp = (*it).get_timestamp(); //Restore timestamp
+		descr.Name = pe.rva_from_section_offset(import_section, current_string_pointer); //Library name RVA
+
+		//If we should save IAT for current import descriptor
+		bool save_iats_for_this_descriptor = import_settings.save_iat_and_original_iat_rvas() && (*it).get_rva_to_iat() != 0;
+		//If we should write original IAT
+		bool write_original_iat = (!save_iats_for_this_descriptor && import_settings.build_original_iat()) || import_settings.fill_missing_original_iats();
+
+		//If we should rewrite saved original IAT for current import descriptor (without changing its position)
+		bool rewrite_saved_original_iat = save_iats_for_this_descriptor && import_settings.rewrite_iat_and_original_iat_contents() && import_settings.build_original_iat();
+		//If we should rewrite saved IAT for current import descriptor (without changing its position)
+		bool rewrite_saved_iat = save_iats_for_this_descriptor && import_settings.rewrite_iat_and_original_iat_contents() && (*it).get_rva_to_iat() != 0;
+
+		//Helper values if we're rewriting existing IAT or orig.IAT
+		uint32_t original_first_thunk = 0;
+		uint32_t first_thunk = 0;
+
+		if(save_iats_for_this_descriptor)
+		{
+			//If there's no original IAT and we're asked to rebuild missing original IATs
+			if(!(*it).get_rva_to_original_iat() && import_settings.fill_missing_original_iats())
+				descr.OriginalFirstThunk = import_settings.build_original_iat() ? pe.rva_from_section_offset(import_section, current_pos_for_original_iat) : 0;
+			else
+				descr.OriginalFirstThunk = import_settings.build_original_iat() ? (*it).get_rva_to_original_iat() : 0;
+			
+			descr.FirstThunk = (*it).get_rva_to_iat();
+
+			original_first_thunk = descr.OriginalFirstThunk;
+			first_thunk = descr.FirstThunk;
+
+			if(rewrite_saved_original_iat)
+			{
+				if((*it).get_rva_to_original_iat())
+					write_original_iat = true;
+				else
+					rewrite_saved_original_iat = false;
+			}
+
+			if(rewrite_saved_iat)
+				save_iats_for_this_descriptor = false;
+		}
+		else
+		{
+			//We are creating new IAT and original IAT (if needed)
+			descr.OriginalFirstThunk = import_settings.build_original_iat() ? pe.rva_from_section_offset(import_section, current_pos_for_original_iat) : 0;
+			descr.FirstThunk = pe.rva_from_section_offset(import_section, current_pos_for_iat);
+		}
+		
+		//Save import descriptor
+		memcpy(&raw_data[current_pos_for_descriptors], &descr, sizeof(descr));
+		current_pos_for_descriptors += sizeof(descr);
+
+		//Save library name
+		memcpy(&raw_data[current_string_pointer], (*it).get_name().c_str(), (*it).get_name().length() + 1 /* nullbyte */);
+		current_string_pointer += static_cast<uint32_t>((*it).get_name().length() + 1 /* nullbyte */);
+		
+		//List all imported functions
+		const import_library::imported_list& funcs = (*it).get_imported_functions();
+		for(import_library::imported_list::const_iterator f = funcs.begin(); f != funcs.end(); ++f)
+		{
+			if((*f).has_name()) //If function is imported by name
+			{
+				//Get RVA of IMAGE_IMPORT_BY_NAME
+				typename PEClassType::BaseSize rva_of_named_import = pe.rva_from_section_offset(import_section, current_string_pointer);
+
+				if(!save_iats_for_this_descriptor)
+				{
+					if(write_original_iat)
+					{
+						//We're creating original IATs - so we can write to IAT saved VA (because IMAGE_IMPORT_BY_NAME will be read
+						//by PE loader from original IAT)
+						typename PEClassType::BaseSize iat_value = static_cast<typename PEClassType::BaseSize>((*f).get_iat_va());
+
+						if(rewrite_saved_iat)
+						{
+							if(pe.section_data_length_from_rva(first_thunk, first_thunk, section_data_raw, true) <= sizeof(iat_value))
+								throw pe_exception("Insufficient space inside initial IAT", pe_exception::insufficient_space);
+
+							memcpy(pe.section_data_from_rva(first_thunk, true), &iat_value, sizeof(iat_value));
+
+							first_thunk += sizeof(iat_value);
+						}
+						else
+						{
+							memcpy(&raw_data[current_pos_for_iat], &iat_value, sizeof(iat_value));
+							current_pos_for_iat += sizeof(rva_of_named_import);
+						}
+					}
+					else
+					{
+						//Else - write to IAT RVA of IMAGE_IMPORT_BY_NAME
+						if(rewrite_saved_iat)
+						{
+							if(pe.section_data_length_from_rva(first_thunk, first_thunk, section_data_raw, true) <= sizeof(rva_of_named_import))
+								throw pe_exception("Insufficient space inside initial IAT", pe_exception::insufficient_space);
+
+							memcpy(pe.section_data_from_rva(first_thunk, true), &rva_of_named_import, sizeof(rva_of_named_import));
+
+							first_thunk += sizeof(rva_of_named_import);
+						}
+						else
+						{
+							memcpy(&raw_data[current_pos_for_iat], &rva_of_named_import, sizeof(rva_of_named_import));
+							current_pos_for_iat += sizeof(rva_of_named_import);
+						}
+					}
+				}
+
+				if(write_original_iat)
+				{
+					if(rewrite_saved_original_iat)
+					{
+						if(pe.section_data_length_from_rva(original_first_thunk, original_first_thunk, section_data_raw, true) <= sizeof(rva_of_named_import))
+							throw pe_exception("Insufficient space inside initial original IAT", pe_exception::insufficient_space);
+
+						memcpy(pe.section_data_from_rva(original_first_thunk, true), &rva_of_named_import, sizeof(rva_of_named_import));
+
+						original_first_thunk += sizeof(rva_of_named_import);
+					}
+					else
+					{
+						//We're creating original IATs
+						memcpy(&raw_data[current_pos_for_original_iat], &rva_of_named_import, sizeof(rva_of_named_import));
+						current_pos_for_original_iat += sizeof(rva_of_named_import);
+					}
+				}
+
+				//Write IMAGE_IMPORT_BY_NAME (WORD hint + string function name)
+				uint16_t hint = (*f).get_hint();
+				memcpy(&raw_data[current_string_pointer], &hint, sizeof(hint));
+				memcpy(&raw_data[current_string_pointer + sizeof(uint16_t)], (*f).get_name().c_str(), (*f).get_name().length() + 1 /* nullbyte */);
+				current_string_pointer += static_cast<uint32_t>((*f).get_name().length() + 1 /* nullbyte */ + sizeof(uint16_t) /* hint */);
+			}
+			else //Function is imported by ordinal
+			{
+				uint16_t ordinal = (*f).get_ordinal();
+				typename PEClassType::BaseSize thunk_value = ordinal;
+				thunk_value |= PEClassType::ImportSnapFlag; //Imported by ordinal
+
+				if(!save_iats_for_this_descriptor)
+				{
+					if(write_original_iat)
+					{
+						//We're creating original IATs - so we can wtire to IAT saved VA (because ordinal will be read
+						//by PE loader from original IAT)
+						typename PEClassType::BaseSize iat_value = static_cast<typename PEClassType::BaseSize>((*f).get_iat_va());
+						if(rewrite_saved_iat)
+						{
+							if(pe.section_data_length_from_rva(first_thunk, first_thunk, section_data_raw, true) <= sizeof(iat_value))
+								throw pe_exception("Insufficient space inside initial IAT", pe_exception::insufficient_space);
+
+							memcpy(pe.section_data_from_rva(first_thunk, true), &iat_value, sizeof(iat_value));
+
+							first_thunk += sizeof(iat_value);
+						}
+						else
+						{
+							memcpy(&raw_data[current_pos_for_iat], &iat_value, sizeof(iat_value));
+							current_pos_for_iat += sizeof(thunk_value);
+						}
+					}
+					else
+					{
+						//Else - write ordinal to IAT
+						if(rewrite_saved_iat)
+						{
+							if(pe.section_data_length_from_rva(first_thunk, first_thunk, section_data_raw, true) <= sizeof(thunk_value))
+								throw pe_exception("Insufficient space inside initial IAT", pe_exception::insufficient_space);
+
+							memcpy(pe.section_data_from_rva(first_thunk, true), &thunk_value, sizeof(thunk_value));
+
+							first_thunk += sizeof(thunk_value);
+						}
+						else
+						{
+							memcpy(&raw_data[current_pos_for_iat], &thunk_value, sizeof(thunk_value));
+						}
+					}
+				}
+
+				//We're writing ordinal to original IAT slot
+				if(write_original_iat)
+				{
+					if(rewrite_saved_original_iat)
+					{
+						if(pe.section_data_length_from_rva(original_first_thunk, original_first_thunk, section_data_raw, true) <= sizeof(thunk_value))
+							throw pe_exception("Insufficient space inside initial original IAT", pe_exception::insufficient_space);
+
+						memcpy(pe.section_data_from_rva(original_first_thunk, true), &thunk_value, sizeof(thunk_value));
+
+						original_first_thunk += sizeof(thunk_value);
+					}
+					else
+					{
+						memcpy(&raw_data[current_pos_for_original_iat], &thunk_value, sizeof(thunk_value));
+						current_pos_for_original_iat += sizeof(thunk_value);
+					}
+				}
+			}
+		}
+
+		if(!save_iats_for_this_descriptor)
+		{
+			//Ending null thunks
+			typename PEClassType::BaseSize thunk_value = 0;
+
+			if(rewrite_saved_iat)
+			{
+				if(pe.section_data_length_from_rva(first_thunk, first_thunk, section_data_raw, true) <= sizeof(thunk_value))
+					throw pe_exception("Insufficient space inside initial IAT", pe_exception::insufficient_space);
+
+				memcpy(pe.section_data_from_rva(first_thunk, true), &thunk_value, sizeof(thunk_value));
+
+				first_thunk += sizeof(thunk_value);
+			}
+			else
+			{
+				memcpy(&raw_data[current_pos_for_iat], &thunk_value, sizeof(thunk_value));
+				current_pos_for_iat += sizeof(thunk_value);
+			}
+		}
+
+		if(write_original_iat)
+		{
+			//Ending null thunks
+			typename PEClassType::BaseSize thunk_value = 0;
+
+			if(rewrite_saved_original_iat)
+			{
+				if(pe.section_data_length_from_rva(original_first_thunk, original_first_thunk, section_data_raw, true) <= sizeof(thunk_value))
+					throw pe_exception("Insufficient space inside initial original IAT", pe_exception::insufficient_space);
+
+				memcpy(pe.section_data_from_rva(original_first_thunk, true), &thunk_value, sizeof(thunk_value));
+
+				original_first_thunk += sizeof(thunk_value);
+			}
+			else
+			{
+				memcpy(&raw_data[current_pos_for_original_iat], &thunk_value, sizeof(thunk_value));
+				current_pos_for_original_iat += sizeof(thunk_value);
+			}
+		}
+	}
+
+	{
+		//Null ending descriptor
+		image_import_descriptor descr;
+		memset(&descr, 0, sizeof(descr));
+		memcpy(&raw_data[current_pos_for_descriptors], &descr, sizeof(descr));
+	}
+
+	//Strip data a little, if we saved some place
+	//We're allocating more space than needed, if present original IAT and IAT are saved
+	raw_data.resize(current_pos_for_original_iat);
+
+	//Adjust section raw and virtual sizes
+	pe.recalculate_section_sizes(import_section, import_settings.auto_strip_last_section_enabled());
+
+	//Return information about rebuilt import directory
+	image_directory ret(pe.rva_from_section_offset(import_section, import_settings.get_offset_from_section_start() + needed_size_for_strings), needed_size - needed_size_for_strings);
+
+	//If auto-rewrite of PE headers is required
+	if(import_settings.auto_set_to_pe_headers())
+	{
+		pe.set_directory_rva(image_directory_entry_import, ret.get_rva());
+		pe.set_directory_size(image_directory_entry_import, ret.get_size());
+
+		//If we are requested to zero IMAGE_DIRECTORY_ENTRY_IAT also
+		if(import_settings.zero_directory_entry_iat())
+		{
+			pe.set_directory_rva(image_directory_entry_iat, 0);
+			pe.set_directory_size(image_directory_entry_iat, 0);
+		}
+	}
+
+	return ret;
+}
+}
diff --git a/tools/pe_bliss/pe_imports.h b/tools/pe_bliss/pe_imports.h
new file mode 100644
index 0000000000..681b5b59bd
--- /dev/null
+++ b/tools/pe_bliss/pe_imports.h
@@ -0,0 +1,208 @@
+/*************************************************************************/
+/* Copyright (c) 2015 dx, http://kaimi.ru                                */
+/*                                                                       */
+/* Permission is hereby granted, free of charge, to any person           */
+/* obtaining a copy of this software and associated documentation        */
+/* files (the "Software"), to deal in the Software without               */
+/* restriction, including without limitation the rights to use,          */
+/* copy, modify, merge, publish, distribute, sublicense, and/or          */
+/* sell copies of the Software, and to permit persons to whom the        */
+/* Software is furnished to do so, subject to the following conditions:  */
+/* The above copyright notice and this permission notice shall be        */
+/* included in all copies or substantial portions of the Software.       */
+/*                                                                       */
+/* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,       */
+/* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF    */
+/* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.*/
+/* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY  */
+/* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,  */
+/* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE     */
+/* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.                */
+/*************************************************************************/
+#pragma once
+#include <vector>
+#include <string>
+#include "pe_structures.h"
+#include "pe_directory.h"
+#include "pe_base.h"
+
+namespace pe_bliss
+{
+//Class representing imported function
+class imported_function
+{
+public:
+	//Default constructor
+	imported_function();
+
+	//Returns true if imported function has name (and hint)
+	bool has_name() const;
+	//Returns name of function
+	const std::string& get_name() const;
+	//Returns hint
+	uint16_t get_hint() const;
+	//Returns ordinal of function
+	uint16_t get_ordinal() const;
+
+	//Returns IAT entry VA (usable if image has both IAT and original IAT and is bound)
+	uint64_t get_iat_va() const;
+
+public: //Setters do not change everything inside image, they are used by PE class
+	//You also can use them to rebuild image imports
+	//Sets name of function
+	void set_name(const std::string& name);
+	//Sets hint
+	void set_hint(uint16_t hint);
+	//Sets ordinal
+	void set_ordinal(uint16_t ordinal);
+
+	//Sets IAT entry VA (usable if image has both IAT and original IAT and is bound)
+	void set_iat_va(uint64_t rva);
+
+private:
+	std::string name_; //Function name
+	uint16_t hint_; //Hint
+	uint16_t ordinal_; //Ordinal
+	uint64_t iat_va_;
+};
+
+//Class representing imported library information
+class import_library
+{
+public:
+	typedef std::vector<imported_function> imported_list;
+
+public:
+	//Default constructor
+	import_library();
+
+	//Returns name of library
+	const std::string& get_name() const;
+	//Returns RVA to Import Address Table (IAT)
+	uint32_t get_rva_to_iat() const;
+	//Returns RVA to Original Import Address Table (Original IAT)
+	uint32_t get_rva_to_original_iat() const;
+	//Returns timestamp
+	uint32_t get_timestamp() const;
+
+	//Returns imported functions list
+	const imported_list& get_imported_functions() const;
+
+public: //Setters do not change everything inside image, they are used by PE class
+	//You also can use them to rebuild image imports
+	//Sets name of library
+	void set_name(const std::string& name);
+	//Sets RVA to Import Address Table (IAT)
+	void set_rva_to_iat(uint32_t rva_to_iat);
+	//Sets RVA to Original Import Address Table (Original IAT)
+	void set_rva_to_original_iat(uint32_t rva_to_original_iat);
+	//Sets timestamp
+	void set_timestamp(uint32_t timestamp);
+
+	//Adds imported function
+	void add_import(const imported_function& func);
+	//Clears imported functions list
+	void clear_imports();
+
+private:
+	std::string name_; //Library name
+	uint32_t rva_to_iat_; //RVA to IAT
+	uint32_t rva_to_original_iat_; //RVA to original IAT
+	uint32_t timestamp_; //DLL TimeStamp
+
+	imported_list imports_;
+};
+
+//Simple import directory rebuilder
+//Class representing import rebuilder advanced settings
+class import_rebuilder_settings
+{
+public:
+	//Default constructor
+	//Default constructor
+	//If set_to_pe_headers = true, IMAGE_DIRECTORY_ENTRY_IMPORT entry will be reset
+	//to new value after import rebuilding
+	//If auto_zero_directory_entry_iat = true, IMAGE_DIRECTORY_ENTRY_IAT will be set to zero
+	//IMAGE_DIRECTORY_ENTRY_IAT is used by loader to temporarily make section, where IMAGE_DIRECTORY_ENTRY_IAT RVA points, writeable
+	//to be able to modify IAT thunks
+	explicit import_rebuilder_settings(bool set_to_pe_headers = true, bool auto_zero_directory_entry_iat = false);
+
+	//Returns offset from section start where import directory data will be placed
+	uint32_t get_offset_from_section_start() const;
+	//Returns true if Original import address table (IAT) will be rebuilt
+	bool build_original_iat() const;
+
+	//Returns true if Original import address and import address tables will not be rebuilt,
+	//works only if import descriptor IAT (and orig.IAT, if present) RVAs are not zero
+	bool save_iat_and_original_iat_rvas() const;
+	//Returns true if Original import address and import address tables contents will be rewritten
+	//works only if import descriptor IAT (and orig.IAT, if present) RVAs are not zero
+	//and save_iat_and_original_iat_rvas is true
+	bool rewrite_iat_and_original_iat_contents() const;
+
+	//Returns true if original missing IATs will be rebuilt
+	//(only if IATs are saved)
+	bool fill_missing_original_iats() const;
+	//Returns true if PE headers should be updated automatically after rebuilding of imports
+	bool auto_set_to_pe_headers() const;
+	//Returns true if IMAGE_DIRECTORY_ENTRY_IAT must be zeroed, works only if auto_set_to_pe_headers = true
+	bool zero_directory_entry_iat() const;
+
+	//Returns true if the last section should be stripped automatically, if imports are inside it
+	bool auto_strip_last_section_enabled() const;
+
+public: //Setters
+	//Sets offset from section start where import directory data will be placed
+	void set_offset_from_section_start(uint32_t offset);
+	//Sets if Original import address table (IAT) will be rebuilt
+	void build_original_iat(bool enable);
+	//Sets if Original import address and import address tables will not be rebuilt,
+	//works only if import descriptor IAT (and orig.IAT, if present) RVAs are not zero
+	//enable_rewrite_iat_and_original_iat_contents sets if Original import address and import address tables contents will be rewritten
+	//works only if import descriptor IAT (and orig.IAT, if present) RVAs are not zero
+	//and save_iat_and_original_iat_rvas is true
+	void save_iat_and_original_iat_rvas(bool enable, bool enable_rewrite_iat_and_original_iat_contents = false);
+	//Sets if original missing IATs will be rebuilt
+	//(only if IATs are saved)
+	void fill_missing_original_iats(bool enable);
+	//Sets if PE headers should be updated automatically after rebuilding of imports
+	void auto_set_to_pe_headers(bool enable);
+	//Sets if IMAGE_DIRECTORY_ENTRY_IAT must be zeroed, works only if auto_set_to_pe_headers = true
+	void zero_directory_entry_iat(bool enable);
+
+	//Sets if the last section should be stripped automatically, if imports are inside it, default true
+	void enable_auto_strip_last_section(bool enable);
+
+private:
+	uint32_t offset_from_section_start_;
+	bool build_original_iat_;
+	bool save_iat_and_original_iat_rvas_;
+	bool fill_missing_original_iats_;
+	bool set_to_pe_headers_;
+	bool zero_directory_entry_iat_;
+	bool rewrite_iat_and_original_iat_contents_;
+	bool auto_strip_last_section_;
+};
+
+typedef std::vector<import_library> imported_functions_list;
+
+
+//Returns imported functions list with related libraries info
+const imported_functions_list get_imported_functions(const pe_base& pe);
+
+template<typename PEClassType>
+const imported_functions_list get_imported_functions_base(const pe_base& pe);
+
+
+//You can get all image imports with get_imported_functions() function
+//You can use returned value to, for example, add new imported library with some functions
+//to the end of list of imported libraries
+//To keep PE file working, rebuild its imports with save_iat_and_original_iat_rvas = true (default)
+//Don't add new imported functions to existing imported library entries, because this can cause
+//rewriting of some used memory (or other IAT/orig.IAT fields) by system loader
+//The safest way is just adding import libraries with functions to the end of imported_functions_list array
+const image_directory rebuild_imports(pe_base& pe, const imported_functions_list& imports, section& import_section, const import_rebuilder_settings& import_settings = import_rebuilder_settings());
+
+template<typename PEClassType>
+const image_directory rebuild_imports_base(pe_base& pe, const imported_functions_list& imports, section& import_section, const import_rebuilder_settings& import_settings = import_rebuilder_settings());
+}
diff --git a/tools/pe_bliss/pe_load_config.cpp b/tools/pe_bliss/pe_load_config.cpp
new file mode 100644
index 0000000000..c05895fecd
--- /dev/null
+++ b/tools/pe_bliss/pe_load_config.cpp
@@ -0,0 +1,557 @@
+/*************************************************************************/
+/* Copyright (c) 2015 dx, http://kaimi.ru                                */
+/*                                                                       */
+/* Permission is hereby granted, free of charge, to any person           */
+/* obtaining a copy of this software and associated documentation        */
+/* files (the "Software"), to deal in the Software without               */
+/* restriction, including without limitation the rights to use,          */
+/* copy, modify, merge, publish, distribute, sublicense, and/or          */
+/* sell copies of the Software, and to permit persons to whom the        */
+/* Software is furnished to do so, subject to the following conditions:  */
+/* The above copyright notice and this permission notice shall be        */
+/* included in all copies or substantial portions of the Software.       */
+/*                                                                       */
+/* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,       */
+/* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF    */
+/* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.*/
+/* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY  */
+/* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,  */
+/* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE     */
+/* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.                */
+/*************************************************************************/
+#include <algorithm>
+#include <string.h>
+#include "pe_load_config.h"
+#include "pe_properties_generic.h"
+
+namespace pe_bliss
+{
+using namespace pe_win;
+
+//IMAGE CONFIG
+//Default constructor
+image_config_info::image_config_info()
+	:time_stamp_(0),
+	major_version_(0), minor_version_(0),
+	global_flags_clear_(0), global_flags_set_(0),
+	critical_section_default_timeout_(0),
+	decommit_free_block_threshold_(0), decommit_total_free_threshold_(0),
+	lock_prefix_table_va_(0),
+	max_allocation_size_(0),
+	virtual_memory_threshold_(0),
+	process_affinity_mask_(0),
+	process_heap_flags_(0),
+	service_pack_version_(0),
+	edit_list_va_(0),
+	security_cookie_va_(0),
+	se_handler_table_va_(0),
+	se_handler_count_(0)
+{}
+
+//Constructors from PE structures
+template<typename ConfigStructure>
+image_config_info::image_config_info(const ConfigStructure& info)
+	:time_stamp_(info.TimeDateStamp),
+	major_version_(info.MajorVersion), minor_version_(info.MinorVersion),
+	global_flags_clear_(info.GlobalFlagsClear), global_flags_set_(info.GlobalFlagsSet),
+	critical_section_default_timeout_(info.CriticalSectionDefaultTimeout),
+	decommit_free_block_threshold_(info.DeCommitFreeBlockThreshold), decommit_total_free_threshold_(info.DeCommitTotalFreeThreshold),
+	lock_prefix_table_va_(info.LockPrefixTable),
+	max_allocation_size_(info.MaximumAllocationSize),
+	virtual_memory_threshold_(info.VirtualMemoryThreshold),
+	process_affinity_mask_(info.ProcessAffinityMask),
+	process_heap_flags_(info.ProcessHeapFlags),
+	service_pack_version_(info.CSDVersion),
+	edit_list_va_(info.EditList),
+	security_cookie_va_(info.SecurityCookie),
+	se_handler_table_va_(info.SEHandlerTable),
+	se_handler_count_(info.SEHandlerCount)
+{}
+
+//Instantiate template constructor with needed structures
+template image_config_info::image_config_info(const image_load_config_directory32& info);
+template image_config_info::image_config_info(const image_load_config_directory64& info);
+
+//Returns the date and time stamp value
+uint32_t image_config_info::get_time_stamp() const
+{
+	return time_stamp_;
+}
+
+//Returns major version number
+uint16_t image_config_info::get_major_version() const
+{
+	return major_version_;
+}
+
+//Returns minor version number
+uint16_t image_config_info::get_minor_version() const
+{
+	return minor_version_;
+}
+
+//Returns clear global flags
+uint32_t image_config_info::get_global_flags_clear() const
+{
+	return global_flags_clear_;
+}
+
+//Returns set global flags
+uint32_t image_config_info::get_global_flags_set() const
+{
+	return global_flags_set_;
+}
+
+//Returns critical section default timeout
+uint32_t image_config_info::get_critical_section_default_timeout() const
+{
+	return critical_section_default_timeout_;
+}
+
+//Get the size of the minimum block that
+//must be freed before it is freed (de-committed), in bytes
+uint64_t image_config_info::get_decommit_free_block_threshold() const
+{
+	return decommit_free_block_threshold_;
+}
+
+//Returns the size of the minimum total memory
+//that must be freed in the process heap before it is freed (de-committed), in bytes
+uint64_t image_config_info::get_decommit_total_free_threshold() const
+{
+	return decommit_total_free_threshold_;
+}
+
+//Returns VA of a list of addresses where the LOCK prefix is used
+uint64_t image_config_info::get_lock_prefix_table_va() const
+{
+	return lock_prefix_table_va_;
+}
+
+//Returns the maximum allocation size, in bytes
+uint64_t image_config_info::get_max_allocation_size() const
+{
+	return max_allocation_size_;
+}
+
+//Returns the maximum block size that can be allocated from heap segments, in bytes
+uint64_t image_config_info::get_virtual_memory_threshold() const
+{
+	return virtual_memory_threshold_;
+}
+
+//Returns process affinity mask
+uint64_t image_config_info::get_process_affinity_mask() const
+{
+	return process_affinity_mask_;
+}
+
+//Returns process heap flags
+uint32_t image_config_info::get_process_heap_flags() const
+{
+	return process_heap_flags_;
+}
+
+//Returns service pack version (CSDVersion)
+uint16_t image_config_info::get_service_pack_version() const
+{
+	return service_pack_version_;
+}
+
+//Returns VA of edit list (reserved by system)
+uint64_t image_config_info::get_edit_list_va() const
+{
+	return edit_list_va_;
+}
+
+//Returns a pointer to a cookie that is used by Visual C++ or GS implementation
+uint64_t image_config_info::get_security_cookie_va() const
+{
+	return security_cookie_va_;
+}
+
+//Returns VA of the sorted table of RVAs of each valid, unique handler in the image
+uint64_t image_config_info::get_se_handler_table_va() const
+{
+	return se_handler_table_va_;
+}
+
+//Returns the count of unique handlers in the table
+uint64_t image_config_info::get_se_handler_count() const
+{
+	return se_handler_count_;
+}
+
+//Returns SE Handler RVA list
+const image_config_info::se_handler_list& image_config_info::get_se_handler_rvas() const
+{
+	return se_handlers_;
+}
+
+//Returns Lock Prefix RVA list
+const image_config_info::lock_prefix_rva_list& image_config_info::get_lock_prefix_rvas() const
+{
+	return lock_prefixes_;
+}
+
+//Adds SE Handler RVA to list
+void image_config_info::add_se_handler_rva(uint32_t rva)
+{
+	se_handlers_.push_back(rva);
+}
+
+//Clears SE Handler list
+void image_config_info::clear_se_handler_list()
+{
+	se_handlers_.clear();
+}
+
+//Adds Lock Prefix RVA to list
+void image_config_info::add_lock_prefix_rva(uint32_t rva)
+{
+	lock_prefixes_.push_back(rva);
+}
+
+//Clears Lock Prefix list
+void image_config_info::clear_lock_prefix_list()
+{
+	lock_prefixes_.clear();
+}
+
+//Sets the date and time stamp value
+void image_config_info::set_time_stamp(uint32_t time_stamp)
+{
+	time_stamp_ = time_stamp;
+}
+
+//Sets major version number
+void image_config_info::set_major_version(uint16_t major_version)
+{
+	major_version_ = major_version;
+}
+
+//Sets minor version number
+void image_config_info::set_minor_version(uint16_t minor_version)
+{
+	minor_version_ = minor_version;
+}
+
+//Sets clear global flags
+void image_config_info::set_global_flags_clear(uint32_t global_flags_clear)
+{
+	global_flags_clear_ = global_flags_clear;
+}
+
+//Sets set global flags
+void image_config_info::set_global_flags_set(uint32_t global_flags_set)
+{
+	global_flags_set_ = global_flags_set;
+}
+
+//Sets critical section default timeout
+void image_config_info::set_critical_section_default_timeout(uint32_t critical_section_default_timeout)
+{
+	critical_section_default_timeout_ = critical_section_default_timeout;
+}
+
+//Sets the size of the minimum block that
+//must be freed before it is freed (de-committed), in bytes
+void image_config_info::set_decommit_free_block_threshold(uint64_t decommit_free_block_threshold)
+{
+	decommit_free_block_threshold_ = decommit_free_block_threshold;
+}
+
+//Sets the size of the minimum total memory
+//that must be freed in the process heap before it is freed (de-committed), in bytes
+void image_config_info::set_decommit_total_free_threshold(uint64_t decommit_total_free_threshold)
+{
+	decommit_total_free_threshold_ = decommit_total_free_threshold;
+}
+
+//Sets VA of a list of addresses where the LOCK prefix is used
+//If you rebuild this list, VA will be re-assigned automatically
+void image_config_info::set_lock_prefix_table_va(uint64_t lock_prefix_table_va)
+{
+	lock_prefix_table_va_ = lock_prefix_table_va;
+}
+
+//Sets the maximum allocation size, in bytes
+void image_config_info::set_max_allocation_size(uint64_t max_allocation_size)
+{
+	max_allocation_size_ = max_allocation_size;
+}
+
+//Sets the maximum block size that can be allocated from heap segments, in bytes
+void image_config_info::set_virtual_memory_threshold(uint64_t virtual_memory_threshold)
+{
+	virtual_memory_threshold_ = virtual_memory_threshold;
+}
+
+//Sets process affinity mask
+void image_config_info::set_process_affinity_mask(uint64_t process_affinity_mask)
+{
+	process_affinity_mask_ = process_affinity_mask;
+}
+
+//Sets process heap flags
+void image_config_info::set_process_heap_flags(uint32_t process_heap_flags)
+{
+	process_heap_flags_ = process_heap_flags;
+}
+
+//Sets service pack version (CSDVersion)
+void image_config_info::set_service_pack_version(uint16_t service_pack_version)
+{
+	service_pack_version_ = service_pack_version;
+}
+
+//Sets VA of edit list (reserved by system)
+void image_config_info::set_edit_list_va(uint64_t edit_list_va)
+{
+	edit_list_va_ = edit_list_va;
+}
+
+//Sets a pointer to a cookie that is used by Visual C++ or GS implementation
+void image_config_info::set_security_cookie_va(uint64_t security_cookie_va)
+{
+	security_cookie_va_ = security_cookie_va;
+}
+
+//Sets VA of the sorted table of RVAs of each valid, unique handler in the image
+//If you rebuild this list, VA will be re-assigned automatically
+void image_config_info::set_se_handler_table_va(uint64_t se_handler_table_va)
+{
+	se_handler_table_va_ = se_handler_table_va;
+}
+
+//Returns SE Handler RVA list
+image_config_info::se_handler_list& image_config_info::get_se_handler_rvas()
+{
+	return se_handlers_;
+}
+
+//Returns Lock Prefix RVA list
+image_config_info::lock_prefix_rva_list& image_config_info::get_lock_prefix_rvas()
+{
+	return lock_prefixes_;
+}
+
+//Returns image config info
+//If image does not have config info, throws an exception
+const image_config_info get_image_config(const pe_base& pe)
+{
+	return pe.get_pe_type() == pe_type_32
+		? get_image_config_base<pe_types_class_32>(pe)
+		: get_image_config_base<pe_types_class_64>(pe);
+}
+
+//Image config rebuilder
+const image_directory rebuild_image_config(pe_base& pe, const image_config_info& info, section& image_config_section, uint32_t offset_from_section_start, bool write_se_handlers, bool write_lock_prefixes, bool save_to_pe_header, bool auto_strip_last_section)
+{
+	return pe.get_pe_type() == pe_type_32
+		? rebuild_image_config_base<pe_types_class_32>(pe, info, image_config_section, offset_from_section_start, write_se_handlers, write_lock_prefixes, save_to_pe_header, auto_strip_last_section)
+		: rebuild_image_config_base<pe_types_class_64>(pe, info, image_config_section, offset_from_section_start, write_se_handlers, write_lock_prefixes, save_to_pe_header, auto_strip_last_section);
+}
+
+
+//Returns image config info
+//If image does not have config info, throws an exception
+template<typename PEClassType>
+const image_config_info get_image_config_base(const pe_base& pe)
+{
+	//Check if image has config directory
+	if(!pe.has_config())
+		throw pe_exception("Image does not have load config directory", pe_exception::directory_does_not_exist);
+
+	//Get load config structure
+	typename PEClassType::ConfigStruct config_info = pe.section_data_from_rva<typename PEClassType::ConfigStruct>(pe.get_directory_rva(image_directory_entry_load_config), section_data_virtual);
+
+	//Check size of config directory
+	if(config_info.Size != sizeof(config_info))
+		throw pe_exception("Incorrect (or old) load config directory", pe_exception::incorrect_config_directory);
+
+	//Fill return structure
+	image_config_info ret(config_info);
+
+	//Check possible overflow
+	if(config_info.SEHandlerCount >= pe_utils::max_dword / sizeof(uint32_t)
+		|| config_info.SEHandlerTable >= static_cast<typename PEClassType::BaseSize>(-1) - config_info.SEHandlerCount * sizeof(uint32_t))
+		throw pe_exception("Incorrect load config directory", pe_exception::incorrect_config_directory);
+
+	//Read sorted SE handler RVA list (if any)
+	for(typename PEClassType::BaseSize i = 0; i != config_info.SEHandlerCount; ++i)
+		ret.add_se_handler_rva(pe.section_data_from_va<uint32_t>(static_cast<typename PEClassType::BaseSize>(config_info.SEHandlerTable + i * sizeof(uint32_t))));
+
+	if(config_info.LockPrefixTable)
+	{
+		//Read Lock Prefix VA list (if any)
+		unsigned long current = 0;
+		while(true)
+		{
+			typename PEClassType::BaseSize lock_prefix_va = pe.section_data_from_va<typename PEClassType::BaseSize>(static_cast<typename PEClassType::BaseSize>(config_info.LockPrefixTable + current * sizeof(typename PEClassType::BaseSize)));
+			if(!lock_prefix_va)
+				break;
+
+			ret.add_lock_prefix_rva(pe.va_to_rva(lock_prefix_va));
+
+			++current;
+		}
+	}
+
+	return ret;
+}
+
+//Image config directory rebuilder
+//auto_strip_last_section - if true and TLS are placed in the last section, it will be automatically stripped
+//If write_se_handlers = true, SE Handlers list will be written just after image config directory structure
+//If write_lock_prefixes = true, Lock Prefixes address list will be written just after image config directory structure
+template<typename PEClassType>
+const image_directory rebuild_image_config_base(pe_base& pe, const image_config_info& info, section& image_config_section, uint32_t offset_from_section_start, bool write_se_handlers, bool write_lock_prefixes, bool save_to_pe_header, bool auto_strip_last_section)
+{
+	//Check that image_config_section is attached to this PE image
+	if(!pe.section_attached(image_config_section))
+		throw pe_exception("Image Config section must be attached to PE file", pe_exception::section_is_not_attached);
+	
+	uint32_t alignment = pe_utils::align_up(offset_from_section_start, sizeof(typename PEClassType::BaseSize)) - offset_from_section_start;
+
+	uint32_t needed_size = sizeof(typename PEClassType::ConfigStruct); //Calculate needed size for Image Config table
+	
+	uint32_t image_config_data_pos = offset_from_section_start + alignment;
+
+	uint32_t current_pos_of_se_handlers = 0;
+	uint32_t current_pos_of_lock_prefixes = 0;
+	
+	if(write_se_handlers)
+	{
+		current_pos_of_se_handlers = needed_size + image_config_data_pos;
+		needed_size += static_cast<uint32_t>(info.get_se_handler_rvas().size()) * sizeof(uint32_t); //RVAs of SE Handlers
+	}
+	
+	if(write_lock_prefixes)
+	{
+		current_pos_of_lock_prefixes = needed_size + image_config_data_pos;
+		needed_size += static_cast<uint32_t>((info.get_lock_prefix_rvas().size() + 1) * sizeof(typename PEClassType::BaseSize)); //VAs of Lock Prefixes (and ending null element)
+	}
+
+	//Check if image_config_section is last one. If it's not, check if there's enough place for Image Config data
+	if(&image_config_section != &*(pe.get_image_sections().end() - 1) && 
+		(image_config_section.empty() || pe_utils::align_up(image_config_section.get_size_of_raw_data(), pe.get_file_alignment()) < needed_size + image_config_data_pos))
+		throw pe_exception("Insufficient space for TLS directory", pe_exception::insufficient_space);
+
+	std::string& raw_data = image_config_section.get_raw_data();
+
+	//This will be done only if image_config_section is the last section of image or for section with unaligned raw length of data
+	if(raw_data.length() < needed_size + image_config_data_pos)
+		raw_data.resize(needed_size + image_config_data_pos); //Expand section raw data
+
+	//Create and fill Image Config structure
+	typename PEClassType::ConfigStruct image_config_section_struct = {0};
+	image_config_section_struct.Size = sizeof(image_config_section_struct);
+	image_config_section_struct.TimeDateStamp = info.get_time_stamp();
+	image_config_section_struct.MajorVersion = info.get_major_version();
+	image_config_section_struct.MinorVersion = info.get_minor_version();
+	image_config_section_struct.GlobalFlagsClear = info.get_global_flags_clear();
+	image_config_section_struct.GlobalFlagsSet = info.get_global_flags_set();
+	image_config_section_struct.CriticalSectionDefaultTimeout = info.get_critical_section_default_timeout();
+	image_config_section_struct.DeCommitFreeBlockThreshold = static_cast<typename PEClassType::BaseSize>(info.get_decommit_free_block_threshold());
+	image_config_section_struct.DeCommitTotalFreeThreshold = static_cast<typename PEClassType::BaseSize>(info.get_decommit_total_free_threshold());
+	image_config_section_struct.MaximumAllocationSize = static_cast<typename PEClassType::BaseSize>(info.get_max_allocation_size());
+	image_config_section_struct.VirtualMemoryThreshold = static_cast<typename PEClassType::BaseSize>(info.get_virtual_memory_threshold());
+	image_config_section_struct.ProcessHeapFlags = info.get_process_heap_flags();
+	image_config_section_struct.ProcessAffinityMask = static_cast<typename PEClassType::BaseSize>(info.get_process_affinity_mask());
+	image_config_section_struct.CSDVersion = info.get_service_pack_version();
+	image_config_section_struct.EditList = static_cast<typename PEClassType::BaseSize>(info.get_edit_list_va());
+	image_config_section_struct.SecurityCookie = static_cast<typename PEClassType::BaseSize>(info.get_security_cookie_va());
+	image_config_section_struct.SEHandlerCount = static_cast<typename PEClassType::BaseSize>(info.get_se_handler_rvas().size());
+	
+
+	if(write_se_handlers)
+	{
+		if(info.get_se_handler_rvas().empty())
+		{
+			write_se_handlers = false;
+			image_config_section_struct.SEHandlerTable = 0;
+		}
+		else
+		{
+			typename PEClassType::BaseSize va;
+			pe.rva_to_va(pe.rva_from_section_offset(image_config_section, current_pos_of_se_handlers), va);
+			image_config_section_struct.SEHandlerTable = va;
+		}
+	}
+	else
+	{
+		image_config_section_struct.SEHandlerTable = static_cast<typename PEClassType::BaseSize>(info.get_se_handler_table_va());
+	}
+
+	if(write_lock_prefixes)
+	{
+		if(info.get_lock_prefix_rvas().empty())
+		{
+			write_lock_prefixes = false;
+			image_config_section_struct.LockPrefixTable = 0;
+		}
+		else
+		{
+			typename PEClassType::BaseSize va;
+			pe.rva_to_va(pe.rva_from_section_offset(image_config_section, current_pos_of_lock_prefixes), va);
+			image_config_section_struct.LockPrefixTable = va;
+		}
+	}
+	else
+	{
+		image_config_section_struct.LockPrefixTable = static_cast<typename PEClassType::BaseSize>(info.get_lock_prefix_table_va());
+	}
+
+	//Write image config section
+	memcpy(&raw_data[image_config_data_pos], &image_config_section_struct, sizeof(image_config_section_struct));
+
+	if(write_se_handlers)
+	{
+		//Sort SE Handlers list
+		image_config_info::se_handler_list sorted_list = info.get_se_handler_rvas();
+		std::sort(sorted_list.begin(), sorted_list.end());
+
+		//Write SE Handlers table
+		for(image_config_info::se_handler_list::const_iterator it = sorted_list.begin(); it != sorted_list.end(); ++it)
+		{
+			uint32_t se_handler_rva = *it;
+			memcpy(&raw_data[current_pos_of_se_handlers], &se_handler_rva, sizeof(se_handler_rva));
+			current_pos_of_se_handlers += sizeof(se_handler_rva);
+		}
+	}
+
+	if(write_lock_prefixes)
+	{
+		//Write Lock Prefixes VA list
+		for(image_config_info::lock_prefix_rva_list::const_iterator it = info.get_lock_prefix_rvas().begin(); it != info.get_lock_prefix_rvas().end(); ++it)
+		{
+			typename PEClassType::BaseSize lock_prefix_va;
+			pe.rva_to_va(*it, lock_prefix_va);
+			memcpy(&raw_data[current_pos_of_lock_prefixes], &lock_prefix_va, sizeof(lock_prefix_va));
+			current_pos_of_lock_prefixes += sizeof(lock_prefix_va);
+		}
+
+		{
+			//Ending null VA
+			typename PEClassType::BaseSize lock_prefix_va = 0;
+			memcpy(&raw_data[current_pos_of_lock_prefixes], &lock_prefix_va, sizeof(lock_prefix_va));
+		}
+	}
+
+	//Adjust section raw and virtual sizes
+	pe.recalculate_section_sizes(image_config_section, auto_strip_last_section);
+
+	image_directory ret(pe.rva_from_section_offset(image_config_section, image_config_data_pos), sizeof(typename PEClassType::ConfigStruct));
+
+	//If auto-rewrite of PE headers is required
+	if(save_to_pe_header)
+	{
+		pe.set_directory_rva(image_directory_entry_load_config, ret.get_rva());
+		pe.set_directory_size(image_directory_entry_load_config, ret.get_size());
+	}
+
+	return ret;
+}
+
+}
diff --git a/tools/pe_bliss/pe_load_config.h b/tools/pe_bliss/pe_load_config.h
new file mode 100644
index 0000000000..cb24072de7
--- /dev/null
+++ b/tools/pe_bliss/pe_load_config.h
@@ -0,0 +1,184 @@
+/*************************************************************************/
+/* Copyright (c) 2015 dx, http://kaimi.ru                                */
+/*                                                                       */
+/* Permission is hereby granted, free of charge, to any person           */
+/* obtaining a copy of this software and associated documentation        */
+/* files (the "Software"), to deal in the Software without               */
+/* restriction, including without limitation the rights to use,          */
+/* copy, modify, merge, publish, distribute, sublicense, and/or          */
+/* sell copies of the Software, and to permit persons to whom the        */
+/* Software is furnished to do so, subject to the following conditions:  */
+/* The above copyright notice and this permission notice shall be        */
+/* included in all copies or substantial portions of the Software.       */
+/*                                                                       */
+/* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,       */
+/* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF    */
+/* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.*/
+/* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY  */
+/* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,  */
+/* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE     */
+/* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.                */
+/*************************************************************************/
+#pragma once
+#include <vector>
+#include "pe_structures.h"
+#include "pe_base.h"
+#include "pe_directory.h"
+
+namespace pe_bliss
+{
+//Class representing image configuration information
+class image_config_info
+{
+public:
+	typedef std::vector<uint32_t> se_handler_list;
+	typedef std::vector<uint32_t> lock_prefix_rva_list;
+
+public:
+	//Default constructor
+	image_config_info();
+	//Constructors from PE structures (no checks)
+	template<typename ConfigStructure>
+	explicit image_config_info(const ConfigStructure& info);
+
+	//Returns the date and time stamp value
+	uint32_t get_time_stamp() const;
+	//Returns major version number
+	uint16_t get_major_version() const;
+	//Returns minor version number
+	uint16_t get_minor_version() const;
+	//Returns clear global flags
+	uint32_t get_global_flags_clear() const;
+	//Returns set global flags
+	uint32_t get_global_flags_set() const;
+	//Returns critical section default timeout
+	uint32_t get_critical_section_default_timeout() const;
+	//Get the size of the minimum block that
+	//must be freed before it is freed (de-committed), in bytes
+	uint64_t get_decommit_free_block_threshold() const;
+	//Returns the size of the minimum total memory
+	//that must be freed in the process heap before it is freed (de-committed), in bytes
+	uint64_t get_decommit_total_free_threshold() const;
+	//Returns VA of a list of addresses where the LOCK prefix is used
+	uint64_t get_lock_prefix_table_va() const;
+	//Returns the maximum allocation size, in bytes
+	uint64_t get_max_allocation_size() const;
+	//Returns the maximum block size that can be allocated from heap segments, in bytes
+	uint64_t get_virtual_memory_threshold() const;
+	//Returns process affinity mask
+	uint64_t get_process_affinity_mask() const;
+	//Returns process heap flags
+	uint32_t get_process_heap_flags() const;
+	//Returns service pack version (CSDVersion)
+	uint16_t get_service_pack_version() const;
+	//Returns VA of edit list (reserved by system)
+	uint64_t get_edit_list_va() const;
+	//Returns a pointer to a cookie that is used by Visual C++ or GS implementation
+	uint64_t get_security_cookie_va() const;
+	//Returns VA of the sorted table of RVAs of each valid, unique handler in the image
+	uint64_t get_se_handler_table_va() const;
+	//Returns the count of unique handlers in the table
+	uint64_t get_se_handler_count() const;
+
+	//Returns SE Handler RVA list
+	const se_handler_list& get_se_handler_rvas() const;
+		
+	//Returns Lock Prefix RVA list
+	const lock_prefix_rva_list& get_lock_prefix_rvas() const;
+
+public: //These functions do not change everything inside image, they are used by PE class
+	//Also you can use these functions to rebuild image config directory
+
+	//Adds SE Handler RVA to list
+	void add_se_handler_rva(uint32_t rva);
+	//Clears SE Handler list
+	void clear_se_handler_list();
+		
+	//Adds Lock Prefix RVA to list
+	void add_lock_prefix_rva(uint32_t rva);
+	//Clears Lock Prefix list
+	void clear_lock_prefix_list();
+		
+	//Sets the date and time stamp value
+	void set_time_stamp(uint32_t time_stamp);
+	//Sets major version number
+	void set_major_version(uint16_t major_version);
+	//Sets minor version number
+	void set_minor_version(uint16_t minor_version);
+	//Sets clear global flags
+	void set_global_flags_clear(uint32_t global_flags_clear);
+	//Sets set global flags
+	void set_global_flags_set(uint32_t global_flags_set);
+	//Sets critical section default timeout
+	void set_critical_section_default_timeout(uint32_t critical_section_default_timeout);
+	//Sets the size of the minimum block that
+	//must be freed before it is freed (de-committed), in bytes
+	void set_decommit_free_block_threshold(uint64_t decommit_free_block_threshold);
+	//Sets the size of the minimum total memory
+	//that must be freed in the process heap before it is freed (de-committed), in bytes
+	void set_decommit_total_free_threshold(uint64_t decommit_total_free_threshold);
+	//Sets VA of a list of addresses where the LOCK prefix is used
+	//If you rebuild this list, VA will be re-assigned automatically
+	void set_lock_prefix_table_va(uint64_t lock_prefix_table_va);
+	//Sets the maximum allocation size, in bytes
+	void set_max_allocation_size(uint64_t max_allocation_size);
+	//Sets the maximum block size that can be allocated from heap segments, in bytes
+	void set_virtual_memory_threshold(uint64_t virtual_memory_threshold);
+	//Sets process affinity mask
+	void set_process_affinity_mask(uint64_t process_affinity_mask);
+	//Sets process heap flags
+	void set_process_heap_flags(uint32_t process_heap_flags);
+	//Sets service pack version (CSDVersion)
+	void set_service_pack_version(uint16_t service_pack_version);
+	//Sets VA of edit list (reserved by system)
+	void set_edit_list_va(uint64_t edit_list_va);
+	//Sets a pointer to a cookie that is used by Visual C++ or GS implementation
+	void set_security_cookie_va(uint64_t security_cookie_va);
+	//Sets VA of the sorted table of RVAs of each valid, unique handler in the image
+	//If you rebuild this list, VA will be re-assigned automatically
+	void set_se_handler_table_va(uint64_t se_handler_table_va);
+
+	//Returns SE Handler RVA list
+	se_handler_list& get_se_handler_rvas();
+
+	//Returns Lock Prefix RVA list
+	lock_prefix_rva_list& get_lock_prefix_rvas();
+
+private:
+	uint32_t time_stamp_;
+	uint16_t major_version_, minor_version_;
+	uint32_t global_flags_clear_, global_flags_set_;
+	uint32_t critical_section_default_timeout_;
+	uint64_t decommit_free_block_threshold_, decommit_total_free_threshold_;
+	uint64_t lock_prefix_table_va_;
+	uint64_t max_allocation_size_;
+	uint64_t virtual_memory_threshold_;
+	uint64_t process_affinity_mask_;
+	uint32_t process_heap_flags_;
+	uint16_t service_pack_version_;
+	uint64_t edit_list_va_;
+	uint64_t security_cookie_va_;
+	uint64_t se_handler_table_va_;
+	uint64_t se_handler_count_;
+
+	se_handler_list se_handlers_;
+	lock_prefix_rva_list lock_prefixes_;
+};
+
+//Returns image config info
+//If image does not have config info, throws an exception
+const image_config_info get_image_config(const pe_base& pe);
+
+template<typename PEClassType>
+const image_config_info get_image_config_base(const pe_base& pe);
+
+
+//Image config directory rebuilder
+//auto_strip_last_section - if true and TLS are placed in the last section, it will be automatically stripped
+//If write_se_handlers = true, SE Handlers list will be written just after image config directory structure
+//If write_lock_prefixes = true, Lock Prefixes address list will be written just after image config directory structure
+const image_directory rebuild_image_config(pe_base& pe, const image_config_info& info, section& image_config_section, uint32_t offset_from_section_start = 0, bool write_se_handlers = true, bool write_lock_prefixes = true, bool save_to_pe_header = true, bool auto_strip_last_section = true);
+
+template<typename PEClassType>
+const image_directory rebuild_image_config_base(pe_base& pe, const image_config_info& info, section& image_config_section, uint32_t offset_from_section_start = 0, bool write_se_handlers = true, bool write_lock_prefixes = true, bool save_to_pe_header = true, bool auto_strip_last_section = true);
+}
diff --git a/tools/pe_bliss/pe_properties.cpp b/tools/pe_bliss/pe_properties.cpp
new file mode 100644
index 0000000000..8d1c2eac43
--- /dev/null
+++ b/tools/pe_bliss/pe_properties.cpp
@@ -0,0 +1,41 @@
+/*************************************************************************/
+/* Copyright (c) 2015 dx, http://kaimi.ru                                */
+/*                                                                       */
+/* Permission is hereby granted, free of charge, to any person           */
+/* obtaining a copy of this software and associated documentation        */
+/* files (the "Software"), to deal in the Software without               */
+/* restriction, including without limitation the rights to use,          */
+/* copy, modify, merge, publish, distribute, sublicense, and/or          */
+/* sell copies of the Software, and to permit persons to whom the        */
+/* Software is furnished to do so, subject to the following conditions:  */
+/* The above copyright notice and this permission notice shall be        */
+/* included in all copies or substantial portions of the Software.       */
+/*                                                                       */
+/* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,       */
+/* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF    */
+/* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.*/
+/* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY  */
+/* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,  */
+/* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE     */
+/* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.                */
+/*************************************************************************/
+#include "pe_properties.h"
+
+namespace pe_bliss
+{
+//Destructor
+pe_properties::~pe_properties()
+{}
+
+//Clears PE characteristics flag
+void pe_properties::clear_characteristics_flags(uint16_t flags)
+{
+	set_characteristics(get_characteristics() & ~flags);
+}
+
+//Sets PE characteristics flag
+void pe_properties::set_characteristics_flags(uint16_t flags)
+{
+	set_characteristics(get_characteristics() | flags);
+}
+}
diff --git a/tools/pe_bliss/pe_properties.h b/tools/pe_bliss/pe_properties.h
new file mode 100644
index 0000000000..1db163e8b1
--- /dev/null
+++ b/tools/pe_bliss/pe_properties.h
@@ -0,0 +1,236 @@
+/*************************************************************************/
+/* Copyright (c) 2015 dx, http://kaimi.ru                                */
+/*                                                                       */
+/* Permission is hereby granted, free of charge, to any person           */
+/* obtaining a copy of this software and associated documentation        */
+/* files (the "Software"), to deal in the Software without               */
+/* restriction, including without limitation the rights to use,          */
+/* copy, modify, merge, publish, distribute, sublicense, and/or          */
+/* sell copies of the Software, and to permit persons to whom the        */
+/* Software is furnished to do so, subject to the following conditions:  */
+/* The above copyright notice and this permission notice shall be        */
+/* included in all copies or substantial portions of the Software.       */
+/*                                                                       */
+/* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,       */
+/* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF    */
+/* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.*/
+/* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY  */
+/* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,  */
+/* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE     */
+/* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.                */
+/*************************************************************************/
+#pragma once
+#include <memory>
+#include "pe_structures.h"
+
+namespace pe_bliss
+{
+class pe_properties
+{
+public: //Constructors
+	virtual std::auto_ptr<pe_properties> duplicate() const = 0;
+	
+	//Fills properly PE structures
+	virtual void create_pe(uint32_t section_alignment, uint16_t subsystem) = 0;
+
+public:
+	//Destructor
+	virtual ~pe_properties();
+
+
+public: //DIRECTORIES
+	//Returns true if directory exists
+	virtual bool directory_exists(uint32_t id) const = 0;
+
+	//Removes directory
+	virtual void remove_directory(uint32_t id) = 0;
+
+	//Returns directory RVA
+	virtual uint32_t get_directory_rva(uint32_t id) const = 0;
+	//Returns directory size
+	virtual uint32_t get_directory_size(uint32_t id) const = 0;
+
+	//Sets directory RVA (just a value of PE header, no moving occurs)
+	virtual void set_directory_rva(uint32_t id, uint32_t rva) = 0;
+	//Sets directory size (just a value of PE header, no moving occurs)
+	virtual void set_directory_size(uint32_t id, uint32_t size) = 0;
+	
+	//Strips only zero DATA_DIRECTORY entries to count = min_count
+	//Returns resulting number of data directories
+	//strip_iat_directory - if true, even not empty IAT directory will be stripped
+	virtual uint32_t strip_data_directories(uint32_t min_count = 1, bool strip_iat_directory = true) = 0;
+
+
+public: //IMAGE
+	//Returns PE type of this image
+	virtual pe_type get_pe_type() const = 0;
+
+
+public: //PE HEADER
+	//Returns image base for PE32 and PE64 respectively
+	virtual uint32_t get_image_base_32() const = 0;
+	virtual uint64_t get_image_base_64() const = 0;
+
+	//Sets new image base for PE32
+	virtual void set_image_base(uint32_t base) = 0;
+	//Sets new image base for PE32/PE+
+	virtual void set_image_base_64(uint64_t base) = 0;
+
+	//Returns image entry point
+	virtual uint32_t get_ep() const = 0;
+	//Sets image entry point
+	virtual void set_ep(uint32_t new_ep) = 0;
+
+	//Returns file alignment
+	virtual uint32_t get_file_alignment() const = 0;
+	//Returns section alignment
+	virtual uint32_t get_section_alignment() const = 0;
+
+	//Sets heap size commit for PE32 and PE64 respectively
+	virtual void set_heap_size_commit(uint32_t size) = 0;
+	virtual void set_heap_size_commit(uint64_t size) = 0;
+	//Sets heap size reserve for PE32 and PE64 respectively
+	virtual void set_heap_size_reserve(uint32_t size) = 0;
+	virtual void set_heap_size_reserve(uint64_t size) = 0;
+	//Sets stack size commit for PE32 and PE64 respectively
+	virtual void set_stack_size_commit(uint32_t size) = 0;
+	virtual void set_stack_size_commit(uint64_t size) = 0;
+	//Sets stack size reserve for PE32 and PE64 respectively
+	virtual void set_stack_size_reserve(uint32_t size) = 0;
+	virtual void set_stack_size_reserve(uint64_t size) = 0;
+	
+	//Returns heap size commit for PE32 and PE64 respectively
+	virtual uint32_t get_heap_size_commit_32() const = 0;
+	virtual uint64_t get_heap_size_commit_64() const = 0;
+	//Returns heap size reserve for PE32 and PE64 respectively
+	virtual uint32_t get_heap_size_reserve_32() const = 0;
+	virtual uint64_t get_heap_size_reserve_64() const = 0;
+	//Returns stack size commit for PE32 and PE64 respectively
+	virtual uint32_t get_stack_size_commit_32() const = 0;
+	virtual uint64_t get_stack_size_commit_64() const = 0;
+	//Returns stack size reserve for PE32 and PE64 respectively
+	virtual uint32_t get_stack_size_reserve_32() const = 0;
+	virtual uint64_t get_stack_size_reserve_64() const = 0;
+
+	//Returns virtual size of image
+	virtual uint32_t get_size_of_image() const = 0;
+
+	//Returns number of RVA and sizes (number of DATA_DIRECTORY entries)
+	virtual uint32_t get_number_of_rvas_and_sizes() const = 0;
+	//Sets number of RVA and sizes (number of DATA_DIRECTORY entries)
+	virtual void set_number_of_rvas_and_sizes(uint32_t number) = 0;
+
+	//Returns PE characteristics
+	virtual uint16_t get_characteristics() const = 0;
+	//Sets PE characteristics
+	virtual void set_characteristics(uint16_t ch) = 0;
+	
+	//Clears PE characteristics flag
+	void clear_characteristics_flags(uint16_t flags);
+	//Sets PE characteristics flag
+	void set_characteristics_flags(uint16_t flags);
+
+	//Returns size of headers
+	virtual uint32_t get_size_of_headers() const = 0;
+
+	//Returns subsystem
+	virtual uint16_t get_subsystem() const = 0;
+
+	//Sets subsystem
+	virtual void set_subsystem(uint16_t subsystem) = 0;
+
+	//Returns size of optional header
+	virtual uint16_t get_size_of_optional_header() const = 0;
+
+	//Returns PE signature
+	virtual uint32_t get_pe_signature() const = 0;
+
+	//Returns PE magic value
+	virtual uint32_t get_magic() const = 0;
+
+	//Returns checksum of PE file from header
+	virtual uint32_t get_checksum() const = 0;
+	
+	//Sets checksum of PE file
+	virtual void set_checksum(uint32_t checksum) = 0;
+	
+	//Returns timestamp of PE file from header
+	virtual uint32_t get_time_date_stamp() const = 0;
+	
+	//Sets timestamp of PE file
+	virtual void set_time_date_stamp(uint32_t timestamp) = 0;
+	
+	//Returns Machine field value of PE file from header
+	virtual uint16_t get_machine() const = 0;
+
+	//Sets Machine field value of PE file
+	virtual void set_machine(uint16_t machine) = 0;
+
+	//Returns DLL Characteristics
+	virtual uint16_t get_dll_characteristics() const = 0;
+	
+	//Sets DLL Characteristics
+	virtual void set_dll_characteristics(uint16_t characteristics) = 0;
+	
+	//Sets required operation system version
+	virtual void set_os_version(uint16_t major, uint16_t minor) = 0;
+
+	//Returns required operation system version (minor word)
+	virtual uint16_t get_minor_os_version() const = 0;
+
+	//Returns required operation system version (major word)
+	virtual uint16_t get_major_os_version() const = 0;
+
+	//Sets required subsystem version
+	virtual void set_subsystem_version(uint16_t major, uint16_t minor) = 0;
+
+	//Returns required subsystem version (minor word)
+	virtual uint16_t get_minor_subsystem_version() const = 0;
+
+	//Returns required subsystem version (major word)
+	virtual uint16_t get_major_subsystem_version() const = 0;
+
+public: //ADDRESS CONVERTIONS
+	//Virtual Address (VA) to Relative Virtual Address (RVA) convertions
+	//for PE32 and PE64 respectively
+	//bound_check checks integer overflow
+	virtual uint32_t va_to_rva(uint32_t va, bool bound_check = true) const = 0;
+	virtual uint32_t va_to_rva(uint64_t va, bool bound_check = true) const = 0;
+	
+	//Relative Virtual Address (RVA) to Virtual Address (VA) convertions
+	//for PE32 and PE64 respectively
+	virtual uint32_t rva_to_va_32(uint32_t rva) const = 0;
+	virtual uint64_t rva_to_va_64(uint32_t rva) const = 0;
+
+
+public: //SECTIONS
+	//Returns number of sections
+	virtual uint16_t get_number_of_sections() const = 0;
+	
+public:
+	//Sets number of sections
+	virtual void set_number_of_sections(uint16_t number) = 0;
+	//Sets virtual size of image
+	virtual void set_size_of_image(uint32_t size) = 0;
+	//Sets size of headers
+	virtual void set_size_of_headers(uint32_t size) = 0;
+	//Sets size of optional headers
+	virtual void set_size_of_optional_header(uint16_t size) = 0;
+	//Returns nt headers data pointer
+	virtual char* get_nt_headers_ptr() = 0;
+	//Returns nt headers data pointer
+	virtual const char* get_nt_headers_ptr() const = 0;
+	//Returns size of NT header
+	virtual uint32_t get_sizeof_nt_header() const = 0;
+	//Returns size of optional headers
+	virtual uint32_t get_sizeof_opt_headers() const = 0;
+	//Sets file alignment (no checks)
+	virtual void set_file_alignment_unchecked(uint32_t alignment) = 0;
+	//Sets base of code
+	virtual void set_base_of_code(uint32_t base) = 0;
+	//Returns base of code
+	virtual uint32_t get_base_of_code() const = 0;
+	//Returns needed PE magic for PE or PE+ (from template parameters)
+	virtual uint32_t get_needed_magic() const = 0;
+};
+}
diff --git a/tools/pe_bliss/pe_properties_generic.cpp b/tools/pe_bliss/pe_properties_generic.cpp
new file mode 100644
index 0000000000..bcf6f2047d
--- /dev/null
+++ b/tools/pe_bliss/pe_properties_generic.cpp
@@ -0,0 +1,645 @@
+/*************************************************************************/
+/* Copyright (c) 2015 dx, http://kaimi.ru                                */
+/*                                                                       */
+/* Permission is hereby granted, free of charge, to any person           */
+/* obtaining a copy of this software and associated documentation        */
+/* files (the "Software"), to deal in the Software without               */
+/* restriction, including without limitation the rights to use,          */
+/* copy, modify, merge, publish, distribute, sublicense, and/or          */
+/* sell copies of the Software, and to permit persons to whom the        */
+/* Software is furnished to do so, subject to the following conditions:  */
+/* The above copyright notice and this permission notice shall be        */
+/* included in all copies or substantial portions of the Software.       */
+/*                                                                       */
+/* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,       */
+/* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF    */
+/* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.*/
+/* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY  */
+/* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,  */
+/* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE     */
+/* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.                */
+/*************************************************************************/
+#include <string.h>
+#include "pe_properties_generic.h"
+#include "pe_exception.h"
+#include "utils.h"
+
+namespace pe_bliss
+{
+using namespace pe_win;
+	
+//Constructor
+template<typename PEClassType>
+std::auto_ptr<pe_properties> pe_properties_generic<PEClassType>::duplicate() const
+{
+	return std::auto_ptr<pe_properties>(new pe_properties_generic<PEClassType>(*this));
+}
+
+//Fills properly PE structures
+template<typename PEClassType>
+void pe_properties_generic<PEClassType>::create_pe(uint32_t section_alignment, uint16_t subsystem)
+{
+	memset(&nt_headers_, 0, sizeof(nt_headers_));
+	nt_headers_.Signature = 0x4550; //"PE"
+	nt_headers_.FileHeader.Machine = 0x14C; //i386
+	nt_headers_.FileHeader.SizeOfOptionalHeader = sizeof(nt_headers_.OptionalHeader);
+	nt_headers_.OptionalHeader.Magic = PEClassType::Id;
+	nt_headers_.OptionalHeader.ImageBase = 0x400000;
+	nt_headers_.OptionalHeader.SectionAlignment = section_alignment;
+	nt_headers_.OptionalHeader.FileAlignment = 0x200;
+	nt_headers_.OptionalHeader.SizeOfHeaders = 1024;
+	nt_headers_.OptionalHeader.Subsystem = subsystem;
+	nt_headers_.OptionalHeader.SizeOfHeapReserve = 0x100000;
+	nt_headers_.OptionalHeader.SizeOfHeapCommit = 0x1000;
+	nt_headers_.OptionalHeader.SizeOfStackReserve = 0x100000;
+	nt_headers_.OptionalHeader.SizeOfStackCommit = 0x1000;
+	nt_headers_.OptionalHeader.NumberOfRvaAndSizes = 0x10;
+}
+
+//Duplicate
+template<typename PEClassType>
+pe_properties_generic<PEClassType>::~pe_properties_generic()
+{}
+
+//Returns true if directory exists
+template<typename PEClassType>
+bool pe_properties_generic<PEClassType>::directory_exists(uint32_t id) const
+{
+	return (nt_headers_.OptionalHeader.NumberOfRvaAndSizes - 1) >= id &&
+		nt_headers_.OptionalHeader.DataDirectory[id].VirtualAddress;
+}
+
+//Removes directory
+template<typename PEClassType>
+void pe_properties_generic<PEClassType>::remove_directory(uint32_t id)
+{
+	if(directory_exists(id))
+	{
+		nt_headers_.OptionalHeader.DataDirectory[id].VirtualAddress = 0;
+		nt_headers_.OptionalHeader.DataDirectory[id].Size = 0;
+
+		if(id == image_directory_entry_basereloc)
+		{
+			set_characteristics_flags(image_file_relocs_stripped);
+			set_dll_characteristics(get_dll_characteristics() & ~image_dllcharacteristics_dynamic_base);
+		}
+		else if(id == image_directory_entry_export)
+		{
+			clear_characteristics_flags(image_file_dll);
+		}
+	}
+}
+
+//Returns directory RVA
+template<typename PEClassType>
+uint32_t pe_properties_generic<PEClassType>::get_directory_rva(uint32_t id) const
+{
+	//Check if directory exists
+	if(nt_headers_.OptionalHeader.NumberOfRvaAndSizes <= id)
+		throw pe_exception("Specified directory does not exist", pe_exception::directory_does_not_exist);
+
+	return nt_headers_.OptionalHeader.DataDirectory[id].VirtualAddress;
+}
+
+//Returns directory size
+template<typename PEClassType>
+void pe_properties_generic<PEClassType>::set_directory_rva(uint32_t id, uint32_t va)
+{
+	//Check if directory exists
+	if(nt_headers_.OptionalHeader.NumberOfRvaAndSizes <= id)
+		throw pe_exception("Specified directory does not exist", pe_exception::directory_does_not_exist);
+
+	nt_headers_.OptionalHeader.DataDirectory[id].VirtualAddress = va;
+}
+
+template<typename PEClassType>
+void pe_properties_generic<PEClassType>::set_directory_size(uint32_t id, uint32_t size)
+{
+	//Check if directory exists
+	if(nt_headers_.OptionalHeader.NumberOfRvaAndSizes <= id)
+		throw pe_exception("Specified directory does not exist", pe_exception::directory_does_not_exist);
+
+	nt_headers_.OptionalHeader.DataDirectory[id].Size = size;
+}
+
+//Returns directory size
+template<typename PEClassType>
+uint32_t pe_properties_generic<PEClassType>::get_directory_size(uint32_t id) const
+{
+	//Check if directory exists
+	if(nt_headers_.OptionalHeader.NumberOfRvaAndSizes <= id)
+		throw pe_exception("Specified directory does not exist", pe_exception::directory_does_not_exist);
+
+	return nt_headers_.OptionalHeader.DataDirectory[id].Size;
+}
+
+//Strips only zero DATA_DIRECTORY entries to count = min_count
+//Returns resulting number of data directories
+//strip_iat_directory - if true, even not empty IAT directory will be stripped
+template<typename PEClassType>
+uint32_t pe_properties_generic<PEClassType>::strip_data_directories(uint32_t min_count, bool strip_iat_directory)
+{
+	int i = nt_headers_.OptionalHeader.NumberOfRvaAndSizes - 1;
+
+	//Enumerate all data directories from the end
+	for(; i >= 0; i--)
+	{
+		//If directory exists, break
+		if(nt_headers_.OptionalHeader.DataDirectory[i].VirtualAddress && (static_cast<uint32_t>(i) != image_directory_entry_iat || !strip_iat_directory))
+			break;
+
+		if(i <= static_cast<int>(min_count) - 2)
+			break;
+	}
+
+	if(i == image_numberof_directory_entries - 1)
+		return image_numberof_directory_entries;
+
+	//Return new number of data directories
+	return nt_headers_.OptionalHeader.NumberOfRvaAndSizes = i + 1;
+}
+
+//Returns image base for PE32
+template<typename PEClassType>
+uint32_t pe_properties_generic<PEClassType>::get_image_base_32() const
+{
+	return static_cast<uint32_t>(nt_headers_.OptionalHeader.ImageBase);
+}
+
+//Returns image base for PE32/PE64
+template<typename PEClassType>
+uint64_t pe_properties_generic<PEClassType>::get_image_base_64() const
+{
+	return static_cast<uint64_t>(nt_headers_.OptionalHeader.ImageBase);
+}
+
+//Sets new image base
+template<typename PEClassType>
+void pe_properties_generic<PEClassType>::set_image_base(uint32_t base)
+{
+	nt_headers_.OptionalHeader.ImageBase = base;
+}
+
+//Sets new image base
+template<typename PEClassType>
+void pe_properties_generic<PEClassType>::set_image_base_64(uint64_t base)
+{
+	nt_headers_.OptionalHeader.ImageBase = static_cast<typename PEClassType::BaseSize>(base);
+}
+
+//Returns image entry point
+template<typename PEClassType>
+uint32_t pe_properties_generic<PEClassType>::get_ep() const
+{
+	return nt_headers_.OptionalHeader.AddressOfEntryPoint;
+}
+
+//Sets image entry point
+template<typename PEClassType>
+void pe_properties_generic<PEClassType>::set_ep(uint32_t new_ep)
+{
+	nt_headers_.OptionalHeader.AddressOfEntryPoint = new_ep;
+}
+
+//Returns file alignment
+template<typename PEClassType>
+uint32_t pe_properties_generic<PEClassType>::get_file_alignment() const
+{
+	return nt_headers_.OptionalHeader.FileAlignment;
+}
+
+//Returns section alignment
+template<typename PEClassType>
+uint32_t pe_properties_generic<PEClassType>::get_section_alignment() const
+{
+	return nt_headers_.OptionalHeader.SectionAlignment;
+}
+
+//Sets heap size commit for PE32
+template<typename PEClassType>
+void pe_properties_generic<PEClassType>::set_heap_size_commit(uint32_t size)
+{
+	nt_headers_.OptionalHeader.SizeOfHeapCommit = static_cast<typename PEClassType::BaseSize>(size);
+}
+
+//Sets heap size commit for PE32/PE64
+template<typename PEClassType>
+void pe_properties_generic<PEClassType>::set_heap_size_commit(uint64_t size)
+{
+	nt_headers_.OptionalHeader.SizeOfHeapCommit = static_cast<typename PEClassType::BaseSize>(size);
+}
+
+//Sets heap size reserve for PE32
+template<typename PEClassType>
+void pe_properties_generic<PEClassType>::set_heap_size_reserve(uint32_t size)
+{
+	nt_headers_.OptionalHeader.SizeOfHeapReserve = static_cast<typename PEClassType::BaseSize>(size);
+}
+
+//Sets heap size reserve for PE32/PE64
+template<typename PEClassType>
+void pe_properties_generic<PEClassType>::set_heap_size_reserve(uint64_t size)
+{
+	nt_headers_.OptionalHeader.SizeOfHeapReserve = static_cast<typename PEClassType::BaseSize>(size);
+}
+
+//Sets stack size commit for PE32
+template<typename PEClassType>
+void pe_properties_generic<PEClassType>::set_stack_size_commit(uint32_t size)
+{
+	nt_headers_.OptionalHeader.SizeOfStackCommit = static_cast<typename PEClassType::BaseSize>(size);
+}
+
+//Sets stack size commit for PE32/PE64
+template<typename PEClassType>
+void pe_properties_generic<PEClassType>::set_stack_size_commit(uint64_t size)
+{
+	nt_headers_.OptionalHeader.SizeOfStackCommit = static_cast<typename PEClassType::BaseSize>(size);
+}
+
+//Sets stack size reserve for PE32
+template<typename PEClassType>
+void pe_properties_generic<PEClassType>::set_stack_size_reserve(uint32_t size)
+{
+	nt_headers_.OptionalHeader.SizeOfStackReserve = static_cast<typename PEClassType::BaseSize>(size);
+}
+
+//Sets stack size reserve for PE32/PE64
+template<typename PEClassType>
+void pe_properties_generic<PEClassType>::set_stack_size_reserve(uint64_t size)
+{
+	nt_headers_.OptionalHeader.SizeOfStackReserve = static_cast<typename PEClassType::BaseSize>(size);
+}
+
+//Returns heap size commit for PE32
+template<typename PEClassType>
+uint32_t pe_properties_generic<PEClassType>::get_heap_size_commit_32() const
+{
+	return static_cast<uint32_t>(nt_headers_.OptionalHeader.SizeOfHeapCommit);
+}
+
+//Returns heap size commit for PE32/PE64
+template<typename PEClassType>
+uint64_t pe_properties_generic<PEClassType>::get_heap_size_commit_64() const
+{
+	return static_cast<uint64_t>(nt_headers_.OptionalHeader.SizeOfHeapCommit);
+}
+
+//Returns heap size reserve for PE32
+template<typename PEClassType>
+uint32_t pe_properties_generic<PEClassType>::get_heap_size_reserve_32() const
+{
+	return static_cast<uint32_t>(nt_headers_.OptionalHeader.SizeOfHeapReserve);
+}
+
+//Returns heap size reserve for PE32/PE64
+template<typename PEClassType>
+uint64_t pe_properties_generic<PEClassType>::get_heap_size_reserve_64() const
+{
+	return static_cast<uint64_t>(nt_headers_.OptionalHeader.SizeOfHeapReserve);
+}
+
+//Returns stack size commit for PE32
+template<typename PEClassType>
+uint32_t pe_properties_generic<PEClassType>::get_stack_size_commit_32() const
+{
+	return static_cast<uint32_t>(nt_headers_.OptionalHeader.SizeOfStackCommit);
+}
+
+//Returns stack size commit for PE32/PE64
+template<typename PEClassType>
+uint64_t pe_properties_generic<PEClassType>::get_stack_size_commit_64() const
+{
+	return static_cast<uint64_t>(nt_headers_.OptionalHeader.SizeOfStackCommit);
+}
+
+//Returns stack size reserve for PE32
+template<typename PEClassType>
+uint32_t pe_properties_generic<PEClassType>::get_stack_size_reserve_32() const
+{
+	return static_cast<uint32_t>(nt_headers_.OptionalHeader.SizeOfStackReserve);
+}
+
+//Returns stack size reserve for PE32/PE64
+template<typename PEClassType>
+uint64_t pe_properties_generic<PEClassType>::get_stack_size_reserve_64() const
+{
+	return static_cast<uint64_t>(nt_headers_.OptionalHeader.SizeOfStackReserve);
+}
+
+//Returns virtual size of image
+template<typename PEClassType>
+uint32_t pe_properties_generic<PEClassType>::get_size_of_image() const
+{
+	return nt_headers_.OptionalHeader.SizeOfImage;
+}
+
+//Returns number of RVA and sizes (number of DATA_DIRECTORY entries)
+template<typename PEClassType>
+uint32_t pe_properties_generic<PEClassType>::get_number_of_rvas_and_sizes() const
+{
+	return nt_headers_.OptionalHeader.NumberOfRvaAndSizes;
+}
+
+//Sets number of RVA and sizes (number of DATA_DIRECTORY entries)
+template<typename PEClassType>
+void pe_properties_generic<PEClassType>::set_number_of_rvas_and_sizes(uint32_t number)
+{
+	nt_headers_.OptionalHeader.NumberOfRvaAndSizes = number;
+}
+
+//Returns PE characteristics
+template<typename PEClassType>
+uint16_t pe_properties_generic<PEClassType>::get_characteristics() const
+{
+	return nt_headers_.FileHeader.Characteristics;
+}
+
+//Returns checksum of PE file from header
+template<typename PEClassType>
+uint32_t pe_properties_generic<PEClassType>::get_checksum() const
+{
+	return nt_headers_.OptionalHeader.CheckSum;
+}
+
+//Sets checksum of PE file
+template<typename PEClassType>
+void pe_properties_generic<PEClassType>::set_checksum(uint32_t checksum)
+{
+	nt_headers_.OptionalHeader.CheckSum = checksum;
+}
+
+//Returns DLL Characteristics
+template<typename PEClassType>
+uint16_t pe_properties_generic<PEClassType>::get_dll_characteristics() const
+{
+	return nt_headers_.OptionalHeader.DllCharacteristics;
+}
+
+//Returns timestamp of PE file from header
+template<typename PEClassType>
+uint32_t pe_properties_generic<PEClassType>::get_time_date_stamp() const
+{
+	return nt_headers_.FileHeader.TimeDateStamp;
+}
+
+//Sets timestamp of PE file
+template<typename PEClassType>
+void pe_properties_generic<PEClassType>::set_time_date_stamp(uint32_t timestamp)
+{
+	nt_headers_.FileHeader.TimeDateStamp = timestamp;
+}
+
+//Sets DLL Characteristics
+template<typename PEClassType>
+void pe_properties_generic<PEClassType>::set_dll_characteristics(uint16_t characteristics)
+{
+	nt_headers_.OptionalHeader.DllCharacteristics = characteristics;
+}
+
+//Returns Machine field value of PE file from header
+template<typename PEClassType>
+uint16_t pe_properties_generic<PEClassType>::get_machine() const
+{
+	return nt_headers_.FileHeader.Machine;
+}
+
+//Sets Machine field value of PE file
+template<typename PEClassType>
+void pe_properties_generic<PEClassType>::set_machine(uint16_t machine)
+{
+	nt_headers_.FileHeader.Machine = machine;
+}
+
+//Sets PE characteristics
+template<typename PEClassType>
+void pe_properties_generic<PEClassType>::set_characteristics(uint16_t ch)
+{
+	nt_headers_.FileHeader.Characteristics = ch;
+}
+
+//Returns size of headers
+template<typename PEClassType>
+uint32_t pe_properties_generic<PEClassType>::get_size_of_headers() const
+{
+	return nt_headers_.OptionalHeader.SizeOfHeaders;
+}
+
+//Returns subsystem
+template<typename PEClassType>
+uint16_t pe_properties_generic<PEClassType>::get_subsystem() const
+{
+	return nt_headers_.OptionalHeader.Subsystem;
+}
+
+//Sets subsystem
+template<typename PEClassType>
+void pe_properties_generic<PEClassType>::set_subsystem(uint16_t subsystem)
+{
+	nt_headers_.OptionalHeader.Subsystem = subsystem;
+}
+
+//Returns size of optional header
+template<typename PEClassType>
+uint16_t pe_properties_generic<PEClassType>::get_size_of_optional_header() const
+{
+	return nt_headers_.FileHeader.SizeOfOptionalHeader;
+}
+
+//Returns PE signature
+template<typename PEClassType>
+uint32_t pe_properties_generic<PEClassType>::get_pe_signature() const
+{
+	return nt_headers_.Signature;
+}
+
+//Returns PE magic value
+template<typename PEClassType>
+uint32_t pe_properties_generic<PEClassType>::get_magic() const
+{
+	return nt_headers_.OptionalHeader.Magic;
+}
+
+//Sets required operation system version
+template<typename PEClassType>
+void pe_properties_generic<PEClassType>::set_os_version(uint16_t major, uint16_t minor)
+{
+	nt_headers_.OptionalHeader.MinorOperatingSystemVersion = minor;
+	nt_headers_.OptionalHeader.MajorOperatingSystemVersion = major;
+}
+
+//Returns required operation system version (minor word)
+template<typename PEClassType>
+uint16_t pe_properties_generic<PEClassType>::get_minor_os_version() const
+{
+	return nt_headers_.OptionalHeader.MinorOperatingSystemVersion;
+}
+
+//Returns required operation system version (major word)
+template<typename PEClassType>
+uint16_t pe_properties_generic<PEClassType>::get_major_os_version() const
+{
+	return nt_headers_.OptionalHeader.MajorOperatingSystemVersion;
+}
+
+//Sets required subsystem version
+template<typename PEClassType>
+void pe_properties_generic<PEClassType>::set_subsystem_version(uint16_t major, uint16_t minor)
+{
+	nt_headers_.OptionalHeader.MinorSubsystemVersion = minor;
+	nt_headers_.OptionalHeader.MajorSubsystemVersion = major;
+}
+
+//Returns required subsystem version (minor word)
+template<typename PEClassType>
+uint16_t pe_properties_generic<PEClassType>::get_minor_subsystem_version() const
+{
+	return nt_headers_.OptionalHeader.MinorSubsystemVersion;
+}
+
+//Returns required subsystem version (major word)
+template<typename PEClassType>
+uint16_t pe_properties_generic<PEClassType>::get_major_subsystem_version() const
+{
+	return nt_headers_.OptionalHeader.MajorSubsystemVersion;
+}
+
+//Virtual Address (VA) to Relative Virtual Address (RVA) convertions for PE32
+template<typename PEClassType>
+uint32_t pe_properties_generic<PEClassType>::va_to_rva(uint32_t va, bool bound_check) const
+{
+	if(bound_check && static_cast<uint64_t>(va) - nt_headers_.OptionalHeader.ImageBase > pe_utils::max_dword)
+		throw pe_exception("Incorrect address conversion", pe_exception::incorrect_address_conversion);
+
+	return static_cast<uint32_t>(va - nt_headers_.OptionalHeader.ImageBase);
+}
+
+//Virtual Address (VA) to Relative Virtual Address (RVA) convertions for PE32/PE64
+template<typename PEClassType>
+uint32_t pe_properties_generic<PEClassType>::va_to_rva(uint64_t va, bool bound_check) const
+{
+	if(bound_check && va - nt_headers_.OptionalHeader.ImageBase > pe_utils::max_dword)
+		throw pe_exception("Incorrect address conversion", pe_exception::incorrect_address_conversion);
+
+	return static_cast<uint32_t>(va - nt_headers_.OptionalHeader.ImageBase);
+}
+
+//Relative Virtual Address (RVA) to Virtual Address (VA) convertions for PE32
+template<typename PEClassType>
+uint32_t pe_properties_generic<PEClassType>::rva_to_va_32(uint32_t rva) const
+{
+	if(!pe_utils::is_sum_safe(rva, static_cast<uint32_t>(nt_headers_.OptionalHeader.ImageBase)))
+		throw pe_exception("Incorrect address conversion", pe_exception::incorrect_address_conversion);
+
+	return static_cast<uint32_t>(rva + nt_headers_.OptionalHeader.ImageBase);
+}
+
+//Relative Virtual Address (RVA) to Virtual Address (VA) convertions for PE32/PE64
+template<typename PEClassType>
+uint64_t pe_properties_generic<PEClassType>::rva_to_va_64(uint32_t rva) const
+{
+	return static_cast<uint64_t>(rva) + nt_headers_.OptionalHeader.ImageBase;
+}
+
+//Returns number of sections
+template<typename PEClassType>
+uint16_t pe_properties_generic<PEClassType>::get_number_of_sections() const
+{
+	return nt_headers_.FileHeader.NumberOfSections;
+}
+
+//Sets number of sections
+template<typename PEClassType>
+void pe_properties_generic<PEClassType>::set_number_of_sections(uint16_t number)
+{
+	nt_headers_.FileHeader.NumberOfSections = number;
+}
+
+//Sets virtual size of image
+template<typename PEClassType>
+void pe_properties_generic<PEClassType>::set_size_of_image(uint32_t size)
+{
+	nt_headers_.OptionalHeader.SizeOfImage = size;
+}
+
+//Sets size of headers
+template<typename PEClassType>
+void pe_properties_generic<PEClassType>::set_size_of_headers(uint32_t size)
+{
+	nt_headers_.OptionalHeader.SizeOfHeaders = size;
+}
+
+//Sets size of optional headers
+template<typename PEClassType>
+void pe_properties_generic<PEClassType>::set_size_of_optional_header(uint16_t size)
+{
+	nt_headers_.FileHeader.SizeOfOptionalHeader = size;
+}
+
+//Returns nt headers data pointer
+template<typename PEClassType>
+char* pe_properties_generic<PEClassType>::get_nt_headers_ptr()
+{
+	return reinterpret_cast<char*>(&nt_headers_);
+}
+
+//Returns nt headers data pointer
+template<typename PEClassType>
+const char* pe_properties_generic<PEClassType>::get_nt_headers_ptr() const
+{
+	return reinterpret_cast<const char*>(&nt_headers_);
+}
+
+//Returns size of NT header
+template<typename PEClassType>
+uint32_t pe_properties_generic<PEClassType>::get_sizeof_nt_header() const
+{
+	return sizeof(typename PEClassType::NtHeaders);
+}
+
+//Returns size of optional headers
+template<typename PEClassType>
+uint32_t pe_properties_generic<PEClassType>::get_sizeof_opt_headers() const
+{
+	return sizeof(typename PEClassType::OptHeaders);
+}
+
+//Sets file alignment (no checks)
+template<typename PEClassType>
+void pe_properties_generic<PEClassType>::set_file_alignment_unchecked(uint32_t alignment) 
+{
+	nt_headers_.OptionalHeader.FileAlignment = alignment;
+}
+
+//Sets base of code
+template<typename PEClassType>
+void pe_properties_generic<PEClassType>::set_base_of_code(uint32_t base)
+{
+	nt_headers_.OptionalHeader.BaseOfCode = base;
+}
+
+//Returns base of code
+template<typename PEClassType>
+uint32_t pe_properties_generic<PEClassType>::get_base_of_code() const
+{
+	return nt_headers_.OptionalHeader.BaseOfCode;
+}
+
+//Returns needed PE magic for PE or PE+ (from template parameters)
+template<typename PEClassType>
+uint32_t pe_properties_generic<PEClassType>::get_needed_magic() const
+{
+	return PEClassType::Id;
+}
+
+//Returns PE type of this image
+template<typename PEClassType>
+pe_type pe_properties_generic<PEClassType>::get_pe_type() const
+{
+	return PEClassType::Id == image_nt_optional_hdr32_magic ? pe_type_32 : pe_type_64;
+}
+
+//Two used instantiations for PE32 (PE) and PE64 (PE+)
+template class pe_properties_generic<pe_types_class_32>;
+template class pe_properties_generic<pe_types_class_64>;
+}
diff --git a/tools/pe_bliss/pe_properties_generic.h b/tools/pe_bliss/pe_properties_generic.h
new file mode 100644
index 0000000000..4ff906803c
--- /dev/null
+++ b/tools/pe_bliss/pe_properties_generic.h
@@ -0,0 +1,277 @@
+/*************************************************************************/
+/* Copyright (c) 2015 dx, http://kaimi.ru                                */
+/*                                                                       */
+/* Permission is hereby granted, free of charge, to any person           */
+/* obtaining a copy of this software and associated documentation        */
+/* files (the "Software"), to deal in the Software without               */
+/* restriction, including without limitation the rights to use,          */
+/* copy, modify, merge, publish, distribute, sublicense, and/or          */
+/* sell copies of the Software, and to permit persons to whom the        */
+/* Software is furnished to do so, subject to the following conditions:  */
+/* The above copyright notice and this permission notice shall be        */
+/* included in all copies or substantial portions of the Software.       */
+/*                                                                       */
+/* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,       */
+/* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF    */
+/* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.*/
+/* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY  */
+/* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,  */
+/* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE     */
+/* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.                */
+/*************************************************************************/
+#include "pe_properties.h"
+
+namespace pe_bliss
+{
+//Helper class to reduce code size and ease its editing
+template<
+	typename NtHeadersType,
+	typename OptHeadersType,
+	uint16_t IdVal,
+	typename BaseSizeType,
+	BaseSizeType ImportSnapFlagVal,
+	typename TLSStructType,
+	typename ConfigStructType>
+class pe_types
+{
+public:
+	typedef NtHeadersType NtHeaders; //NT HEADERS type
+	typedef OptHeadersType OptHeaders; //NT OPTIONAL HEADER type
+	typedef BaseSizeType BaseSize; //Base size of different values: DWORD or ULONGLONG
+	typedef TLSStructType TLSStruct; //TLS structure type
+	typedef ConfigStructType ConfigStruct; //Configuration structure type
+
+	static const uint16_t Id = IdVal; //Magic of PE or PE+
+	static const BaseSize ImportSnapFlag = ImportSnapFlagVal; //Import snap flag value
+};
+
+//Portable Executable derived class for PE and PE+
+//Describes PE/PE+ dependent things
+template<typename PEClassType>
+class pe_properties_generic : public pe_properties
+{
+public: //Constructor
+	virtual std::auto_ptr<pe_properties> duplicate() const;
+
+	//Fills properly PE structures
+	virtual void create_pe(uint32_t section_alignment, uint16_t subsystem);
+
+public:
+	//Destructor
+	virtual ~pe_properties_generic();
+
+
+public: //DIRECTORIES
+	//Returns true if directory exists
+	virtual bool directory_exists(uint32_t id) const;
+
+	//Removes directory
+	virtual void remove_directory(uint32_t id);
+
+	//Returns directory RVA
+	virtual uint32_t get_directory_rva(uint32_t id) const;
+	//Returns directory size
+	virtual uint32_t get_directory_size(uint32_t id) const;
+
+	//Sets directory RVA (just a value of PE header, no moving occurs)
+	virtual void set_directory_rva(uint32_t id, uint32_t rva);
+	//Sets directory size (just a value of PE header, no moving occurs)
+	virtual void set_directory_size(uint32_t id, uint32_t size);
+	
+	//Strips only zero DATA_DIRECTORY entries to count = min_count
+	//Returns resulting number of data directories
+	//strip_iat_directory - if true, even not empty IAT directory will be stripped
+	virtual uint32_t strip_data_directories(uint32_t min_count = 1, bool strip_iat_directory = true);
+
+
+public: //IMAGE
+	//Returns PE type of this image
+	virtual pe_type get_pe_type() const;
+
+
+public: //PE HEADER
+	//Returns image base for PE32 and PE64 respectively
+	virtual uint32_t get_image_base_32() const;
+	virtual uint64_t get_image_base_64() const;
+
+	//Sets new image base for PE32
+	virtual void set_image_base(uint32_t base);
+	//Sets new image base for PE32/PE+
+	virtual void set_image_base_64(uint64_t base);
+
+	//Returns image entry point
+	virtual uint32_t get_ep() const;
+	//Sets image entry point
+	virtual void set_ep(uint32_t new_ep);
+
+	//Returns file alignment
+	virtual uint32_t get_file_alignment() const;
+	//Returns section alignment
+	virtual uint32_t get_section_alignment() const;
+
+	//Sets heap size commit for PE32 and PE64 respectively
+	virtual void set_heap_size_commit(uint32_t size);
+	virtual void set_heap_size_commit(uint64_t size);
+	//Sets heap size reserve for PE32 and PE64 respectively
+	virtual void set_heap_size_reserve(uint32_t size);
+	virtual void set_heap_size_reserve(uint64_t size);
+	//Sets stack size commit for PE32 and PE64 respectively
+	virtual void set_stack_size_commit(uint32_t size);
+	virtual void set_stack_size_commit(uint64_t size);
+	//Sets stack size reserve for PE32 and PE64 respectively
+	virtual void set_stack_size_reserve(uint32_t size);
+	virtual void set_stack_size_reserve(uint64_t size);
+	
+	//Returns heap size commit for PE32 and PE64 respectively
+	virtual uint32_t get_heap_size_commit_32() const;
+	virtual uint64_t get_heap_size_commit_64() const;
+	//Returns heap size reserve for PE32 and PE64 respectively
+	virtual uint32_t get_heap_size_reserve_32() const;
+	virtual uint64_t get_heap_size_reserve_64() const;
+	//Returns stack size commit for PE32 and PE64 respectively
+	virtual uint32_t get_stack_size_commit_32() const;
+	virtual uint64_t get_stack_size_commit_64() const;
+	//Returns stack size reserve for PE32 and PE64 respectively
+	virtual uint32_t get_stack_size_reserve_32() const;
+	virtual uint64_t get_stack_size_reserve_64() const;
+
+	//Returns virtual size of image
+	virtual uint32_t get_size_of_image() const;
+
+	//Returns number of RVA and sizes (number of DATA_DIRECTORY entries)
+	virtual uint32_t get_number_of_rvas_and_sizes() const;
+	//Sets number of RVA and sizes (number of DATA_DIRECTORY entries)
+	virtual void set_number_of_rvas_and_sizes(uint32_t number);
+
+	//Returns PE characteristics
+	virtual uint16_t get_characteristics() const;
+	//Sets PE characteristics
+	virtual void set_characteristics(uint16_t ch);
+	
+	//Returns size of headers
+	virtual uint32_t get_size_of_headers() const;
+
+	//Returns subsystem
+	virtual uint16_t get_subsystem() const;
+
+	//Sets subsystem
+	virtual void set_subsystem(uint16_t subsystem);
+
+	//Returns size of optional header
+	virtual uint16_t get_size_of_optional_header() const;
+
+	//Returns PE signature
+	virtual uint32_t get_pe_signature() const;
+
+	//Returns PE magic value
+	virtual uint32_t get_magic() const;
+
+	//Returns checksum of PE file from header
+	virtual uint32_t get_checksum() const;
+	
+	//Sets checksum of PE file
+	virtual void set_checksum(uint32_t checksum);
+	
+	//Returns timestamp of PE file from header
+	virtual uint32_t get_time_date_stamp() const;
+	
+	//Sets timestamp of PE file
+	virtual void set_time_date_stamp(uint32_t timestamp);
+	
+	//Returns Machine field value of PE file from header
+	virtual uint16_t get_machine() const;
+
+	//Sets Machine field value of PE file
+	virtual void set_machine(uint16_t machine);
+
+	//Returns DLL Characteristics
+	virtual uint16_t get_dll_characteristics() const;
+	
+	//Sets DLL Characteristics
+	virtual void set_dll_characteristics(uint16_t characteristics);
+	
+	//Sets required operation system version
+	virtual void set_os_version(uint16_t major, uint16_t minor);
+
+	//Returns required operation system version (minor word)
+	virtual uint16_t get_minor_os_version() const;
+
+	//Returns required operation system version (major word)
+	virtual uint16_t get_major_os_version() const;
+
+	//Sets required subsystem version
+	virtual void set_subsystem_version(uint16_t major, uint16_t minor);
+
+	//Returns required subsystem version (minor word)
+	virtual uint16_t get_minor_subsystem_version() const;
+
+	//Returns required subsystem version (major word)
+	virtual uint16_t get_major_subsystem_version() const;
+
+public: //ADDRESS CONVERTIONS
+	//Virtual Address (VA) to Relative Virtual Address (RVA) convertions
+	//for PE32 and PE64 respectively
+	//bound_check checks integer overflow
+	virtual uint32_t va_to_rva(uint32_t va, bool bound_check = true) const;
+	virtual uint32_t va_to_rva(uint64_t va, bool bound_check = true) const;
+	
+	//Relative Virtual Address (RVA) to Virtual Address (VA) convertions
+	//for PE32 and PE64 respectively
+	virtual uint32_t rva_to_va_32(uint32_t rva) const;
+	virtual uint64_t rva_to_va_64(uint32_t rva) const;
+
+
+public: //SECTIONS
+	//Returns number of sections
+	virtual uint16_t get_number_of_sections() const;
+
+protected:
+	typename PEClassType::NtHeaders nt_headers_; //NT headers (PE32 or PE64)
+	
+public:
+	//Sets number of sections
+	virtual void set_number_of_sections(uint16_t number);
+	//Sets virtual size of image
+	virtual void set_size_of_image(uint32_t size);
+	//Sets size of headers
+	virtual void set_size_of_headers(uint32_t size);
+	//Sets size of optional headers
+	virtual void set_size_of_optional_header(uint16_t size);
+	//Returns nt headers data pointer
+	virtual char* get_nt_headers_ptr();
+	//Returns nt headers data pointer
+	virtual const char* get_nt_headers_ptr() const;
+	//Returns size of NT header
+	virtual uint32_t get_sizeof_nt_header() const;
+	//Returns size of optional headers
+	virtual uint32_t get_sizeof_opt_headers() const;
+	//Sets file alignment (no checks)
+	virtual void set_file_alignment_unchecked(uint32_t alignment);
+	//Sets base of code
+	virtual void set_base_of_code(uint32_t base);
+	//Returns base of code
+	virtual uint32_t get_base_of_code() const;
+	//Returns needed PE magic for PE or PE+ (from template parameters)
+	virtual uint32_t get_needed_magic() const;
+};
+
+//Two used typedefs for PE32 (PE) and PE64 (PE+)
+typedef pe_types<pe_win::image_nt_headers32,
+	pe_win::image_optional_header32,
+	pe_win::image_nt_optional_hdr32_magic,
+	uint32_t,
+	pe_win::image_ordinal_flag32,
+	pe_win::image_tls_directory32,
+	pe_win::image_load_config_directory32> pe_types_class_32;
+
+typedef pe_types<pe_win::image_nt_headers64,
+	pe_win::image_optional_header64,
+	pe_win::image_nt_optional_hdr64_magic,
+	uint64_t,
+	pe_win::image_ordinal_flag64,
+	pe_win::image_tls_directory64,
+	pe_win::image_load_config_directory64> pe_types_class_64;
+
+typedef pe_properties_generic<pe_types_class_32> pe_properties_32;
+typedef pe_properties_generic<pe_types_class_64> pe_properties_64;
+}
diff --git a/tools/pe_bliss/pe_rebuilder.cpp b/tools/pe_bliss/pe_rebuilder.cpp
new file mode 100644
index 0000000000..faf5803b8c
--- /dev/null
+++ b/tools/pe_bliss/pe_rebuilder.cpp
@@ -0,0 +1,214 @@
+/*************************************************************************/
+/* Copyright (c) 2015 dx, http://kaimi.ru                                */
+/*                                                                       */
+/* Permission is hereby granted, free of charge, to any person           */
+/* obtaining a copy of this software and associated documentation        */
+/* files (the "Software"), to deal in the Software without               */
+/* restriction, including without limitation the rights to use,          */
+/* copy, modify, merge, publish, distribute, sublicense, and/or          */
+/* sell copies of the Software, and to permit persons to whom the        */
+/* Software is furnished to do so, subject to the following conditions:  */
+/* The above copyright notice and this permission notice shall be        */
+/* included in all copies or substantial portions of the Software.       */
+/*                                                                       */
+/* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,       */
+/* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF    */
+/* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.*/
+/* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY  */
+/* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,  */
+/* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE     */
+/* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.                */
+/*************************************************************************/
+#include "pe_rebuilder.h"
+#include "pe_base.h"
+#include "pe_structures.h"
+#include "pe_exception.h"
+
+namespace pe_bliss
+{
+using namespace pe_win;
+
+//Rebuilds PE image headers
+//If strip_dos_header is true, DOS headers partially will be used for PE headers
+//If change_size_of_headers == true, SizeOfHeaders will be recalculated automatically
+//If save_bound_import == true, existing bound import directory will be saved correctly (because some compilers and bind.exe put it to PE headers)
+void rebuild_pe(pe_base& pe, image_dos_header& dos_header, bool strip_dos_header, bool change_size_of_headers, bool save_bound_import)
+{
+	dos_header = pe.get_dos_header();
+
+	if(strip_dos_header)
+	{
+		//Strip stub overlay
+		pe.strip_stub_overlay();
+		//BaseOfCode NT Headers field now overlaps
+		//e_lfanew field, so we're acrually setting
+		//e_lfanew with this call
+		pe.set_base_of_code(8 * sizeof(uint16_t));
+	}
+	else
+	{
+		//Set start of PE headers
+		dos_header.e_lfanew = sizeof(image_dos_header)
+			+ pe_utils::align_up(static_cast<uint32_t>(pe.get_stub_overlay().size()), sizeof(uint32_t));
+	}
+
+	section_list& sections = pe.get_image_sections();
+
+	//Calculate pointer to section data
+	size_t ptr_to_section_data = (strip_dos_header ? 8 * sizeof(uint16_t) : sizeof(image_dos_header)) + pe.get_sizeof_nt_header()
+		+ pe_utils::align_up(pe.get_stub_overlay().size(), sizeof(uint32_t))
+		- sizeof(image_data_directory) * (image_numberof_directory_entries - pe.get_number_of_rvas_and_sizes())
+		+ sections.size() * sizeof(image_section_header);
+
+	if(save_bound_import && pe.has_bound_import())
+	{
+		//It will be aligned to DWORD, because we're aligning to DWORD everything above it
+		pe.set_directory_rva(image_directory_entry_bound_import, static_cast<uint32_t>(ptr_to_section_data));
+		ptr_to_section_data += pe.get_directory_size(image_directory_entry_bound_import);	
+	}
+	
+	ptr_to_section_data = pe_utils::align_up(ptr_to_section_data, pe.get_file_alignment());
+
+	//Set size of headers and size of optional header
+	if(change_size_of_headers)
+	{
+		if(!pe.get_image_sections().empty())
+		{
+			if(static_cast<uint32_t>(ptr_to_section_data) > (*sections.begin()).get_virtual_address())
+				throw pe_exception("Headers of PE file are too long. Try to strip STUB or don't build bound import", pe_exception::cannot_rebuild_image);
+		}
+
+		pe.set_size_of_headers(static_cast<uint32_t>(ptr_to_section_data));
+	}
+
+	//Set number of sections in PE header
+	pe.update_number_of_sections();
+
+	pe.update_image_size();
+
+	pe.set_size_of_optional_header(static_cast<uint16_t>(pe.get_sizeof_opt_headers()
+		- sizeof(image_data_directory) * (image_numberof_directory_entries - pe.get_number_of_rvas_and_sizes())));
+
+	//Recalculate pointer to raw data according to section list
+	for(section_list::iterator it = sections.begin(); it != sections.end(); ++it)
+	{
+		//Save section headers PointerToRawData
+		(*it).set_pointer_to_raw_data(static_cast<uint32_t>(ptr_to_section_data));
+		ptr_to_section_data += (*it).get_aligned_raw_size(pe.get_file_alignment());
+	}
+}
+
+//Rebuild PE image and write it to "out" ostream
+//If strip_dos_header is true, DOS headers partially will be used for PE headers
+//If change_size_of_headers == true, SizeOfHeaders will be recalculated automatically
+//If save_bound_import == true, existing bound import directory will be saved correctly (because some compilers and bind.exe put it to PE headers)
+void rebuild_pe(pe_base& pe, std::ostream& out, bool strip_dos_header, bool change_size_of_headers, bool save_bound_import)
+{
+	if(out.bad())
+		throw pe_exception("Stream is bad", pe_exception::stream_is_bad);
+
+	if(save_bound_import && pe.has_bound_import())
+	{
+		if(pe.section_data_length_from_rva(pe.get_directory_rva(image_directory_entry_bound_import), pe.get_directory_rva(image_directory_entry_bound_import), section_data_raw, true)
+			< pe.get_directory_size(image_directory_entry_bound_import))
+			throw pe_exception("Incorrect bound import directory", pe_exception::incorrect_bound_import_directory);
+	}
+
+	//Change ostream state
+	out.exceptions(std::ios::goodbit);
+	out.clear();
+	
+	uint32_t original_bound_import_rva = pe.has_bound_import() ? pe.get_directory_rva(image_directory_entry_bound_import) : 0;
+	if(original_bound_import_rva && original_bound_import_rva > pe.get_size_of_headers())
+	{
+		//No need to do anything with bound import directory
+		//if it is placed inside of any section, not headers
+		original_bound_import_rva = 0;
+		save_bound_import = false;
+	}
+
+	{
+		image_dos_header dos_header;
+
+		//Rebuild PE image headers
+		rebuild_pe(pe, dos_header, strip_dos_header, change_size_of_headers, save_bound_import);
+
+		//Write DOS header
+		out.write(reinterpret_cast<const char*>(&dos_header), strip_dos_header ? 8 * sizeof(uint16_t) : sizeof(image_dos_header));
+	}
+
+	//If we have stub overlay, write it too
+	{
+		const std::string& stub = pe.get_stub_overlay();
+		if(stub.size())
+		{
+			out.write(stub.data(), stub.size());
+			size_t aligned_size = pe_utils::align_up(stub.size(), sizeof(uint32_t));
+			//Align PE header, which is right after rich overlay
+			while(aligned_size > stub.size())
+			{
+				out.put('\0');
+				--aligned_size;
+			}
+		}
+	}
+	
+	//Write NT headers
+	out.write(static_cast<const pe_base&>(pe).get_nt_headers_ptr(), pe.get_sizeof_nt_header()
+		- sizeof(image_data_directory) * (image_numberof_directory_entries - pe.get_number_of_rvas_and_sizes()));
+
+	//Write section headers
+	const section_list& sections = pe.get_image_sections();
+	for(section_list::const_iterator it = sections.begin(); it != sections.end(); ++it)
+	{
+		if(it == sections.end() - 1) //If last section encountered
+		{
+			image_section_header header((*it).get_raw_header());
+			header.SizeOfRawData = static_cast<uint32_t>((*it).get_raw_data().length()); //Set non-aligned actual data length for it
+			out.write(reinterpret_cast<const char*>(&header), sizeof(image_section_header));
+		}
+		else
+		{
+			out.write(reinterpret_cast<const char*>(&(*it).get_raw_header()), sizeof(image_section_header));
+		}
+	}
+
+	//Write bound import data if requested
+	if(save_bound_import && pe.has_bound_import())
+	{
+		out.write(pe.section_data_from_rva(original_bound_import_rva, section_data_raw, true),
+			pe.get_directory_size(image_directory_entry_bound_import));
+	}
+
+	//Write section data finally
+	for(section_list::const_iterator it = sections.begin(); it != sections.end(); ++it)
+	{
+		const section& s = *it;
+
+		std::streamoff wpos = out.tellp();
+
+		//Fill unused overlay data between sections with null bytes
+		for(unsigned int i = 0; i < s.get_pointer_to_raw_data() - wpos; i++)
+			out.put(0);
+
+		//Write raw section data
+		out.write(s.get_raw_data().data(), s.get_raw_data().length());
+	}
+}
+
+//Rebuild PE image and write it to "out" file
+//If strip_dos_header is true, DOS headers partially will be used for PE headers
+//If change_size_of_headers == true, SizeOfHeaders will be recalculated automatically
+//If save_bound_import == true, existing bound import directory will be saved correctly (because some compilers and bind.exe put it to PE headers)
+void rebuild_pe(pe_base& pe, const char* out, bool strip_dos_header, bool change_size_of_headers, bool save_bound_import)
+{
+	std::ofstream pe_file(out, std::ios::out | std::ios::binary | std::ios::trunc);
+	if(!pe_file)
+	{
+		throw pe_exception("Error in open file.", pe_exception::stream_is_bad);
+	}
+	rebuild_pe(pe, pe_file, strip_dos_header, change_size_of_headers, save_bound_import);
+}
+
+
+}
diff --git a/tools/pe_bliss/pe_rebuilder.h b/tools/pe_bliss/pe_rebuilder.h
new file mode 100644
index 0000000000..319807e5c9
--- /dev/null
+++ b/tools/pe_bliss/pe_rebuilder.h
@@ -0,0 +1,40 @@
+/*************************************************************************/
+/* Copyright (c) 2015 dx, http://kaimi.ru                                */
+/*                                                                       */
+/* Permission is hereby granted, free of charge, to any person           */
+/* obtaining a copy of this software and associated documentation        */
+/* files (the "Software"), to deal in the Software without               */
+/* restriction, including without limitation the rights to use,          */
+/* copy, modify, merge, publish, distribute, sublicense, and/or          */
+/* sell copies of the Software, and to permit persons to whom the        */
+/* Software is furnished to do so, subject to the following conditions:  */
+/* The above copyright notice and this permission notice shall be        */
+/* included in all copies or substantial portions of the Software.       */
+/*                                                                       */
+/* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,       */
+/* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF    */
+/* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.*/
+/* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY  */
+/* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,  */
+/* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE     */
+/* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.                */
+/*************************************************************************/
+#pragma once
+#include <ostream>
+#include <fstream>
+
+namespace pe_bliss
+{
+class pe_base;
+//Rebuilds PE image, writes resulting image to ostream "out". If strip_dos_header == true, DOS header will be stripped a little
+//If change_size_of_headers == true, SizeOfHeaders will be recalculated automatically
+//If save_bound_import == true, existing bound import directory will be saved correctly (because some compilers and bind.exe put it to PE headers)
+void rebuild_pe(pe_base& pe, std::ostream& out, bool strip_dos_header = false, bool change_size_of_headers = true, bool save_bound_import = true);
+
+//Rebuild PE image and write it to "out" file
+//If strip_dos_header is true, DOS headers partially will be used for PE headers
+//If change_size_of_headers == true, SizeOfHeaders will be recalculated automatically
+//If save_bound_import == true, existing bound import directory will be saved correctly (because some compilers and bind.exe put it to PE headers)
+void rebuild_pe(pe_base& pe, const char* out, bool strip_dos_header = false, bool change_size_of_headers = true, bool save_bound_import = true);
+
+}
diff --git a/tools/pe_bliss/pe_relocations.cpp b/tools/pe_bliss/pe_relocations.cpp
new file mode 100644
index 0000000000..d5357dd219
--- /dev/null
+++ b/tools/pe_bliss/pe_relocations.cpp
@@ -0,0 +1,320 @@
+/*************************************************************************/
+/* Copyright (c) 2015 dx, http://kaimi.ru                                */
+/*                                                                       */
+/* Permission is hereby granted, free of charge, to any person           */
+/* obtaining a copy of this software and associated documentation        */
+/* files (the "Software"), to deal in the Software without               */
+/* restriction, including without limitation the rights to use,          */
+/* copy, modify, merge, publish, distribute, sublicense, and/or          */
+/* sell copies of the Software, and to permit persons to whom the        */
+/* Software is furnished to do so, subject to the following conditions:  */
+/* The above copyright notice and this permission notice shall be        */
+/* included in all copies or substantial portions of the Software.       */
+/*                                                                       */
+/* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,       */
+/* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF    */
+/* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.*/
+/* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY  */
+/* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,  */
+/* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE     */
+/* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.                */
+/*************************************************************************/
+#include <string.h>
+#include "pe_relocations.h"
+#include "pe_properties_generic.h"
+
+namespace pe_bliss
+{
+using namespace pe_win;
+
+//RELOCATIONS
+//Default constructor
+relocation_entry::relocation_entry()
+	:rva_(0), type_(0)
+{}
+
+//Constructor from relocation item (WORD)
+relocation_entry::relocation_entry(uint16_t relocation_value)
+	:rva_(relocation_value & ((1 << 12) - 1)), type_(relocation_value >> 12)
+{}
+
+//Constructor from relative rva and relocation type
+relocation_entry::relocation_entry(uint16_t rrva, uint16_t type)
+	:rva_(rrva), type_(type)
+{}
+
+//Returns RVA of relocation
+uint16_t relocation_entry::get_rva() const
+{
+	return rva_;
+}
+
+//Returns type of relocation
+uint16_t relocation_entry::get_type() const
+{
+	return type_;
+}
+
+//Sets RVA of relocation
+void relocation_entry::set_rva(uint16_t rva)
+{
+	rva_ = rva;
+}
+
+//Sets type of relocation
+void relocation_entry::set_type(uint16_t type)
+{
+	type_ = type;
+}
+
+//Returns relocation item (rrva + type)
+uint16_t relocation_entry::get_item() const
+{
+	return rva_ | (type_ << 12);
+}
+
+//Sets relocation item (rrva + type)
+void relocation_entry::set_item(uint16_t item)
+{
+	rva_ = item & ((1 << 12) - 1);
+	type_ = item >> 12;
+}
+
+//Returns relocation list
+const relocation_table::relocation_list& relocation_table::get_relocations() const
+{
+	return relocations_;
+}
+
+//Adds relocation to table
+void relocation_table::add_relocation(const relocation_entry& entry)
+{
+	relocations_.push_back(entry);
+}
+
+//Default constructor
+relocation_table::relocation_table()
+	:rva_(0)
+{}
+
+//Constructor from RVA of relocation table
+relocation_table::relocation_table(uint32_t rva)
+	:rva_(rva)
+{}
+
+//Returns RVA of block
+uint32_t relocation_table::get_rva() const
+{
+	return rva_;
+}
+
+//Sets RVA of block
+void relocation_table::set_rva(uint32_t rva)
+{
+	rva_ = rva;
+}
+
+//Returns changeable relocation list
+relocation_table::relocation_list& relocation_table::get_relocations()
+{
+	return relocations_;
+}
+
+//Get relocation list of pe file, supports one-word sized relocations only
+//If list_absolute_entries = true, IMAGE_REL_BASED_ABSOLUTE will be listed
+const relocation_table_list get_relocations(const pe_base& pe, bool list_absolute_entries)
+{
+	relocation_table_list ret;
+
+	//If image does not have relocations
+	if(!pe.has_reloc())
+		return ret;
+
+	//Check the length in bytes of the section containing relocation directory
+	if(pe.section_data_length_from_rva(pe.get_directory_rva(image_directory_entry_basereloc),
+		pe.get_directory_rva(image_directory_entry_basereloc), section_data_virtual, true)
+		< sizeof(image_base_relocation))
+		throw pe_exception("Incorrect relocation directory", pe_exception::incorrect_relocation_directory);
+
+	unsigned long current_pos = pe.get_directory_rva(image_directory_entry_basereloc);
+	//First IMAGE_BASE_RELOCATION table
+	image_base_relocation reloc_table = pe.section_data_from_rva<image_base_relocation>(current_pos, section_data_virtual, true);
+
+	if(reloc_table.SizeOfBlock % 2)
+		throw pe_exception("Incorrect relocation directory", pe_exception::incorrect_relocation_directory);
+
+	unsigned long reloc_size = pe.get_directory_size(image_directory_entry_basereloc);
+	unsigned long read_size = 0;
+
+	//reloc_table.VirtualAddress is not checked (not so important)
+	while(reloc_table.SizeOfBlock && read_size < reloc_size)
+	{
+		//Create relocation table
+		relocation_table table;
+		//Save RVA
+		table.set_rva(reloc_table.VirtualAddress);
+
+		if(!pe_utils::is_sum_safe(current_pos, reloc_table.SizeOfBlock))
+			throw pe_exception("Incorrect relocation directory", pe_exception::incorrect_relocation_directory);
+
+		//List all relocations
+		for(unsigned long i = sizeof(image_base_relocation); i < reloc_table.SizeOfBlock; i += sizeof(uint16_t))
+		{
+			relocation_entry entry(pe.section_data_from_rva<uint16_t>(current_pos + i, section_data_virtual, true));
+			if(list_absolute_entries || entry.get_type() != image_rel_based_absolute)
+				table.add_relocation(entry);
+		}
+
+		//Save table
+		ret.push_back(table);
+		
+		//Go to next relocation block
+		if(!pe_utils::is_sum_safe(current_pos, reloc_table.SizeOfBlock))
+			throw pe_exception("Incorrect relocation directory", pe_exception::incorrect_relocation_directory);
+
+		current_pos += reloc_table.SizeOfBlock;
+		read_size += reloc_table.SizeOfBlock;
+		reloc_table = pe.section_data_from_rva<image_base_relocation>(current_pos, section_data_virtual, true);
+	}
+
+	return ret;
+}
+
+//Simple relocations rebuilder
+//To keep PE file working, don't remove any of existing relocations in
+//relocation_table_list returned by a call to get_relocations() function
+//auto_strip_last_section - if true and relocations are placed in the last section, it will be automatically stripped
+//offset_from_section_start - offset from the beginning of reloc_section, where relocations data will be situated
+//If save_to_pe_header is true, PE header will be modified automatically
+const image_directory rebuild_relocations(pe_base& pe, const relocation_table_list& relocs, section& reloc_section, uint32_t offset_from_section_start, bool save_to_pe_header, bool auto_strip_last_section)
+{
+	//Check that reloc_section is attached to this PE image
+	if(!pe.section_attached(reloc_section))
+		throw pe_exception("Relocations section must be attached to PE file", pe_exception::section_is_not_attached);
+	
+	uint32_t current_reloc_data_pos = pe_utils::align_up(offset_from_section_start, sizeof(uint32_t));
+
+	uint32_t needed_size = current_reloc_data_pos - offset_from_section_start; //Calculate needed size for relocation tables
+	uint32_t size_delta = needed_size;
+
+	uint32_t start_reloc_pos = current_reloc_data_pos;
+
+	//Enumerate relocation tables
+	for(relocation_table_list::const_iterator it = relocs.begin(); it != relocs.end(); ++it)
+	{
+		needed_size += static_cast<uint32_t>((*it).get_relocations().size() * sizeof(uint16_t) /* relocations */ + sizeof(image_base_relocation) /* table header */);
+		//End of each table will be DWORD-aligned
+		if((start_reloc_pos + needed_size - size_delta) % sizeof(uint32_t))
+			needed_size += sizeof(uint16_t); //Align it with IMAGE_REL_BASED_ABSOLUTE relocation
+	}
+
+	//Check if reloc_section is last one. If it's not, check if there's enough place for relocations data
+	if(&reloc_section != &*(pe.get_image_sections().end() - 1) && 
+		(reloc_section.empty() || pe_utils::align_up(reloc_section.get_size_of_raw_data(), pe.get_file_alignment()) < needed_size + current_reloc_data_pos))
+		throw pe_exception("Insufficient space for relocations directory", pe_exception::insufficient_space);
+
+	std::string& raw_data = reloc_section.get_raw_data();
+
+	//This will be done only if reloc_section is the last section of image or for section with unaligned raw length of data
+	if(raw_data.length() < needed_size + current_reloc_data_pos)
+		raw_data.resize(needed_size + current_reloc_data_pos); //Expand section raw data
+
+	//Enumerate relocation tables
+	for(relocation_table_list::const_iterator it = relocs.begin(); it != relocs.end(); ++it)
+	{
+		//Create relocation table header
+		image_base_relocation reloc;
+		reloc.VirtualAddress = (*it).get_rva();
+		const relocation_table::relocation_list& reloc_list = (*it).get_relocations();
+		reloc.SizeOfBlock = static_cast<uint32_t>(sizeof(image_base_relocation) + sizeof(uint16_t) * reloc_list.size());
+		if((reloc_list.size() * sizeof(uint16_t)) % sizeof(uint32_t)) //If we must align end of relocation table
+			reloc.SizeOfBlock += sizeof(uint16_t);
+
+		memcpy(&raw_data[current_reloc_data_pos], &reloc, sizeof(reloc));
+		current_reloc_data_pos += sizeof(reloc);
+
+		//Enumerate relocations in table
+		for(relocation_table::relocation_list::const_iterator r = reloc_list.begin(); r != reloc_list.end(); ++r)
+		{
+			//Save relocations
+			uint16_t reloc_value = (*r).get_item();
+			memcpy(&raw_data[current_reloc_data_pos], &reloc_value, sizeof(reloc_value));
+			current_reloc_data_pos += sizeof(reloc_value);
+		}
+
+		if(current_reloc_data_pos % sizeof(uint32_t)) //If end of table is not DWORD-aligned
+		{
+			memset(&raw_data[current_reloc_data_pos], 0, sizeof(uint16_t)); //Align it with IMAGE_REL_BASED_ABSOLUTE relocation
+			current_reloc_data_pos += sizeof(uint16_t);
+		}
+	}
+
+	image_directory ret(pe.rva_from_section_offset(reloc_section, start_reloc_pos), needed_size - size_delta);
+	
+	//Adjust section raw and virtual sizes
+	pe.recalculate_section_sizes(reloc_section, auto_strip_last_section);
+
+	//If auto-rewrite of PE headers is required
+	if(save_to_pe_header)
+	{
+		pe.set_directory_rva(image_directory_entry_basereloc, ret.get_rva());
+		pe.set_directory_size(image_directory_entry_basereloc, ret.get_size());
+
+		pe.clear_characteristics_flags(image_file_relocs_stripped);
+		pe.set_dll_characteristics(pe.get_dll_characteristics() | image_dllcharacteristics_dynamic_base);
+	}
+
+	return ret;
+}
+
+//Recalculates image base with the help of relocation tables
+void rebase_image(pe_base& pe, const relocation_table_list& tables, uint64_t new_base)
+{
+	pe.get_pe_type() == pe_type_32
+		? rebase_image_base<pe_types_class_32>(pe, tables, new_base)
+		: rebase_image_base<pe_types_class_64>(pe, tables, new_base);
+}
+
+//RELOCATIONS
+//Recalculates image base with the help of relocation tables
+//Recalculates VAs of DWORDS/QWORDS in image according to relocations
+//Notice: if you move some critical structures like TLS, image relocations will not fix new
+//positions of TLS VAs. Instead, some bytes that now doesn't belong to TLS will be fixed.
+//It is recommended to rebase image in the very beginning and move all structures afterwards.
+template<typename PEClassType>
+void rebase_image_base(pe_base& pe, const relocation_table_list& tables, uint64_t new_base)
+{
+	//Get current image base value
+	typename PEClassType::BaseSize image_base;
+	pe.get_image_base(image_base);
+
+	//ImageBase difference
+	typename PEClassType::BaseSize base_rel = static_cast<typename PEClassType::BaseSize>(static_cast<int64_t>(new_base) - image_base);
+
+	//We need to fix addresses from relocation tables
+	//Enumerate relocation tables
+	for(relocation_table_list::const_iterator it = tables.begin(); it != tables.end(); ++it)
+	{
+		const relocation_table::relocation_list& relocs = (*it).get_relocations();
+
+		uint32_t base_rva = (*it).get_rva();
+
+		//Enumerate relocations
+		for(relocation_table::relocation_list::const_iterator rel = relocs.begin(); rel != relocs.end(); ++rel)
+		{
+			//Skip ABSOLUTE entries
+			if((*rel).get_type() == pe_win::image_rel_based_absolute)
+				continue;
+			
+			//Recalculate value by RVA and rewrite it
+			uint32_t current_rva = base_rva + (*rel).get_rva();
+			typename PEClassType::BaseSize value = pe.section_data_from_rva<typename PEClassType::BaseSize>(current_rva, section_data_raw, true);
+			value += base_rel;
+			memcpy(pe.section_data_from_rva(current_rva, true), &value, sizeof(value));
+		}
+	}
+
+	//Finally, save new image base
+	pe.set_image_base_64(new_base);
+}
+}
diff --git a/tools/pe_bliss/pe_relocations.h b/tools/pe_bliss/pe_relocations.h
new file mode 100644
index 0000000000..1bc8b2a405
--- /dev/null
+++ b/tools/pe_bliss/pe_relocations.h
@@ -0,0 +1,122 @@
+/*************************************************************************/
+/* Copyright (c) 2015 dx, http://kaimi.ru                                */
+/*                                                                       */
+/* Permission is hereby granted, free of charge, to any person           */
+/* obtaining a copy of this software and associated documentation        */
+/* files (the "Software"), to deal in the Software without               */
+/* restriction, including without limitation the rights to use,          */
+/* copy, modify, merge, publish, distribute, sublicense, and/or          */
+/* sell copies of the Software, and to permit persons to whom the        */
+/* Software is furnished to do so, subject to the following conditions:  */
+/* The above copyright notice and this permission notice shall be        */
+/* included in all copies or substantial portions of the Software.       */
+/*                                                                       */
+/* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,       */
+/* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF    */
+/* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.*/
+/* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY  */
+/* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,  */
+/* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE     */
+/* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.                */
+/*************************************************************************/
+#pragma once
+#include <vector>
+#include "pe_structures.h"
+#include "pe_base.h"
+#include "pe_directory.h"
+
+namespace pe_bliss
+{
+//Class representing relocation entry
+//RVA of relocation is not actually RVA, but
+//(real RVA) - (RVA of table)
+class relocation_entry
+{
+public:
+	//Default constructor
+	relocation_entry();
+	//Constructor from relocation item (WORD)
+	explicit relocation_entry(uint16_t relocation_value);
+	//Constructor from relative rva and relocation type
+	relocation_entry(uint16_t rrva, uint16_t type);
+
+	//Returns RVA of relocation (actually, relative RVA from relocation table RVA)
+	uint16_t get_rva() const;
+	//Returns type of relocation
+	uint16_t get_type() const;
+
+	//Returns relocation item (rrva + type)
+	uint16_t get_item() const;
+
+public: //Setters do not change everything inside image, they are used by PE class
+	//You can also use them to rebuild relocations using rebuild_relocations()
+
+	//Sets RVA of relocation (actually, relative RVA from relocation table RVA)
+	void set_rva(uint16_t rva);
+	//Sets type of relocation
+	void set_type(uint16_t type);
+		
+	//Sets relocation item (rrva + type)
+	void set_item(uint16_t item);
+
+private:
+	uint16_t rva_;
+	uint16_t type_;
+};
+
+//Class representing relocation table
+class relocation_table
+{
+public:
+	typedef std::vector<relocation_entry> relocation_list;
+
+public:
+	//Default constructor
+	relocation_table();
+	//Constructor from RVA of relocation table
+	explicit relocation_table(uint32_t rva);
+
+	//Returns relocation list
+	const relocation_list& get_relocations() const;
+	//Returns RVA of block
+	uint32_t get_rva() const;
+
+public: //These functions do not change everything inside image, they are used by PE class
+	//You can also use them to rebuild relocations using rebuild_relocations()
+
+	//Adds relocation to table
+	void add_relocation(const relocation_entry& entry);
+	//Returns changeable relocation list
+	relocation_list& get_relocations();
+	//Sets RVA of block
+	void set_rva(uint32_t rva);
+
+private:
+	uint32_t rva_;
+	relocation_list relocations_;
+};
+
+typedef std::vector<relocation_table> relocation_table_list;
+
+//Get relocation list of pe file, supports one-word sized relocations only
+//If list_absolute_entries = true, IMAGE_REL_BASED_ABSOLUTE will be listed
+const relocation_table_list get_relocations(const pe_base& pe, bool list_absolute_entries = false);
+
+//Simple relocations rebuilder
+//To keep PE file working, don't remove any of existing relocations in
+//relocation_table_list returned by a call to get_relocations() function
+//auto_strip_last_section - if true and relocations are placed in the last section, it will be automatically stripped
+//offset_from_section_start - offset from the beginning of reloc_section, where relocations data will be situated
+//If save_to_pe_header is true, PE header will be modified automatically
+const image_directory rebuild_relocations(pe_base& pe, const relocation_table_list& relocs, section& reloc_section, uint32_t offset_from_section_start = 0, bool save_to_pe_header = true, bool auto_strip_last_section = true);
+
+//Recalculates image base with the help of relocation tables
+//Recalculates VAs of DWORDS/QWORDS in image according to relocations
+//Notice: if you move some critical structures like TLS, image relocations will not fix new
+//positions of TLS VAs. Instead, some bytes that now doesn't belong to TLS will be fixed.
+//It is recommended to rebase image in the very beginning and move all structures afterwards.
+void rebase_image(pe_base& pe, const relocation_table_list& tables, uint64_t new_base);
+
+template<typename PEClassType>
+void rebase_image_base(pe_base& pe, const relocation_table_list& tables, uint64_t new_base);
+}
diff --git a/tools/pe_bliss/pe_resource_manager.cpp b/tools/pe_bliss/pe_resource_manager.cpp
new file mode 100644
index 0000000000..0ee7840ff0
--- /dev/null
+++ b/tools/pe_bliss/pe_resource_manager.cpp
@@ -0,0 +1,286 @@
+/*************************************************************************/
+/* Copyright (c) 2015 dx, http://kaimi.ru                                */
+/*                                                                       */
+/* Permission is hereby granted, free of charge, to any person           */
+/* obtaining a copy of this software and associated documentation        */
+/* files (the "Software"), to deal in the Software without               */
+/* restriction, including without limitation the rights to use,          */
+/* copy, modify, merge, publish, distribute, sublicense, and/or          */
+/* sell copies of the Software, and to permit persons to whom the        */
+/* Software is furnished to do so, subject to the following conditions:  */
+/* The above copyright notice and this permission notice shall be        */
+/* included in all copies or substantial portions of the Software.       */
+/*                                                                       */
+/* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,       */
+/* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF    */
+/* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.*/
+/* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY  */
+/* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,  */
+/* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE     */
+/* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.                */
+/*************************************************************************/
+#include <algorithm>
+#include <sstream>
+#include <iomanip>
+#include <math.h>
+#include <string.h>
+#include "pe_resource_manager.h"
+#include "resource_internal.h"
+
+namespace pe_bliss
+{
+using namespace pe_win;
+
+//Constructor from root resource directory
+pe_resource_manager::pe_resource_manager(resource_directory& root_directory)
+	:pe_resource_viewer(root_directory), root_dir_edit_(root_directory)
+{}
+
+resource_directory& pe_resource_manager::get_root_directory()
+{
+	return root_dir_edit_;
+}
+
+//Removes all resources of given type or root name
+//If there's more than one directory entry of a given type, only the
+//first one will be deleted (that's an unusual situation)
+//Returns true if resource was deleted
+bool pe_resource_manager::remove_resource_type(resource_type type)
+{
+	//Search for resource type
+	resource_directory::entry_list& entries = root_dir_edit_.get_entry_list();
+	resource_directory::entry_list::iterator it = std::find_if(entries.begin(), entries.end(), resource_directory::id_entry_finder(type));
+	if(it != entries.end())
+	{
+		//Remove it, if found
+		entries.erase(it);
+		return true;
+	}
+
+	return false;
+}
+
+bool pe_resource_manager::remove_resource(const std::wstring& root_name)
+{
+	//Search for resource type
+	resource_directory::entry_list& entries = root_dir_edit_.get_entry_list();
+	resource_directory::entry_list::iterator it = std::find_if(entries.begin(), entries.end(), resource_directory::name_entry_finder(root_name));
+	if(it != entries.end())
+	{
+		//Remove it, if found
+		entries.erase(it);
+		return true;
+	}
+
+	return false;
+}
+
+//Helper to remove resource
+bool pe_resource_manager::remove_resource(const resource_directory::entry_finder& root_finder, const resource_directory::entry_finder& finder)
+{
+	//Search for resource type
+	resource_directory::entry_list& entries_type = root_dir_edit_.get_entry_list();
+	resource_directory::entry_list::iterator it_type = std::find_if(entries_type.begin(), entries_type.end(), root_finder);
+	if(it_type != entries_type.end())
+	{
+		//Search for resource name/ID with "finder"
+		resource_directory::entry_list& entries_name = (*it_type).get_resource_directory().get_entry_list();
+		resource_directory::entry_list::iterator it_name = std::find_if(entries_name.begin(), entries_name.end(), finder);
+		if(it_name != entries_name.end())
+		{
+			//Erase resource, if found
+			entries_name.erase(it_name);
+			if(entries_name.empty())
+				entries_type.erase(it_type);
+
+			return true;
+		}
+	}
+
+	return false;
+}
+
+//Removes all resource languages by resource type/root name and name
+//Deletes only one entry of given type and name
+//Returns true if resource was deleted
+bool pe_resource_manager::remove_resource(resource_type type, const std::wstring& name)
+{
+	return remove_resource(resource_directory::entry_finder(type), resource_directory::entry_finder(name));
+}
+
+bool pe_resource_manager::remove_resource(const std::wstring& root_name, const std::wstring& name)
+{
+	return remove_resource(resource_directory::entry_finder(root_name), resource_directory::entry_finder(name));
+}
+
+//Removes all resource languages by resource type/root name and ID
+//Deletes only one entry of given type and ID
+//Returns true if resource was deleted
+bool pe_resource_manager::remove_resource(resource_type type, uint32_t id)
+{
+	return remove_resource(resource_directory::entry_finder(type), resource_directory::entry_finder(id));
+}
+
+bool pe_resource_manager::remove_resource(const std::wstring& root_name, uint32_t id)
+{
+	return remove_resource(resource_directory::entry_finder(root_name), resource_directory::entry_finder(id));
+}
+
+//Helper to remove resource
+bool pe_resource_manager::remove_resource(const resource_directory::entry_finder& root_finder, const resource_directory::entry_finder& finder, uint32_t language)
+{
+	//Search for resource type
+	resource_directory::entry_list& entries_type = root_dir_edit_.get_entry_list();
+	resource_directory::entry_list::iterator it_type = std::find_if(entries_type.begin(), entries_type.end(), root_finder);
+	if(it_type != entries_type.end())
+	{
+		//Search for resource name/ID with "finder"
+		resource_directory::entry_list& entries_name = (*it_type).get_resource_directory().get_entry_list();
+		resource_directory::entry_list::iterator it_name = std::find_if(entries_name.begin(), entries_name.end(), finder);
+		if(it_name != entries_name.end())
+		{
+			//Search for resource language
+			resource_directory::entry_list& entries_lang = (*it_name).get_resource_directory().get_entry_list();
+			resource_directory::entry_list::iterator it_lang = std::find_if(entries_lang.begin(), entries_lang.end(), resource_directory::id_entry_finder(language));
+			if(it_lang != entries_lang.end())
+			{
+				//Erase resource, if found
+				entries_lang.erase(it_lang);
+				if(entries_lang.empty())
+				{
+					entries_name.erase(it_name);
+					if(entries_name.empty())
+						entries_type.erase(it_type);
+				}
+
+				return true;
+			}
+		}
+	}
+
+	return false;
+}
+
+//Removes resource language by resource type/root name and name
+//Deletes only one entry of given type, name and language
+//Returns true if resource was deleted
+bool pe_resource_manager::remove_resource(resource_type type, const std::wstring& name, uint32_t language)
+{
+	return remove_resource(resource_directory::entry_finder(type), resource_directory::entry_finder(name), language);
+}
+
+bool pe_resource_manager::remove_resource(const std::wstring& root_name, const std::wstring& name, uint32_t language)
+{
+	return remove_resource(resource_directory::entry_finder(root_name), resource_directory::entry_finder(name), language);
+}
+
+//Removes recource language by resource type/root name and ID
+//Deletes only one entry of given type, ID and language
+//Returns true if resource was deleted
+bool pe_resource_manager::remove_resource(resource_type type, uint32_t id, uint32_t language)
+{
+	return remove_resource(resource_directory::entry_finder(type), resource_directory::entry_finder(id), language);
+}
+
+bool pe_resource_manager::remove_resource(const std::wstring& root_name, uint32_t id, uint32_t language)
+{
+	return remove_resource(resource_directory::entry_finder(root_name), resource_directory::entry_finder(id), language);
+}
+
+//Helper to add/replace resource
+void pe_resource_manager::add_resource(const std::string& data, resource_type type, resource_directory_entry& new_entry, const resource_directory::entry_finder& finder, uint32_t language, uint32_t codepage, uint32_t timestamp)
+{
+	resource_directory_entry new_type_entry;
+	new_type_entry.set_id(type);
+
+	add_resource(data, new_type_entry, resource_directory::entry_finder(type), new_entry, finder, language, codepage, timestamp);
+}
+
+//Helper to add/replace resource
+void pe_resource_manager::add_resource(const std::string& data, const std::wstring& root_name, resource_directory_entry& new_entry, const resource_directory::entry_finder& finder, uint32_t language, uint32_t codepage, uint32_t timestamp)
+{
+	resource_directory_entry new_type_entry;
+	new_type_entry.set_name(root_name);
+	
+	add_resource(data, new_type_entry, resource_directory::entry_finder(root_name), new_entry, finder, language, codepage, timestamp);
+}
+
+//Helper to add/replace resource
+void pe_resource_manager::add_resource(const std::string& data, resource_directory_entry& new_root_entry, const resource_directory::entry_finder& root_finder, resource_directory_entry& new_entry, const resource_directory::entry_finder& finder, uint32_t language, uint32_t codepage, uint32_t timestamp)
+{
+	//Search for resource type
+	resource_directory::entry_list* entries = &root_dir_edit_.get_entry_list();
+	resource_directory::entry_list::iterator it = std::find_if(entries->begin(), entries->end(), root_finder);
+	if(it == entries->end())
+	{
+		//Add resource type directory, if it was not found
+		resource_directory dir;
+		dir.set_timestamp(timestamp);
+		new_root_entry.add_resource_directory(dir);
+		entries->push_back(new_root_entry);
+		it = entries->end() - 1;
+	}
+
+	//Search for resource name/ID directory with "finder"
+	entries = &(*it).get_resource_directory().get_entry_list();
+	it = std::find_if(entries->begin(), entries->end(), finder);
+	if(it == entries->end())
+	{
+		//Add resource name/ID directory, if it was not found
+		resource_directory dir;
+		dir.set_timestamp(timestamp);
+		new_entry.add_resource_directory(dir);
+		entries->push_back(new_entry);
+		it = entries->end() - 1;
+	}
+
+	//Search for data resource entry by language
+	entries = &(*it).get_resource_directory().get_entry_list();
+	it = std::find_if(entries->begin(), entries->end(), resource_directory::id_entry_finder(language));
+	if(it != entries->end())
+		entries->erase(it); //Erase it, if found
+
+	//Add new data entry
+	resource_directory_entry new_dir_data_entry;
+	resource_data_entry data_dir(data, codepage);
+	new_dir_data_entry.add_data_entry(data_dir);
+	new_dir_data_entry.set_id(language);
+	entries->push_back(new_dir_data_entry);
+}
+
+//Adds resource. If resource already exists, replaces it
+void pe_resource_manager::add_resource(const std::string& data, resource_type type, const std::wstring& name, uint32_t language, uint32_t codepage, uint32_t timestamp)
+{
+	resource_directory_entry new_entry;
+	new_entry.set_name(name);
+
+	add_resource(data, type, new_entry, resource_directory::entry_finder(name), language, codepage, timestamp);
+}
+
+//Adds resource. If resource already exists, replaces it
+void pe_resource_manager::add_resource(const std::string& data, const std::wstring& root_name, const std::wstring& name, uint32_t language, uint32_t codepage, uint32_t timestamp)
+{
+	resource_directory_entry new_entry;
+	new_entry.set_name(name);
+
+	add_resource(data, root_name, new_entry, resource_directory::entry_finder(name), language, codepage, timestamp);
+}
+
+//Adds resource. If resource already exists, replaces it
+void pe_resource_manager::add_resource(const std::string& data, resource_type type, uint32_t id, uint32_t language, uint32_t codepage, uint32_t timestamp)
+{
+	resource_directory_entry new_entry;
+	new_entry.set_id(id);
+
+	add_resource(data, type, new_entry, resource_directory::entry_finder(id), language, codepage, timestamp);
+}
+
+//Adds resource. If resource already exists, replaces it
+void pe_resource_manager::add_resource(const std::string& data, const std::wstring& root_name, uint32_t id, uint32_t language, uint32_t codepage, uint32_t timestamp)
+{
+	resource_directory_entry new_entry;
+	new_entry.set_id(id);
+
+	add_resource(data, root_name, new_entry, resource_directory::entry_finder(id), language, codepage, timestamp);
+}
+}
diff --git a/tools/pe_bliss/pe_resource_manager.h b/tools/pe_bliss/pe_resource_manager.h
new file mode 100644
index 0000000000..85d7f44a8a
--- /dev/null
+++ b/tools/pe_bliss/pe_resource_manager.h
@@ -0,0 +1,113 @@
+/*************************************************************************/
+/* Copyright (c) 2015 dx, http://kaimi.ru                                */
+/*                                                                       */
+/* Permission is hereby granted, free of charge, to any person           */
+/* obtaining a copy of this software and associated documentation        */
+/* files (the "Software"), to deal in the Software without               */
+/* restriction, including without limitation the rights to use,          */
+/* copy, modify, merge, publish, distribute, sublicense, and/or          */
+/* sell copies of the Software, and to permit persons to whom the        */
+/* Software is furnished to do so, subject to the following conditions:  */
+/* The above copyright notice and this permission notice shall be        */
+/* included in all copies or substantial portions of the Software.       */
+/*                                                                       */
+/* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,       */
+/* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF    */
+/* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.*/
+/* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY  */
+/* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,  */
+/* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE     */
+/* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.                */
+/*************************************************************************/
+#pragma once
+#include <map>
+#include <sstream>
+#include <string>
+#include <memory>
+#include "pe_base.h"
+#include "pe_structures.h"
+#include "pe_resources.h"
+#include "message_table.h"
+#include "file_version_info.h"
+#include "pe_resource_viewer.h"
+#include "resource_data_info.h"
+
+namespace pe_bliss
+{
+//Derived class to edit PE resources
+class pe_resource_manager : public pe_resource_viewer
+{
+public:
+	//Constructor from root resource directory
+	explicit pe_resource_manager(resource_directory& root_directory);
+	
+	resource_directory& get_root_directory();
+
+public: //Resource editing
+	//Removes all resources of given type or root name
+	//If there's more than one directory entry of a given type, only the
+	//first one will be deleted (that's an unusual situation)
+	//Returns true if resource was deleted
+	bool remove_resource_type(resource_type type);
+	bool remove_resource(const std::wstring& root_name);
+	
+	//Removes all resource languages by resource type/root name and name
+	//Deletes only one entry of given type and name
+	//Returns true if resource was deleted
+	bool remove_resource(resource_type type, const std::wstring& name);
+	bool remove_resource(const std::wstring& root_name, const std::wstring& name);
+	//Removes all resource languages by resource type/root name and ID
+	//Deletes only one entry of given type and ID
+	//Returns true if resource was deleted
+	bool remove_resource(resource_type type, uint32_t id);
+	bool remove_resource(const std::wstring& root_name, uint32_t id);
+
+	//Removes resource language by resource type/root name and name
+	//Deletes only one entry of given type, name and language
+	//Returns true if resource was deleted
+	bool remove_resource(resource_type type, const std::wstring& name, uint32_t language);
+	bool remove_resource(const std::wstring& root_name, const std::wstring& name, uint32_t language);
+	//Removes recource language by resource type/root name and ID
+	//Deletes only one entry of given type, ID and language
+	//Returns true if resource was deleted
+	bool remove_resource(resource_type type, uint32_t id, uint32_t language);
+	bool remove_resource(const std::wstring& root_name, uint32_t id, uint32_t language);
+	
+	//Adds resource. If resource already exists, replaces it
+	//timestamp will be used for directories that will be added
+	void add_resource(const std::string& data, resource_type type, const std::wstring& name, uint32_t language, uint32_t codepage = 0, uint32_t timestamp = 0);
+	void add_resource(const std::string& data, const std::wstring& root_name, const std::wstring& name, uint32_t language, uint32_t codepage = 0, uint32_t timestamp = 0);
+	//Adds resource. If resource already exists, replaces it
+	//timestamp will be used for directories that will be added
+	void add_resource(const std::string& data, resource_type type, uint32_t id, uint32_t language, uint32_t codepage = 0, uint32_t timestamp = 0);
+	void add_resource(const std::string& data, const std::wstring& root_name, uint32_t id, uint32_t language, uint32_t codepage = 0, uint32_t timestamp = 0);
+
+public:
+	//Helpers to add/replace resource
+	void add_resource(const std::string& data, resource_type type,
+		resource_directory_entry& new_entry,
+		const resource_directory::entry_finder& finder,
+		uint32_t language, uint32_t codepage, uint32_t timestamp);
+
+	void add_resource(const std::string& data, const std::wstring& root_name,
+		resource_directory_entry& new_entry,
+		const resource_directory::entry_finder& finder,
+		uint32_t language, uint32_t codepage, uint32_t timestamp);
+
+	void add_resource(const std::string& data, resource_directory_entry& new_root_entry,
+		const resource_directory::entry_finder& root_finder,
+		resource_directory_entry& new_entry,
+		const resource_directory::entry_finder& finder,
+		uint32_t language, uint32_t codepage, uint32_t timestamp);
+
+private:
+	//Root resource directory. We're not copying it, because it might be heavy
+	resource_directory& root_dir_edit_;
+
+	//Helper to remove resource
+	bool remove_resource(const resource_directory::entry_finder& root_finder, const resource_directory::entry_finder& finder);
+
+	//Helper to remove resource
+	bool remove_resource(const resource_directory::entry_finder& root_finder, const resource_directory::entry_finder& finder, uint32_t language);
+};
+}
diff --git a/tools/pe_bliss/pe_resource_viewer.cpp b/tools/pe_bliss/pe_resource_viewer.cpp
new file mode 100644
index 0000000000..712cc28d9b
--- /dev/null
+++ b/tools/pe_bliss/pe_resource_viewer.cpp
@@ -0,0 +1,382 @@
+/*************************************************************************/
+/* Copyright (c) 2015 dx, http://kaimi.ru                                */
+/*                                                                       */
+/* Permission is hereby granted, free of charge, to any person           */
+/* obtaining a copy of this software and associated documentation        */
+/* files (the "Software"), to deal in the Software without               */
+/* restriction, including without limitation the rights to use,          */
+/* copy, modify, merge, publish, distribute, sublicense, and/or          */
+/* sell copies of the Software, and to permit persons to whom the        */
+/* Software is furnished to do so, subject to the following conditions:  */
+/* The above copyright notice and this permission notice shall be        */
+/* included in all copies or substantial portions of the Software.       */
+/*                                                                       */
+/* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,       */
+/* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF    */
+/* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.*/
+/* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY  */
+/* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,  */
+/* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE     */
+/* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.                */
+/*************************************************************************/
+#include <algorithm>
+#include <cmath>
+#include "pe_resource_viewer.h"
+#include "pe_structures.h"
+
+namespace pe_bliss
+{
+using namespace pe_win;
+
+//Constructor from root resource_directory
+pe_resource_viewer::pe_resource_viewer(const resource_directory& root_directory)
+	:root_dir_(root_directory)
+{}
+
+const resource_directory& pe_resource_viewer::get_root_directory() const
+{
+	return root_dir_;
+}
+
+//Finder helpers
+bool pe_resource_viewer::has_name::operator()(const resource_directory_entry& entry) const
+{
+	return entry.is_named();
+}
+
+bool pe_resource_viewer::has_id::operator()(const resource_directory_entry& entry) const
+{
+	return !entry.is_named();
+}
+
+//Lists resource types existing in PE file (non-named only)
+const pe_resource_viewer::resource_type_list pe_resource_viewer::list_resource_types() const
+{
+	resource_type_list ret;
+
+	//Get root directory entries list
+	const resource_directory::entry_list& entries = root_dir_.get_entry_list();
+	for(resource_directory::entry_list::const_iterator it = entries.begin(); it != entries.end(); ++it)
+	{
+		//List all non-named items
+		if(!(*it).is_named())
+			ret.push_back((*it).get_id());
+	}
+
+	return ret;
+}
+
+//Returns true if resource type exists
+bool pe_resource_viewer::resource_exists(resource_type type) const
+{
+	const resource_directory::entry_list& entries = root_dir_.get_entry_list();
+	return std::find_if(entries.begin(), entries.end(), resource_directory::id_entry_finder(type)) != entries.end();
+}
+
+//Returns true if resource name exists
+bool pe_resource_viewer::resource_exists(const std::wstring& root_name) const
+{
+	const resource_directory::entry_list& entries = root_dir_.get_entry_list();
+	return std::find_if(entries.begin(), entries.end(), resource_directory::name_entry_finder(root_name)) != entries.end();
+}
+
+//Helper function to get name list from entry list
+const pe_resource_viewer::resource_name_list pe_resource_viewer::get_name_list(const resource_directory::entry_list& entries)
+{
+	resource_name_list ret;
+
+	for(resource_directory::entry_list::const_iterator it = entries.begin(); it != entries.end(); ++it)
+	{
+		//List all named items
+		if((*it).is_named())
+			ret.push_back((*it).get_name());
+	}
+
+	return ret;
+}
+
+//Helper function to get ID list from entry list
+const pe_resource_viewer::resource_id_list pe_resource_viewer::get_id_list(const resource_directory::entry_list& entries)
+{
+	resource_id_list ret;
+
+	for(resource_directory::entry_list::const_iterator it = entries.begin(); it != entries.end(); ++it)
+	{
+		//List all non-named items
+		if(!(*it).is_named())
+			ret.push_back((*it).get_id());
+	}
+
+	return ret;
+}
+
+//Lists resource names existing in PE file by resource type
+const pe_resource_viewer::resource_name_list pe_resource_viewer::list_resource_names(resource_type type) const
+{
+	return get_name_list(root_dir_.entry_by_id(type).get_resource_directory().get_entry_list());
+}
+
+//Lists resource names existing in PE file by resource name
+const pe_resource_viewer::resource_name_list pe_resource_viewer::list_resource_names(const std::wstring& root_name) const
+{
+	return get_name_list(root_dir_.entry_by_name(root_name).get_resource_directory().get_entry_list());
+}
+
+//Lists resource IDs existing in PE file by resource type
+const pe_resource_viewer::resource_id_list pe_resource_viewer::list_resource_ids(resource_type type) const
+{
+	return get_id_list(root_dir_.entry_by_id(type).get_resource_directory().get_entry_list());
+}
+
+//Lists resource IDs existing in PE file by resource name
+const pe_resource_viewer::resource_id_list pe_resource_viewer::list_resource_ids(const std::wstring& root_name) const
+{
+	return get_id_list(root_dir_.entry_by_name(root_name).get_resource_directory().get_entry_list());
+}
+
+//Returns resource count by type
+unsigned long pe_resource_viewer::get_resource_count(resource_type type) const
+{
+	return static_cast<unsigned long>(
+		root_dir_ //Type directory
+		.entry_by_id(type)
+		.get_resource_directory() //Name/ID directory
+		.get_entry_list()
+		.size());
+}
+
+//Returns resource count by name
+unsigned long pe_resource_viewer::get_resource_count(const std::wstring& root_name) const
+{
+	return static_cast<unsigned long>(
+		root_dir_ //Type directory
+		.entry_by_name(root_name)
+		.get_resource_directory() //Name/ID directory
+		.get_entry_list()
+		.size());
+}
+
+//Returns language count of resource by resource type and name
+unsigned long pe_resource_viewer::get_language_count(resource_type type, const std::wstring& name) const
+{
+	const resource_directory::entry_list& entries =
+		root_dir_ //Type directory
+		.entry_by_id(type)
+		.get_resource_directory() //Name/ID directory
+		.entry_by_name(name)
+		.get_resource_directory() //Language directory
+		.get_entry_list();
+
+	return static_cast<unsigned long>(std::count_if(entries.begin(), entries.end(), has_id()));
+}
+
+//Returns language count of resource by resource names
+unsigned long pe_resource_viewer::get_language_count(const std::wstring& root_name, const std::wstring& name) const
+{
+	const resource_directory::entry_list& entries =
+		root_dir_ //Type directory
+		.entry_by_name(root_name)
+		.get_resource_directory() //Name/ID directory
+		.entry_by_name(name)
+		.get_resource_directory() //Language directory
+		.get_entry_list();
+
+	return static_cast<unsigned long>(std::count_if(entries.begin(), entries.end(), has_id()));
+}
+
+//Returns language count of resource by resource type and ID
+unsigned long pe_resource_viewer::get_language_count(resource_type type, uint32_t id) const
+{
+	const resource_directory::entry_list& entries =
+		root_dir_ //Type directory
+		.entry_by_id(type)
+		.get_resource_directory() //Name/ID directory
+		.entry_by_id(id)
+		.get_resource_directory() //Language directory
+		.get_entry_list();
+
+	return static_cast<unsigned long>(std::count_if(entries.begin(), entries.end(), has_id()));
+}
+
+//Returns language count of resource by resource name and ID
+unsigned long pe_resource_viewer::get_language_count(const std::wstring& root_name, uint32_t id) const
+{
+	const resource_directory::entry_list& entries =
+		root_dir_ //Type directory
+		.entry_by_name(root_name)
+		.get_resource_directory() //Name/ID directory
+		.entry_by_id(id)
+		.get_resource_directory() //Language directory
+		.get_entry_list();
+
+	return static_cast<unsigned long>(std::count_if(entries.begin(), entries.end(), has_id()));
+}
+
+//Lists resource languages by resource type and name
+const pe_resource_viewer::resource_language_list pe_resource_viewer::list_resource_languages(resource_type type, const std::wstring& name) const
+{
+	const resource_directory::entry_list& entries =
+		root_dir_ //Type directory
+		.entry_by_id(type)
+		.get_resource_directory() //Name/ID directory
+		.entry_by_name(name)
+		.get_resource_directory() //Language directory
+		.get_entry_list();
+
+	return get_id_list(entries);
+}
+
+//Lists resource languages by resource names
+const pe_resource_viewer::resource_language_list pe_resource_viewer::list_resource_languages(const std::wstring& root_name, const std::wstring& name) const
+{
+	const resource_directory::entry_list& entries =
+		root_dir_ //Type directory
+		.entry_by_name(root_name)
+		.get_resource_directory() //Name/ID directory
+		.entry_by_name(name)
+		.get_resource_directory() //Language directory
+		.get_entry_list();
+
+	return get_id_list(entries);
+}
+
+//Lists resource languages by resource type and ID
+const pe_resource_viewer::resource_language_list pe_resource_viewer::list_resource_languages(resource_type type, uint32_t id) const
+{
+	const resource_directory::entry_list& entries =
+		root_dir_ //Type directory
+		.entry_by_id(type)
+		.get_resource_directory() //Name/ID directory
+		.entry_by_id(id)
+		.get_resource_directory() //Language directory
+		.get_entry_list();
+
+	return get_id_list(entries);
+}
+
+//Lists resource languages by resource name and ID
+const pe_resource_viewer::resource_language_list pe_resource_viewer::list_resource_languages(const std::wstring& root_name, uint32_t id) const
+{
+	const resource_directory::entry_list& entries =
+		root_dir_ //Type directory
+		.entry_by_name(root_name)
+		.get_resource_directory() //Name/ID directory
+		.entry_by_id(id)
+		.get_resource_directory() //Language directory
+		.get_entry_list();
+
+	return get_id_list(entries);
+}
+
+//Returns raw resource data by type, name and language
+const resource_data_info pe_resource_viewer::get_resource_data_by_name(uint32_t language, resource_type type, const std::wstring& name) const
+{
+	return resource_data_info(root_dir_ //Type directory
+		.entry_by_id(type)
+		.get_resource_directory() //Name/ID directory
+		.entry_by_name(name)
+		.get_resource_directory() //Language directory
+		.entry_by_id(language)
+		.get_data_entry()); //Data directory
+}
+
+//Returns raw resource data by root name, name and language
+const resource_data_info pe_resource_viewer::get_resource_data_by_name(uint32_t language, const std::wstring& root_name, const std::wstring& name) const
+{
+	return resource_data_info(root_dir_ //Type directory
+		.entry_by_name(root_name)
+		.get_resource_directory() //Name/ID directory
+		.entry_by_name(name)
+		.get_resource_directory() //Language directory
+		.entry_by_id(language)
+		.get_data_entry()); //Data directory
+}
+
+//Returns raw resource data by type, ID and language
+const resource_data_info pe_resource_viewer::get_resource_data_by_id(uint32_t language, resource_type type, uint32_t id) const
+{
+	return resource_data_info(root_dir_ //Type directory
+		.entry_by_id(type)
+		.get_resource_directory() //Name/ID directory
+		.entry_by_id(id)
+		.get_resource_directory() //Language directory
+		.entry_by_id(language)
+		.get_data_entry()); //Data directory
+}
+
+//Returns raw resource data by root name, ID and language
+const resource_data_info pe_resource_viewer::get_resource_data_by_id(uint32_t language, const std::wstring& root_name, uint32_t id) const
+{
+	return resource_data_info(root_dir_ //Type directory
+		.entry_by_name(root_name)
+		.get_resource_directory() //Name/ID directory
+		.entry_by_id(id)
+		.get_resource_directory() //Language directory
+		.entry_by_id(language)
+		.get_data_entry()); //Data directory
+}
+
+//Returns raw resource data by type, name and index in language directory (instead of language)
+const resource_data_info pe_resource_viewer::get_resource_data_by_name(resource_type type, const std::wstring& name, uint32_t index) const
+{
+	const resource_directory::entry_list& entries = root_dir_ //Type directory
+		.entry_by_id(type)
+		.get_resource_directory() //Name/ID directory
+		.entry_by_name(name)
+		.get_resource_directory() //Language directory
+		.get_entry_list();
+
+	if(entries.size() <= index)
+		throw pe_exception("Resource data entry not found", pe_exception::resource_data_entry_not_found);
+
+	return resource_data_info(entries.at(index).get_data_entry()); //Data directory
+}
+
+//Returns raw resource data by root name, name and index in language directory (instead of language)
+const resource_data_info pe_resource_viewer::get_resource_data_by_name(const std::wstring& root_name, const std::wstring& name, uint32_t index) const
+{
+	const resource_directory::entry_list& entries = root_dir_ //Type directory
+		.entry_by_name(root_name)
+		.get_resource_directory() //Name/ID directory
+		.entry_by_name(name)
+		.get_resource_directory() //Language directory
+		.get_entry_list();
+
+	if(entries.size() <= index)
+		throw pe_exception("Resource data entry not found", pe_exception::resource_data_entry_not_found);
+
+	return resource_data_info(entries.at(index).get_data_entry()); //Data directory
+}
+
+//Returns raw resource data by type, ID and index in language directory (instead of language)
+const resource_data_info pe_resource_viewer::get_resource_data_by_id(resource_type type, uint32_t id, uint32_t index) const
+{
+	const resource_directory::entry_list& entries = root_dir_ //Type directory
+		.entry_by_id(type)
+		.get_resource_directory() //Name/ID directory
+		.entry_by_id(id)
+		.get_resource_directory() //Language directory
+		.get_entry_list();
+
+	if(entries.size() <= index)
+		throw pe_exception("Resource data entry not found", pe_exception::resource_data_entry_not_found);
+
+	return resource_data_info(entries.at(index).get_data_entry()); //Data directory
+}
+
+//Returns raw resource data by root name, ID and index in language directory (instead of language)
+const resource_data_info pe_resource_viewer::get_resource_data_by_id(const std::wstring& root_name, uint32_t id, uint32_t index) const
+{
+	const resource_directory::entry_list& entries = root_dir_ //Type directory
+		.entry_by_name(root_name)
+		.get_resource_directory() //Name/ID directory
+		.entry_by_id(id)
+		.get_resource_directory() //Language directory
+		.get_entry_list();
+
+	if(entries.size() <= index)
+		throw pe_exception("Resource data entry not found", pe_exception::resource_data_entry_not_found);
+
+	return resource_data_info(entries.at(index).get_data_entry()); //Data directory
+}
+}
diff --git a/tools/pe_bliss/pe_resource_viewer.h b/tools/pe_bliss/pe_resource_viewer.h
new file mode 100644
index 0000000000..e585da6a87
--- /dev/null
+++ b/tools/pe_bliss/pe_resource_viewer.h
@@ -0,0 +1,153 @@
+/*************************************************************************/
+/* Copyright (c) 2015 dx, http://kaimi.ru                                */
+/*                                                                       */
+/* Permission is hereby granted, free of charge, to any person           */
+/* obtaining a copy of this software and associated documentation        */
+/* files (the "Software"), to deal in the Software without               */
+/* restriction, including without limitation the rights to use,          */
+/* copy, modify, merge, publish, distribute, sublicense, and/or          */
+/* sell copies of the Software, and to permit persons to whom the        */
+/* Software is furnished to do so, subject to the following conditions:  */
+/* The above copyright notice and this permission notice shall be        */
+/* included in all copies or substantial portions of the Software.       */
+/*                                                                       */
+/* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,       */
+/* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF    */
+/* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.*/
+/* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY  */
+/* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,  */
+/* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE     */
+/* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.                */
+/*************************************************************************/
+#pragma once
+#include <map>
+#include <string>
+#include "pe_structures.h"
+#include "pe_resources.h"
+#include "message_table.h"
+#include "resource_data_info.h"
+
+namespace pe_bliss
+{
+	//PE resource manager allows to read resources from PE files
+class pe_resource_viewer
+{
+public:
+	//Resource type enumeration
+	enum resource_type
+	{
+		resource_cursor = 1,
+		resource_bitmap = 2,
+		resource_icon = 3,
+		resource_menu = 4,
+		resource_dialog = 5,
+		resource_string = 6,
+		resource_fontdir = 7,
+		resource_font = 8,
+		resource_accelerator = 9,
+		resource_rcdata = 10,
+		resource_message_table = 11,
+		resource_cursor_group = 12,
+		resource_icon_group = 14,
+		resource_version = 16,
+		resource_dlginclude = 17,
+		resource_plugplay = 19,
+		resource_vxd = 20,
+		resource_anicursor = 21,
+		resource_aniicon = 22,
+		resource_html = 23,
+		resource_manifest = 24
+	};
+
+public:
+	//Some useful typedefs
+	typedef std::vector<uint32_t> resource_type_list;
+	typedef std::vector<uint32_t> resource_id_list;
+	typedef std::vector<std::wstring> resource_name_list;
+	typedef std::vector<uint32_t> resource_language_list;
+	
+public:
+	//Constructor from root resource_directory from PE file
+	explicit pe_resource_viewer(const resource_directory& root_directory);
+
+	const resource_directory& get_root_directory() const;
+
+	//Lists resource types existing in PE file (non-named only)
+	const resource_type_list list_resource_types() const;
+	//Returns true if resource type exists
+	bool resource_exists(resource_type type) const;
+	//Returns true if resource name exists
+	bool resource_exists(const std::wstring& root_name) const;
+
+	//Lists resource names existing in PE file by resource type
+	const resource_name_list list_resource_names(resource_type type) const;
+	//Lists resource names existing in PE file by resource name
+	const resource_name_list list_resource_names(const std::wstring& root_name) const;
+	//Lists resource IDs existing in PE file by resource type
+	const resource_id_list list_resource_ids(resource_type type) const;
+	//Lists resource IDs existing in PE file by resource name
+	const resource_id_list list_resource_ids(const std::wstring& root_name) const;
+	//Returns resource count by type
+	unsigned long get_resource_count(resource_type type) const;
+	//Returns resource count by name
+	unsigned long get_resource_count(const std::wstring& root_name) const;
+
+	//Returns language count of resource by resource type and name
+	unsigned long get_language_count(resource_type type, const std::wstring& name) const;
+	//Returns language count of resource by resource names
+	unsigned long get_language_count(const std::wstring& root_name, const std::wstring& name) const;
+	//Returns language count of resource by resource type and ID
+	unsigned long get_language_count(resource_type type, uint32_t id) const;
+	//Returns language count of resource by resource name and ID
+	unsigned long get_language_count(const std::wstring& root_name, uint32_t id) const;
+	//Lists resource languages by resource type and name
+	const resource_language_list list_resource_languages(resource_type type, const std::wstring& name) const;
+	//Lists resource languages by resource names
+	const resource_language_list list_resource_languages(const std::wstring& root_name, const std::wstring& name) const;
+	//Lists resource languages by resource type and ID
+	const resource_language_list list_resource_languages(resource_type type, uint32_t id) const;
+	//Lists resource languages by resource name and ID
+	const resource_language_list list_resource_languages(const std::wstring& root_name, uint32_t id) const;
+
+	//Returns raw resource data by type, name and language
+	const resource_data_info get_resource_data_by_name(uint32_t language, resource_type type, const std::wstring& name) const;
+	//Returns raw resource data by root name, name and language
+	const resource_data_info get_resource_data_by_name(uint32_t language, const std::wstring& root_name, const std::wstring& name) const;
+	//Returns raw resource data by type, ID and language
+	const resource_data_info get_resource_data_by_id(uint32_t language, resource_type type, uint32_t id) const;
+	//Returns raw resource data by root name, ID and language
+	const resource_data_info get_resource_data_by_id(uint32_t language, const std::wstring& root_name, uint32_t id) const;
+	//Returns raw resource data by type, name and index in language directory (instead of language)
+	const resource_data_info get_resource_data_by_name(resource_type type, const std::wstring& name, uint32_t index = 0) const;
+	//Returns raw resource data by root name, name and index in language directory (instead of language)
+	const resource_data_info get_resource_data_by_name(const std::wstring& root_name, const std::wstring& name, uint32_t index = 0) const;
+	//Returns raw resource data by type, ID and index in language directory (instead of language)
+	const resource_data_info get_resource_data_by_id(resource_type type, uint32_t id, uint32_t index = 0) const;
+	//Returns raw resource data by root name, ID and index in language directory (instead of language)
+	const resource_data_info get_resource_data_by_id(const std::wstring& root_name, uint32_t id, uint32_t index = 0) const;
+
+protected:
+	//Root resource directory. We're not copying it, because it might be heavy
+	const resource_directory& root_dir_;
+
+	//Helper function to get ID list from entry list
+	static const resource_id_list get_id_list(const resource_directory::entry_list& entries);
+	//Helper function to get name list from entry list
+	static const resource_name_list get_name_list(const resource_directory::entry_list& entries);
+
+protected:
+	//Helper structure - finder of resource_directory_entry that is named
+	struct has_name
+	{
+	public:
+		bool operator()(const resource_directory_entry& entry) const;
+	};
+
+	//Helper structure - finder of resource_directory_entry that is not named (has id)
+	struct has_id
+	{
+	public:
+		bool operator()(const resource_directory_entry& entry) const;
+	};
+};
+}
diff --git a/tools/pe_bliss/pe_resources.cpp b/tools/pe_bliss/pe_resources.cpp
new file mode 100644
index 0000000000..189aba1f76
--- /dev/null
+++ b/tools/pe_bliss/pe_resources.cpp
@@ -0,0 +1,726 @@
+/*************************************************************************/
+/* Copyright (c) 2015 dx, http://kaimi.ru                                */
+/*                                                                       */
+/* Permission is hereby granted, free of charge, to any person           */
+/* obtaining a copy of this software and associated documentation        */
+/* files (the "Software"), to deal in the Software without               */
+/* restriction, including without limitation the rights to use,          */
+/* copy, modify, merge, publish, distribute, sublicense, and/or          */
+/* sell copies of the Software, and to permit persons to whom the        */
+/* Software is furnished to do so, subject to the following conditions:  */
+/* The above copyright notice and this permission notice shall be        */
+/* included in all copies or substantial portions of the Software.       */
+/*                                                                       */
+/* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,       */
+/* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF    */
+/* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.*/
+/* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY  */
+/* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,  */
+/* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE     */
+/* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.                */
+/*************************************************************************/
+#include <algorithm>
+#include <string.h>
+#include "pe_resources.h"
+
+namespace pe_bliss
+{
+using namespace pe_win;
+
+//RESOURCES
+//Default constructor
+resource_data_entry::resource_data_entry()
+	:codepage_(0)
+{}
+
+//Constructor from data
+resource_data_entry::resource_data_entry(const std::string& data, uint32_t codepage)
+	:codepage_(codepage), data_(data)
+{}
+
+//Returns resource data codepage
+uint32_t resource_data_entry::get_codepage() const
+{
+	return codepage_;
+}
+
+//Returns resource data
+const std::string& resource_data_entry::get_data() const
+{
+	return data_;
+}
+
+//Sets resource data codepage
+void resource_data_entry::set_codepage(uint32_t codepage)
+{
+	codepage_ = codepage;
+}
+
+//Sets resource data
+void resource_data_entry::set_data(const std::string& data)
+{
+	data_ = data;
+}
+
+//Default constructor
+resource_directory_entry::includes::includes()
+	:data_(0)
+{}
+
+//Default constructor
+resource_directory_entry::resource_directory_entry()
+	:id_(0), includes_data_(false), named_(false)
+{}
+
+//Copy constructor
+resource_directory_entry::resource_directory_entry(const resource_directory_entry& other)
+	:id_(other.id_), name_(other.name_), includes_data_(other.includes_data_), named_(other.named_)
+{
+	//If union'ed pointer is not zero
+	if(other.ptr_.data_)
+	{
+		if(other.includes_data())
+			ptr_.data_ = new resource_data_entry(*other.ptr_.data_);
+		else
+			ptr_.dir_ = new resource_directory(*other.ptr_.dir_);
+	}
+}
+
+//Copy assignment operator
+resource_directory_entry& resource_directory_entry::operator=(const resource_directory_entry& other)
+{
+	release();
+
+	id_ = other.id_;
+	name_ = other.name_;
+	includes_data_ = other.includes_data_;
+	named_ = other.named_;
+
+	//If other union'ed pointer is not zero
+	if(other.ptr_.data_)
+	{
+		if(other.includes_data())
+			ptr_.data_ = new resource_data_entry(*other.ptr_.data_);
+		else
+			ptr_.dir_ = new resource_directory(*other.ptr_.dir_);
+	}
+
+	return *this;
+}
+
+//Destroys included data
+void resource_directory_entry::release()
+{
+	//If union'ed pointer is not zero
+	if(ptr_.data_)
+	{
+		if(includes_data())
+			delete ptr_.data_;
+		else
+			delete ptr_.dir_;
+
+		ptr_.data_ = 0;
+	}
+}
+
+//Destructor
+resource_directory_entry::~resource_directory_entry()
+{
+	release();
+}
+
+//Returns entry ID
+uint32_t resource_directory_entry::get_id() const
+{
+	return id_;
+}
+
+//Returns entry name
+const std::wstring& resource_directory_entry::get_name() const
+{
+	return name_;
+}
+
+//Returns true, if entry has name
+//Returns false, if entry has ID
+bool resource_directory_entry::is_named() const
+{
+	return named_;
+}
+
+//Returns true, if entry includes resource_data_entry
+//Returns false, if entry includes resource_directory
+bool resource_directory_entry::includes_data() const
+{
+	return includes_data_;
+}
+
+//Returns resource_directory if entry includes it, otherwise throws an exception
+const resource_directory& resource_directory_entry::get_resource_directory() const
+{
+	if(!ptr_.dir_ || includes_data_)
+		throw pe_exception("Resource directory entry does not contain resource directory", pe_exception::resource_directory_entry_error);
+
+	return *ptr_.dir_;
+}
+
+//Returns resource_data_entry if entry includes it, otherwise throws an exception
+const resource_data_entry& resource_directory_entry::get_data_entry() const
+{
+	if(!ptr_.data_ || !includes_data_)
+		throw pe_exception("Resource directory entry does not contain resource data entry", pe_exception::resource_directory_entry_error);
+
+	return *ptr_.data_;
+}
+
+//Returns resource_directory if entry includes it, otherwise throws an exception
+resource_directory& resource_directory_entry::get_resource_directory()
+{
+	if(!ptr_.dir_ || includes_data_)
+		throw pe_exception("Resource directory entry does not contain resource directory", pe_exception::resource_directory_entry_error);
+
+	return *ptr_.dir_;
+}
+
+//Returns resource_data_entry if entry includes it, otherwise throws an exception
+resource_data_entry& resource_directory_entry::get_data_entry()
+{
+	if(!ptr_.data_ || !includes_data_)
+		throw pe_exception("Resource directory entry does not contain resource data entry", pe_exception::resource_directory_entry_error);
+
+	return *ptr_.data_;
+}
+
+//Sets entry name
+void resource_directory_entry::set_name(const std::wstring& name)
+{
+	name_ = name;
+	named_ = true;
+	id_ = 0;
+}
+
+//Sets entry ID
+void resource_directory_entry::set_id(uint32_t id)
+{
+	id_ = id;
+	named_ = false;
+	name_.clear();
+}
+
+//Adds resource_data_entry
+void resource_directory_entry::add_data_entry(const resource_data_entry& entry)
+{
+	release();
+	ptr_.data_ = new resource_data_entry(entry);
+	includes_data_ = true;
+}
+
+//Adds resource_directory
+void resource_directory_entry::add_resource_directory(const resource_directory& dir)
+{
+	release();
+	ptr_.dir_ = new resource_directory(dir);
+	includes_data_ = false;
+}
+
+//Default constructor
+resource_directory::resource_directory()
+	:characteristics_(0),
+	timestamp_(0),
+	major_version_(0), minor_version_(0),
+	number_of_named_entries_(0), number_of_id_entries_(0)
+{}
+
+//Constructor from data
+resource_directory::resource_directory(const image_resource_directory& dir)
+	:characteristics_(dir.Characteristics),
+	timestamp_(dir.TimeDateStamp),
+	major_version_(dir.MajorVersion), minor_version_(dir.MinorVersion),
+	number_of_named_entries_(0), number_of_id_entries_(0) //Set to zero here, calculate on add
+{}
+
+//Returns characteristics of directory
+uint32_t resource_directory::get_characteristics() const
+{
+	return characteristics_;
+}
+
+//Returns date and time stamp of directory
+uint32_t resource_directory::get_timestamp() const
+{
+	return timestamp_;
+}
+
+//Returns major version of directory
+uint16_t resource_directory::get_major_version() const
+{
+	return major_version_;
+}
+
+//Returns minor version of directory
+uint16_t resource_directory::get_minor_version() const
+{
+	return minor_version_;
+}
+
+//Returns number of named entries
+uint32_t resource_directory::get_number_of_named_entries() const
+{
+	return number_of_named_entries_;
+}
+
+//Returns number of ID entries
+uint32_t resource_directory::get_number_of_id_entries() const
+{
+	return number_of_id_entries_;
+}
+
+//Returns resource_directory_entry array
+const resource_directory::entry_list& resource_directory::get_entry_list() const
+{
+	return entries_;
+}
+
+//Returns resource_directory_entry array
+resource_directory::entry_list& resource_directory::get_entry_list()
+{
+	return entries_;
+}
+
+//Adds resource_directory_entry
+void resource_directory::add_resource_directory_entry(const resource_directory_entry& entry)
+{
+	entries_.push_back(entry);
+	if(entry.is_named())
+		++number_of_named_entries_;
+	else
+		++number_of_id_entries_;
+}
+
+//Clears resource_directory_entry array
+void resource_directory::clear_resource_directory_entry_list()
+{
+	entries_.clear();
+	number_of_named_entries_ = 0;
+	number_of_id_entries_ = 0;
+}
+
+//Sets characteristics of directory
+void resource_directory::set_characteristics(uint32_t characteristics)
+{
+	characteristics_ = characteristics;
+}
+
+//Sets date and time stamp of directory
+void resource_directory::set_timestamp(uint32_t timestamp)
+{
+	timestamp_ = timestamp;
+}
+
+//Sets number of named entries
+void resource_directory::set_number_of_named_entries(uint32_t number)
+{
+	number_of_named_entries_ = number;
+}
+
+//Sets number of ID entries
+void resource_directory::set_number_of_id_entries(uint32_t number)
+{
+	number_of_id_entries_ = number;
+}
+
+//Sets major version of directory
+void resource_directory::set_major_version(uint16_t major_version)
+{
+	major_version_ = major_version;
+}
+
+//Sets minor version of directory
+void resource_directory::get_minor_version(uint16_t minor_version)
+{
+	minor_version_ = minor_version;
+}
+
+//Processes resource directory
+const resource_directory process_resource_directory(const pe_base& pe, uint32_t res_rva, uint32_t offset_to_directory, std::set<uint32_t>& processed)
+{
+	resource_directory ret;
+	
+	//Check for resource loops
+	if(!processed.insert(offset_to_directory).second)
+		throw pe_exception("Incorrect resource directory", pe_exception::incorrect_resource_directory);
+
+	if(!pe_utils::is_sum_safe(res_rva, offset_to_directory))
+		throw pe_exception("Incorrect resource directory", pe_exception::incorrect_resource_directory);
+
+	//Get root IMAGE_RESOURCE_DIRECTORY
+	image_resource_directory directory = pe.section_data_from_rva<image_resource_directory>(res_rva + offset_to_directory, section_data_virtual, true);
+
+	ret = resource_directory(directory);
+
+	//Check DWORDs for possible overflows
+	if(!pe_utils::is_sum_safe(directory.NumberOfIdEntries, directory.NumberOfNamedEntries)
+		|| directory.NumberOfIdEntries + directory.NumberOfNamedEntries >= pe_utils::max_dword / sizeof(image_resource_directory_entry) + sizeof(image_resource_directory))
+		throw pe_exception("Incorrect resource directory", pe_exception::incorrect_resource_directory);
+
+	if(!pe_utils::is_sum_safe(offset_to_directory, sizeof(image_resource_directory) + (directory.NumberOfIdEntries + directory.NumberOfNamedEntries) * sizeof(image_resource_directory_entry))
+		|| !pe_utils::is_sum_safe(res_rva, offset_to_directory + sizeof(image_resource_directory) + (directory.NumberOfIdEntries + directory.NumberOfNamedEntries) * sizeof(image_resource_directory_entry)))
+		throw pe_exception("Incorrect resource directory", pe_exception::incorrect_resource_directory);
+
+	for(unsigned long i = 0; i != static_cast<unsigned long>(directory.NumberOfIdEntries) + directory.NumberOfNamedEntries; ++i)
+	{
+		//Read directory entries one by one
+		image_resource_directory_entry dir_entry = pe.section_data_from_rva<image_resource_directory_entry>(
+			res_rva + sizeof(image_resource_directory) + i * sizeof(image_resource_directory_entry) + offset_to_directory, section_data_virtual, true);
+
+		//Create directory entry structure
+		resource_directory_entry entry;
+
+		//If directory is named
+		if(dir_entry.NameIsString)
+		{
+			if(!pe_utils::is_sum_safe(res_rva + sizeof(uint16_t) /* safe */, dir_entry.NameOffset))
+				throw pe_exception("Incorrect resource directory", pe_exception::incorrect_resource_directory);
+
+			//get directory name length
+			uint16_t directory_name_length = pe.section_data_from_rva<uint16_t>(res_rva + dir_entry.NameOffset, section_data_virtual, true);
+
+			//Check name length
+			if(pe.section_data_length_from_rva(res_rva + dir_entry.NameOffset + sizeof(uint16_t), res_rva + dir_entry.NameOffset + sizeof(uint16_t), section_data_virtual, true)
+				< directory_name_length)
+				throw pe_exception("Incorrect resource directory", pe_exception::incorrect_resource_directory);
+
+#ifdef PE_BLISS_WINDOWS
+			//Set entry UNICODE name
+			entry.set_name(std::wstring(
+				reinterpret_cast<const wchar_t*>(pe.section_data_from_rva(res_rva + dir_entry.NameOffset + sizeof(uint16_t), section_data_virtual, true)),
+				directory_name_length));
+#else
+			//Set entry UNICODE name
+			entry.set_name(pe_utils::from_ucs2(u16string(
+				reinterpret_cast<const unicode16_t*>(pe.section_data_from_rva(res_rva + dir_entry.NameOffset + sizeof(uint16_t), section_data_virtual, true)),
+				directory_name_length)));
+#endif
+		}
+		else
+		{
+			//Else - set directory ID
+			entry.set_id(dir_entry.Id);
+		}
+
+		//If directory entry has another resource directory
+		if(dir_entry.DataIsDirectory)
+		{
+			entry.add_resource_directory(process_resource_directory(pe, res_rva, dir_entry.OffsetToDirectory, processed));
+		}
+		else
+		{
+			//If directory entry has data
+			image_resource_data_entry data_entry = pe.section_data_from_rva<image_resource_data_entry>(
+				res_rva + dir_entry.OffsetToData, section_data_virtual, true);
+
+			//Check byte count that stated by data entry
+			if(pe.section_data_length_from_rva(data_entry.OffsetToData, data_entry.OffsetToData, section_data_virtual, true) < data_entry.Size)
+				throw pe_exception("Incorrect resource directory", pe_exception::incorrect_resource_directory);
+
+			//Add data entry to directory entry
+			entry.add_data_entry(resource_data_entry(
+				std::string(pe.section_data_from_rva(data_entry.OffsetToData, section_data_virtual, true), data_entry.Size),
+				data_entry.CodePage));
+		}
+
+		//Save directory entry
+		ret.add_resource_directory_entry(entry);
+	}
+
+	//Return resource directory
+	return ret;
+}
+
+//Helper function to calculate needed space for resource data
+void calculate_resource_data_space(const resource_directory& root, uint32_t aligned_offset_from_section_start, uint32_t& needed_size_for_structures, uint32_t& needed_size_for_strings)
+{
+	needed_size_for_structures += sizeof(image_resource_directory);
+	for(resource_directory::entry_list::const_iterator it = root.get_entry_list().begin(); it != root.get_entry_list().end(); ++it)
+	{
+		needed_size_for_structures += sizeof(image_resource_directory_entry);
+
+		if((*it).is_named())
+			needed_size_for_strings += static_cast<uint32_t>(((*it).get_name().length() + 1) * 2 /* unicode */ + sizeof(uint16_t) /* for string length */);
+
+		if(!(*it).includes_data())
+			calculate_resource_data_space((*it).get_resource_directory(), aligned_offset_from_section_start, needed_size_for_structures, needed_size_for_strings);
+	}
+}
+
+//Helper function to calculate needed space for resource data
+void calculate_resource_data_space(const resource_directory& root, uint32_t needed_size_for_structures, uint32_t needed_size_for_strings, uint32_t& needed_size_for_data, uint32_t& current_data_pos)
+{
+	for(resource_directory::entry_list::const_iterator it = root.get_entry_list().begin(); it != root.get_entry_list().end(); ++it)
+	{
+		if((*it).includes_data())
+		{
+			uint32_t data_size = static_cast<uint32_t>((*it).get_data_entry().get_data().length()
+				+ sizeof(image_resource_data_entry)
+				+ (pe_utils::align_up(current_data_pos, sizeof(uint32_t)) - current_data_pos) /* alignment */);
+			needed_size_for_data += data_size;
+			current_data_pos += data_size;
+		}
+		else
+		{
+			calculate_resource_data_space((*it).get_resource_directory(), needed_size_for_structures, needed_size_for_strings, needed_size_for_data, current_data_pos);
+		}
+	}
+}
+
+//Helper: sorts resource directory entries
+struct entry_sorter
+{
+public:
+	bool operator()(const resource_directory_entry& entry1, const resource_directory_entry& entry2) const;
+};
+
+//Helper function to rebuild resource directory
+void rebuild_resource_directory(pe_base& pe, section& resource_section, resource_directory& root, uint32_t& current_structures_pos, uint32_t& current_data_pos, uint32_t& current_strings_pos, uint32_t offset_from_section_start)
+{
+	//Create resource directory
+	image_resource_directory dir = {0};
+	dir.Characteristics = root.get_characteristics();
+	dir.MajorVersion = root.get_major_version();
+	dir.MinorVersion = root.get_minor_version();
+	dir.TimeDateStamp = root.get_timestamp();
+	
+	{
+		resource_directory::entry_list& entries = root.get_entry_list();
+		std::sort(entries.begin(), entries.end(), entry_sorter());
+	}
+
+	//Calculate number of named and ID entries
+	for(resource_directory::entry_list::const_iterator it = root.get_entry_list().begin(); it != root.get_entry_list().end(); ++it)
+	{
+		if((*it).is_named())
+			++dir.NumberOfNamedEntries;
+		else
+			++dir.NumberOfIdEntries;
+	}
+	
+	std::string& raw_data = resource_section.get_raw_data();
+
+	//Save resource directory
+	memcpy(&raw_data[current_structures_pos], &dir, sizeof(dir));
+	current_structures_pos += sizeof(dir);
+
+	uint32_t this_current_structures_pos = current_structures_pos;
+
+	current_structures_pos += sizeof(image_resource_directory_entry) * (dir.NumberOfNamedEntries + dir.NumberOfIdEntries);
+
+	//Create all resource directory entries
+	for(resource_directory::entry_list::iterator it = root.get_entry_list().begin(); it != root.get_entry_list().end(); ++it)
+	{
+		image_resource_directory_entry entry;
+		if((*it).is_named())
+		{
+			entry.Name = 0x80000000 | (current_strings_pos - offset_from_section_start);
+			uint16_t unicode_length = static_cast<uint16_t>((*it).get_name().length());
+			memcpy(&raw_data[current_strings_pos], &unicode_length, sizeof(unicode_length));
+			current_strings_pos += sizeof(unicode_length);
+
+#ifdef PE_BLISS_WINDOWS
+			memcpy(&raw_data[current_strings_pos], (*it).get_name().c_str(), (*it).get_name().length() * sizeof(uint16_t) + sizeof(uint16_t) /* unicode */);
+#else
+			{
+				u16string str(pe_utils::to_ucs2((*it).get_name()));
+				memcpy(&raw_data[current_strings_pos], str.c_str(), (*it).get_name().length() * sizeof(uint16_t) + sizeof(uint16_t) /* unicode */);
+			}
+#endif
+
+			current_strings_pos += static_cast<unsigned long>((*it).get_name().length() * sizeof(uint16_t) + sizeof(uint16_t) /* unicode */);
+		}
+		else
+		{
+			entry.Name = (*it).get_id();
+		}
+
+		if((*it).includes_data())
+		{
+			current_data_pos = pe_utils::align_up(current_data_pos, sizeof(uint32_t));
+			image_resource_data_entry data_entry = {0};
+			data_entry.CodePage = (*it).get_data_entry().get_codepage();
+			data_entry.Size = static_cast<uint32_t>((*it).get_data_entry().get_data().length());
+			data_entry.OffsetToData = pe.rva_from_section_offset(resource_section, current_data_pos + sizeof(data_entry));
+			
+			entry.OffsetToData = current_data_pos - offset_from_section_start;
+
+			memcpy(&raw_data[current_data_pos], &data_entry, sizeof(data_entry));
+			current_data_pos += sizeof(data_entry);
+			
+			memcpy(&raw_data[current_data_pos], (*it).get_data_entry().get_data().data(), data_entry.Size);
+			current_data_pos += data_entry.Size;
+
+			memcpy(&raw_data[this_current_structures_pos], &entry, sizeof(entry));
+			this_current_structures_pos += sizeof(entry);
+		}
+		else
+		{
+			entry.OffsetToData = 0x80000000 | (current_structures_pos - offset_from_section_start);
+
+			memcpy(&raw_data[this_current_structures_pos], &entry, sizeof(entry));
+			this_current_structures_pos += sizeof(entry);
+
+			rebuild_resource_directory(pe, resource_section, (*it).get_resource_directory(), current_structures_pos, current_data_pos, current_strings_pos, offset_from_section_start);
+		}
+	}
+}
+
+//Helper function to rebuild resource directory
+bool entry_sorter::operator()(const resource_directory_entry& entry1, const resource_directory_entry& entry2) const
+{
+	if(entry1.is_named() && entry2.is_named())
+		return entry1.get_name() < entry2.get_name();
+	else if(!entry1.is_named() && !entry2.is_named())
+		return entry1.get_id() < entry2.get_id();
+	else
+		return entry1.is_named();
+}
+
+//Resources rebuilder
+//resource_directory - root resource directory
+//resources_section - section where resource directory will be placed (must be attached to PE image)
+//offset_from_section_start - offset from resources_section raw data start
+//resource_directory is non-constant, because it will be sorted
+//save_to_pe_headers - if true, new resource directory information will be saved to PE image headers
+//auto_strip_last_section - if true and resources are placed in the last section, it will be automatically stripped
+//number_of_id_entries and number_of_named_entries for resource directories are recalculated and not used
+const image_directory rebuild_resources(pe_base& pe, resource_directory& info, section& resources_section, uint32_t offset_from_section_start, bool save_to_pe_header, bool auto_strip_last_section)
+{
+	//Check that resources_section is attached to this PE image
+	if(!pe.section_attached(resources_section))
+		throw pe_exception("Resource section must be attached to PE file", pe_exception::section_is_not_attached);
+	
+	//Check resource directory correctness
+	if(info.get_entry_list().empty())
+		throw pe_exception("Empty resource directory", pe_exception::incorrect_resource_directory);
+	
+	uint32_t aligned_offset_from_section_start = pe_utils::align_up(offset_from_section_start, sizeof(uint32_t));
+	uint32_t needed_size_for_structures = aligned_offset_from_section_start - offset_from_section_start; //Calculate needed size for resource tables and data
+	uint32_t needed_size_for_strings = 0;
+	uint32_t needed_size_for_data = 0;
+
+	calculate_resource_data_space(info, aligned_offset_from_section_start, needed_size_for_structures, needed_size_for_strings);
+
+	{
+		uint32_t current_data_pos = aligned_offset_from_section_start + needed_size_for_structures + needed_size_for_strings;
+		calculate_resource_data_space(info, needed_size_for_structures, needed_size_for_strings, needed_size_for_data, current_data_pos);
+	}
+
+	uint32_t needed_size = needed_size_for_structures + needed_size_for_strings + needed_size_for_data;
+
+	//Check if resources_section is last one. If it's not, check if there's enough place for resource data
+	if(&resources_section != &*(pe.get_image_sections().end() - 1) && 
+		(resources_section.empty() || pe_utils::align_up(resources_section.get_size_of_raw_data(), pe.get_file_alignment())
+		< needed_size + aligned_offset_from_section_start))
+		throw pe_exception("Insufficient space for resource directory", pe_exception::insufficient_space);
+
+	std::string& raw_data = resources_section.get_raw_data();
+
+	//This will be done only if resources_section is the last section of image or for section with unaligned raw length of data
+	if(raw_data.length() < needed_size + aligned_offset_from_section_start)
+		raw_data.resize(needed_size + aligned_offset_from_section_start); //Expand section raw data
+
+	uint32_t current_structures_pos = aligned_offset_from_section_start;
+	uint32_t current_strings_pos = current_structures_pos + needed_size_for_structures;
+	uint32_t current_data_pos = current_strings_pos + needed_size_for_strings;
+	rebuild_resource_directory(pe, resources_section, info, current_structures_pos, current_data_pos, current_strings_pos, aligned_offset_from_section_start);
+	
+	//Adjust section raw and virtual sizes
+	pe.recalculate_section_sizes(resources_section, auto_strip_last_section);
+
+	image_directory ret(pe.rva_from_section_offset(resources_section, aligned_offset_from_section_start), needed_size);
+
+	//If auto-rewrite of PE headers is required
+	if(save_to_pe_header)
+	{
+		pe.set_directory_rva(image_directory_entry_resource, ret.get_rva());
+		pe.set_directory_size(image_directory_entry_resource, ret.get_size());
+	}
+
+	return ret;
+}
+
+//Returns resources from PE file
+const resource_directory get_resources(const pe_base& pe)
+{
+	resource_directory ret;
+
+	if(!pe.has_resources())
+		return ret;
+
+	//Get resource directory RVA
+	uint32_t res_rva = pe.get_directory_rva(image_directory_entry_resource);
+	
+	//Store already processed directories to avoid resource loops
+	std::set<uint32_t> processed;
+	
+	//Process all directories (recursion)
+	ret = process_resource_directory(pe, res_rva, 0, processed);
+
+	return ret;
+}
+
+//Finds resource_directory_entry by ID
+resource_directory::id_entry_finder::id_entry_finder(uint32_t id)
+	:id_(id)
+{}
+
+bool resource_directory::id_entry_finder::operator()(const resource_directory_entry& entry) const
+{
+	return !entry.is_named() && entry.get_id() == id_;
+}
+
+//Finds resource_directory_entry by name
+resource_directory::name_entry_finder::name_entry_finder(const std::wstring& name)
+	:name_(name)
+{}
+
+bool resource_directory::name_entry_finder::operator()(const resource_directory_entry& entry) const
+{
+	return entry.is_named() && entry.get_name() == name_;
+}
+
+//Finds resource_directory_entry by name or ID (universal)
+resource_directory::entry_finder::entry_finder(const std::wstring& name)
+	:name_(name), named_(true)
+{}
+
+resource_directory::entry_finder::entry_finder(uint32_t id)
+	:id_(id), named_(false)
+{}
+
+bool resource_directory::entry_finder::operator()(const resource_directory_entry& entry) const
+{
+	if(named_)
+		return entry.is_named() && entry.get_name() == name_;
+	else
+		return !entry.is_named() && entry.get_id() == id_;
+}
+
+//Returns resource_directory_entry by ID. If not found - throws an exception
+const resource_directory_entry& resource_directory::entry_by_id(uint32_t id) const
+{
+	entry_list::const_iterator i = std::find_if(entries_.begin(), entries_.end(), id_entry_finder(id));
+	if(i == entries_.end())
+		throw pe_exception("Resource directory entry not found", pe_exception::resource_directory_entry_not_found);
+
+	return *i;
+}
+
+//Returns resource_directory_entry by name. If not found - throws an exception
+const resource_directory_entry& resource_directory::entry_by_name(const std::wstring& name) const
+{
+	entry_list::const_iterator i = std::find_if(entries_.begin(), entries_.end(), name_entry_finder(name));
+	if(i == entries_.end())
+		throw pe_exception("Resource directory entry not found", pe_exception::resource_directory_entry_not_found);
+
+	return *i;
+}
+}
diff --git a/tools/pe_bliss/pe_resources.h b/tools/pe_bliss/pe_resources.h
new file mode 100644
index 0000000000..1eb6437563
--- /dev/null
+++ b/tools/pe_bliss/pe_resources.h
@@ -0,0 +1,245 @@
+/*************************************************************************/
+/* Copyright (c) 2015 dx, http://kaimi.ru                                */
+/*                                                                       */
+/* Permission is hereby granted, free of charge, to any person           */
+/* obtaining a copy of this software and associated documentation        */
+/* files (the "Software"), to deal in the Software without               */
+/* restriction, including without limitation the rights to use,          */
+/* copy, modify, merge, publish, distribute, sublicense, and/or          */
+/* sell copies of the Software, and to permit persons to whom the        */
+/* Software is furnished to do so, subject to the following conditions:  */
+/* The above copyright notice and this permission notice shall be        */
+/* included in all copies or substantial portions of the Software.       */
+/*                                                                       */
+/* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,       */
+/* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF    */
+/* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.*/
+/* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY  */
+/* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,  */
+/* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE     */
+/* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.                */
+/*************************************************************************/
+#pragma once
+#include <vector>
+#include <string>
+#include <set>
+#include "pe_structures.h"
+#include "pe_base.h"
+#include "pe_directory.h"
+
+namespace pe_bliss
+{
+//Class representing resource data entry
+class resource_data_entry
+{
+public:
+	//Default constructor
+	resource_data_entry();
+	//Constructor from data
+	resource_data_entry(const std::string& data, uint32_t codepage);
+
+	//Returns resource data codepage
+	uint32_t get_codepage() const;
+	//Returns resource data
+	const std::string& get_data() const;
+		
+public: //These functions do not change everything inside image, they are used by PE class
+	//You can also use them to rebuild resource directory
+		
+	//Sets resource data codepage
+	void set_codepage(uint32_t codepage);
+	//Sets resource data
+	void set_data(const std::string& data);
+
+private:
+	uint32_t codepage_; //Resource data codepage
+	std::string data_; //Resource data
+};
+
+//Forward declaration
+class resource_directory;
+
+//Class representing resource directory entry
+class resource_directory_entry
+{
+public:
+	//Default constructor
+	resource_directory_entry();
+	//Copy constructor
+	resource_directory_entry(const resource_directory_entry& other);
+	//Copy assignment operator
+	resource_directory_entry& operator=(const resource_directory_entry& other);
+
+	//Returns entry ID
+	uint32_t get_id() const;
+	//Returns entry name
+	const std::wstring& get_name() const;
+	//Returns true, if entry has name
+	//Returns false, if entry has ID
+	bool is_named() const;
+
+	//Returns true, if entry includes resource_data_entry
+	//Returns false, if entry includes resource_directory
+	bool includes_data() const;
+	//Returns resource_directory if entry includes it, otherwise throws an exception
+	const resource_directory& get_resource_directory() const;
+	//Returns resource_data_entry if entry includes it, otherwise throws an exception
+	const resource_data_entry& get_data_entry() const;
+
+	//Destructor
+	~resource_directory_entry();
+
+public: //These functions do not change everything inside image, they are used by PE class
+	//You can also use them to rebuild resource directory
+
+	//Sets entry name
+	void set_name(const std::wstring& name);
+	//Sets entry ID
+	void set_id(uint32_t id);
+		
+	//Returns resource_directory if entry includes it, otherwise throws an exception
+	resource_directory& get_resource_directory();
+	//Returns resource_data_entry if entry includes it, otherwise throws an exception
+	resource_data_entry& get_data_entry();
+
+	//Adds resource_data_entry
+	void add_data_entry(const resource_data_entry& entry);
+	//Adds resource_directory
+	void add_resource_directory(const resource_directory& dir);
+
+private:
+	//Destroys included data
+	void release();
+
+private:
+	uint32_t id_;
+	std::wstring name_;
+
+	union includes
+	{
+		//Default constructor
+		includes();
+
+		//We use pointers, we're doing manual copying here
+		class resource_data_entry* data_;
+		class resource_directory* dir_; //We use pointer, because structs include each other
+	};
+
+	includes ptr_;
+
+	bool includes_data_, named_;
+};
+
+//Class representing resource directory
+class resource_directory
+{
+public:
+	typedef std::vector<resource_directory_entry> entry_list;
+
+public:
+	//Default constructor
+	resource_directory();
+	//Constructor from data
+	explicit resource_directory(const pe_win::image_resource_directory& dir);
+
+	//Returns characteristics of directory
+	uint32_t get_characteristics() const;
+	//Returns date and time stamp of directory
+	uint32_t get_timestamp() const;
+	//Returns number of named entries
+	uint32_t get_number_of_named_entries() const;
+	//Returns number of ID entries
+	uint32_t get_number_of_id_entries() const;
+	//Returns major version of directory
+	uint16_t get_major_version() const;
+	//Returns minor version of directory
+	uint16_t get_minor_version() const;
+	//Returns resource_directory_entry array
+	const entry_list& get_entry_list() const;
+	//Returns resource_directory_entry by ID. If not found - throws an exception
+	const resource_directory_entry& entry_by_id(uint32_t id) const;
+	//Returns resource_directory_entry by name. If not found - throws an exception
+	const resource_directory_entry& entry_by_name(const std::wstring& name) const;
+
+public: //These functions do not change everything inside image, they are used by PE class
+	//You can also use them to rebuild resource directory
+
+	//Adds resource_directory_entry
+	void add_resource_directory_entry(const resource_directory_entry& entry);
+	//Clears resource_directory_entry array
+	void clear_resource_directory_entry_list();
+
+	//Sets characteristics of directory
+	void set_characteristics(uint32_t characteristics);
+	//Sets date and time stamp of directory
+	void set_timestamp(uint32_t timestamp);
+	//Sets number of named entries
+	void set_number_of_named_entries(uint32_t number);
+	//Sets number of ID entries
+	void set_number_of_id_entries(uint32_t number);
+	//Sets major version of directory
+	void set_major_version(uint16_t major_version);
+	//Sets minor version of directory
+	void get_minor_version(uint16_t minor_version);
+		
+	//Returns resource_directory_entry array
+	entry_list& get_entry_list();
+
+private:
+	uint32_t characteristics_;
+	uint32_t timestamp_;
+	uint16_t major_version_, minor_version_;
+	uint32_t number_of_named_entries_, number_of_id_entries_;
+	entry_list entries_;
+
+public: //Finder helpers
+	//Finds resource_directory_entry by ID
+	struct id_entry_finder
+	{
+	public:
+		explicit id_entry_finder(uint32_t id);
+		bool operator()(const resource_directory_entry& entry) const;
+
+	private:
+		uint32_t id_;
+	};
+
+	//Finds resource_directory_entry by name
+	struct name_entry_finder
+	{
+	public:
+		explicit name_entry_finder(const std::wstring& name);
+		bool operator()(const resource_directory_entry& entry) const;
+
+	private:
+		std::wstring name_;
+	};
+
+	//Finds resource_directory_entry by name or ID (universal)
+	struct entry_finder
+	{
+	public:
+		explicit entry_finder(const std::wstring& name);
+		explicit entry_finder(uint32_t id);
+		bool operator()(const resource_directory_entry& entry) const;
+
+	private:
+		std::wstring name_;
+		uint32_t id_;
+		bool named_;
+	};
+};
+
+//Returns resources (root resource_directory) from PE file
+const resource_directory get_resources(const pe_base& pe);
+
+//Resources rebuilder
+//resource_directory - root resource directory
+//resources_section - section where resource directory will be placed (must be attached to PE image)
+//resource_directory is non-constant, because it will be sorted
+//offset_from_section_start - offset from resources_section raw data start
+//save_to_pe_headers - if true, new resource directory information will be saved to PE image headers
+//auto_strip_last_section - if true and resources are placed in the last section, it will be automatically stripped
+//number_of_id_entries and number_of_named_entries for resource directories are recalculated and not used
+const image_directory rebuild_resources(pe_base& pe, resource_directory& info, section& resources_section, uint32_t offset_from_section_start = 0, bool save_to_pe_header = true, bool auto_strip_last_section = true);
+}
diff --git a/tools/pe_bliss/pe_rich_data.cpp b/tools/pe_bliss/pe_rich_data.cpp
new file mode 100644
index 0000000000..e92f7ddc1b
--- /dev/null
+++ b/tools/pe_bliss/pe_rich_data.cpp
@@ -0,0 +1,152 @@
+/*************************************************************************/
+/* Copyright (c) 2015 dx, http://kaimi.ru                                */
+/*                                                                       */
+/* Permission is hereby granted, free of charge, to any person           */
+/* obtaining a copy of this software and associated documentation        */
+/* files (the "Software"), to deal in the Software without               */
+/* restriction, including without limitation the rights to use,          */
+/* copy, modify, merge, publish, distribute, sublicense, and/or          */
+/* sell copies of the Software, and to permit persons to whom the        */
+/* Software is furnished to do so, subject to the following conditions:  */
+/* The above copyright notice and this permission notice shall be        */
+/* included in all copies or substantial portions of the Software.       */
+/*                                                                       */
+/* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,       */
+/* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF    */
+/* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.*/
+/* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY  */
+/* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,  */
+/* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE     */
+/* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.                */
+/*************************************************************************/
+#include "pe_rich_data.h"
+
+namespace pe_bliss
+{
+//STUB OVERLAY
+//Default constructor
+rich_data::rich_data()
+	:number_(0), version_(0), times_(0)
+{}
+
+//Who knows, what these fields mean...
+uint32_t rich_data::get_number() const
+{
+	return number_;
+}
+
+uint32_t rich_data::get_version() const
+{
+	return version_;
+}
+
+uint32_t rich_data::get_times() const
+{
+	return times_;
+}
+
+void rich_data::set_number(uint32_t number)
+{
+	number_ = number;
+}
+
+void rich_data::set_version(uint32_t version)
+{
+	version_ = version;
+}
+
+void rich_data::set_times(uint32_t times)
+{
+	times_ = times;
+}
+
+//Returns MSVC rich data
+const rich_data_list get_rich_data(const pe_base& pe)
+{
+	//Returned value
+	rich_data_list ret;
+
+	const std::string& rich_overlay = pe.get_stub_overlay();
+
+	//If there's no rich overlay, return empty vector
+	if(rich_overlay.size() < sizeof(uint32_t))
+		return ret;
+
+	//True if rich data was found
+	bool found = false;
+
+	//Rich overlay ID ("Rich" word)
+	static const uint32_t rich_overlay_id = 0x68636952;
+
+	//Search for rich data overlay ID
+	const char* begin = &rich_overlay[0];
+	const char* end = begin + rich_overlay.length();
+	for(; begin != end; ++begin)
+	{
+		if(*reinterpret_cast<const uint32_t*>(begin) == rich_overlay_id)
+		{
+			found = true; //We've found it!
+			break;
+		}
+	}
+
+	//If we found it
+	if(found)
+	{
+		//Check remaining length
+		if(static_cast<size_t>(end - begin) < sizeof(uint32_t))
+			return ret;
+
+		//The XOR key is after "Rich" word, we should get it
+		uint32_t xorkey = *reinterpret_cast<const uint32_t*>(begin + sizeof(uint32_t));
+
+		//True if rich data was found
+		found = false;
+
+		//Second search for signature "DanS"
+		begin = &rich_overlay[0];
+		for(; begin != end; ++begin)
+		{
+			if((*reinterpret_cast<const uint32_t*>(begin) ^ xorkey) == 0x536e6144) //"DanS"
+			{
+				found = true;
+				break;
+			}
+		}
+
+		//If second signature is found
+		if(found)
+		{
+			begin += sizeof(uint32_t) * 3;
+			//List all rich data structures
+			while(begin < end)
+			{
+				begin += sizeof(uint32_t);
+				if(begin >= end)
+					break;
+
+				//Check for rich overlay data end ("Rich" word reached)
+				if(*reinterpret_cast<const uint32_t*>(begin) == rich_overlay_id)
+					break;
+
+				//Create rich_data structure
+				rich_data data;
+				data.set_number((*reinterpret_cast<const uint32_t*>(begin) ^ xorkey) >> 16);
+				data.set_version((*reinterpret_cast<const uint32_t*>(begin) ^ xorkey) & 0xFFFF);
+
+				begin += sizeof(uint32_t);
+				if(begin >= end)
+					break;
+
+				data.set_times(*reinterpret_cast<const uint32_t*>(begin) ^ xorkey);
+
+				//Save rich data structure
+				ret.push_back(data);
+			}
+		}
+	}
+
+	//Return rich data structures list
+	return ret;
+}
+}
diff --git a/tools/pe_bliss/pe_rich_data.h b/tools/pe_bliss/pe_rich_data.h
new file mode 100644
index 0000000000..3e01d3c011
--- /dev/null
+++ b/tools/pe_bliss/pe_rich_data.h
@@ -0,0 +1,58 @@
+/*************************************************************************/
+/* Copyright (c) 2015 dx, http://kaimi.ru                                */
+/*                                                                       */
+/* Permission is hereby granted, free of charge, to any person           */
+/* obtaining a copy of this software and associated documentation        */
+/* files (the "Software"), to deal in the Software without               */
+/* restriction, including without limitation the rights to use,          */
+/* copy, modify, merge, publish, distribute, sublicense, and/or          */
+/* sell copies of the Software, and to permit persons to whom the        */
+/* Software is furnished to do so, subject to the following conditions:  */
+/* The above copyright notice and this permission notice shall be        */
+/* included in all copies or substantial portions of the Software.       */
+/*                                                                       */
+/* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,       */
+/* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF    */
+/* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.*/
+/* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY  */
+/* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,  */
+/* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE     */
+/* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.                */
+/*************************************************************************/
+#pragma once
+#include <vector>
+#include "pe_structures.h"
+#include "pe_base.h"
+
+namespace pe_bliss
+{
+//Rich data overlay class of Microsoft Visual Studio
+class rich_data
+{
+public:
+	//Default constructor
+	rich_data();
+
+public: //Getters
+	//Who knows, what these fields mean...
+	uint32_t get_number() const;
+	uint32_t get_version() const;
+	uint32_t get_times() const;
+
+public: //Setters, used by PE library only
+	void set_number(uint32_t number);
+	void set_version(uint32_t version);
+	void set_times(uint32_t times);
+
+private:
+	uint32_t number_;
+	uint32_t version_;
+	uint32_t times_;
+};
+
+//Rich data list typedef
+typedef std::vector<rich_data> rich_data_list;
+
+//Returns a vector with rich data (stub overlay)
+const rich_data_list get_rich_data(const pe_base& pe);
+}
diff --git a/tools/pe_bliss/pe_section.cpp b/tools/pe_bliss/pe_section.cpp
new file mode 100644
index 0000000000..72127e22e2
--- /dev/null
+++ b/tools/pe_bliss/pe_section.cpp
@@ -0,0 +1,303 @@
+/*************************************************************************/
+/* Copyright (c) 2015 dx, http://kaimi.ru                                */
+/*                                                                       */
+/* Permission is hereby granted, free of charge, to any person           */
+/* obtaining a copy of this software and associated documentation        */
+/* files (the "Software"), to deal in the Software without               */
+/* restriction, including without limitation the rights to use,          */
+/* copy, modify, merge, publish, distribute, sublicense, and/or          */
+/* sell copies of the Software, and to permit persons to whom the        */
+/* Software is furnished to do so, subject to the following conditions:  */
+/* The above copyright notice and this permission notice shall be        */
+/* included in all copies or substantial portions of the Software.       */
+/*                                                                       */
+/* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,       */
+/* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF    */
+/* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.*/
+/* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY  */
+/* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,  */
+/* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE     */
+/* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.                */
+/*************************************************************************/
+#include <algorithm>
+#include <string.h>
+#include "utils.h"
+#include "pe_section.h"
+
+namespace pe_bliss
+{
+using namespace pe_win;
+
+//Section structure default constructor
+section::section()
+	:old_size_(static_cast<size_t>(-1))
+{
+	memset(&header_, 0, sizeof(image_section_header));
+}
+
+//Sets the name of section (8 characters maximum)
+void section::set_name(const std::string& name)
+{
+	memset(header_.Name, 0, sizeof(header_.Name));
+	memcpy(header_.Name, name.c_str(), std::min<size_t>(name.length(), sizeof(header_.Name)));
+}
+
+//Returns section name
+const std::string section::get_name() const
+{
+	char buf[9] = {0};
+	memcpy(buf, header_.Name, 8);
+	return std::string(buf);
+}
+
+//Set flag (attribute) of section
+section& section::set_flag(uint32_t flag, bool setflag)
+{
+	if(setflag)
+		header_.Characteristics |= flag;
+	else
+		header_.Characteristics &= ~flag;
+
+	return *this;
+}
+
+//Sets "readable" attribute of section
+section& section::readable(bool readable)
+{
+	return set_flag(image_scn_mem_read, readable);
+}
+
+//Sets "writeable" attribute of section
+section& section::writeable(bool writeable)
+{
+	return set_flag(image_scn_mem_write, writeable);
+}
+
+//Sets "executable" attribute of section
+section& section::executable(bool executable)
+{
+	return set_flag(image_scn_mem_execute, executable);
+}
+
+//Sets "shared" attribute of section
+section& section::shared(bool shared)
+{
+	return set_flag(image_scn_mem_shared, shared);
+}
+
+//Sets "discardable" attribute of section
+section& section::discardable(bool discardable)
+{
+	return set_flag(image_scn_mem_discardable, discardable);
+}
+
+//Returns true if section is readable
+bool section::readable() const
+{
+	return (header_.Characteristics & image_scn_mem_read) != 0;
+}
+
+//Returns true if section is writeable
+bool section::writeable() const
+{
+	return (header_.Characteristics & image_scn_mem_write) != 0;
+}
+
+//Returns true if section is executable
+bool section::executable() const
+{
+	return (header_.Characteristics & image_scn_mem_execute) != 0;
+}
+
+bool section::shared() const
+{
+	return (header_.Characteristics & image_scn_mem_shared) != 0;
+}
+
+bool section::discardable() const
+{
+	return (header_.Characteristics & image_scn_mem_discardable) != 0;
+}
+
+//Returns true if section has no RAW data
+bool section::empty() const
+{
+	if(old_size_ != static_cast<size_t>(-1)) //If virtual memory is mapped, check raw data length (old_size_)
+		return old_size_ == 0;
+	else
+		return raw_data_.empty();
+}
+
+//Returns raw section data from file image
+std::string& section::get_raw_data()
+{
+	unmap_virtual();
+	return raw_data_;
+}
+
+//Sets raw section data from file image
+void section::set_raw_data(const std::string& data)
+{
+	old_size_ = static_cast<size_t>(-1);
+	raw_data_ = data;
+}
+
+//Returns raw section data from file image
+const std::string& section::get_raw_data() const
+{
+	unmap_virtual();
+	return raw_data_;
+}
+
+//Returns mapped virtual section data
+const std::string& section::get_virtual_data(uint32_t section_alignment) const
+{
+	map_virtual(section_alignment);
+	return raw_data_;
+}
+
+//Returns mapped virtual section data
+std::string& section::get_virtual_data(uint32_t section_alignment)
+{
+	map_virtual(section_alignment);
+	return raw_data_;
+}
+
+//Maps virtual section data
+void section::map_virtual(uint32_t section_alignment) const
+{
+	uint32_t aligned_virtual_size = get_aligned_virtual_size(section_alignment);
+	if(old_size_ == static_cast<size_t>(-1) && aligned_virtual_size && aligned_virtual_size > raw_data_.length())
+	{
+		old_size_ = raw_data_.length();
+		raw_data_.resize(aligned_virtual_size, 0);
+	}
+}
+
+//Unmaps virtual section data
+void section::unmap_virtual() const
+{
+	if(old_size_ != static_cast<size_t>(-1))
+	{
+		raw_data_.resize(old_size_, 0);
+		old_size_ = static_cast<size_t>(-1);
+	}
+}
+
+//Returns section virtual size
+uint32_t section::get_virtual_size() const
+{
+	return header_.Misc.VirtualSize;
+}
+
+//Returns section virtual address
+uint32_t section::get_virtual_address() const
+{
+	return header_.VirtualAddress;
+}
+
+//Returns size of section raw data
+uint32_t section::get_size_of_raw_data() const
+{
+	return header_.SizeOfRawData;
+}
+
+//Returns pointer to raw section data in PE file
+uint32_t section::get_pointer_to_raw_data() const
+{
+	return header_.PointerToRawData;
+}
+
+//Returns section characteristics
+uint32_t section::get_characteristics() const
+{
+	return header_.Characteristics;
+}
+
+//Returns raw image section header
+const pe_win::image_section_header& section::get_raw_header() const
+{
+	return header_;
+}
+
+//Returns raw image section header
+pe_win::image_section_header& section::get_raw_header()
+{
+	return header_;
+}
+
+//Calculates aligned virtual section size
+uint32_t section::get_aligned_virtual_size(uint32_t section_alignment) const
+{
+	if(get_size_of_raw_data())
+	{
+		if(!get_virtual_size())
+		{
+			//If section virtual size is zero
+			//Set aligned virtual size of section as aligned raw size
+			return pe_utils::align_up(get_size_of_raw_data(), section_alignment);
+		}
+	}
+
+	return pe_utils::align_up(get_virtual_size(), section_alignment);
+}
+
+//Calculates aligned raw section size
+uint32_t section::get_aligned_raw_size(uint32_t file_alignment) const
+{
+	if(get_size_of_raw_data())
+		return pe_utils::align_up(get_size_of_raw_data(), file_alignment);
+	else
+		return 0;
+}
+
+//Sets size of raw section data
+void section::set_size_of_raw_data(uint32_t size_of_raw_data)
+{
+	header_.SizeOfRawData = size_of_raw_data;
+}
+
+//Sets pointer to section raw data
+void section::set_pointer_to_raw_data(uint32_t pointer_to_raw_data)
+{
+	header_.PointerToRawData = pointer_to_raw_data;
+}
+
+//Sets section characteristics
+void section::set_characteristics(uint32_t characteristics)
+{
+	header_.Characteristics = characteristics;
+}
+
+//Sets section virtual size
+void section::set_virtual_size(uint32_t virtual_size)
+{
+	header_.Misc.VirtualSize = virtual_size;
+}
+
+//Sets section virtual address
+void section::set_virtual_address(uint32_t virtual_address)
+{
+	header_.VirtualAddress = virtual_address;
+}
+
+//Section by file offset finder helper (4gb max)
+section_by_raw_offset::section_by_raw_offset(uint32_t offset)
+	:offset_(offset)
+{}
+
+bool section_by_raw_offset::operator()(const section& s) const
+{
+	return (s.get_pointer_to_raw_data() <= offset_)
+		&& (s.get_pointer_to_raw_data() + s.get_size_of_raw_data() > offset_);
+}
+
+section_ptr_finder::section_ptr_finder(const section& s)
+	:s_(s)
+{}
+
+bool section_ptr_finder::operator()(const section& s) const
+{
+	return &s == &s_;
+}
+}
diff --git a/tools/pe_bliss/pe_section.h b/tools/pe_bliss/pe_section.h
new file mode 100644
index 0000000000..617ecebe26
--- /dev/null
+++ b/tools/pe_bliss/pe_section.h
@@ -0,0 +1,158 @@
+/*************************************************************************/
+/* Copyright (c) 2015 dx, http://kaimi.ru                                */
+/*                                                                       */
+/* Permission is hereby granted, free of charge, to any person           */
+/* obtaining a copy of this software and associated documentation        */
+/* files (the "Software"), to deal in the Software without               */
+/* restriction, including without limitation the rights to use,          */
+/* copy, modify, merge, publish, distribute, sublicense, and/or          */
+/* sell copies of the Software, and to permit persons to whom the        */
+/* Software is furnished to do so, subject to the following conditions:  */
+/* The above copyright notice and this permission notice shall be        */
+/* included in all copies or substantial portions of the Software.       */
+/*                                                                       */
+/* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,       */
+/* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF    */
+/* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.*/
+/* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY  */
+/* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,  */
+/* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE     */
+/* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.                */
+/*************************************************************************/
+#pragma once
+#include <string>
+#include <vector>
+#include "pe_structures.h"
+
+namespace pe_bliss
+{
+//Enumeration of section data types, used in functions below
+enum section_data_type
+{
+	section_data_raw,
+	section_data_virtual
+};
+
+//Class representing image section
+class section
+{
+public:
+	//Default constructor
+	section();
+
+	//Sets the name of section (stripped to 8 characters)
+	void set_name(const std::string& name);
+
+	//Returns the name of section
+	const std::string get_name() const;
+
+	//Changes attributes of section
+	section& readable(bool readable);
+	section& writeable(bool writeable);
+	section& executable(bool executable);
+	section& shared(bool shared);
+	section& discardable(bool discardable);
+
+	//Returns attributes of section
+	bool readable() const;
+	bool writeable() const;
+	bool executable() const;
+	bool shared() const;
+	bool discardable() const;
+
+	//Returns true if section has no RAW data
+	bool empty() const;
+
+	//Returns raw section data from file image
+	std::string& get_raw_data();
+	//Returns raw section data from file image
+	const std::string& get_raw_data() const;
+	//Returns mapped virtual section data
+	const std::string& get_virtual_data(uint32_t section_alignment) const;
+	//Returns mapped virtual section data
+	std::string& get_virtual_data(uint32_t section_alignment);
+
+public: //Header getters
+	//Returns section virtual size
+	uint32_t get_virtual_size() const;
+	//Returns section virtual address (RVA)
+	uint32_t get_virtual_address() const;
+	//Returns size of section raw data
+	uint32_t get_size_of_raw_data() const;
+	//Returns pointer to raw section data in PE file
+	uint32_t get_pointer_to_raw_data() const;
+	//Returns section characteristics
+	uint32_t get_characteristics() const;
+
+	//Returns raw image section header
+	const pe_win::image_section_header& get_raw_header() const;
+
+public: //Aligned sizes calculation
+	//Calculates aligned virtual section size
+	uint32_t get_aligned_virtual_size(uint32_t section_alignment) const;
+	//Calculates aligned raw section size
+	uint32_t get_aligned_raw_size(uint32_t file_alignment) const;
+
+public: //Setters
+	//Sets size of raw section data
+	void set_size_of_raw_data(uint32_t size_of_raw_data);
+	//Sets pointer to section raw data
+	void set_pointer_to_raw_data(uint32_t pointer_to_raw_data);
+	//Sets section characteristics
+	void set_characteristics(uint32_t characteristics);
+	//Sets raw section data from file image
+	void set_raw_data(const std::string& data);
+
+public: //Setters, be careful
+	//Sets section virtual size (doesn't set internal aligned virtual size, changes only header value)
+	//Better use pe_base::set_section_virtual_size
+	void set_virtual_size(uint32_t virtual_size);
+	//Sets section virtual address
+	void set_virtual_address(uint32_t virtual_address);
+	//Returns raw image section header
+	pe_win::image_section_header& get_raw_header();
+
+private:
+	//Section header
+	pe_win::image_section_header header_;
+
+	//Maps virtual section data
+	void map_virtual(uint32_t section_alignment) const;
+
+	//Unmaps virtual section data
+	void unmap_virtual() const;
+
+	//Set flag (attribute) of section
+	section& set_flag(uint32_t flag, bool setflag);
+
+	//Old size of section (stored after mapping of virtual section memory)
+	mutable std::size_t old_size_;
+
+	//Section raw/virtual data
+	mutable std::string raw_data_;
+};
+
+//Section by file offset finder helper (4gb max)
+struct section_by_raw_offset
+{
+public:
+	explicit section_by_raw_offset(uint32_t offset);
+	bool operator()(const section& s) const;
+
+private:
+	uint32_t offset_;
+};
+
+//Helper: finder of section* in sections list
+struct section_ptr_finder
+{
+public:
+	explicit section_ptr_finder(const section& s);
+	bool operator()(const section& s) const;
+
+private:
+	const section& s_;
+};
+
+typedef std::vector<section> section_list;
+}
diff --git a/tools/pe_bliss/pe_structures.h b/tools/pe_bliss/pe_structures.h
new file mode 100644
index 0000000000..efc99103b2
--- /dev/null
+++ b/tools/pe_bliss/pe_structures.h
@@ -0,0 +1,1028 @@
+/*************************************************************************/
+/* Copyright (c) 2015 dx, http://kaimi.ru                                */
+/*                                                                       */
+/* Permission is hereby granted, free of charge, to any person           */
+/* obtaining a copy of this software and associated documentation        */
+/* files (the "Software"), to deal in the Software without               */
+/* restriction, including without limitation the rights to use,          */
+/* copy, modify, merge, publish, distribute, sublicense, and/or          */
+/* sell copies of the Software, and to permit persons to whom the        */
+/* Software is furnished to do so, subject to the following conditions:  */
+/* The above copyright notice and this permission notice shall be        */
+/* included in all copies or substantial portions of the Software.       */
+/*                                                                       */
+/* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,       */
+/* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF    */
+/* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.*/
+/* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY  */
+/* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,  */
+/* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE     */
+/* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.                */
+/*************************************************************************/
+#pragma once
+#include <string>
+#include <sstream>
+#include "stdint_defs.h"
+#if defined(_MSC_VER) or defined(__MINGW32__)
+#define PE_BLISS_WINDOWS
+#endif
+
+namespace pe_bliss
+{
+//Enumeration of PE types
+enum pe_type
+{
+	pe_type_32,
+	pe_type_64
+};
+
+namespace pe_win
+{
+const uint32_t image_numberof_directory_entries = 16;
+const uint32_t image_nt_optional_hdr32_magic = 0x10b;
+const uint32_t image_nt_optional_hdr64_magic = 0x20b;
+const uint32_t image_resource_name_is_string = 0x80000000;
+const uint32_t image_resource_data_is_directory = 0x80000000;
+
+const uint32_t image_dllcharacteristics_dynamic_base = 0x0040;     // DLL can move.
+const uint32_t image_dllcharacteristics_force_integrity = 0x0080;     // Code Integrity Image
+const uint32_t image_dllcharacteristics_nx_compat = 0x0100;     // Image is NX compatible
+const uint32_t image_dllcharacteristics_no_isolation = 0x0200;     // Image understands isolation and doesn't want it
+const uint32_t image_dllcharacteristics_no_seh = 0x0400;     // Image does not use SEH.  No SE handler may reside in this image
+const uint32_t image_dllcharacteristics_no_bind = 0x0800;     // Do not bind this image.
+const uint32_t image_dllcharacteristics_wdm_driver = 0x2000;     // Driver uses WDM model
+const uint32_t image_dllcharacteristics_terminal_server_aware = 0x8000;
+
+const uint32_t image_sizeof_file_header = 20;
+
+const uint32_t image_file_relocs_stripped = 0x0001;  // Relocation info stripped from file.
+const uint32_t image_file_executable_image = 0x0002;  // File is executable  (i.e. no unresolved externel references).
+const uint32_t image_file_line_nums_stripped = 0x0004;  // Line nunbers stripped from file.
+const uint32_t image_file_local_syms_stripped = 0x0008;  // Local symbols stripped from file.
+const uint32_t image_file_aggresive_ws_trim = 0x0010;  // Agressively trim working set
+const uint32_t image_file_large_address_aware = 0x0020;  // App can handle >2gb addresses
+const uint32_t image_file_bytes_reversed_lo = 0x0080;  // Bytes of machine word are reversed.
+const uint32_t image_file_32bit_machine = 0x0100;  // 32 bit word machine.
+const uint32_t image_file_debug_stripped = 0x0200;  // Debugging info stripped from file in .DBG file
+const uint32_t image_file_removable_run_from_swap = 0x0400;  // If Image is on removable media, copy and run from the swap file.
+const uint32_t image_file_net_run_from_swap = 0x0800;  // If Image is on Net, copy and run from the swap file.
+const uint32_t image_file_system = 0x1000;  // System File.
+const uint32_t image_file_dll = 0x2000;  // File is a DLL.
+const uint32_t image_file_up_system_only = 0x4000;  // File should only be run on a UP machine
+const uint32_t image_file_bytes_reversed_hi = 0x8000;  // Bytes of machine word are reversed.
+
+const uint32_t image_scn_lnk_nreloc_ovfl = 0x01000000;  // Section contains extended relocations.
+const uint32_t image_scn_mem_discardable = 0x02000000;  // Section can be discarded.
+const uint32_t image_scn_mem_not_cached = 0x04000000;  // Section is not cachable.
+const uint32_t image_scn_mem_not_paged = 0x08000000;  // Section is not pageable.
+const uint32_t image_scn_mem_shared = 0x10000000;  // Section is shareable.
+const uint32_t image_scn_mem_execute = 0x20000000;  // Section is executable.
+const uint32_t image_scn_mem_read = 0x40000000;  // Section is readable.
+const uint32_t image_scn_mem_write = 0x80000000;  // Section is writeable.
+
+const uint32_t image_scn_cnt_code = 0x00000020;  // Section contains code.
+const uint32_t image_scn_cnt_initialized_data = 0x00000040;  // Section contains initialized data.
+const uint32_t image_scn_cnt_uninitialized_data = 0x00000080;  // Section contains uninitialized data.
+
+//Directory Entries
+const uint32_t image_directory_entry_export = 0;   // Export Directory
+const uint32_t image_directory_entry_import = 1;   // Import Directory
+const uint32_t image_directory_entry_resource = 2;   // Resource Directory
+const uint32_t image_directory_entry_exception = 3;   // Exception Directory
+const uint32_t image_directory_entry_security = 4;   // Security Directory
+const uint32_t image_directory_entry_basereloc = 5;   // Base Relocation Table
+const uint32_t image_directory_entry_debug = 6;   // Debug Directory
+const uint32_t image_directory_entry_architecture = 7;   // Architecture Specific Data
+const uint32_t image_directory_entry_globalptr = 8;   // RVA of GP
+const uint32_t image_directory_entry_tls = 9;   // TLS Directory
+const uint32_t image_directory_entry_load_config = 10;   // Load Configuration Directory
+const uint32_t image_directory_entry_bound_import = 11;   // Bound Import Directory in headers
+const uint32_t image_directory_entry_iat = 12;   // Import Address Table
+const uint32_t image_directory_entry_delay_import = 13;   // Delay Load Import Descriptors
+const uint32_t image_directory_entry_com_descriptor = 14;   // COM Runtime descriptor
+
+//Subsystem Values
+const uint32_t image_subsystem_unknown = 0;   // Unknown subsystem.
+const uint32_t image_subsystem_native = 1;   // Image doesn't require a subsystem.
+const uint32_t image_subsystem_windows_gui = 2;   // Image runs in the Windows GUI subsystem.
+const uint32_t image_subsystem_windows_cui = 3;   // Image runs in the Windows character subsystem.
+const uint32_t image_subsystem_os2_cui = 5;   // image runs in the OS/2 character subsystem.
+const uint32_t image_subsystem_posix_cui = 7;   // image runs in the Posix character subsystem.
+const uint32_t image_subsystem_native_windows = 8;   // image is a native Win9x driver.
+const uint32_t image_subsystem_windows_ce_gui = 9;   // Image runs in the Windows CE subsystem.
+const uint32_t image_subsystem_efi_application = 10;  //
+const uint32_t image_subsystem_efi_boot_service_driver = 11;   //
+const uint32_t image_subsystem_efi_runtime_driver = 12;  //
+const uint32_t image_subsystem_efi_rom = 13;
+const uint32_t image_subsystem_xbox = 14;
+const uint32_t image_subsystem_windows_boot_application = 16;
+
+//Imports
+const uint64_t image_ordinal_flag64 = 0x8000000000000000ull;
+const uint32_t image_ordinal_flag32 = 0x80000000;
+
+//Based relocation types
+const uint32_t image_rel_based_absolute = 0;
+const uint32_t image_rel_based_high =  1;
+const uint32_t image_rel_based_low = 2;
+const uint32_t image_rel_based_highlow = 3;
+const uint32_t image_rel_based_highadj = 4;
+const uint32_t image_rel_based_mips_jmpaddr = 5;
+const uint32_t image_rel_based_mips_jmpaddr16 = 9;
+const uint32_t image_rel_based_ia64_imm64 = 9;
+const uint32_t image_rel_based_dir64 = 10;
+
+//Exception directory
+//The function has an exception handler that should be called when looking for functions that need to examine exceptions
+const uint32_t unw_flag_ehandler = 0x01;
+//The function has a termination handler that should be called when unwinding an exception
+const uint32_t unw_flag_uhandler = 0x02;
+//This unwind info structure is not the primary one for the procedure.
+//Instead, the chained unwind info entry is the contents of a previous RUNTIME_FUNCTION entry.
+//If this flag is set, then the UNW_FLAG_EHANDLER and UNW_FLAG_UHANDLER flags must be cleared.
+//Also, the frame register and fixed-stack allocation fields must have the same values as in the primary unwind info
+const uint32_t unw_flag_chaininfo = 0x04;
+
+//Debug
+const uint32_t image_debug_misc_exename = 1;
+const uint32_t image_debug_type_unknown = 0;
+const uint32_t image_debug_type_coff = 1;
+const uint32_t image_debug_type_codeview = 2;
+const uint32_t image_debug_type_fpo = 3;
+const uint32_t image_debug_type_misc = 4;
+const uint32_t image_debug_type_exception = 5;
+const uint32_t image_debug_type_fixup = 6;
+const uint32_t image_debug_type_omap_to_src = 7;
+const uint32_t image_debug_type_omap_from_src = 8;
+const uint32_t image_debug_type_borland = 9;
+const uint32_t image_debug_type_reserved10 = 10;
+const uint32_t image_debug_type_clsid = 11;
+
+
+//Storage classes
+const uint32_t image_sym_class_end_of_function = static_cast<uint8_t>(-1);
+const uint32_t image_sym_class_null = 0x0000;
+const uint32_t image_sym_class_automatic = 0x0001;
+const uint32_t image_sym_class_external = 0x0002;
+const uint32_t image_sym_class_static = 0x0003;
+const uint32_t image_sym_class_register = 0x0004;
+const uint32_t image_sym_class_external_def = 0x0005;
+const uint32_t image_sym_class_label = 0x0006;
+const uint32_t image_sym_class_undefined_label = 0x0007;
+const uint32_t image_sym_class_member_of_struct = 0x0008;
+const uint32_t image_sym_class_argument = 0x0009;
+const uint32_t image_sym_class_struct_tag = 0x000a;
+const uint32_t image_sym_class_member_of_union = 0x000b;
+const uint32_t image_sym_class_union_tag = 0x000c;
+const uint32_t image_sym_class_type_definition = 0x000d;
+const uint32_t image_sym_class_undefined_static = 0x000e;
+const uint32_t image_sym_class_enum_tag = 0x000f;
+const uint32_t image_sym_class_member_of_enum = 0x0010;
+const uint32_t image_sym_class_register_param = 0x0011;
+const uint32_t image_sym_class_bit_field = 0x0012;
+
+const uint32_t image_sym_class_far_external = 0x0044;
+
+const uint32_t image_sym_class_block = 0x0064;
+const uint32_t image_sym_class_function = 0x0065;
+const uint32_t image_sym_class_end_of_struct = 0x0066;
+const uint32_t image_sym_class_file = 0x0067;
+
+const uint32_t image_sym_class_section = 0x0068;
+const uint32_t image_sym_class_weak_external = 0x0069;
+
+const uint32_t image_sym_class_clr_token = 0x006b;
+
+//type packing constants
+const uint32_t n_btmask = 0x000f;
+const uint32_t n_tmask = 0x0030;
+const uint32_t n_tmask1 = 0x00c0;
+const uint32_t n_tmask2 = 0x00f0;
+const uint32_t n_btshft = 4;
+const uint32_t n_tshift = 2;
+
+//Type (derived) values.
+const uint32_t image_sym_dtype_null = 0;          // no derived type.
+const uint32_t image_sym_dtype_pointer = 1;       // pointer.
+const uint32_t image_sym_dtype_function = 2;      // function.
+const uint32_t image_sym_dtype_array = 3;         // array.
+
+// Is x a function?
+//TODO
+#ifndef ISFCN
+#define ISFCN(x) (((x) & n_tmask) == (image_sym_dtype_function << n_btshft))
+#endif
+
+//Version info
+const uint32_t vs_ffi_fileflagsmask = 0x0000003FL;
+
+const uint32_t vs_ffi_signature = 0xFEEF04BDL;
+const uint32_t vs_ffi_strucversion = 0x00010000L;
+
+/* ----- VS_VERSION.dwFileFlags ----- */
+const uint32_t vs_ff_debug = 0x00000001L;
+const uint32_t vs_ff_prerelease = 0x00000002L;
+const uint32_t vs_ff_patched = 0x00000004L;
+const uint32_t vs_ff_privatebuild = 0x00000008L;
+const uint32_t vs_ff_infoinferred = 0x00000010L;
+const uint32_t vs_ff_specialbuild = 0x00000020L;
+
+/* ----- VS_VERSION.dwFileOS ----- */
+const uint32_t vos_unknown = 0x00000000L;
+const uint32_t vos_dos = 0x00010000L;
+const uint32_t vos_os216 = 0x00020000L;
+const uint32_t vos_os232 = 0x00030000L;
+const uint32_t vos_nt = 0x00040000L;
+const uint32_t vos_wince = 0x00050000L;
+
+const uint32_t vos__base = 0x00000000L;
+const uint32_t vos__windows16 = 0x00000001L;
+const uint32_t vos__pm16 = 0x00000002L;
+const uint32_t vos__pm32 = 0x00000003L;
+const uint32_t vos__windows32 = 0x00000004L;
+
+const uint32_t vos_dos_windows16 = 0x00010001L;
+const uint32_t vos_dos_windows32 = 0x00010004L;
+const uint32_t vos_os216_pm16 = 0x00020002L;
+const uint32_t vos_os232_pm32 = 0x00030003L;
+const uint32_t vos_nt_windows32 = 0x00040004L;
+
+/* ----- VS_VERSION.dwFileType ----- */
+const uint32_t vft_unknown = 0x00000000L;
+const uint32_t vft_app = 0x00000001L;
+const uint32_t vft_dll = 0x00000002L;
+const uint32_t vft_drv = 0x00000003L;
+const uint32_t vft_font =  0x00000004L;
+const uint32_t vft_vxd = 0x00000005L;
+const uint32_t vft_static_lib = 0x00000007L;
+
+const uint32_t message_resource_unicode = 0x0001;
+
+#pragma pack(push, 1)
+
+//Windows GUID structure
+struct guid
+{
+	uint32_t Data1;
+	uint16_t Data2;
+	uint16_t Data3;
+	uint8_t Data4[8];
+};
+
+//DOS .EXE header
+struct image_dos_header
+{
+	uint16_t e_magic;                     // Magic number
+	uint16_t e_cblp;                      // Bytes on last page of file
+	uint16_t e_cp;                        // Pages in file
+	uint16_t e_crlc;                      // Relocations
+	uint16_t e_cparhdr;                   // Size of header in paragraphs
+	uint16_t e_minalloc;                  // Minimum extra paragraphs needed
+	uint16_t e_maxalloc;                  // Maximum extra paragraphs needed
+	uint16_t e_ss;                        // Initial (relative) SS value
+	uint16_t e_sp;                        // Initial SP value
+	uint16_t e_csum;                      // Checksum
+	uint16_t e_ip;                        // Initial IP value
+	uint16_t e_cs;                        // Initial (relative) CS value
+	uint16_t e_lfarlc;                    // File address of relocation table
+	uint16_t e_ovno;                      // Overlay number
+	uint16_t e_res[4];                    // Reserved words
+	uint16_t e_oemid;                     // OEM identifier (for e_oeminfo)
+	uint16_t e_oeminfo;                   // OEM information; e_oemid specific
+	uint16_t e_res2[10];                  // Reserved words
+	int32_t  e_lfanew;                    // File address of new exe header
+};
+
+//Directory format
+struct image_data_directory
+{
+	uint32_t VirtualAddress;
+	uint32_t Size;
+};
+
+//Optional header format
+struct image_optional_header32
+{
+	//Standard fields
+	uint16_t Magic;
+	uint8_t  MajorLinkerVersion;
+	uint8_t  MinorLinkerVersion;
+	uint32_t SizeOfCode;
+	uint32_t SizeOfInitializedData;
+	uint32_t SizeOfUninitializedData;
+	uint32_t AddressOfEntryPoint;
+	uint32_t BaseOfCode;
+	uint32_t BaseOfData;
+
+	//NT additional fields
+	uint32_t ImageBase;
+	uint32_t SectionAlignment;
+	uint32_t FileAlignment;
+	uint16_t MajorOperatingSystemVersion;
+	uint16_t MinorOperatingSystemVersion;
+	uint16_t MajorImageVersion;
+	uint16_t MinorImageVersion;
+	uint16_t MajorSubsystemVersion;
+	uint16_t MinorSubsystemVersion;
+	uint32_t Win32VersionValue;
+	uint32_t SizeOfImage;
+	uint32_t SizeOfHeaders;
+	uint32_t CheckSum;
+	uint16_t Subsystem;
+	uint16_t DllCharacteristics;
+	uint32_t SizeOfStackReserve;
+	uint32_t SizeOfStackCommit;
+	uint32_t SizeOfHeapReserve;
+	uint32_t SizeOfHeapCommit;
+	uint32_t LoaderFlags;
+	uint32_t NumberOfRvaAndSizes;
+	image_data_directory DataDirectory[image_numberof_directory_entries];
+};
+
+struct image_optional_header64
+{
+	uint16_t Magic;
+	uint8_t  MajorLinkerVersion;
+	uint8_t  MinorLinkerVersion;
+	uint32_t SizeOfCode;
+	uint32_t SizeOfInitializedData;
+	uint32_t SizeOfUninitializedData;
+	uint32_t AddressOfEntryPoint;
+	uint32_t BaseOfCode;
+	uint64_t ImageBase;
+	uint32_t SectionAlignment;
+	uint32_t FileAlignment;
+	uint16_t MajorOperatingSystemVersion;
+	uint16_t MinorOperatingSystemVersion;
+	uint16_t MajorImageVersion;
+	uint16_t MinorImageVersion;
+	uint16_t MajorSubsystemVersion;
+	uint16_t MinorSubsystemVersion;
+	uint32_t Win32VersionValue;
+	uint32_t SizeOfImage;
+	uint32_t SizeOfHeaders;
+	uint32_t CheckSum;
+	uint16_t Subsystem;
+	uint16_t DllCharacteristics;
+	uint64_t SizeOfStackReserve;
+	uint64_t SizeOfStackCommit;
+	uint64_t SizeOfHeapReserve;
+	uint64_t SizeOfHeapCommit;
+	uint32_t LoaderFlags;
+	uint32_t NumberOfRvaAndSizes;
+	image_data_directory DataDirectory[image_numberof_directory_entries];
+};
+
+struct image_file_header
+{
+	uint16_t Machine;
+	uint16_t NumberOfSections;
+	uint32_t TimeDateStamp;
+	uint32_t PointerToSymbolTable;
+	uint32_t NumberOfSymbols;
+	uint16_t SizeOfOptionalHeader;
+	uint16_t Characteristics;
+};
+
+struct image_nt_headers64
+{
+	uint32_t Signature;
+	image_file_header FileHeader;
+	image_optional_header64 OptionalHeader;
+};
+
+struct image_nt_headers32
+{
+	uint32_t Signature;
+	image_file_header FileHeader;
+	image_optional_header32 OptionalHeader;
+};
+
+//Section header format
+struct image_section_header
+{
+	uint8_t Name[8];
+	union
+	{
+		uint32_t PhysicalAddress;
+		uint32_t VirtualSize;
+	} Misc;
+
+	uint32_t VirtualAddress;
+	uint32_t SizeOfRawData;
+	uint32_t PointerToRawData;
+	uint32_t PointerToRelocations;
+	uint32_t PointerToLinenumbers;
+	uint16_t NumberOfRelocations;
+	uint16_t NumberOfLinenumbers;
+	uint32_t Characteristics;
+};
+
+
+/// RESOURCES ///
+struct image_resource_directory
+{
+	uint32_t Characteristics;
+	uint32_t TimeDateStamp;
+	uint16_t MajorVersion;
+	uint16_t MinorVersion;
+	uint16_t NumberOfNamedEntries;
+	uint16_t NumberOfIdEntries;
+	//  IMAGE_RESOURCE_DIRECTORY_ENTRY DirectoryEntries[];
+};
+
+struct vs_fixedfileinfo
+{
+	uint32_t dwSignature;            /* e.g. 0xfeef04bd */
+	uint32_t dwStrucVersion;         /* e.g. 0x00000042 = "0.42" */
+	uint32_t dwFileVersionMS;        /* e.g. 0x00030075 = "3.75" */
+	uint32_t dwFileVersionLS;        /* e.g. 0x00000031 = "0.31" */
+	uint32_t dwProductVersionMS;     /* e.g. 0x00030010 = "3.10" */
+	uint32_t dwProductVersionLS;     /* e.g. 0x00000031 = "0.31" */
+	uint32_t dwFileFlagsMask;        /* = 0x3F for version "0.42" */
+	uint32_t dwFileFlags;            /* e.g. VFF_DEBUG | VFF_PRERELEASE */
+	uint32_t dwFileOS;               /* e.g. VOS_DOS_WINDOWS16 */
+	uint32_t dwFileType;             /* e.g. VFT_DRIVER */
+	uint32_t dwFileSubtype;          /* e.g. VFT2_DRV_KEYBOARD */
+	uint32_t dwFileDateMS;           /* e.g. 0 */
+	uint32_t dwFileDateLS;           /* e.g. 0 */
+};
+
+struct bitmapinfoheader
+{
+	uint32_t biSize;
+	int32_t  biWidth;
+	int32_t  biHeight;
+	uint16_t biPlanes;
+	uint16_t biBitCount;
+	uint32_t biCompression;
+	uint32_t biSizeImage;
+	int32_t  biXPelsPerMeter;
+	int32_t  biYPelsPerMeter;
+	uint32_t biClrUsed;
+	uint32_t biClrImportant;
+};
+
+struct message_resource_entry
+{
+	uint16_t Length;
+	uint16_t Flags;
+	uint8_t  Text[1];
+};
+
+struct message_resource_block
+{
+	uint32_t LowId;
+	uint32_t HighId;
+	uint32_t OffsetToEntries;
+};
+
+struct message_resource_data
+{
+	uint32_t NumberOfBlocks;
+	message_resource_block Blocks[1];
+};
+
+struct image_resource_directory_entry
+{
+	union
+	{
+		struct
+		{
+			uint32_t NameOffset:31;
+			uint32_t NameIsString:1;
+		};
+		uint32_t Name;
+		uint16_t Id;
+	};
+
+	union
+	{
+		uint32_t OffsetToData;
+		struct
+		{
+			uint32_t OffsetToDirectory:31;
+			uint32_t DataIsDirectory:1;
+		};
+	};
+};
+
+struct image_resource_data_entry
+{
+	uint32_t OffsetToData;
+	uint32_t Size;
+	uint32_t CodePage;
+	uint32_t Reserved;
+};
+
+#pragma pack(push, 2)
+struct bitmapfileheader
+{
+	uint16_t bfType;
+	uint32_t bfSize;
+	uint16_t bfReserved1;
+	uint16_t bfReserved2;
+	uint32_t bfOffBits;
+};
+#pragma pack(pop)
+
+
+
+//Structure representing ICON file header
+struct ico_header
+{
+	uint16_t Reserved;
+	uint16_t Type; //1
+	uint16_t Count; //Count of icons included in icon group
+};
+
+//Structure that is stored in icon group directory in PE resources
+struct icon_group
+{
+	uint8_t Width;
+	uint8_t Height;
+	uint8_t ColorCount;
+	uint8_t Reserved;
+	uint16_t Planes;
+	uint16_t BitCount;
+	uint32_t SizeInBytes;
+	uint16_t Number; //Represents resource ID in PE icon list
+};
+
+//Structure representing ICON directory entry inside ICON file
+struct icondirentry
+{
+	uint8_t Width;
+	uint8_t Height;
+	uint8_t ColorCount;
+	uint8_t Reserved;
+	uint16_t Planes;
+	uint16_t BitCount;
+	uint32_t SizeInBytes;
+	uint32_t ImageOffset; //Offset from start of header to the image
+};
+
+//Structure representing CURSOR file header
+struct cursor_header
+{
+	uint16_t Reserved;
+	uint16_t Type; //2
+	uint16_t Count; //Count of cursors included in cursor group
+};
+
+struct cursor_group
+{
+	uint16_t Width;
+	uint16_t Height; //Divide by 2 to get the actual height.
+	uint16_t Planes;
+	uint16_t BitCount;
+	uint32_t SizeInBytes;
+	uint16_t Number; //Represents resource ID in PE icon list
+};
+
+//Structure representing CURSOR directory entry inside CURSOR file
+struct cursordirentry
+{
+	uint8_t Width; //Set to CURSOR_GROUP::Height/2.
+	uint8_t Height;
+	uint8_t ColorCount;
+	uint8_t Reserved;
+	uint16_t HotspotX;
+	uint16_t HotspotY;
+	uint32_t SizeInBytes;
+	uint32_t ImageOffset; //Offset from start of header to the image
+};
+
+//Structure representing BLOCK in version info resource
+struct version_info_block //(always aligned on 32-bit (DWORD) boundary)
+{
+	uint16_t Length; //Length of this block (doesn't include padding)
+	uint16_t ValueLength; //Value length (if any)
+	uint16_t Type; //Value type (0 = binary, 1 = text)
+	uint16_t Key[1]; //Value name (block key) (always NULL terminated)
+
+	//////////
+	//WORD padding1[]; //Padding, if any (ALIGNMENT)
+	//xxxxx Value[]; //Value data, if any (*ALIGNED*)
+	//WORD padding2[]; //Padding, if any (ALIGNMENT)
+	//xxxxx Child[]; //Child block(s), if any (*ALIGNED*)
+	//////////
+};
+
+
+/// IMPORTS ///
+#pragma pack(push, 8)
+struct image_thunk_data64
+{
+	union
+	{
+		uint64_t ForwarderString;  // PBYTE 
+		uint64_t Function;         // PDWORD
+		uint64_t Ordinal;
+		uint64_t AddressOfData;    // PIMAGE_IMPORT_BY_NAME
+	} u1;
+};
+#pragma pack(pop)
+
+struct image_thunk_data32
+{
+	union
+	{
+		uint32_t ForwarderString;      // PBYTE 
+		uint32_t Function;             // PDWORD
+		uint32_t Ordinal;
+		uint32_t AddressOfData;        // PIMAGE_IMPORT_BY_NAME
+	} u1;
+};
+
+struct image_import_descriptor
+{
+	union
+	{
+		uint32_t Characteristics;           // 0 for terminating null import descriptor
+		uint32_t OriginalFirstThunk;        // RVA to original unbound IAT (PIMAGE_THUNK_DATA)
+	};
+
+	uint32_t TimeDateStamp;                 // 0 if not bound,
+											// -1 if bound, and real date\time stamp
+											//     in IMAGE_DIRECTORY_ENTRY_BOUND_IMPORT (new BIND)
+											// O.W. date/time stamp of DLL bound to (Old BIND)
+
+	uint32_t ForwarderChain;                // -1 if no forwarders
+	uint32_t Name;
+	uint32_t FirstThunk;                    // RVA to IAT (if bound this IAT has actual addresses)
+};
+
+
+/// TLS ///
+struct image_tls_directory64
+{
+	uint64_t StartAddressOfRawData;
+	uint64_t EndAddressOfRawData;
+	uint64_t AddressOfIndex;         // PDWORD
+	uint64_t AddressOfCallBacks;     // PIMAGE_TLS_CALLBACK *;
+	uint32_t SizeOfZeroFill;
+	uint32_t Characteristics;
+};
+
+struct image_tls_directory32
+{
+	uint32_t StartAddressOfRawData;
+	uint32_t EndAddressOfRawData;
+	uint32_t AddressOfIndex;             // PDWORD
+	uint32_t AddressOfCallBacks;         // PIMAGE_TLS_CALLBACK *
+	uint32_t SizeOfZeroFill;
+	uint32_t Characteristics;
+};
+
+
+/// Export Format ///
+struct image_export_directory
+{
+	uint32_t Characteristics;
+	uint32_t TimeDateStamp;
+	uint16_t MajorVersion;
+	uint16_t MinorVersion;
+	uint32_t Name;
+	uint32_t Base;
+	uint32_t NumberOfFunctions;
+	uint32_t NumberOfNames;
+	uint32_t AddressOfFunctions;     // RVA from base of image
+	uint32_t AddressOfNames;         // RVA from base of image
+	uint32_t AddressOfNameOrdinals;  // RVA from base of image
+};
+
+
+/// Based relocation format ///
+struct image_base_relocation
+{
+	uint32_t VirtualAddress;
+	uint32_t SizeOfBlock;
+	//  uint16_t TypeOffset[1];
+};
+
+
+/// New format import descriptors pointed to by DataDirectory[ IMAGE_DIRECTORY_ENTRY_BOUND_IMPORT ] ///
+struct image_bound_import_descriptor
+{
+	uint32_t TimeDateStamp;
+	uint16_t OffsetModuleName;
+	uint16_t NumberOfModuleForwarderRefs;
+	// Array of zero or more IMAGE_BOUND_FORWARDER_REF follows
+};
+
+struct image_bound_forwarder_ref
+{
+	uint32_t TimeDateStamp;
+	uint16_t OffsetModuleName;
+	uint16_t Reserved;
+};
+
+
+/// Exception directory ///
+struct image_runtime_function_entry
+{
+	uint32_t BeginAddress;
+	uint32_t EndAddress;
+	uint32_t UnwindInfoAddress;
+};
+
+enum unwind_op_codes
+{
+	uwop_push_nonvol = 0, /* info == register number */
+	uwop_alloc_large,     /* no info, alloc size in next 2 slots */
+	uwop_alloc_small,     /* info == size of allocation / 8 - 1 */
+	uwop_set_fpreg,       /* no info, FP = RSP + UNWIND_INFO.FPRegOffset*16 */
+	uwop_save_nonvol,     /* info == register number, offset in next slot */
+	uwop_save_nonvol_far, /* info == register number, offset in next 2 slots */
+	uwop_save_xmm128,     /* info == XMM reg number, offset in next slot */
+	uwop_save_xmm128_far, /* info == XMM reg number, offset in next 2 slots */
+	uwop_push_machframe   /* info == 0: no error-code, 1: error-code */
+};
+
+union unwind_code
+{
+	struct s
+	{
+		uint8_t CodeOffset;
+		uint8_t UnwindOp : 4;
+		uint8_t OpInfo   : 4;
+	};
+
+	uint16_t FrameOffset;
+};
+
+struct unwind_info
+{
+	uint8_t Version       : 3;
+	uint8_t Flags         : 5;
+	uint8_t SizeOfProlog;
+	uint8_t CountOfCodes;
+	uint8_t FrameRegister : 4;
+	uint8_t FrameOffset   : 4;
+	unwind_code UnwindCode[1];
+	/*  unwind_code MoreUnwindCode[((CountOfCodes + 1) & ~1) - 1];
+	*   union {
+	*       OPTIONAL ULONG ExceptionHandler;
+	*       OPTIONAL ULONG FunctionEntry;
+	*   };
+	*   OPTIONAL ULONG ExceptionData[]; */
+};
+
+
+
+/// Debug ///
+struct image_debug_misc
+{
+	uint32_t DataType;               // type of misc data, see defines
+	uint32_t Length;                 // total length of record, rounded to four
+	// byte multiple.
+	uint8_t  Unicode;                // TRUE if data is unicode string
+	uint8_t  Reserved[3];
+	uint8_t  Data[1];                // Actual data
+};
+
+struct image_coff_symbols_header
+{
+	uint32_t NumberOfSymbols;
+	uint32_t LvaToFirstSymbol;
+	uint32_t NumberOfLinenumbers;
+	uint32_t LvaToFirstLinenumber;
+	uint32_t RvaToFirstByteOfCode;
+	uint32_t RvaToLastByteOfCode;
+	uint32_t RvaToFirstByteOfData;
+	uint32_t RvaToLastByteOfData;
+};
+
+struct image_debug_directory
+{
+	uint32_t Characteristics;
+	uint32_t TimeDateStamp;
+	uint16_t MajorVersion;
+	uint16_t MinorVersion;
+	uint32_t Type;
+	uint32_t SizeOfData;
+	uint32_t AddressOfRawData;
+	uint32_t PointerToRawData;
+};
+
+
+#pragma pack(push, 2)
+struct image_symbol
+{
+	union
+	{
+		uint8_t ShortName[8];
+		struct
+		{
+			uint32_t Short;     // if 0, use LongName
+			uint32_t Long;      // offset into string table
+		} Name;
+		uint32_t LongName[2];    // PBYTE [2]
+	} N;
+	uint32_t Value;
+	int16_t  SectionNumber;
+	uint16_t Type;
+	uint8_t  StorageClass;
+	uint8_t  NumberOfAuxSymbols;
+};
+#pragma pack(pop)
+
+//CodeView Debug OMF signature. The signature at the end of the file is
+//a negative offset from the end of the file to another signature.  At
+//the negative offset (base address) is another signature whose filepos
+//field points to the first OMFDirHeader in a chain of directories.
+//The NB05 signature is used by the link utility to indicated a completely
+//unpacked file. The NB06 signature is used by ilink to indicate that the
+//executable has had CodeView information from an incremental link appended
+//to the executable. The NB08 signature is used by cvpack to indicate that
+//the CodeView Debug OMF has been packed. CodeView will only process
+//executables with the NB08 signature.
+struct OMFSignature
+{
+	char Signature[4];   // "NBxx"
+	uint32_t filepos;    // offset in file
+};
+
+struct CV_INFO_PDB20
+{
+	OMFSignature CvHeader;
+	uint32_t Signature;
+	uint32_t Age;
+	uint8_t PdbFileName[1];
+};
+
+struct CV_INFO_PDB70
+{
+	uint32_t CvSignature;
+	guid Signature;
+	uint32_t Age;
+	uint8_t PdbFileName[1];
+};
+
+//  directory information structure
+//  This structure contains the information describing the directory.
+//  It is pointed to by the signature at the base address or the directory
+//  link field of a preceeding directory.  The directory entries immediately
+//  follow this structure.
+struct OMFDirHeader
+{
+	uint16_t cbDirHeader;    // length of this structure
+	uint16_t cbDirEntry;     // number of bytes in each directory entry
+	uint32_t cDir;           // number of directorie entries
+	int32_t  lfoNextDir;     // offset from base of next directory
+	uint32_t flags;          // status flags
+};
+
+//  directory structure
+//  The data in this structure is used to reference the data for each
+//  subsection of the CodeView Debug OMF information.  Tables that are
+//  not associated with a specific module will have a module index of
+//  oxffff.  These tables are the global types table, the global symbol
+//  table, the global public table and the library table.
+struct OMFDirEntry
+{
+	uint16_t SubSection;     // subsection type (sst...)
+	uint16_t iMod;           // module index
+	int32_t  lfo;            // large file offset of subsection
+	uint32_t cb;             // number of bytes in subsection
+};
+
+
+/// CLR 2.0 header structure ///
+struct image_cor20_header
+{
+	//Header versioning
+	uint32_t cb;
+	uint16_t MajorRuntimeVersion;
+	uint16_t MinorRuntimeVersion;
+
+	// Symbol table and startup information
+	image_data_directory MetaData;
+	uint32_t Flags;
+
+	// If COMIMAGE_FLAGS_NATIVE_ENTRYPOINT is not set, EntryPointToken represents a managed entrypoint.
+	// If COMIMAGE_FLAGS_NATIVE_ENTRYPOINT is set, EntryPointRVA represents an RVA to a native entrypoint.
+	union
+	{
+		uint32_t EntryPointToken;
+		uint32_t EntryPointRVA;
+	};
+
+	// Binding information
+	image_data_directory Resources;
+	image_data_directory StrongNameSignature;
+
+	// Regular fixup and binding information
+	image_data_directory CodeManagerTable;
+	image_data_directory VTableFixups;
+	image_data_directory ExportAddressTableJumps;
+
+	// Precompiled image info (internal use only - set to zero)
+	image_data_directory ManagedNativeHeader;
+};
+
+enum replaces_cor_hdr_numeric_defines
+{
+	// COM+ Header entry point flags.
+	comimage_flags_ilonly               =0x00000001,
+	comimage_flags_32bitrequired        =0x00000002,
+	comimage_flags_il_library           =0x00000004,
+	comimage_flags_strongnamesigned     =0x00000008,
+	comimage_flags_native_entrypoint    =0x00000010,
+	comimage_flags_trackdebugdata       =0x00010000,
+
+	// Version flags for image.
+	cor_version_major_v2                =2,
+	cor_version_major                   =cor_version_major_v2,
+	cor_version_minor                   =0,
+	cor_deleted_name_length             =8,
+	cor_vtablegap_name_length           =8,
+
+	// Maximum size of a NativeType descriptor.
+	native_type_max_cb                  =1,
+	cor_ilmethod_sect_small_max_datasize=0xff,
+
+	// #defines for the MIH FLAGS
+	image_cor_mih_methodrva             =0x01,
+	image_cor_mih_ehrva                 =0x02,
+	image_cor_mih_basicblock            =0x08,
+
+	// V-table constants
+	cor_vtable_32bit                    =0x01,          // V-table slots are 32-bits in size.
+	cor_vtable_64bit                    =0x02,          // V-table slots are 64-bits in size.
+	cor_vtable_from_unmanaged           =0x04,          // If set, transition from unmanaged.
+	cor_vtable_from_unmanaged_retain_appdomain  =0x08,  // If set, transition from unmanaged with keeping the current appdomain.
+	cor_vtable_call_most_derived        =0x10,          // Call most derived method described by
+
+	// EATJ constants
+	image_cor_eatj_thunk_size           =32,            // Size of a jump thunk reserved range.
+
+	// Max name lengths
+	//@todo: Change to unlimited name lengths.
+	max_class_name                      =1024,
+	max_package_name                    =1024
+};
+
+/// Load Configuration Directory Entry ///
+struct image_load_config_directory32
+{
+	uint32_t Size;
+	uint32_t TimeDateStamp;
+	uint16_t MajorVersion;
+	uint16_t MinorVersion;
+	uint32_t GlobalFlagsClear;
+	uint32_t GlobalFlagsSet;
+	uint32_t CriticalSectionDefaultTimeout;
+	uint32_t DeCommitFreeBlockThreshold;
+	uint32_t DeCommitTotalFreeThreshold;
+	uint32_t LockPrefixTable;            // VA
+	uint32_t MaximumAllocationSize;
+	uint32_t VirtualMemoryThreshold;
+	uint32_t ProcessHeapFlags;
+	uint32_t ProcessAffinityMask;
+	uint16_t CSDVersion;
+	uint16_t Reserved1;
+	uint32_t EditList;                   // VA
+	uint32_t SecurityCookie;             // VA
+	uint32_t SEHandlerTable;             // VA
+	uint32_t SEHandlerCount;
+};
+
+struct image_load_config_directory64
+{
+	uint32_t Size;
+	uint32_t TimeDateStamp;
+	uint16_t MajorVersion;
+	uint16_t MinorVersion;
+	uint32_t GlobalFlagsClear;
+	uint32_t GlobalFlagsSet;
+	uint32_t CriticalSectionDefaultTimeout;
+	uint64_t DeCommitFreeBlockThreshold;
+	uint64_t DeCommitTotalFreeThreshold;
+	uint64_t LockPrefixTable;         // VA
+	uint64_t MaximumAllocationSize;
+	uint64_t VirtualMemoryThreshold;
+	uint64_t ProcessAffinityMask;
+	uint32_t ProcessHeapFlags;
+	uint16_t CSDVersion;
+	uint16_t Reserved1;
+	uint64_t EditList;                // VA
+	uint64_t SecurityCookie;          // VA
+	uint64_t SEHandlerTable;          // VA
+	uint64_t SEHandlerCount;
+};
+
+#pragma pack(pop)
+} //namespace pe_win
+
+#ifdef PE_BLISS_WINDOWS
+typedef wchar_t unicode16_t;
+typedef std::basic_string<unicode16_t> u16string;
+#else
+//Instead of wchar_t for windows
+typedef unsigned short unicode16_t;
+typedef std::basic_string<unicode16_t> u16string;
+#endif
+
+} //namespace pe_bliss
diff --git a/tools/pe_bliss/pe_tls.cpp b/tools/pe_bliss/pe_tls.cpp
new file mode 100644
index 0000000000..5ec68e3f10
--- /dev/null
+++ b/tools/pe_bliss/pe_tls.cpp
@@ -0,0 +1,396 @@
+/*************************************************************************/
+/* Copyright (c) 2015 dx, http://kaimi.ru                                */
+/*                                                                       */
+/* Permission is hereby granted, free of charge, to any person           */
+/* obtaining a copy of this software and associated documentation        */
+/* files (the "Software"), to deal in the Software without               */
+/* restriction, including without limitation the rights to use,          */
+/* copy, modify, merge, publish, distribute, sublicense, and/or          */
+/* sell copies of the Software, and to permit persons to whom the        */
+/* Software is furnished to do so, subject to the following conditions:  */
+/* The above copyright notice and this permission notice shall be        */
+/* included in all copies or substantial portions of the Software.       */
+/*                                                                       */
+/* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,       */
+/* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF    */
+/* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.*/
+/* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY  */
+/* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,  */
+/* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE     */
+/* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.                */
+/*************************************************************************/
+#include <string.h>
+#include "pe_tls.h"
+#include "pe_properties_generic.h"
+
+namespace pe_bliss
+{
+using namespace pe_win;
+
+//TLS
+//Default constructor
+tls_info::tls_info()
+	:start_rva_(0), end_rva_(0), index_rva_(0), callbacks_rva_(0),
+	size_of_zero_fill_(0), characteristics_(0)
+{}
+
+//Returns start RVA of TLS raw data
+uint32_t tls_info::get_raw_data_start_rva() const
+{
+	return start_rva_;
+}
+
+//Returns end RVA of TLS raw data
+uint32_t tls_info::get_raw_data_end_rva() const
+{
+	return end_rva_;
+}
+
+//Returns TLS index RVA
+uint32_t tls_info::get_index_rva() const
+{
+	return index_rva_;
+}
+
+//Returns TLS callbacks RVA
+uint32_t tls_info::get_callbacks_rva() const
+{
+	return callbacks_rva_;
+}
+
+//Returns size of zero fill
+uint32_t tls_info::get_size_of_zero_fill() const
+{
+	return size_of_zero_fill_;
+}
+
+//Returns characteristics
+uint32_t tls_info::get_characteristics() const
+{
+	return characteristics_;
+}
+
+//Returns raw TLS data
+const std::string& tls_info::get_raw_data() const
+{
+	return raw_data_;
+}
+
+//Returns TLS callbacks addresses
+const tls_info::tls_callback_list& tls_info::get_tls_callbacks() const
+{
+	return callbacks_;
+}
+
+//Returns TLS callbacks addresses
+tls_info::tls_callback_list& tls_info::get_tls_callbacks()
+{
+	return callbacks_;
+}
+
+//Adds TLS callback
+void tls_info::add_tls_callback(uint32_t rva)
+{
+	callbacks_.push_back(rva);
+}
+
+//Clears TLS callbacks list
+void tls_info::clear_tls_callbacks()
+{
+	callbacks_.clear();
+}
+
+//Recalculates end address of raw TLS data
+void tls_info::recalc_raw_data_end_rva()
+{
+	end_rva_ = static_cast<uint32_t>(start_rva_ + raw_data_.length());
+}
+
+//Sets start RVA of TLS raw data
+void tls_info::set_raw_data_start_rva(uint32_t rva)
+{
+	start_rva_ = rva;
+}
+
+//Sets end RVA of TLS raw data
+void tls_info::set_raw_data_end_rva(uint32_t rva)
+{
+	end_rva_ = rva;
+}
+
+//Sets TLS index RVA
+void tls_info::set_index_rva(uint32_t rva)
+{
+	index_rva_ = rva;
+}
+
+//Sets TLS callbacks RVA
+void tls_info::set_callbacks_rva(uint32_t rva)
+{
+	callbacks_rva_ = rva;
+}
+
+//Sets size of zero fill
+void tls_info::set_size_of_zero_fill(uint32_t size)
+{
+	size_of_zero_fill_ = size;
+}
+
+//Sets characteristics
+void tls_info::set_characteristics(uint32_t characteristics)
+{
+	characteristics_ = characteristics;
+}
+
+//Sets raw TLS data
+void tls_info::set_raw_data(const std::string& data)
+{
+	raw_data_ = data;
+}
+
+//If image does not have TLS, throws an exception
+const tls_info get_tls_info(const pe_base& pe)
+{
+	return pe.get_pe_type() == pe_type_32
+		? get_tls_info_base<pe_types_class_32>(pe)
+		: get_tls_info_base<pe_types_class_64>(pe);
+}
+
+//TLS Rebuilder
+const image_directory rebuild_tls(pe_base& pe, const tls_info& info, section& tls_section, uint32_t offset_from_section_start, bool write_tls_callbacks, bool write_tls_data, tls_data_expand_type expand, bool save_to_pe_header, bool auto_strip_last_section)
+{
+	return pe.get_pe_type() == pe_type_32
+		? rebuild_tls_base<pe_types_class_32>(pe, info, tls_section, offset_from_section_start, write_tls_callbacks, write_tls_data, expand, save_to_pe_header, auto_strip_last_section)
+		: rebuild_tls_base<pe_types_class_64>(pe, info, tls_section, offset_from_section_start, write_tls_callbacks, write_tls_data, expand, save_to_pe_header, auto_strip_last_section);
+}
+
+//Get TLS info
+//If image does not have TLS, throws an exception
+template<typename PEClassType>
+const tls_info get_tls_info_base(const pe_base& pe)
+{
+	tls_info ret;
+
+	//If there's no TLS directory, throw an exception
+	if(!pe.has_tls())
+		throw pe_exception("Image does not have TLS directory", pe_exception::directory_does_not_exist);
+
+	//Get TLS directory data
+	typename PEClassType::TLSStruct tls_directory_data = pe.section_data_from_rva<typename PEClassType::TLSStruct>(pe.get_directory_rva(image_directory_entry_tls), section_data_virtual, true);
+
+	//Check data addresses
+	if(tls_directory_data.EndAddressOfRawData == tls_directory_data.StartAddressOfRawData)
+	{
+		try
+		{
+			pe.va_to_rva(static_cast<typename PEClassType::BaseSize>(tls_directory_data.EndAddressOfRawData));
+		}
+		catch(const pe_exception&)
+		{
+			//Fix addressess on incorrect conversion
+			tls_directory_data.EndAddressOfRawData = tls_directory_data.StartAddressOfRawData = 0;
+		}
+	}
+
+	if(tls_directory_data.StartAddressOfRawData &&
+		pe.section_data_length_from_va(static_cast<typename PEClassType::BaseSize>(tls_directory_data.StartAddressOfRawData),
+		static_cast<typename PEClassType::BaseSize>(tls_directory_data.StartAddressOfRawData), section_data_virtual, true)
+		< (tls_directory_data.EndAddressOfRawData - tls_directory_data.StartAddressOfRawData))
+		throw pe_exception("Incorrect TLS directory", pe_exception::incorrect_tls_directory);
+
+	//Fill TLS info
+	//VAs are not checked
+	ret.set_raw_data_start_rva(tls_directory_data.StartAddressOfRawData ? pe.va_to_rva(static_cast<typename PEClassType::BaseSize>(tls_directory_data.StartAddressOfRawData)) : 0);
+	ret.set_raw_data_end_rva(tls_directory_data.EndAddressOfRawData ? pe.va_to_rva(static_cast<typename PEClassType::BaseSize>(tls_directory_data.EndAddressOfRawData)) : 0);
+	ret.set_index_rva(tls_directory_data.AddressOfIndex ? pe.va_to_rva(static_cast<typename PEClassType::BaseSize>(tls_directory_data.AddressOfIndex)) : 0);
+	ret.set_callbacks_rva(tls_directory_data.AddressOfCallBacks ? pe.va_to_rva(static_cast<typename PEClassType::BaseSize>(tls_directory_data.AddressOfCallBacks)) : 0);
+	ret.set_size_of_zero_fill(tls_directory_data.SizeOfZeroFill);
+	ret.set_characteristics(tls_directory_data.Characteristics);
+
+	if(tls_directory_data.StartAddressOfRawData && tls_directory_data.StartAddressOfRawData != tls_directory_data.EndAddressOfRawData)
+	{
+		//Read and save TLS RAW data
+		ret.set_raw_data(std::string(
+			pe.section_data_from_va(static_cast<typename PEClassType::BaseSize>(tls_directory_data.StartAddressOfRawData), section_data_virtual, true),
+			static_cast<uint32_t>(tls_directory_data.EndAddressOfRawData - tls_directory_data.StartAddressOfRawData)));
+	}
+
+	//If file has TLS callbacks
+	if(ret.get_callbacks_rva())
+	{
+		//Read callbacks VAs
+		uint32_t current_tls_callback = 0;
+
+		while(true)
+		{
+			//Read TLS callback VA
+			typename PEClassType::BaseSize va = pe.section_data_from_va<typename PEClassType::BaseSize>(static_cast<typename PEClassType::BaseSize>(tls_directory_data.AddressOfCallBacks + current_tls_callback), section_data_virtual, true);
+			if(va == 0)
+				break;
+
+			//Save it
+			ret.add_tls_callback(pe.va_to_rva(va, false));
+
+			//Move to next callback VA
+			current_tls_callback += sizeof(va);
+		}
+	}
+
+	return ret;
+}
+
+//Rebuilder of TLS structures
+//If write_tls_callbacks = true, TLS callbacks VAs will be written to their place
+//If write_tls_data = true, TLS data will be written to its place
+//If you have chosen to rewrite raw data, only (EndAddressOfRawData - StartAddressOfRawData) bytes will be written, not the full length of string
+//representing raw data content
+//auto_strip_last_section - if true and TLS are placed in the last section, it will be automatically stripped
+//Note/TODO: TLS Callbacks array is not DWORD-aligned (seems to work on WinXP - Win7)
+template<typename PEClassType>
+const image_directory rebuild_tls_base(pe_base& pe, const tls_info& info, section& tls_section, uint32_t offset_from_section_start, bool write_tls_callbacks, bool write_tls_data, tls_data_expand_type expand, bool save_to_pe_header, bool auto_strip_last_section)
+{
+	//Check that tls_section is attached to this PE image
+	if(!pe.section_attached(tls_section))
+		throw pe_exception("TLS section must be attached to PE file", pe_exception::section_is_not_attached);
+	
+	uint32_t tls_data_pos = pe_utils::align_up(offset_from_section_start, sizeof(typename PEClassType::BaseSize));
+	uint32_t needed_size = sizeof(typename PEClassType::TLSStruct); //Calculate needed size for TLS table
+	
+	//Check if tls_section is last one. If it's not, check if there's enough place for TLS data
+	if(&tls_section != &*(pe.get_image_sections().end() - 1) && 
+		(tls_section.empty() || pe_utils::align_up(tls_section.get_size_of_raw_data(), pe.get_file_alignment()) < needed_size + tls_data_pos))
+		throw pe_exception("Insufficient space for TLS directory", pe_exception::insufficient_space);
+
+	//Check raw data positions
+	if(info.get_raw_data_end_rva() < info.get_raw_data_start_rva() || info.get_index_rva() == 0)
+		throw pe_exception("Incorrect TLS directory", pe_exception::incorrect_tls_directory);
+
+	std::string& raw_data = tls_section.get_raw_data();
+
+	//This will be done only if tls_section is the last section of image or for section with unaligned raw length of data
+	if(raw_data.length() < needed_size + tls_data_pos)
+		raw_data.resize(needed_size + tls_data_pos); //Expand section raw data
+
+	//Create and fill TLS structure
+	typename PEClassType::TLSStruct tls_struct = {0};
+	
+	typename PEClassType::BaseSize va;
+	if(info.get_raw_data_start_rva())
+	{
+		pe.rva_to_va(info.get_raw_data_start_rva(), va);
+		tls_struct.StartAddressOfRawData = va;
+		tls_struct.SizeOfZeroFill = info.get_size_of_zero_fill();
+	}
+
+	if(info.get_raw_data_end_rva())
+	{
+		pe.rva_to_va(info.get_raw_data_end_rva(), va);
+		tls_struct.EndAddressOfRawData = va;
+	}
+
+	pe.rva_to_va(info.get_index_rva(), va);
+	tls_struct.AddressOfIndex = va;
+
+	if(info.get_callbacks_rva())
+	{
+		pe.rva_to_va(info.get_callbacks_rva(), va);
+		tls_struct.AddressOfCallBacks = va;
+	}
+
+	tls_struct.Characteristics = info.get_characteristics();
+
+	//Save TLS structure
+	memcpy(&raw_data[tls_data_pos], &tls_struct, sizeof(tls_struct));
+
+	//If we are asked to rewrite TLS raw data
+	if(write_tls_data && info.get_raw_data_start_rva() && info.get_raw_data_start_rva() != info.get_raw_data_end_rva())
+	{
+		try
+		{
+			//Check if we're going to write TLS raw data to an existing section (not to PE headers)
+			section& raw_data_section = pe.section_from_rva(info.get_raw_data_start_rva());
+			pe.expand_section(raw_data_section, info.get_raw_data_start_rva(), info.get_raw_data_end_rva() - info.get_raw_data_start_rva(), expand == tls_data_expand_raw ? pe_base::expand_section_raw : pe_base::expand_section_virtual);
+		}
+		catch(const pe_exception&)
+		{
+			//If no section is presented by StartAddressOfRawData, just go to next step
+		}
+
+		unsigned long write_raw_data_size = info.get_raw_data_end_rva() - info.get_raw_data_start_rva();
+		unsigned long available_raw_length = 0;
+
+		//Check if there's enough RAW space to write raw TLS data...
+		if((available_raw_length = pe.section_data_length_from_rva(info.get_raw_data_start_rva(), info.get_raw_data_start_rva(), section_data_raw, true))
+			< info.get_raw_data_end_rva() - info.get_raw_data_start_rva())
+		{
+			//Check if there's enough virtual space for it...
+			if(pe.section_data_length_from_rva(info.get_raw_data_start_rva(), info.get_raw_data_start_rva(), section_data_virtual, true)
+				< info.get_raw_data_end_rva() - info.get_raw_data_start_rva())
+				throw pe_exception("Insufficient space for TLS raw data", pe_exception::insufficient_space);
+			else
+				write_raw_data_size = available_raw_length; //We'll write just a part of full raw data
+		}
+
+		//Write raw TLS data, if any
+		if(write_raw_data_size != 0)
+			memcpy(pe.section_data_from_rva(info.get_raw_data_start_rva(), true), info.get_raw_data().data(), write_raw_data_size);
+	}
+
+	//If we are asked to rewrite TLS callbacks addresses
+	if(write_tls_callbacks && info.get_callbacks_rva())
+	{
+		unsigned long needed_callback_size = static_cast<unsigned long>((info.get_tls_callbacks().size() + 1 /* last null element */) * sizeof(typename PEClassType::BaseSize));
+
+		try
+		{
+			//Check if we're going to write TLS callbacks VAs to an existing section (not to PE headers)
+			section& raw_data_section = pe.section_from_rva(info.get_callbacks_rva());
+			pe.expand_section(raw_data_section, info.get_callbacks_rva(), needed_callback_size, pe_base::expand_section_raw);
+		}
+		catch(const pe_exception&)
+		{
+			//If no section is presented by RVA of callbacks, just go to next step
+		}
+
+		//Check if there's enough space to write callbacks TLS data...
+		if(pe.section_data_length_from_rva(info.get_callbacks_rva(), info.get_callbacks_rva(), section_data_raw, true)
+			< needed_callback_size - sizeof(typename PEClassType::BaseSize) /* last zero element can be virtual only */)
+			throw pe_exception("Insufficient space for TLS callbacks data", pe_exception::insufficient_space);
+		
+		if(pe.section_data_length_from_rva(info.get_callbacks_rva(), info.get_callbacks_rva(), section_data_virtual, true)
+			< needed_callback_size /* check here full virtual data length available */)
+			throw pe_exception("Insufficient space for TLS callbacks data", pe_exception::insufficient_space);
+
+		std::vector<typename PEClassType::BaseSize> callbacks_virtual_addresses;
+		callbacks_virtual_addresses.reserve(info.get_tls_callbacks().size() + 1 /* last null element */);
+
+		//Convert TLS RVAs to VAs
+		for(tls_info::tls_callback_list::const_iterator it = info.get_tls_callbacks().begin(); it != info.get_tls_callbacks().end(); ++it)
+		{
+			typename PEClassType::BaseSize cb_va = 0;
+			pe.rva_to_va(*it, cb_va);
+			callbacks_virtual_addresses.push_back(cb_va);
+		}
+
+		//Ending null element
+		callbacks_virtual_addresses.push_back(0);
+
+		//Write callbacks TLS data
+		memcpy(pe.section_data_from_rva(info.get_callbacks_rva(), true), &callbacks_virtual_addresses[0], needed_callback_size);
+	}
+	
+	//Adjust section raw and virtual sizes
+	pe.recalculate_section_sizes(tls_section, auto_strip_last_section);
+
+	image_directory ret(pe.rva_from_section_offset(tls_section, tls_data_pos), needed_size);
+
+	//If auto-rewrite of PE headers is required
+	if(save_to_pe_header)
+	{
+		pe.set_directory_rva(image_directory_entry_tls, ret.get_rva());
+		pe.set_directory_size(image_directory_entry_tls, ret.get_size());
+	}
+
+	return ret;
+}
+}
diff --git a/tools/pe_bliss/pe_tls.h b/tools/pe_bliss/pe_tls.h
new file mode 100644
index 0000000000..316e208147
--- /dev/null
+++ b/tools/pe_bliss/pe_tls.h
@@ -0,0 +1,122 @@
+/*************************************************************************/
+/* Copyright (c) 2015 dx, http://kaimi.ru                                */
+/*                                                                       */
+/* Permission is hereby granted, free of charge, to any person           */
+/* obtaining a copy of this software and associated documentation        */
+/* files (the "Software"), to deal in the Software without               */
+/* restriction, including without limitation the rights to use,          */
+/* copy, modify, merge, publish, distribute, sublicense, and/or          */
+/* sell copies of the Software, and to permit persons to whom the        */
+/* Software is furnished to do so, subject to the following conditions:  */
+/* The above copyright notice and this permission notice shall be        */
+/* included in all copies or substantial portions of the Software.       */
+/*                                                                       */
+/* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,       */
+/* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF    */
+/* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.*/
+/* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY  */
+/* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,  */
+/* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE     */
+/* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.                */
+/*************************************************************************/
+#pragma once
+#include <memory>
+#include <istream>
+#include "pe_base.h"
+#include "pe_directory.h"
+
+namespace pe_bliss
+{
+//Class representing TLS info
+//We use "DWORD" type to represent RVAs, because RVA is
+//always 32bit even in PE+
+class tls_info
+{
+public:
+	typedef std::vector<uint32_t> tls_callback_list;
+
+public:
+	//Default constructor
+	tls_info();
+
+	//Returns start RVA of TLS raw data
+	uint32_t get_raw_data_start_rva() const;
+	//Returns end RVA of TLS raw data
+	uint32_t get_raw_data_end_rva() const;
+	//Returns TLS index RVA
+	uint32_t get_index_rva() const;
+	//Returns TLS callbacks RVA
+	uint32_t get_callbacks_rva() const;
+	//Returns size of zero fill
+	uint32_t get_size_of_zero_fill() const;
+	//Returns characteristics
+	uint32_t get_characteristics() const;
+	//Returns raw TLS data
+	const std::string& get_raw_data() const;
+	//Returns TLS callbacks addresses
+	const tls_callback_list& get_tls_callbacks() const;
+
+public: //These functions do not change everything inside image, they are used by PE class
+	//You can also use them to rebuild TLS directory
+
+	//Sets start RVA of TLS raw data
+	void set_raw_data_start_rva(uint32_t rva);
+	//Sets end RVA of TLS raw data
+	void set_raw_data_end_rva(uint32_t rva);
+	//Sets TLS index RVA
+	void set_index_rva(uint32_t rva);
+	//Sets TLS callbacks RVA
+	void set_callbacks_rva(uint32_t rva);
+	//Sets size of zero fill
+	void set_size_of_zero_fill(uint32_t size);
+	//Sets characteristics
+	void set_characteristics(uint32_t characteristics);
+	//Sets raw TLS data
+	void set_raw_data(const std::string& data);
+	//Returns TLS callbacks addresses
+	tls_callback_list& get_tls_callbacks();
+	//Adds TLS callback
+	void add_tls_callback(uint32_t rva);
+	//Clears TLS callbacks list
+	void clear_tls_callbacks();
+	//Recalculates end address of raw TLS data
+	void recalc_raw_data_end_rva();
+
+private:
+	uint32_t start_rva_, end_rva_, index_rva_, callbacks_rva_;
+	uint32_t size_of_zero_fill_, characteristics_;
+
+	//Raw TLS data
+	std::string raw_data_;
+
+	//TLS callback RVAs
+	tls_callback_list callbacks_;
+};
+
+//Represents type of expanding of TLS section containing raw data
+//(Works only if you are writing TLS raw data to tls_section and it is the last one in the PE image on the moment of TLS rebuild)
+enum tls_data_expand_type
+{
+	tls_data_expand_raw, //If there is not enough raw space for raw TLS data, it can be expanded
+	tls_data_expand_virtual //If there is not enough virtual place for raw TLS data, it can be expanded
+};
+
+
+//Get TLS info
+//If image does not have TLS, throws an exception
+const tls_info get_tls_info(const pe_base& pe);
+
+template<typename PEClassType>
+const tls_info get_tls_info_base(const pe_base& pe);
+	
+//Rebuilder of TLS structures
+//If write_tls_callbacks = true, TLS callbacks VAs will be written to their place
+//If write_tls_data = true, TLS data will be written to its place
+//If you have chosen to rewrite raw data, only (EndAddressOfRawData - StartAddressOfRawData) bytes will be written, not the full length of string
+//representing raw data content
+//auto_strip_last_section - if true and TLS are placed in the last section, it will be automatically stripped
+const image_directory rebuild_tls(pe_base& pe, const tls_info& info, section& tls_section, uint32_t offset_from_section_start = 0, bool write_tls_callbacks = true, bool write_tls_data = true, tls_data_expand_type expand = tls_data_expand_raw, bool save_to_pe_header = true, bool auto_strip_last_section = true);
+
+template<typename PEClassType>
+const image_directory rebuild_tls_base(pe_base& pe, const tls_info& info, section& tls_section, uint32_t offset_from_section_start = 0, bool write_tls_callbacks = true, bool write_tls_data = true, tls_data_expand_type expand = tls_data_expand_raw, bool save_to_pe_header = true, bool auto_strip_last_section = true);
+}
diff --git a/tools/pe_bliss/resource_bitmap_reader.cpp b/tools/pe_bliss/resource_bitmap_reader.cpp
new file mode 100644
index 0000000000..3546461f53
--- /dev/null
+++ b/tools/pe_bliss/resource_bitmap_reader.cpp
@@ -0,0 +1,86 @@
+/*************************************************************************/
+/* Copyright (c) 2015 dx, http://kaimi.ru                                */
+/*                                                                       */
+/* Permission is hereby granted, free of charge, to any person           */
+/* obtaining a copy of this software and associated documentation        */
+/* files (the "Software"), to deal in the Software without               */
+/* restriction, including without limitation the rights to use,          */
+/* copy, modify, merge, publish, distribute, sublicense, and/or          */
+/* sell copies of the Software, and to permit persons to whom the        */
+/* Software is furnished to do so, subject to the following conditions:  */
+/* The above copyright notice and this permission notice shall be        */
+/* included in all copies or substantial portions of the Software.       */
+/*                                                                       */
+/* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,       */
+/* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF    */
+/* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.*/
+/* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY  */
+/* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,  */
+/* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE     */
+/* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.                */
+/*************************************************************************/
+#include <cmath>
+#include "resource_bitmap_reader.h"
+#include "pe_resource_viewer.h"
+#include "pe_structures.h"
+
+namespace pe_bliss
+{
+using namespace pe_win;
+
+resource_bitmap_reader::resource_bitmap_reader(const pe_resource_viewer& res)
+	:res_(res)
+{}
+
+//Returns bitmap data by name and index in language directory (instead of language) (minimum checks of format correctness)
+const std::string resource_bitmap_reader::get_bitmap_by_name(const std::wstring& name, uint32_t index) const
+{
+	return create_bitmap(res_.get_resource_data_by_name(pe_resource_viewer::resource_bitmap, name, index).get_data());
+}
+
+//Returns bitmap data by name and language (minimum checks of format correctness)
+const std::string resource_bitmap_reader::get_bitmap_by_name(uint32_t language, const std::wstring& name) const
+{
+	return create_bitmap(res_.get_resource_data_by_name(language, pe_resource_viewer::resource_bitmap, name).get_data());
+}
+
+//Returns bitmap data by ID and language (minimum checks of format correctness)
+const std::string resource_bitmap_reader::get_bitmap_by_id_lang(uint32_t language, uint32_t id) const
+{
+	return create_bitmap(res_.get_resource_data_by_id(language, pe_resource_viewer::resource_bitmap, id).get_data());
+}
+
+//Returns bitmap data by ID and index in language directory (instead of language) (minimum checks of format correctness)
+const std::string resource_bitmap_reader::get_bitmap_by_id(uint32_t id, uint32_t index) const
+{
+	return create_bitmap(res_.get_resource_data_by_id(pe_resource_viewer::resource_bitmap, id, index).get_data());
+}
+
+//Helper function of creating bitmap header
+const std::string resource_bitmap_reader::create_bitmap(const std::string& resource_data)
+{
+	//Create bitmap file header
+	bitmapfileheader header = {0};
+	header.bfType = 0x4d42; //Signature "BM"
+	header.bfOffBits = sizeof(bitmapfileheader) + sizeof(bitmapinfoheader); //Offset to bitmap bits
+	header.bfSize = static_cast<uint32_t>(sizeof(bitmapfileheader) + resource_data.length()); //Size of bitmap
+
+	//Check size of resource data
+	if(resource_data.length() < sizeof(bitmapinfoheader))
+		throw pe_exception("Incorrect resource bitmap", pe_exception::resource_incorrect_bitmap);
+
+	{
+		//Get bitmap info header
+		const bitmapinfoheader* info = reinterpret_cast<const bitmapinfoheader*>(resource_data.data());
+
+		//If color table is present, skip it
+		if(info->biClrUsed != 0)
+			header.bfOffBits += 4 * info->biClrUsed; //Add this size to offset to bitmap bits
+		else if(info->biBitCount <= 8)
+			header.bfOffBits += 4 * static_cast<uint32_t>(std::pow(2.f, info->biBitCount)); //Add this size to offset to bitmap bits
+	}
+
+	//Return final bitmap data
+	return std::string(reinterpret_cast<const char*>(&header), sizeof(bitmapfileheader)) + resource_data;
+}
+}
diff --git a/tools/pe_bliss/resource_bitmap_reader.h b/tools/pe_bliss/resource_bitmap_reader.h
new file mode 100644
index 0000000000..f2b92bbde7
--- /dev/null
+++ b/tools/pe_bliss/resource_bitmap_reader.h
@@ -0,0 +1,50 @@
+/*************************************************************************/
+/* Copyright (c) 2015 dx, http://kaimi.ru                                */
+/*                                                                       */
+/* Permission is hereby granted, free of charge, to any person           */
+/* obtaining a copy of this software and associated documentation        */
+/* files (the "Software"), to deal in the Software without               */
+/* restriction, including without limitation the rights to use,          */
+/* copy, modify, merge, publish, distribute, sublicense, and/or          */
+/* sell copies of the Software, and to permit persons to whom the        */
+/* Software is furnished to do so, subject to the following conditions:  */
+/* The above copyright notice and this permission notice shall be        */
+/* included in all copies or substantial portions of the Software.       */
+/*                                                                       */
+/* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,       */
+/* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF    */
+/* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.*/
+/* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY  */
+/* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,  */
+/* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE     */
+/* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.                */
+/*************************************************************************/
+#pragma once
+#include <string>
+#include "stdint_defs.h"
+
+namespace pe_bliss
+{
+class pe_resource_viewer;
+
+class resource_bitmap_reader
+{
+public:
+	resource_bitmap_reader(const pe_resource_viewer& res);
+
+	//Returns bitmap data by name and language (minimum checks of format correctness)
+	const std::string get_bitmap_by_name(uint32_t language, const std::wstring& name) const;
+	//Returns bitmap data by name and index in language directory (instead of language) (minimum checks of format correctness)
+	const std::string get_bitmap_by_name(const std::wstring& name, uint32_t index = 0) const;
+	//Returns bitmap data by ID and language (minimum checks of format correctness)
+	const std::string get_bitmap_by_id_lang(uint32_t language, uint32_t id) const;
+	//Returns bitmap data by ID and index in language directory (instead of language) (minimum checks of format correctness)
+	const std::string get_bitmap_by_id(uint32_t id, uint32_t index = 0) const;
+
+private:
+	//Helper function of creating bitmap header
+	static const std::string create_bitmap(const std::string& resource_data);
+
+	const pe_resource_viewer& res_;
+};
+}
diff --git a/tools/pe_bliss/resource_bitmap_writer.cpp b/tools/pe_bliss/resource_bitmap_writer.cpp
new file mode 100644
index 0000000000..3445a08445
--- /dev/null
+++ b/tools/pe_bliss/resource_bitmap_writer.cpp
@@ -0,0 +1,75 @@
+/*************************************************************************/
+/* Copyright (c) 2015 dx, http://kaimi.ru                                */
+/*                                                                       */
+/* Permission is hereby granted, free of charge, to any person           */
+/* obtaining a copy of this software and associated documentation        */
+/* files (the "Software"), to deal in the Software without               */
+/* restriction, including without limitation the rights to use,          */
+/* copy, modify, merge, publish, distribute, sublicense, and/or          */
+/* sell copies of the Software, and to permit persons to whom the        */
+/* Software is furnished to do so, subject to the following conditions:  */
+/* The above copyright notice and this permission notice shall be        */
+/* included in all copies or substantial portions of the Software.       */
+/*                                                                       */
+/* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,       */
+/* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF    */
+/* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.*/
+/* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY  */
+/* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,  */
+/* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE     */
+/* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.                */
+/*************************************************************************/
+#include "resource_bitmap_writer.h"
+#include "pe_resource_manager.h"
+#include "pe_structures.h"
+
+namespace pe_bliss
+{
+using namespace pe_win;
+
+resource_bitmap_writer::resource_bitmap_writer(pe_resource_manager& res)
+	:res_(res)
+{}
+
+//Adds bitmap from bitmap file data. If bitmap already exists, replaces it
+//timestamp will be used for directories that will be added
+void resource_bitmap_writer::add_bitmap(const std::string& bitmap_file, uint32_t id, uint32_t language, uint32_t codepage, uint32_t timestamp)
+{
+	//Check bitmap data a little
+	if(bitmap_file.length() < sizeof(bitmapfileheader))
+		throw pe_exception("Incorrect resource bitmap", pe_exception::resource_incorrect_bitmap);
+
+	resource_directory_entry new_entry;
+	new_entry.set_id(id);
+
+	//Add bitmap
+	res_.add_resource(bitmap_file.substr(sizeof(bitmapfileheader)), pe_resource_viewer::resource_bitmap, new_entry, resource_directory::entry_finder(id), language, codepage, timestamp);
+}
+
+//Adds bitmap from bitmap file data. If bitmap already exists, replaces it
+//timestamp will be used for directories that will be added
+void resource_bitmap_writer::add_bitmap(const std::string& bitmap_file, const std::wstring& name, uint32_t language, uint32_t codepage, uint32_t timestamp)
+{
+	//Check bitmap data a little
+	if(bitmap_file.length() < sizeof(bitmapfileheader))
+		throw pe_exception("Incorrect resource bitmap", pe_exception::resource_incorrect_bitmap);
+
+	resource_directory_entry new_entry;
+	new_entry.set_name(name);
+
+	//Add bitmap
+	res_.add_resource(bitmap_file.substr(sizeof(bitmapfileheader)), pe_resource_viewer::resource_bitmap, new_entry, resource_directory::entry_finder(name), language, codepage, timestamp);
+}
+
+//Removes bitmap by name/ID and language
+bool resource_bitmap_writer::remove_bitmap(const std::wstring& name, uint32_t language)
+{
+	return res_.remove_resource(pe_resource_viewer::resource_bitmap, name, language);
+}
+
+//Removes bitmap by name/ID and language
+bool resource_bitmap_writer::remove_bitmap(uint32_t id, uint32_t language)
+{
+	return res_.remove_resource(pe_resource_viewer::resource_bitmap, id, language);
+}
+}
diff --git a/tools/pe_bliss/resource_bitmap_writer.h b/tools/pe_bliss/resource_bitmap_writer.h
new file mode 100644
index 0000000000..4b8ea72705
--- /dev/null
+++ b/tools/pe_bliss/resource_bitmap_writer.h
@@ -0,0 +1,47 @@
+/*************************************************************************/
+/* Copyright (c) 2015 dx, http://kaimi.ru                                */
+/*                                                                       */
+/* Permission is hereby granted, free of charge, to any person           */
+/* obtaining a copy of this software and associated documentation        */
+/* files (the "Software"), to deal in the Software without               */
+/* restriction, including without limitation the rights to use,          */
+/* copy, modify, merge, publish, distribute, sublicense, and/or          */
+/* sell copies of the Software, and to permit persons to whom the        */
+/* Software is furnished to do so, subject to the following conditions:  */
+/* The above copyright notice and this permission notice shall be        */
+/* included in all copies or substantial portions of the Software.       */
+/*                                                                       */
+/* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,       */
+/* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF    */
+/* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.*/
+/* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY  */
+/* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,  */
+/* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE     */
+/* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.                */
+/*************************************************************************/
+#pragma once
+#include <string>
+#include "stdint_defs.h"
+
+namespace pe_bliss
+{
+class pe_resource_manager;
+
+class resource_bitmap_writer
+{
+public:
+	resource_bitmap_writer(pe_resource_manager& res);
+
+	//Adds bitmap from bitmap file data. If bitmap already exists, replaces it
+	//timestamp will be used for directories that will be added
+	void add_bitmap(const std::string& bitmap_file, uint32_t id, uint32_t language, uint32_t codepage = 0, uint32_t timestamp = 0);
+	void add_bitmap(const std::string& bitmap_file, const std::wstring& name, uint32_t language, uint32_t codepage = 0, uint32_t timestamp = 0);
+
+	//Removes bitmap by name/ID and language
+	bool remove_bitmap(const std::wstring& name, uint32_t language);
+	bool remove_bitmap(uint32_t id, uint32_t language);
+
+private:
+	pe_resource_manager& res_;
+};
+}
diff --git a/tools/pe_bliss/resource_cursor_icon_reader.cpp b/tools/pe_bliss/resource_cursor_icon_reader.cpp
new file mode 100644
index 0000000000..28a259163e
--- /dev/null
+++ b/tools/pe_bliss/resource_cursor_icon_reader.cpp
@@ -0,0 +1,521 @@
+/*************************************************************************/
+/* Copyright (c) 2015 dx, http://kaimi.ru                                */
+/*                                                                       */
+/* Permission is hereby granted, free of charge, to any person           */
+/* obtaining a copy of this software and associated documentation        */
+/* files (the "Software"), to deal in the Software without               */
+/* restriction, including without limitation the rights to use,          */
+/* copy, modify, merge, publish, distribute, sublicense, and/or          */
+/* sell copies of the Software, and to permit persons to whom the        */
+/* Software is furnished to do so, subject to the following conditions:  */
+/* The above copyright notice and this permission notice shall be        */
+/* included in all copies or substantial portions of the Software.       */
+/*                                                                       */
+/* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,       */
+/* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF    */
+/* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.*/
+/* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY  */
+/* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,  */
+/* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE     */
+/* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.                */
+/*************************************************************************/
+#include <algorithm>
+#include "resource_cursor_icon_reader.h"
+#include "pe_structures.h"
+#include "pe_resource_viewer.h"
+
+namespace pe_bliss
+{
+using namespace pe_win;
+
+resource_cursor_icon_reader::resource_cursor_icon_reader(const pe_resource_viewer& res)
+	:res_(res)
+{}
+
+//Helper function of creating icon headers from ICON_GROUP resource data
+//Returns icon count
+uint16_t resource_cursor_icon_reader::format_icon_headers(std::string& ico_data, const std::string& resource_data)
+{
+	//Check resource data size
+	if(resource_data.length() < sizeof(ico_header))
+		throw pe_exception("Incorrect resource icon", pe_exception::resource_incorrect_icon);
+
+	//Get icon header
+	const ico_header* info = reinterpret_cast<const ico_header*>(resource_data.data());
+
+	//Check resource data size
+	if(resource_data.length() < sizeof(ico_header) + info->Count * sizeof(icon_group))
+		throw pe_exception("Incorrect resource icon", pe_exception::resource_incorrect_icon);
+
+	//Reserve memory to speed up a little
+	ico_data.reserve(sizeof(ico_header) + info->Count * sizeof(icondirentry));
+	ico_data.append(reinterpret_cast<const char*>(info), sizeof(ico_header));
+
+	//Iterate over all listed icons
+	uint32_t offset = sizeof(ico_header) + sizeof(icondirentry) * info->Count;
+	for(uint16_t i = 0; i != info->Count; ++i)
+	{
+		const icon_group* group = reinterpret_cast<const icon_group*>(resource_data.data() + sizeof(ico_header) + i * sizeof(icon_group));
+
+		//Fill icon data
+		icondirentry direntry;
+		direntry.BitCount = group->BitCount;
+		direntry.ColorCount = group->ColorCount;
+		direntry.Height = group->Height;
+		direntry.Planes = group->Planes;
+		direntry.Reserved = group->Reserved;
+		direntry.SizeInBytes = group->SizeInBytes;
+		direntry.Width = group->Width;
+		direntry.ImageOffset = offset;
+
+		//Add icon header to returned value
+		ico_data.append(reinterpret_cast<const char*>(&direntry), sizeof(icondirentry));
+
+		offset += group->SizeInBytes;
+	}
+
+	//Return icon count
+	return info->Count;
+}
+
+//Returns single icon data by ID and language (minimum checks of format correctness)
+const std::string resource_cursor_icon_reader::get_single_icon_by_id_lang(uint32_t language, uint32_t id) const
+{
+	//Get icon headers
+	std::string icon_data(lookup_icon_group_data_by_icon(id, language));
+	//Append icon data
+	icon_data.append(res_.get_resource_data_by_id(language, pe_resource_viewer::resource_icon, id).get_data());
+	return icon_data;
+}
+
+//Returns single icon data by ID and index in language directory (instead of language) (minimum checks of format correctness)
+const std::string resource_cursor_icon_reader::get_single_icon_by_id(uint32_t id, uint32_t index) const
+{
+	pe_resource_viewer::resource_language_list languages(res_.list_resource_languages(pe_resource_viewer::resource_icon, id));
+	if(languages.size() <= index)
+		throw pe_exception("Resource data entry not found", pe_exception::resource_data_entry_not_found);
+
+	//Get icon headers
+	std::string icon_data(lookup_icon_group_data_by_icon(id, languages.at(index)));
+	//Append icon data
+	icon_data.append(res_.get_resource_data_by_id(pe_resource_viewer::resource_icon, id, index).get_data());
+	return icon_data;
+}
+
+//Returns icon data by name and index in language directory (instead of language) (minimum checks of format correctness)
+const std::string resource_cursor_icon_reader::get_icon_by_name(const std::wstring& name, uint32_t index) const
+{
+	std::string ret;
+
+	//Get resource by name and index
+	const std::string data = res_.get_resource_data_by_name(pe_resource_viewer::resource_icon_group, name, index).get_data();
+
+	//Create icon headers
+	uint16_t icon_count = format_icon_headers(ret, data);
+
+	//Append icon data
+	for(uint16_t i = 0; i != icon_count; ++i)
+	{
+		const icon_group* group = reinterpret_cast<const icon_group*>(data.data() + sizeof(ico_header) + i * sizeof(icon_group));
+		ret += res_.get_resource_data_by_id(pe_resource_viewer::resource_icon, group->Number, index).get_data();
+	}
+
+	return ret;
+}
+
+//Returns icon data by name and language (minimum checks of format correctness)
+const std::string resource_cursor_icon_reader::get_icon_by_name(uint32_t language, const std::wstring& name) const
+{
+	std::string ret;
+
+	//Get resource by name and language
+	const std::string data = res_.get_resource_data_by_name(language, pe_resource_viewer::resource_icon_group, name).get_data();
+
+	//Create icon headers
+	uint16_t icon_count = format_icon_headers(ret, data);
+
+	//Append icon data
+	for(uint16_t i = 0; i != icon_count; ++i)
+	{
+		const icon_group* group = reinterpret_cast<const icon_group*>(data.data() + sizeof(ico_header) + i * sizeof(icon_group));
+		ret += res_.get_resource_data_by_id(language, pe_resource_viewer::resource_icon, group->Number).get_data();
+	}
+
+	return ret;
+}
+
+//Returns icon data by ID and language (minimum checks of format correctness)
+const std::string resource_cursor_icon_reader::get_icon_by_id_lang(uint32_t language, uint32_t id) const
+{
+	std::string ret;
+
+	//Get resource by language and id
+	const std::string data = res_.get_resource_data_by_id(language, pe_resource_viewer::resource_icon_group, id).get_data();
+
+	//Create icon headers
+	uint16_t icon_count = format_icon_headers(ret, data);
+
+	//Append icon data
+	for(uint16_t i = 0; i != icon_count; ++i)
+	{
+		const icon_group* group = reinterpret_cast<const icon_group*>(data.data() + sizeof(ico_header) + i * sizeof(icon_group));
+		ret += res_.get_resource_data_by_id(language, pe_resource_viewer::resource_icon, group->Number).get_data();
+	}
+
+	return ret;
+}
+
+//Returns icon data by ID and index in language directory (instead of language) (minimum checks of format correctness)
+const std::string resource_cursor_icon_reader::get_icon_by_id(uint32_t id, uint32_t index) const
+{
+	std::string ret;
+
+	//Get resource by id and index
+	const std::string data = res_.get_resource_data_by_id(pe_resource_viewer::resource_icon_group, id, index).get_data();
+
+	//Create icon headers
+	uint16_t icon_count = format_icon_headers(ret, data);
+
+	//Append icon data
+	for(uint16_t i = 0; i != icon_count; ++i)
+	{
+		const icon_group* group = reinterpret_cast<const icon_group*>(data.data() + sizeof(ico_header) + i * sizeof(icon_group));
+		ret += res_.get_resource_data_by_id(pe_resource_viewer::resource_icon, group->Number, index).get_data();
+	}
+
+	return ret;
+}
+
+//Checks for icon presence inside icon group, fills icon headers if found
+bool resource_cursor_icon_reader::check_icon_presence(const std::string& icon_group_resource_data, uint32_t icon_id, std::string& ico_data)
+{
+	//Check resource data size
+	if(icon_group_resource_data.length() < sizeof(ico_header))
+		throw pe_exception("Incorrect resource icon", pe_exception::resource_incorrect_icon);
+
+	//Get icon header
+	const ico_header* info = reinterpret_cast<const ico_header*>(icon_group_resource_data.data());
+
+	//Check resource data size
+	if(icon_group_resource_data.length() < sizeof(ico_header) + info->Count * sizeof(icon_group))
+		throw pe_exception("Incorrect resource icon", pe_exception::resource_incorrect_icon);
+
+	for(uint16_t i = 0; i != info->Count; ++i)
+	{
+		const icon_group* group = reinterpret_cast<const icon_group*>(icon_group_resource_data.data() + sizeof(ico_header) + i * sizeof(icon_group));
+		if(group->Number == icon_id)
+		{
+			//Reserve memory to speed up a little
+			ico_data.reserve(sizeof(ico_header) + sizeof(icondirentry));
+			//Write single-icon icon header
+			ico_header new_header = *info;
+			new_header.Count = 1;
+			ico_data.append(reinterpret_cast<const char*>(&new_header), sizeof(ico_header));
+
+			//Fill icon data
+			icondirentry direntry;
+			direntry.BitCount = group->BitCount;
+			direntry.ColorCount = group->ColorCount;
+			direntry.Height = group->Height;
+			direntry.Planes = group->Planes;
+			direntry.Reserved = group->Reserved;
+			direntry.SizeInBytes = group->SizeInBytes;
+			direntry.Width = group->Width;
+			direntry.ImageOffset = sizeof(ico_header) + sizeof(icondirentry);
+			ico_data.append(reinterpret_cast<const char*>(&direntry), sizeof(direntry));
+
+			return true;
+		}
+	}
+
+	return false;
+}
+
+//Looks up icon group by icon id and returns full icon headers if found
+const std::string resource_cursor_icon_reader::lookup_icon_group_data_by_icon(uint32_t icon_id, uint32_t language) const
+{
+	std::string icon_header_data;
+
+	{
+		//List all ID-resources
+		pe_resource_viewer::resource_id_list ids(res_.list_resource_ids(pe_resource_viewer::resource_icon_group));
+
+		for(pe_resource_viewer::resource_id_list::const_iterator it = ids.begin(); it != ids.end(); ++it)
+		{
+			pe_resource_viewer::resource_language_list group_languages(res_.list_resource_languages(pe_resource_viewer::resource_icon_group, *it));
+			if(std::find(group_languages.begin(), group_languages.end(), language) != group_languages.end()
+				&& check_icon_presence(res_.get_resource_data_by_id(language, pe_resource_viewer::resource_icon_group, *it).get_data(), icon_id, icon_header_data))
+				return icon_header_data;
+		}
+	}
+
+	{
+		//List all named resources
+		pe_resource_viewer::resource_name_list names(res_.list_resource_names(pe_resource_viewer::resource_icon_group));
+		for(pe_resource_viewer::resource_name_list::const_iterator it = names.begin(); it != names.end(); ++it)
+		{
+			pe_resource_viewer::resource_language_list group_languages(res_.list_resource_languages(pe_resource_viewer::resource_icon_group, *it));
+			if(std::find(group_languages.begin(), group_languages.end(), language) != group_languages.end()
+				&& check_icon_presence(res_.get_resource_data_by_name(language, pe_resource_viewer::resource_icon_group, *it).get_data(), icon_id, icon_header_data))
+				return icon_header_data;
+		}
+	}
+
+	throw pe_exception("No icon group find for requested icon", pe_exception::no_icon_group_found);
+}
+
+//Returns single cursor data by ID and language (minimum checks of format correctness)
+const std::string resource_cursor_icon_reader::get_single_cursor_by_id_lang(uint32_t language, uint32_t id) const
+{
+	std::string raw_cursor_data(res_.get_resource_data_by_id(language, pe_resource_viewer::resource_cursor, id).get_data());
+	//Get cursor headers
+	std::string cursor_data(lookup_cursor_group_data_by_cursor(id, language, raw_cursor_data));
+	//Append cursor data
+	cursor_data.append(raw_cursor_data.substr(sizeof(uint16_t) * 2 /* hotspot position */));
+	return cursor_data;
+}
+
+//Returns single cursor data by ID and index in language directory (instead of language) (minimum checks of format correctness)
+const std::string resource_cursor_icon_reader::get_single_cursor_by_id(uint32_t id, uint32_t index) const
+{
+	pe_resource_viewer::resource_language_list languages(res_.list_resource_languages(pe_resource_viewer::resource_cursor, id));
+	if(languages.size() <= index)
+		throw pe_exception("Resource data entry not found", pe_exception::resource_data_entry_not_found);
+	
+	std::string raw_cursor_data(res_.get_resource_data_by_id(pe_resource_viewer::resource_cursor, id, index).get_data());
+	//Get cursor headers
+	std::string cursor_data(lookup_cursor_group_data_by_cursor(id, languages.at(index), raw_cursor_data));
+	//Append cursor data
+	cursor_data.append(raw_cursor_data.substr(sizeof(uint16_t) * 2 /* hotspot position */));
+	return cursor_data;
+}
+
+//Helper function of creating cursor headers
+//Returns cursor count
+uint16_t resource_cursor_icon_reader::format_cursor_headers(std::string& cur_data, const std::string& resource_data, uint32_t language, uint32_t index) const
+{
+	//Check resource data length
+	if(resource_data.length() < sizeof(cursor_header))
+		throw pe_exception("Incorrect resource cursor", pe_exception::resource_incorrect_cursor);
+
+	const cursor_header* info = reinterpret_cast<const cursor_header*>(resource_data.data());
+
+	//Check resource data length
+	if(resource_data.length() < sizeof(cursor_header) + sizeof(cursor_group) * info->Count)
+		throw pe_exception("Incorrect resource cursor", pe_exception::resource_incorrect_cursor);
+
+	//Reserve needed space to speed up a little
+	cur_data.reserve(sizeof(cursor_header) + info->Count * sizeof(cursordirentry));
+	//Add cursor header
+	cur_data.append(reinterpret_cast<const char*>(info), sizeof(cursor_header));
+
+	//Iterate over all cursors listed in cursor group
+	uint32_t offset = sizeof(cursor_header) + sizeof(cursordirentry) * info->Count;
+	for(uint16_t i = 0; i != info->Count; ++i)
+	{
+		const cursor_group* group = reinterpret_cast<const cursor_group*>(resource_data.data() + sizeof(cursor_header) + i * sizeof(cursor_group));
+
+		//Fill cursor info
+		cursordirentry direntry;
+		direntry.ColorCount = 0; //OK
+		direntry.Width = static_cast<uint8_t>(group->Width);
+		direntry.Height = static_cast<uint8_t>(group->Height)  / 2;
+		direntry.Reserved = 0;
+
+		//Now read hotspot data from cursor data directory
+		const std::string cursor = index == 0xFFFFFFFF
+			? res_.get_resource_data_by_id(language, pe_resource_viewer::resource_cursor, group->Number).get_data()
+			: res_.get_resource_data_by_id(pe_resource_viewer::resource_cursor, group->Number, index).get_data();
+		if(cursor.length() < 2 * sizeof(uint16_t))
+			throw pe_exception("Incorrect resource cursor", pe_exception::resource_incorrect_cursor);
+
+		//Here it is - two words in the very beginning of cursor data
+		direntry.HotspotX = *reinterpret_cast<const uint16_t*>(cursor.data());
+		direntry.HotspotY = *reinterpret_cast<const uint16_t*>(cursor.data() + sizeof(uint16_t));
+
+		//Fill the rest data
+		direntry.SizeInBytes = group->SizeInBytes - 2 * sizeof(uint16_t);
+		direntry.ImageOffset = offset;
+
+		//Add cursor header
+		cur_data.append(reinterpret_cast<const char*>(&direntry), sizeof(cursordirentry));
+
+		offset += direntry.SizeInBytes;
+	}
+
+	//Return cursor count
+	return info->Count;
+}
+
+//Returns cursor data by name and language (minimum checks of format correctness)
+const std::string resource_cursor_icon_reader::get_cursor_by_name(uint32_t language, const std::wstring& name) const
+{
+	std::string ret;
+
+	//Get resource by name and language
+	const std::string resource_data = res_.get_resource_data_by_name(language, pe_resource_viewer::resource_cursor_group, name).get_data();
+
+	//Create cursor headers
+	uint16_t cursor_count = format_cursor_headers(ret, resource_data, language);
+
+	//Add cursor data
+	for(uint16_t i = 0; i != cursor_count; ++i)
+	{
+		const cursor_group* group = reinterpret_cast<const cursor_group*>(resource_data.data() + sizeof(cursor_header) + i * sizeof(cursor_group));
+		ret += res_.get_resource_data_by_id(language, pe_resource_viewer::resource_cursor, group->Number).get_data().substr(2 * sizeof(uint16_t));
+	}
+
+	return ret;
+}
+
+//Returns cursor data by name and index in language directory (instead of language) (minimum checks of format correctness)
+const std::string resource_cursor_icon_reader::get_cursor_by_name(const std::wstring& name, uint32_t index) const
+{
+	std::string ret;
+
+	//Get resource by name and index
+	const std::string resource_data = res_.get_resource_data_by_name(pe_resource_viewer::resource_cursor_group, name, index).get_data();
+
+	//Create cursor headers
+	uint16_t cursor_count = format_cursor_headers(ret, resource_data, 0, index);
+
+	//Add cursor data
+	for(uint16_t i = 0; i != cursor_count; ++i)
+	{
+		const cursor_group* group = reinterpret_cast<const cursor_group*>(resource_data.data() + sizeof(cursor_header) + i * sizeof(cursor_group));
+		ret += res_.get_resource_data_by_id(pe_resource_viewer::resource_cursor, group->Number, index).get_data().substr(2 * sizeof(uint16_t));
+	}
+
+	return ret;
+}
+
+//Returns cursor data by ID and language (minimum checks of format correctness)
+const std::string resource_cursor_icon_reader::get_cursor_by_id_lang(uint32_t language, uint32_t id) const
+{
+	std::string ret;
+
+	//Get resource by ID and language
+	const std::string resource_data = res_.get_resource_data_by_id(language, pe_resource_viewer::resource_cursor_group, id).get_data();
+
+	//Create cursor headers
+	uint16_t cursor_count = format_cursor_headers(ret, resource_data, language);
+
+	//Add cursor data
+	for(uint16_t i = 0; i != cursor_count; ++i)
+	{
+		const cursor_group* group = reinterpret_cast<const cursor_group*>(resource_data.data() + sizeof(cursor_header) + i * sizeof(cursor_group));
+		ret += res_.get_resource_data_by_id(language, pe_resource_viewer::resource_cursor, group->Number).get_data().substr(2 * sizeof(uint16_t));
+	}
+
+	return ret;
+}
+
+//Returns cursor data by ID and index in language directory (instead of language) (minimum checks of format correctness)
+const std::string resource_cursor_icon_reader::get_cursor_by_id(uint32_t id, uint32_t index) const
+{
+	std::string ret;
+
+	//Get resource by ID and index
+	const std::string resource_data = res_.get_resource_data_by_id(pe_resource_viewer::resource_cursor_group, id, index).get_data();
+
+	//Create cursor headers
+	uint16_t cursor_count = format_cursor_headers(ret, resource_data, 0, index);
+
+	//Add cursor data
+	for(uint16_t i = 0; i != cursor_count; ++i)
+	{
+		const cursor_group* group = reinterpret_cast<const cursor_group*>(resource_data.data() + sizeof(cursor_header) + i * sizeof(cursor_group));
+		ret += res_.get_resource_data_by_id(pe_resource_viewer::resource_cursor, group->Number, index).get_data().substr(2 * sizeof(uint16_t));
+	}
+
+	return ret;
+}
+
+//Checks for cursor presence inside cursor group, fills cursor headers if found
+bool resource_cursor_icon_reader::check_cursor_presence(const std::string& cursor_group_resource_data, uint32_t cursor_id, std::string& cur_header_data, const std::string& raw_cursor_data)
+{
+	//Check resource data length
+	if(cursor_group_resource_data.length() < sizeof(cursor_header))
+		throw pe_exception("Incorrect resource cursor", pe_exception::resource_incorrect_cursor);
+
+	const cursor_header* info = reinterpret_cast<const cursor_header*>(cursor_group_resource_data.data());
+
+	//Check resource data length
+	if(cursor_group_resource_data.length() < sizeof(cursor_header) + sizeof(cursor_group))
+		throw pe_exception("Incorrect resource cursor", pe_exception::resource_incorrect_cursor);
+
+	//Iterate over all cursors listed in cursor group
+	for(uint16_t i = 0; i != info->Count; ++i)
+	{
+		const cursor_group* group = reinterpret_cast<const cursor_group*>(cursor_group_resource_data.data() + sizeof(cursor_header) + i * sizeof(cursor_group));
+
+		if(group->Number == cursor_id)
+		{
+			//Reserve needed space to speed up a little
+			cur_header_data.reserve(sizeof(cursor_header) + sizeof(cursordirentry));
+			//Write single-cursor cursor header
+			cursor_header new_header = *info;
+			new_header.Count = 1;
+			cur_header_data.append(reinterpret_cast<const char*>(&new_header), sizeof(cursor_header));
+
+			//Fill cursor info
+			cursordirentry direntry;
+			direntry.ColorCount = 0; //OK
+			direntry.Width = static_cast<uint8_t>(group->Width);
+			direntry.Height = static_cast<uint8_t>(group->Height)  / 2;
+			direntry.Reserved = 0;
+
+			if(raw_cursor_data.length() < 2 * sizeof(uint16_t))
+				throw pe_exception("Incorrect resource cursor", pe_exception::resource_incorrect_cursor);
+
+			//Here it is - two words in the very beginning of cursor data
+			direntry.HotspotX = *reinterpret_cast<const uint16_t*>(raw_cursor_data.data());
+			direntry.HotspotY = *reinterpret_cast<const uint16_t*>(raw_cursor_data.data() + sizeof(uint16_t));
+
+			//Fill the rest data
+			direntry.SizeInBytes = group->SizeInBytes - 2 * sizeof(uint16_t);
+			direntry.ImageOffset = sizeof(cursor_header) + sizeof(cursordirentry);
+
+			//Add cursor header
+			cur_header_data.append(reinterpret_cast<const char*>(&direntry), sizeof(cursordirentry));
+
+			return true;
+		}
+	}
+
+	return false;
+}
+
+//Looks up cursor group by cursor id and returns full cursor headers if found
+const std::string resource_cursor_icon_reader::lookup_cursor_group_data_by_cursor(uint32_t cursor_id, uint32_t language, const std::string& raw_cursor_data) const
+{
+	std::string cursor_header_data;
+
+	{
+		//List all ID-resources
+		pe_resource_viewer::resource_id_list ids(res_.list_resource_ids(pe_resource_viewer::resource_cursor_group));
+
+		for(pe_resource_viewer::resource_id_list::const_iterator it = ids.begin(); it != ids.end(); ++it)
+		{
+			pe_resource_viewer::resource_language_list group_languages(res_.list_resource_languages(pe_resource_viewer::resource_cursor_group, *it));
+			if(std::find(group_languages.begin(), group_languages.end(), language) != group_languages.end()
+				&& check_cursor_presence(res_.get_resource_data_by_id(language, pe_resource_viewer::resource_cursor_group, *it).get_data(), cursor_id, cursor_header_data, raw_cursor_data))
+				return cursor_header_data;
+		}
+	}
+
+	{
+		//List all named resources
+		pe_resource_viewer::resource_name_list names(res_.list_resource_names(pe_resource_viewer::resource_cursor_group));
+		for(pe_resource_viewer::resource_name_list::const_iterator it = names.begin(); it != names.end(); ++it)
+		{
+			pe_resource_viewer::resource_language_list group_languages(res_.list_resource_languages(pe_resource_viewer::resource_cursor_group, *it));
+			if(std::find(group_languages.begin(), group_languages.end(), language) != group_languages.end()
+				&& check_cursor_presence(res_.get_resource_data_by_name(language, pe_resource_viewer::resource_cursor_group, *it).get_data(), cursor_id, cursor_header_data, raw_cursor_data))
+				return cursor_header_data;
+		}
+	}
+
+	throw pe_exception("No cursor group find for requested icon", pe_exception::no_cursor_group_found);
+}
+}
diff --git a/tools/pe_bliss/resource_cursor_icon_reader.h b/tools/pe_bliss/resource_cursor_icon_reader.h
new file mode 100644
index 0000000000..e34fff419b
--- /dev/null
+++ b/tools/pe_bliss/resource_cursor_icon_reader.h
@@ -0,0 +1,84 @@
+/*************************************************************************/
+/* Copyright (c) 2015 dx, http://kaimi.ru                                */
+/*                                                                       */
+/* Permission is hereby granted, free of charge, to any person           */
+/* obtaining a copy of this software and associated documentation        */
+/* files (the "Software"), to deal in the Software without               */
+/* restriction, including without limitation the rights to use,          */
+/* copy, modify, merge, publish, distribute, sublicense, and/or          */
+/* sell copies of the Software, and to permit persons to whom the        */
+/* Software is furnished to do so, subject to the following conditions:  */
+/* The above copyright notice and this permission notice shall be        */
+/* included in all copies or substantial portions of the Software.       */
+/*                                                                       */
+/* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,       */
+/* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF    */
+/* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.*/
+/* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY  */
+/* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,  */
+/* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE     */
+/* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.                */
+/*************************************************************************/
+#pragma once
+#include <string>
+#include "stdint_defs.h"
+
+namespace pe_bliss
+{
+class pe_resource_viewer;
+
+class resource_cursor_icon_reader
+{
+public:
+	resource_cursor_icon_reader(const pe_resource_viewer& res);
+
+	//Returns single icon data by ID and language (minimum checks of format correctness)
+	const std::string get_single_icon_by_id_lang(uint32_t language, uint32_t id) const;
+	//Returns single icon data by ID and index in language directory (instead of language) (minimum checks of format correctness)
+	const std::string get_single_icon_by_id(uint32_t id, uint32_t index = 0) const;
+
+	//Returns icon data of group of icons by name and language (minimum checks of format correctness)
+	const std::string get_icon_by_name(uint32_t language, const std::wstring& icon_group_name) const;
+	//Returns icon data of group of icons by name and index in language directory (instead of language) (minimum checks of format correctness)
+	const std::string get_icon_by_name(const std::wstring& icon_group_name, uint32_t index = 0) const;
+	//Returns icon data of group of icons by ID and language (minimum checks of format correctness)
+	const std::string get_icon_by_id_lang(uint32_t language, uint32_t icon_group_id) const;
+	//Returns icon data of group of icons by ID and index in language directory (instead of language) (minimum checks of format correctness)
+	const std::string get_icon_by_id(uint32_t icon_group_id, uint32_t index = 0) const;
+	
+	//Returns single cursor data by ID and language (minimum checks of format correctness)
+	const std::string get_single_cursor_by_id_lang(uint32_t language, uint32_t id) const;
+	//Returns single cursor data by ID and index in language directory (instead of language) (minimum checks of format correctness)
+	const std::string get_single_cursor_by_id(uint32_t id, uint32_t index = 0) const;
+
+	//Returns cursor data by name and language (minimum checks of format correctness)
+	const std::string get_cursor_by_name(uint32_t language, const std::wstring& cursor_group_name) const;
+	//Returns cursor data by name and index in language directory (instead of language) (minimum checks of format correctness)
+	const std::string get_cursor_by_name(const std::wstring& cursor_group_name, uint32_t index = 0) const;
+	//Returns cursor data by ID and language (minimum checks of format correctness)
+	const std::string get_cursor_by_id_lang(uint32_t language, uint32_t cursor_group_id) const;
+	//Returns cursor data by ID and index in language directory (instead of language) (minimum checks of format correctness)
+	const std::string get_cursor_by_id(uint32_t cursor_group_id, uint32_t index = 0) const;
+
+private:
+	const pe_resource_viewer& res_;
+
+	//Helper function of creating icon headers from ICON_GROUP resource data
+	//Returns icon count
+	static uint16_t format_icon_headers(std::string& ico_data, const std::string& resource_data);
+	
+	//Helper function of creating cursor headers from CURSOR_GROUP resource data
+	//Returns cursor count
+	uint16_t format_cursor_headers(std::string& cur_data, const std::string& resource_data, uint32_t language, uint32_t index = 0xFFFFFFFF) const;
+
+	//Looks up icon group by icon id and returns full icon headers if found
+	const std::string lookup_icon_group_data_by_icon(uint32_t icon_id, uint32_t language) const;
+	//Checks for icon presence inside icon group, fills icon headers if found
+	static bool check_icon_presence(const std::string& icon_group_resource_data, uint32_t icon_id, std::string& ico_data);
+
+	//Looks up cursor group by cursor id and returns full cursor headers if found
+	const std::string lookup_cursor_group_data_by_cursor(uint32_t cursor_id, uint32_t language, const std::string& raw_cursor_data) const;
+	//Checks for cursor presence inside cursor group, fills cursor headers if found
+	static bool check_cursor_presence(const std::string& icon_group_resource_data, uint32_t cursor_id, std::string& cur_header_data, const std::string& raw_cursor_data);
+};
+}
diff --git a/tools/pe_bliss/resource_cursor_icon_writer.cpp b/tools/pe_bliss/resource_cursor_icon_writer.cpp
new file mode 100644
index 0000000000..2f1c4363c4
--- /dev/null
+++ b/tools/pe_bliss/resource_cursor_icon_writer.cpp
@@ -0,0 +1,447 @@
+/*************************************************************************/
+/* Copyright (c) 2015 dx, http://kaimi.ru                                */
+/*                                                                       */
+/* Permission is hereby granted, free of charge, to any person           */
+/* obtaining a copy of this software and associated documentation        */
+/* files (the "Software"), to deal in the Software without               */
+/* restriction, including without limitation the rights to use,          */
+/* copy, modify, merge, publish, distribute, sublicense, and/or          */
+/* sell copies of the Software, and to permit persons to whom the        */
+/* Software is furnished to do so, subject to the following conditions:  */
+/* The above copyright notice and this permission notice shall be        */
+/* included in all copies or substantial portions of the Software.       */
+/*                                                                       */
+/* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,       */
+/* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF    */
+/* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.*/
+/* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY  */
+/* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,  */
+/* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE     */
+/* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.                */
+/*************************************************************************/
+#include <algorithm>
+#include <string.h>
+#include "resource_cursor_icon_writer.h"
+
+namespace pe_bliss
+{
+using namespace pe_win;
+
+resource_cursor_icon_writer::resource_cursor_icon_writer(pe_resource_manager& res)
+	:res_(res)
+{}
+
+//Add icon helper
+void resource_cursor_icon_writer::add_icon(const std::string& icon_file, const resource_data_info* group_icon_info /* or zero */, resource_directory_entry& new_icon_group_entry, const resource_directory::entry_finder& finder, uint32_t language, icon_place_mode mode, uint32_t codepage, uint32_t timestamp)
+{
+	//Check icon for correctness
+	if(icon_file.length() < sizeof(ico_header))
+		throw pe_exception("Incorrect resource icon", pe_exception::resource_incorrect_icon);
+
+	const ico_header* icon_header = reinterpret_cast<const ico_header*>(&icon_file[0]);
+
+	unsigned long size_of_headers = sizeof(ico_header) + icon_header->Count * sizeof(icondirentry);
+	if(icon_file.length() < size_of_headers || icon_header->Count == 0)
+		throw pe_exception("Incorrect resource icon", pe_exception::resource_incorrect_icon);
+
+	//Enumerate all icons in file
+	for(uint16_t i = 0; i != icon_header->Count; ++i)
+	{
+		//Check icon entries
+		const icondirentry* icon_entry = reinterpret_cast<const icondirentry*>(&icon_file[sizeof(ico_header) + i * sizeof(icondirentry)]);
+		if(icon_entry->SizeInBytes == 0
+			|| icon_entry->ImageOffset < size_of_headers
+			|| !pe_utils::is_sum_safe(icon_entry->ImageOffset, icon_entry->SizeInBytes)
+			|| icon_entry->ImageOffset + icon_entry->SizeInBytes > icon_file.length())
+			throw pe_exception("Incorrect resource icon", pe_exception::resource_incorrect_icon);
+	}
+
+	std::string icon_group_data;
+	ico_header* info = 0;
+
+	if(group_icon_info)
+	{
+		//If icon group already exists
+		{
+			icon_group_data = group_icon_info->get_data();
+			codepage = group_icon_info->get_codepage(); //Don't change codepage of icon group entry
+		}
+
+		//Check resource data size
+		if(icon_group_data.length() < sizeof(ico_header))
+			throw pe_exception("Incorrect resource icon", pe_exception::resource_incorrect_icon);
+
+		//Get icon header
+		info = reinterpret_cast<ico_header*>(&icon_group_data[0]);
+
+		//Check resource data size
+		if(icon_group_data.length() < sizeof(ico_header) + info->Count * sizeof(icon_group))
+			throw pe_exception("Incorrect resource icon", pe_exception::resource_incorrect_icon);
+
+		icon_group_data.resize(sizeof(ico_header) + (info->Count + icon_header->Count) * sizeof(icon_group));
+		info = reinterpret_cast<ico_header*>(&icon_group_data[0]); //In case if memory was reallocated
+	}
+	else //Entry not found - icon group doesn't exist
+	{
+		icon_group_data.resize(sizeof(ico_header) + icon_header->Count * sizeof(icon_group));
+		memcpy(&icon_group_data[0], icon_header, sizeof(ico_header));
+	}
+
+	//Search for available icon IDs
+	std::vector<uint16_t> icon_id_list(get_icon_or_cursor_free_id_list(pe_resource_viewer::resource_icon, mode, icon_header->Count));
+
+	//Enumerate all icons in file
+	for(uint16_t i = 0; i != icon_header->Count; ++i)
+	{
+		const icondirentry* icon_entry = reinterpret_cast<const icondirentry*>(&icon_file[sizeof(ico_header) + i * sizeof(icondirentry)]);
+		icon_group group = {0};
+
+		//Fill icon resource header
+		group.BitCount = icon_entry->BitCount;
+		group.ColorCount = icon_entry->ColorCount;
+		group.Height = icon_entry->Height;
+		group.Planes = icon_entry->Planes;
+		group.Reserved = icon_entry->Reserved;
+		group.SizeInBytes = icon_entry->SizeInBytes;
+		group.Width = icon_entry->Width;
+		group.Number = icon_id_list.at(i);
+
+		memcpy(&icon_group_data[sizeof(ico_header) + ((info ? info->Count : 0) + i) * sizeof(icon_group)], &group, sizeof(group));
+
+		//Add icon to resources
+		resource_directory_entry new_entry;
+		new_entry.set_id(group.Number);
+		res_.add_resource(icon_file.substr(icon_entry->ImageOffset, icon_entry->SizeInBytes), pe_resource_viewer::resource_icon, new_entry, resource_directory::entry_finder(group.Number), language, codepage, timestamp);
+	}
+
+	if(info)
+		info->Count += icon_header->Count; //Increase icon count, if we're adding icon to existing group
+
+	{
+		//Add or replace icon group data entry
+		res_.add_resource(icon_group_data, pe_resource_viewer::resource_icon_group, new_icon_group_entry, finder, language, codepage, timestamp);
+	}
+}
+
+//Returns free icon or cursor ID list depending on icon_place_mode
+const std::vector<uint16_t> resource_cursor_icon_writer::get_icon_or_cursor_free_id_list(pe_resource_viewer::resource_type type, icon_place_mode mode, uint32_t count)
+{
+	//Search for available icon/cursor IDs
+	std::vector<uint16_t> icon_cursor_id_list;
+
+	try
+	{
+		//If any icon exists
+		//List icon IDs
+		std::vector<uint32_t> id_list(res_.list_resource_ids(type));
+		std::sort(id_list.begin(), id_list.end());
+
+		//If we are placing icon on free spaces
+		//I.e., icon IDs 1, 3, 4, 7, 8 already exist
+		//We'll place five icons on IDs 2, 5, 6, 9, 10
+		if(mode != icon_place_after_max_icon_id)
+		{
+			if(!id_list.empty())
+			{
+				//Determine and list free icon IDs
+				for(std::vector<uint32_t>::const_iterator it = id_list.begin(); it != id_list.end(); ++it)
+				{
+					if(it == id_list.begin())
+					{
+						if(*it > 1)
+						{
+							for(uint16_t i = 1; i != *it; ++i)
+							{
+								icon_cursor_id_list.push_back(i);
+								if(icon_cursor_id_list.size() == count)
+									break;
+							}
+						}
+					}
+					else if(*(it - 1) - *it > 1)
+					{
+						for(uint16_t i = static_cast<uint16_t>(*(it - 1) + 1); i != static_cast<uint16_t>(*it); ++i)
+						{
+							icon_cursor_id_list.push_back(i);
+							if(icon_cursor_id_list.size() == count)
+								break;
+						}
+					}
+
+					if(icon_cursor_id_list.size() == count)
+						break;
+				}
+			}
+		}
+
+		uint32_t max_id = id_list.empty() ? 0 : *std::max_element(id_list.begin(), id_list.end());
+		for(uint32_t i = static_cast<uint32_t>(icon_cursor_id_list.size()); i != count; ++i)
+			icon_cursor_id_list.push_back(static_cast<uint16_t>(++max_id));
+	}
+	catch(const pe_exception&) //Entry not found
+	{
+		for(uint16_t i = 1; i != count + 1; ++i)
+			icon_cursor_id_list.push_back(i);
+	}
+
+	return icon_cursor_id_list;
+}
+
+//Add cursor helper
+void resource_cursor_icon_writer::add_cursor(const std::string& cursor_file, const resource_data_info* group_cursor_info /* or zero */, resource_directory_entry& new_cursor_group_entry, const resource_directory::entry_finder& finder, uint32_t language, icon_place_mode mode, uint32_t codepage, uint32_t timestamp)
+{
+	//Check cursor for correctness
+	if(cursor_file.length() < sizeof(cursor_header))
+		throw pe_exception("Incorrect resource cursor", pe_exception::resource_incorrect_cursor);
+
+	const cursor_header* cur_header = reinterpret_cast<const cursor_header*>(&cursor_file[0]);
+
+	unsigned long size_of_headers = sizeof(cursor_header) + cur_header->Count * sizeof(cursordirentry);
+	if(cursor_file.length() < size_of_headers || cur_header->Count == 0)
+		throw pe_exception("Incorrect resource cursor", pe_exception::resource_incorrect_cursor);
+
+	//Enumerate all cursors in file
+	for(uint16_t i = 0; i != cur_header->Count; ++i)
+	{
+		//Check cursor entries
+		const cursordirentry* cursor_entry = reinterpret_cast<const cursordirentry*>(&cursor_file[sizeof(cursor_header) + i * sizeof(cursordirentry)]);
+		if(cursor_entry->SizeInBytes == 0
+			|| cursor_entry->ImageOffset < size_of_headers
+			|| !pe_utils::is_sum_safe(cursor_entry->ImageOffset, cursor_entry->SizeInBytes)
+			|| cursor_entry->ImageOffset + cursor_entry->SizeInBytes > cursor_file.length())
+			throw pe_exception("Incorrect resource cursor", pe_exception::resource_incorrect_cursor);
+	}
+
+	std::string cursor_group_data;
+	cursor_header* info = 0;
+
+	if(group_cursor_info)
+	{
+		//If cursor group already exists
+		{
+			cursor_group_data = group_cursor_info->get_data();
+			codepage = group_cursor_info->get_codepage(); //Don't change codepage of cursor group entry
+		}
+
+		//Check resource data size
+		if(cursor_group_data.length() < sizeof(cursor_header))
+			throw pe_exception("Incorrect resource cursor", pe_exception::resource_incorrect_cursor);
+
+		//Get cursor header
+		info = reinterpret_cast<cursor_header*>(&cursor_group_data[0]);
+
+		//Check resource data size
+		if(cursor_group_data.length() < sizeof(cursor_header) + info->Count * sizeof(cursor_group))
+			throw pe_exception("Incorrect resource cursor", pe_exception::resource_incorrect_cursor);
+
+		cursor_group_data.resize(sizeof(cursor_header) + (info->Count + cur_header->Count) * sizeof(cursor_group));
+		info = reinterpret_cast<cursor_header*>(&cursor_group_data[0]); //In case if memory was reallocated
+	}
+	else //Entry not found - cursor group doesn't exist
+	{
+		cursor_group_data.resize(sizeof(cursor_header) + cur_header->Count * sizeof(cursor_group));
+		memcpy(&cursor_group_data[0], cur_header, sizeof(cursor_header));
+	}
+
+	//Search for available cursor IDs
+	std::vector<uint16_t> cursor_id_list(get_icon_or_cursor_free_id_list(pe_resource_viewer::resource_cursor, mode, cur_header->Count));
+
+	//Enumerate all cursors in file
+	for(uint16_t i = 0; i != cur_header->Count; ++i)
+	{
+		const cursordirentry* cursor_entry = reinterpret_cast<const cursordirentry*>(&cursor_file[sizeof(cursor_header) + i * sizeof(cursordirentry)]);
+		cursor_group group = {0};
+
+		//Fill cursor resource header
+		group.Height = cursor_entry->Height * 2;
+		group.SizeInBytes = cursor_entry->SizeInBytes + 2 * sizeof(uint16_t) /* hotspot coordinates */;
+		group.Width = cursor_entry->Width;
+		group.Number = cursor_id_list.at(i);
+
+		memcpy(&cursor_group_data[sizeof(cursor_header) + ((info ? info->Count : 0) + i) * sizeof(cursor_group)], &group, sizeof(group));
+
+		//Add cursor to resources
+		resource_directory_entry new_entry;
+		new_entry.set_id(group.Number);
+
+		//Fill resource data (two WORDs for hotspot of cursor, and cursor bitmap data)
+		std::string cur_data;
+		cur_data.resize(sizeof(uint16_t) * 2);
+		memcpy(&cur_data[0], &cursor_entry->HotspotX, sizeof(uint16_t));
+		memcpy(&cur_data[sizeof(uint16_t)], &cursor_entry->HotspotY, sizeof(uint16_t));
+		cur_data.append(cursor_file.substr(cursor_entry->ImageOffset, cursor_entry->SizeInBytes));
+
+		res_.add_resource(cur_data, pe_resource_viewer::resource_cursor, new_entry, resource_directory::entry_finder(group.Number), language, codepage, timestamp);
+	}
+
+	if(info)
+		info->Count += cur_header->Count; //Increase cursor count, if we're adding cursor to existing group
+
+	{
+		//Add or replace cursor group data entry
+		res_.add_resource(cursor_group_data, pe_resource_viewer::resource_cursor_group, new_cursor_group_entry, finder, language, codepage, timestamp);
+	}
+}
+
+//Adds icon(s) from icon file data
+//timestamp will be used for directories that will be added
+//If icon group with name "icon_group_name" or ID "icon_group_id" already exists, it will be appended with new icon(s)
+//(Codepage of icon group and icons will not be changed in this case)
+//icon_place_mode determines, how new icon(s) will be placed
+void resource_cursor_icon_writer::add_icon(const std::string& icon_file, const std::wstring& icon_group_name, uint32_t language, icon_place_mode mode, uint32_t codepage, uint32_t timestamp)
+{
+	resource_directory_entry new_icon_group_entry;
+	new_icon_group_entry.set_name(icon_group_name);
+	std::auto_ptr<resource_data_info> data_info;
+
+	try
+	{
+		data_info.reset(new resource_data_info(res_.get_resource_data_by_name(language, pe_resource_viewer::resource_icon_group, icon_group_name)));
+	}
+	catch(const pe_exception&) //Entry not found
+	{
+	}
+
+	add_icon(icon_file, data_info.get(), new_icon_group_entry, resource_directory::entry_finder(icon_group_name), language, mode, codepage, timestamp);
+}
+
+void resource_cursor_icon_writer::add_icon(const std::string& icon_file, uint32_t icon_group_id, uint32_t language, icon_place_mode mode, uint32_t codepage, uint32_t timestamp)
+{
+	resource_directory_entry new_icon_group_entry;
+	new_icon_group_entry.set_id(icon_group_id);
+	std::auto_ptr<resource_data_info> data_info;
+
+	try
+	{
+		data_info.reset(new resource_data_info(res_.get_resource_data_by_id(language, pe_resource_viewer::resource_icon_group, icon_group_id)));
+	}
+	catch(const pe_exception&) //Entry not found
+	{
+	}
+
+	add_icon(icon_file, data_info.get(), new_icon_group_entry, resource_directory::entry_finder(icon_group_id), language, mode, codepage, timestamp);
+}
+
+//Adds cursor(s) from cursor file data
+//timestamp will be used for directories that will be added
+//If cursor group with name "cursor_group_name" or ID "cursor_group_id" already exists, it will be appended with new cursor(s)
+//(Codepage of cursor group and cursors will not be changed in this case)
+//icon_place_mode determines, how new cursor(s) will be placed
+void resource_cursor_icon_writer::add_cursor(const std::string& cursor_file, const std::wstring& cursor_group_name, uint32_t language, icon_place_mode mode, uint32_t codepage, uint32_t timestamp)
+{
+	resource_directory_entry new_cursor_group_entry;
+	new_cursor_group_entry.set_name(cursor_group_name);
+	std::auto_ptr<resource_data_info> data_info;
+
+	try
+	{
+		data_info.reset(new resource_data_info(res_.get_resource_data_by_name(language, pe_resource_viewer::resource_cursor_group, cursor_group_name)));
+	}
+	catch(const pe_exception&) //Entry not found
+	{
+	}
+
+	add_cursor(cursor_file, data_info.get(), new_cursor_group_entry, resource_directory::entry_finder(cursor_group_name), language, mode, codepage, timestamp);
+}
+
+void resource_cursor_icon_writer::add_cursor(const std::string& cursor_file, uint32_t cursor_group_id, uint32_t language, icon_place_mode mode, uint32_t codepage, uint32_t timestamp)
+{
+	resource_directory_entry new_cursor_group_entry;
+	new_cursor_group_entry.set_id(cursor_group_id);
+	std::auto_ptr<resource_data_info> data_info;
+
+	try
+	{
+		data_info.reset(new resource_data_info(res_.get_resource_data_by_id(language, pe_resource_viewer::resource_cursor_group, cursor_group_id)));
+	}
+	catch(const pe_exception&) //Entry not found
+	{
+	}
+
+	add_cursor(cursor_file, data_info.get(), new_cursor_group_entry, resource_directory::entry_finder(cursor_group_id), language, mode, codepage, timestamp);
+}
+
+//Remove icon group helper
+void resource_cursor_icon_writer::remove_icons_from_icon_group(const std::string& icon_group_data, uint32_t language)
+{
+	//Check resource data size
+	if(icon_group_data.length() < sizeof(ico_header))
+		throw pe_exception("Incorrect resource icon", pe_exception::resource_incorrect_icon);
+
+	//Get icon header
+	const ico_header* info = reinterpret_cast<const ico_header*>(icon_group_data.data());
+
+	uint16_t icon_count = info->Count;
+
+	//Check resource data size
+	if(icon_group_data.length() < sizeof(ico_header) + icon_count * sizeof(icon_group))
+		throw pe_exception("Incorrect resource icon", pe_exception::resource_incorrect_icon);
+
+	//Remove icon data
+	for(uint16_t i = 0; i != icon_count; ++i)
+	{
+		const icon_group* group = reinterpret_cast<const icon_group*>(icon_group_data.data() + sizeof(ico_header) + i * sizeof(icon_group));
+		res_.remove_resource(pe_resource_viewer::resource_icon, group->Number, language);
+	}
+}
+
+//Remove cursor group helper
+void resource_cursor_icon_writer::remove_cursors_from_cursor_group(const std::string& cursor_group_data, uint32_t language)
+{
+	//Check resource data size
+	if(cursor_group_data.length() < sizeof(cursor_header))
+		throw pe_exception("Incorrect resource cursor", pe_exception::resource_incorrect_cursor);
+
+	//Get icon header
+	const cursor_header* info = reinterpret_cast<const cursor_header*>(cursor_group_data.data());
+
+	uint16_t cursor_count = info->Count;
+
+	//Check resource data size
+	if(cursor_group_data.length() < sizeof(cursor_header) + cursor_count * sizeof(cursor_group))
+		throw pe_exception("Incorrect resource cursor", pe_exception::resource_incorrect_cursor);
+
+	//Remove icon data
+	for(uint16_t i = 0; i != cursor_count; ++i)
+	{
+		const icon_group* group = reinterpret_cast<const icon_group*>(cursor_group_data.data() + sizeof(cursor_header) + i * sizeof(cursor_group));
+		res_.remove_resource(pe_resource_viewer::resource_cursor, group->Number, language);
+	}
+}
+
+//Removes cursor group and all its cursors by name/ID and language
+bool resource_cursor_icon_writer::remove_cursor_group(const std::wstring& cursor_group_name, uint32_t language)
+{
+	//Get resource by name and language
+	const std::string data = res_.get_resource_data_by_name(language, pe_resource_viewer::resource_cursor_group, cursor_group_name).get_data();
+	remove_cursors_from_cursor_group(data, language);
+	return res_.remove_resource(pe_resource_viewer::resource_cursor_group, cursor_group_name, language);
+}
+
+//Removes cursor group and all its cursors by name/ID and language
+bool resource_cursor_icon_writer::remove_cursor_group(uint32_t cursor_group_id, uint32_t language)
+{
+	//Get resource by name and language
+	const std::string data = res_.get_resource_data_by_id(language, pe_resource_viewer::resource_cursor_group, cursor_group_id).get_data();
+	remove_cursors_from_cursor_group(data, language);
+	return res_.remove_resource(pe_resource_viewer::resource_cursor_group, cursor_group_id, language);
+}
+
+//Removes icon group and all its icons by name/ID and language
+bool resource_cursor_icon_writer::remove_icon_group(const std::wstring& icon_group_name, uint32_t language)
+{
+	//Get resource by name and language
+	const std::string data = res_.get_resource_data_by_name(language, pe_resource_viewer::resource_icon_group, icon_group_name).get_data();
+	remove_icons_from_icon_group(data, language);
+	return res_.remove_resource(pe_resource_viewer::resource_icon_group, icon_group_name, language);
+}
+
+//Removes icon group and all its icons by name/ID and language
+bool resource_cursor_icon_writer::remove_icon_group(uint32_t icon_group_id, uint32_t language)
+{
+	//Get resource by name and language
+	const std::string data = res_.get_resource_data_by_id(language, pe_resource_viewer::resource_icon_group, icon_group_id).get_data();
+	remove_icons_from_icon_group(data, language);
+	return res_.remove_resource(pe_resource_viewer::resource_icon_group, icon_group_id, language);
+}
+}
diff --git a/tools/pe_bliss/resource_cursor_icon_writer.h b/tools/pe_bliss/resource_cursor_icon_writer.h
new file mode 100644
index 0000000000..e73ac6a093
--- /dev/null
+++ b/tools/pe_bliss/resource_cursor_icon_writer.h
@@ -0,0 +1,94 @@
+/*************************************************************************/
+/* Copyright (c) 2015 dx, http://kaimi.ru                                */
+/*                                                                       */
+/* Permission is hereby granted, free of charge, to any person           */
+/* obtaining a copy of this software and associated documentation        */
+/* files (the "Software"), to deal in the Software without               */
+/* restriction, including without limitation the rights to use,          */
+/* copy, modify, merge, publish, distribute, sublicense, and/or          */
+/* sell copies of the Software, and to permit persons to whom the        */
+/* Software is furnished to do so, subject to the following conditions:  */
+/* The above copyright notice and this permission notice shall be        */
+/* included in all copies or substantial portions of the Software.       */
+/*                                                                       */
+/* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,       */
+/* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF    */
+/* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.*/
+/* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY  */
+/* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,  */
+/* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE     */
+/* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.                */
+/*************************************************************************/
+#pragma once
+#include <string>
+#include <vector>
+#include "stdint_defs.h"
+#include "pe_resource_manager.h"
+
+namespace pe_bliss
+{
+class pe_resource_manager;
+
+class resource_cursor_icon_writer
+{
+public:
+	//Determines, how new icon(s) or cursor(s) will be placed
+	enum icon_place_mode
+	{
+		icon_place_after_max_icon_id, //Icon(s) will be placed after all existing
+		icon_place_free_ids //New icon(s) will take all free IDs between existing icons
+	};
+	
+public:
+	resource_cursor_icon_writer(pe_resource_manager& res);
+
+	//Removes icon group and all its icons by name/ID and language
+	bool remove_icon_group(const std::wstring& icon_group_name, uint32_t language);
+	bool remove_icon_group(uint32_t icon_group_id, uint32_t language);
+
+	//Adds icon(s) from icon file data
+	//timestamp will be used for directories that will be added
+	//If icon group with name "icon_group_name" or ID "icon_group_id" already exists, it will be appended with new icon(s)
+	//(Codepage of icon group and icons will not be changed in this case)
+	//icon_place_mode determines, how new icon(s) will be placed
+	void add_icon(const std::string& icon_file,
+		const std::wstring& icon_group_name,
+		uint32_t language, icon_place_mode mode = icon_place_after_max_icon_id,
+		uint32_t codepage = 0, uint32_t timestamp = 0);
+
+	void add_icon(const std::string& icon_file,
+		uint32_t icon_group_id,
+		uint32_t language, icon_place_mode mode = icon_place_after_max_icon_id,
+		uint32_t codepage = 0, uint32_t timestamp = 0);
+	
+	//Removes cursor group and all its cursors by name/ID and language
+	bool remove_cursor_group(const std::wstring& cursor_group_name, uint32_t language);
+	bool remove_cursor_group(uint32_t cursor_group_id, uint32_t language);
+
+	//Adds cursor(s) from cursor file data
+	//timestamp will be used for directories that will be added
+	//If cursor group with name "cursor_group_name" or ID "cursor_group_id" already exists, it will be appended with new cursor(s)
+	//(Codepage of cursor group and cursors will not be changed in this case)
+	//icon_place_mode determines, how new cursor(s) will be placed
+	void add_cursor(const std::string& cursor_file, const std::wstring& cursor_group_name, uint32_t language, icon_place_mode mode = icon_place_after_max_icon_id, uint32_t codepage = 0, uint32_t timestamp = 0);
+	void add_cursor(const std::string& cursor_file, uint32_t cursor_group_id, uint32_t language, icon_place_mode mode = icon_place_after_max_icon_id, uint32_t codepage = 0, uint32_t timestamp = 0);
+
+private:
+	pe_resource_manager& res_;
+
+	//Add icon helper
+	void add_icon(const std::string& icon_file, const resource_data_info* group_icon_info /* or zero */, resource_directory_entry& new_icon_group_entry, const resource_directory::entry_finder& finder, uint32_t language, icon_place_mode mode, uint32_t codepage, uint32_t timestamp);
+	
+	//Remove icon group helper
+	void remove_icons_from_icon_group(const std::string& icon_group_data, uint32_t language);
+
+	//Add cursor helper
+	void add_cursor(const std::string& cursor_file, const resource_data_info* group_cursor_info /* or zero */, resource_directory_entry& new_cursor_group_entry, const resource_directory::entry_finder& finder, uint32_t language, icon_place_mode mode, uint32_t codepage, uint32_t timestamp);
+
+	//Remove cursor group helper
+	void remove_cursors_from_cursor_group(const std::string& cursor_group_data, uint32_t language);
+
+	//Returns free icon or cursor ID list depending on icon_place_mode
+	const std::vector<uint16_t> get_icon_or_cursor_free_id_list(pe_resource_manager::resource_type type, icon_place_mode mode, uint32_t count);
+};
+}
diff --git a/tools/pe_bliss/resource_data_info.cpp b/tools/pe_bliss/resource_data_info.cpp
new file mode 100644
index 0000000000..75bb060eae
--- /dev/null
+++ b/tools/pe_bliss/resource_data_info.cpp
@@ -0,0 +1,48 @@
+/*************************************************************************/
+/* Copyright (c) 2015 dx, http://kaimi.ru                                */
+/*                                                                       */
+/* Permission is hereby granted, free of charge, to any person           */
+/* obtaining a copy of this software and associated documentation        */
+/* files (the "Software"), to deal in the Software without               */
+/* restriction, including without limitation the rights to use,          */
+/* copy, modify, merge, publish, distribute, sublicense, and/or          */
+/* sell copies of the Software, and to permit persons to whom the        */
+/* Software is furnished to do so, subject to the following conditions:  */
+/* The above copyright notice and this permission notice shall be        */
+/* included in all copies or substantial portions of the Software.       */
+/*                                                                       */
+/* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,       */
+/* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF    */
+/* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.*/
+/* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY  */
+/* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,  */
+/* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE     */
+/* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.                */
+/*************************************************************************/
+#include "resource_data_info.h"
+#include "pe_resource_viewer.h"
+
+namespace pe_bliss
+{
+//Default constructor
+resource_data_info::resource_data_info(const std::string& data, uint32_t codepage)
+	:data_(data), codepage_(codepage)
+{}
+
+//Constructor from data
+resource_data_info::resource_data_info(const resource_data_entry& data)
+	:data_(data.get_data()), codepage_(data.get_codepage())
+{}
+
+//Returns resource data
+const std::string& resource_data_info::get_data() const
+{
+	return data_;
+}
+
+//Returns resource codepage
+uint32_t resource_data_info::get_codepage() const
+{
+	return codepage_;
+}
+}
diff --git a/tools/pe_bliss/resource_data_info.h b/tools/pe_bliss/resource_data_info.h
new file mode 100644
index 0000000000..e2275ebbf5
--- /dev/null
+++ b/tools/pe_bliss/resource_data_info.h
@@ -0,0 +1,48 @@
+/*************************************************************************/
+/* Copyright (c) 2015 dx, http://kaimi.ru                                */
+/*                                                                       */
+/* Permission is hereby granted, free of charge, to any person           */
+/* obtaining a copy of this software and associated documentation        */
+/* files (the "Software"), to deal in the Software without               */
+/* restriction, including without limitation the rights to use,          */
+/* copy, modify, merge, publish, distribute, sublicense, and/or          */
+/* sell copies of the Software, and to permit persons to whom the        */
+/* Software is furnished to do so, subject to the following conditions:  */
+/* The above copyright notice and this permission notice shall be        */
+/* included in all copies or substantial portions of the Software.       */
+/*                                                                       */
+/* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,       */
+/* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF    */
+/* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.*/
+/* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY  */
+/* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,  */
+/* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE     */
+/* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.                */
+/*************************************************************************/
+#pragma once
+#include <string>
+#include "stdint_defs.h"
+
+namespace pe_bliss
+{
+class resource_data_entry;
+
+//Class representing resource data
+class resource_data_info
+{
+public:
+	//Constructor from data
+	resource_data_info(const std::string& data, uint32_t codepage);
+	//Constructor from data
+	explicit resource_data_info(const resource_data_entry& data);
+
+	//Returns resource data
+	const std::string& get_data() const;
+	//Returns resource codepage
+	uint32_t get_codepage() const;
+
+private:
+	std::string data_;
+	uint32_t codepage_;
+};
+}
diff --git a/tools/pe_bliss/resource_internal.h b/tools/pe_bliss/resource_internal.h
new file mode 100644
index 0000000000..64a5bf3903
--- /dev/null
+++ b/tools/pe_bliss/resource_internal.h
@@ -0,0 +1,34 @@
+/*************************************************************************/
+/* Copyright (c) 2015 dx, http://kaimi.ru                                */
+/*                                                                       */
+/* Permission is hereby granted, free of charge, to any person           */
+/* obtaining a copy of this software and associated documentation        */
+/* files (the "Software"), to deal in the Software without               */
+/* restriction, including without limitation the rights to use,          */
+/* copy, modify, merge, publish, distribute, sublicense, and/or          */
+/* sell copies of the Software, and to permit persons to whom the        */
+/* Software is furnished to do so, subject to the following conditions:  */
+/* The above copyright notice and this permission notice shall be        */
+/* included in all copies or substantial portions of the Software.       */
+/*                                                                       */
+/* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,       */
+/* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF    */
+/* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.*/
+/* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY  */
+/* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,  */
+/* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE     */
+/* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.                */
+/*************************************************************************/
+#pragma once
+
+#define U16TEXT(t) reinterpret_cast<const unicode16_t*>( t )
+
+#define StringFileInfo U16TEXT("S\0t\0r\0i\0n\0g\0F\0i\0l\0e\0I\0n\0f\0o\0\0")
+#define SizeofStringFileInfo sizeof("S\0t\0r\0i\0n\0g\0F\0i\0l\0e\0I\0n\0f\0o\0\0")
+#define VarFileInfo U16TEXT("V\0a\0r\0F\0i\0l\0e\0I\0n\0f\0o\0\0")
+#define Translation U16TEXT("T\0r\0a\0n\0s\0l\0a\0t\0i\0o\0n\0\0")
+
+#define VarFileInfoAligned U16TEXT("V\0a\0r\0F\0i\0l\0e\0I\0n\0f\0o\0\0\0\0")
+#define TranslationAligned U16TEXT("T\0r\0a\0n\0s\0l\0a\0t\0i\0o\0n\0\0\0\0")
+#define SizeofVarFileInfoAligned sizeof("V\0a\0r\0F\0i\0l\0e\0I\0n\0f\0o\0\0\0\0")
+#define SizeofTranslationAligned sizeof("T\0r\0a\0n\0s\0l\0a\0t\0i\0o\0n\0\0\0\0")
diff --git a/tools/pe_bliss/resource_message_list_reader.cpp b/tools/pe_bliss/resource_message_list_reader.cpp
new file mode 100644
index 0000000000..f2ea142bee
--- /dev/null
+++ b/tools/pe_bliss/resource_message_list_reader.cpp
@@ -0,0 +1,131 @@
+/*************************************************************************/
+/* Copyright (c) 2015 dx, http://kaimi.ru                                */
+/*                                                                       */
+/* Permission is hereby granted, free of charge, to any person           */
+/* obtaining a copy of this software and associated documentation        */
+/* files (the "Software"), to deal in the Software without               */
+/* restriction, including without limitation the rights to use,          */
+/* copy, modify, merge, publish, distribute, sublicense, and/or          */
+/* sell copies of the Software, and to permit persons to whom the        */
+/* Software is furnished to do so, subject to the following conditions:  */
+/* The above copyright notice and this permission notice shall be        */
+/* included in all copies or substantial portions of the Software.       */
+/*                                                                       */
+/* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,       */
+/* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF    */
+/* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.*/
+/* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY  */
+/* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,  */
+/* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE     */
+/* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.                */
+/*************************************************************************/
+#include "resource_message_list_reader.h"
+#include "pe_resource_viewer.h"
+
+namespace pe_bliss
+{
+using namespace pe_win;
+
+resource_message_list_reader::resource_message_list_reader(const pe_resource_viewer& res)
+	:res_(res)
+{}
+
+//Helper function of parsing message list table
+const resource_message_list resource_message_list_reader::parse_message_list(const std::string& resource_data)
+{
+	resource_message_list ret;
+
+	//Check resource data length
+	if(resource_data.length() < sizeof(message_resource_data))
+		throw pe_exception("Incorrect resource message table", pe_exception::resource_incorrect_message_table);
+
+	const message_resource_data* message_data = reinterpret_cast<const message_resource_data*>(resource_data.data());
+
+	//Check resource data length more carefully and some possible overflows
+	if(message_data->NumberOfBlocks >= pe_utils::max_dword / sizeof(message_resource_block)
+		|| !pe_utils::is_sum_safe(message_data->NumberOfBlocks * sizeof(message_resource_block), sizeof(message_resource_data))
+		|| resource_data.length() < message_data->NumberOfBlocks * sizeof(message_resource_block) + sizeof(message_resource_data))
+		throw pe_exception("Incorrect resource message table", pe_exception::resource_incorrect_message_table);
+
+	//Iterate over all message resource blocks
+	for(unsigned long i = 0; i != message_data->NumberOfBlocks; ++i)
+	{
+		//Get block
+		const message_resource_block* block =
+			reinterpret_cast<const message_resource_block*>(resource_data.data() + sizeof(message_resource_data) - sizeof(message_resource_block) + sizeof(message_resource_block) * i);
+
+		//Check resource data length and IDs
+		if(resource_data.length() < block->OffsetToEntries || block->LowId > block->HighId)
+			throw pe_exception("Incorrect resource message table", pe_exception::resource_incorrect_message_table);
+
+		unsigned long current_pos = 0;
+		static const unsigned long size_of_entry_headers = 4;
+		//List all message resource entries in block
+		for(uint32_t curr_id = block->LowId; curr_id <= block->HighId; curr_id++)
+		{
+			//Check resource data length and some possible overflows
+			if(!pe_utils::is_sum_safe(block->OffsetToEntries, current_pos)
+				|| !pe_utils::is_sum_safe(block->OffsetToEntries + current_pos, size_of_entry_headers)
+				|| resource_data.length() < block->OffsetToEntries + current_pos + size_of_entry_headers)
+				throw pe_exception("Incorrect resource message table", pe_exception::resource_incorrect_message_table);
+
+			//Get entry
+			const message_resource_entry* entry = reinterpret_cast<const message_resource_entry*>(resource_data.data() + block->OffsetToEntries + current_pos);
+
+			//Check resource data length and entry length and some possible overflows
+			if(entry->Length < size_of_entry_headers
+				|| !pe_utils::is_sum_safe(block->OffsetToEntries + current_pos, entry->Length)
+				|| resource_data.length() < block->OffsetToEntries + current_pos + entry->Length
+				|| entry->Length < size_of_entry_headers)
+				throw pe_exception("Incorrect resource message table", pe_exception::resource_incorrect_message_table);
+
+			if(entry->Flags & message_resource_unicode)
+			{
+				//If string is UNICODE
+				//Check its length
+				if(entry->Length % 2)
+					throw pe_exception("Incorrect resource message table", pe_exception::resource_incorrect_message_table);
+
+				//Add ID and string to message table
+#ifdef PE_BLISS_WINDOWS
+				ret.insert(std::make_pair(curr_id, message_table_item(
+					std::wstring(reinterpret_cast<const wchar_t*>(resource_data.data() + block->OffsetToEntries + current_pos + size_of_entry_headers),
+					(entry->Length - size_of_entry_headers) / 2)
+					)));
+#else
+				ret.insert(std::make_pair(curr_id, message_table_item(
+					pe_utils::from_ucs2(u16string(reinterpret_cast<const unicode16_t*>(resource_data.data() + block->OffsetToEntries + current_pos + size_of_entry_headers),
+					(entry->Length - size_of_entry_headers) / 2))
+					)));
+#endif
+			}
+			else
+			{
+				//If string is ANSI
+				//Add ID and string to message table
+				ret.insert(std::make_pair(curr_id, message_table_item(
+					std::string(resource_data.data() + block->OffsetToEntries + current_pos + size_of_entry_headers,
+					entry->Length - size_of_entry_headers)
+					)));
+			}
+
+			//Go to next entry
+			current_pos += entry->Length;
+		}
+	}
+
+	return ret;
+}
+
+//Returns message table data by ID and index in language directory (instead of language)
+const resource_message_list resource_message_list_reader::get_message_table_by_id(uint32_t id, uint32_t index) const
+{
+	return parse_message_list(res_.get_resource_data_by_id(pe_resource_viewer::resource_message_table, id, index).get_data());
+}
+
+//Returns message table data by ID and language
+const resource_message_list resource_message_list_reader::get_message_table_by_id_lang(uint32_t language, uint32_t id) const
+{
+	return parse_message_list(res_.get_resource_data_by_id(language, pe_resource_viewer::resource_message_table, id).get_data());
+}
+}
diff --git a/tools/pe_bliss/resource_message_list_reader.h b/tools/pe_bliss/resource_message_list_reader.h
new file mode 100644
index 0000000000..a0ac96eb8c
--- /dev/null
+++ b/tools/pe_bliss/resource_message_list_reader.h
@@ -0,0 +1,49 @@
+/*************************************************************************/
+/* Copyright (c) 2015 dx, http://kaimi.ru                                */
+/*                                                                       */
+/* Permission is hereby granted, free of charge, to any person           */
+/* obtaining a copy of this software and associated documentation        */
+/* files (the "Software"), to deal in the Software without               */
+/* restriction, including without limitation the rights to use,          */
+/* copy, modify, merge, publish, distribute, sublicense, and/or          */
+/* sell copies of the Software, and to permit persons to whom the        */
+/* Software is furnished to do so, subject to the following conditions:  */
+/* The above copyright notice and this permission notice shall be        */
+/* included in all copies or substantial portions of the Software.       */
+/*                                                                       */
+/* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,       */
+/* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF    */
+/* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.*/
+/* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY  */
+/* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,  */
+/* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE     */
+/* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.                */
+/*************************************************************************/
+#pragma once
+#include "message_table.h"
+
+namespace pe_bliss
+{
+class pe_resource_viewer;
+
+//ID; message_table_item
+typedef std::map<uint32_t, message_table_item> resource_message_list;
+
+class resource_message_list_reader
+{
+public:
+	resource_message_list_reader(const pe_resource_viewer& res);
+
+	//Returns message table data by ID and language
+	const resource_message_list get_message_table_by_id_lang(uint32_t language, uint32_t id) const;
+	//Returns message table data by ID and index in language directory (instead of language)
+	const resource_message_list get_message_table_by_id(uint32_t id, uint32_t index = 0) const;
+
+	//Helper function of parsing message list table
+	//resource_data - raw message table resource data
+	static const resource_message_list parse_message_list(const std::string& resource_data);
+
+private:
+	const pe_resource_viewer& res_;
+};
+}
diff --git a/tools/pe_bliss/resource_string_table_reader.cpp b/tools/pe_bliss/resource_string_table_reader.cpp
new file mode 100644
index 0000000000..8a51720e6a
--- /dev/null
+++ b/tools/pe_bliss/resource_string_table_reader.cpp
@@ -0,0 +1,109 @@
+/*************************************************************************/
+/* Copyright (c) 2015 dx, http://kaimi.ru                                */
+/*                                                                       */
+/* Permission is hereby granted, free of charge, to any person           */
+/* obtaining a copy of this software and associated documentation        */
+/* files (the "Software"), to deal in the Software without               */
+/* restriction, including without limitation the rights to use,          */
+/* copy, modify, merge, publish, distribute, sublicense, and/or          */
+/* sell copies of the Software, and to permit persons to whom the        */
+/* Software is furnished to do so, subject to the following conditions:  */
+/* The above copyright notice and this permission notice shall be        */
+/* included in all copies or substantial portions of the Software.       */
+/*                                                                       */
+/* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,       */
+/* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF    */
+/* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.*/
+/* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY  */
+/* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,  */
+/* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE     */
+/* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.                */
+/*************************************************************************/
+#include "resource_string_table_reader.h"
+#include "pe_resource_viewer.h"
+
+namespace pe_bliss
+{
+resource_string_table_reader::resource_string_table_reader(const pe_resource_viewer& res)
+	:res_(res)
+{}
+
+//Returns string table data by ID and index in language directory (instead of language)
+const resource_string_list resource_string_table_reader::get_string_table_by_id(uint32_t id, uint32_t index) const
+{
+	return parse_string_list(id, res_.get_resource_data_by_id(pe_resource_viewer::resource_string, id, index).get_data());
+}
+
+//Returns string table data by ID and language
+const resource_string_list resource_string_table_reader::get_string_table_by_id_lang(uint32_t language, uint32_t id) const
+{
+	return parse_string_list(id, res_.get_resource_data_by_id(language, pe_resource_viewer::resource_string, id).get_data());
+}
+
+//Helper function of parsing string list table
+const resource_string_list resource_string_table_reader::parse_string_list(uint32_t id, const std::string& resource_data)
+{
+	resource_string_list ret;
+
+	//16 is maximum count of strings in a string table
+	static const unsigned long max_string_list_entries = 16;
+	unsigned long passed_bytes = 0;
+	for(unsigned long i = 0; i != max_string_list_entries; ++i)
+	{
+		//Check resource data length
+		if(resource_data.length() < sizeof(uint16_t) + passed_bytes)
+			throw pe_exception("Incorrect resource string table", pe_exception::resource_incorrect_string_table);
+
+		//Get string length - the first WORD
+		uint16_t string_length = *reinterpret_cast<const uint16_t*>(resource_data.data() + passed_bytes);
+		passed_bytes += sizeof(uint16_t); //WORD containing string length
+
+		//Check resource data length again
+		if(resource_data.length() < string_length + passed_bytes)
+			throw pe_exception("Incorrect resource string table", pe_exception::resource_incorrect_string_table);
+
+		if(string_length)
+		{
+			//Create and save string (UNICODE)
+#ifdef PE_BLISS_WINDOWS
+			ret.insert(
+				std::make_pair(static_cast<uint16_t>(((id - 1) << 4) + i), //ID of string is calculated such way
+				std::wstring(reinterpret_cast<const wchar_t*>(resource_data.data() + passed_bytes), string_length)));
+#else
+			ret.insert(
+				std::make_pair(static_cast<uint16_t>(((id - 1) << 4) + i), //ID of string is calculated such way
+				pe_utils::from_ucs2(u16string(reinterpret_cast<const unicode16_t*>(resource_data.data() + passed_bytes), string_length))));
+#endif
+		}
+
+		//Go to next string
+		passed_bytes += string_length * 2;
+	}
+
+	return ret;
+}
+
+//Returns string from string table by ID and language
+const std::wstring resource_string_table_reader::get_string_by_id_lang(uint32_t language, uint16_t id) const
+{
+	//List strings by string table id and language
+	const resource_string_list strings(get_string_table_by_id_lang(language, (id >> 4) + 1));
+	resource_string_list::const_iterator it = strings.find(id); //Find string by id
+	if(it == strings.end())
+		throw pe_exception("Resource string not found", pe_exception::resource_string_not_found);
+
+	return (*it).second;
+}
+
+//Returns string from string table by ID and index in language directory (instead of language)
+const std::wstring resource_string_table_reader::get_string_by_id(uint16_t id, uint32_t index) const
+{
+	//List strings by string table id and index
+	const resource_string_list strings(get_string_table_by_id((id >> 4) + 1, index));
+	resource_string_list::const_iterator it = strings.find(id); //Find string by id
+	if(it == strings.end())
+		throw pe_exception("Resource string not found", pe_exception::resource_string_not_found);
+
+	return (*it).second;
+}
+}
diff --git a/tools/pe_bliss/resource_string_table_reader.h b/tools/pe_bliss/resource_string_table_reader.h
new file mode 100644
index 0000000000..e3ded1da85
--- /dev/null
+++ b/tools/pe_bliss/resource_string_table_reader.h
@@ -0,0 +1,57 @@
+/*************************************************************************/
+/* Copyright (c) 2015 dx, http://kaimi.ru                                */
+/*                                                                       */
+/* Permission is hereby granted, free of charge, to any person           */
+/* obtaining a copy of this software and associated documentation        */
+/* files (the "Software"), to deal in the Software without               */
+/* restriction, including without limitation the rights to use,          */
+/* copy, modify, merge, publish, distribute, sublicense, and/or          */
+/* sell copies of the Software, and to permit persons to whom the        */
+/* Software is furnished to do so, subject to the following conditions:  */
+/* The above copyright notice and this permission notice shall be        */
+/* included in all copies or substantial portions of the Software.       */
+/*                                                                       */
+/* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,       */
+/* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF    */
+/* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.*/
+/* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY  */
+/* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,  */
+/* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE     */
+/* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.                */
+/*************************************************************************/
+#pragma once
+#include <string>
+#include <map>
+#include "stdint_defs.h"
+
+namespace pe_bliss
+{
+class pe_resource_viewer;
+
+//ID; string
+typedef std::map<uint16_t, std::wstring> resource_string_list;
+
+class resource_string_table_reader
+{
+public:
+	resource_string_table_reader(const pe_resource_viewer& res);
+
+public:
+	//Returns string table data by ID and language
+	const resource_string_list get_string_table_by_id_lang(uint32_t language, uint32_t id) const;
+	//Returns string table data by ID and index in language directory (instead of language)
+	const resource_string_list get_string_table_by_id(uint32_t id, uint32_t index = 0) const;
+	//Returns string from string table by ID and language
+	const std::wstring get_string_by_id_lang(uint32_t language, uint16_t id) const;
+	//Returns string from string table by ID and index in language directory (instead of language)
+	const std::wstring get_string_by_id(uint16_t id, uint32_t index = 0) const;
+
+private:
+	const pe_resource_viewer& res_;
+
+	//Helper function of parsing string list table
+	//Id of resource is needed to calculate string IDs correctly
+	//resource_data is raw string table resource data
+	static const resource_string_list parse_string_list(uint32_t id, const std::string& resource_data);
+};
+}
diff --git a/tools/pe_bliss/resource_version_info_reader.cpp b/tools/pe_bliss/resource_version_info_reader.cpp
new file mode 100644
index 0000000000..8ad44c6856
--- /dev/null
+++ b/tools/pe_bliss/resource_version_info_reader.cpp
@@ -0,0 +1,311 @@
+/*************************************************************************/
+/* Copyright (c) 2015 dx, http://kaimi.ru                                */
+/*                                                                       */
+/* Permission is hereby granted, free of charge, to any person           */
+/* obtaining a copy of this software and associated documentation        */
+/* files (the "Software"), to deal in the Software without               */
+/* restriction, including without limitation the rights to use,          */
+/* copy, modify, merge, publish, distribute, sublicense, and/or          */
+/* sell copies of the Software, and to permit persons to whom the        */
+/* Software is furnished to do so, subject to the following conditions:  */
+/* The above copyright notice and this permission notice shall be        */
+/* included in all copies or substantial portions of the Software.       */
+/*                                                                       */
+/* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,       */
+/* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF    */
+/* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.*/
+/* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY  */
+/* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,  */
+/* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE     */
+/* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.                */
+/*************************************************************************/
+#include "resource_version_info_reader.h"
+#include "utils.h"
+#include "pe_exception.h"
+#include "resource_internal.h"
+#include "pe_resource_viewer.h"
+
+namespace pe_bliss
+{
+using namespace pe_win;
+
+//Root version info block key value
+const u16string resource_version_info_reader::version_info_key(U16TEXT("V\0S\0_\0V\0E\0R\0S\0I\0O\0N\0_\0I\0N\0F\0O\0\0"));
+
+resource_version_info_reader::resource_version_info_reader(const pe_resource_viewer& res)
+	:res_(res)
+{}
+
+//Returns aligned version block value position
+uint32_t resource_version_info_reader::get_version_block_value_pos(uint32_t base_pos, const unicode16_t* key)
+{
+	uint32_t string_length = static_cast<uint32_t>(u16string(key).length());
+	uint32_t ret = pe_utils::align_up(static_cast<uint32_t>(sizeof(uint16_t) * 3 /* headers before Key data */
+		+ base_pos
+		+ (string_length + 1 /* nullbyte */) * 2),
+		sizeof(uint32_t));
+
+	//Check possible overflows
+	if(ret < base_pos || ret < sizeof(uint16_t) * 3 || ret < (string_length + 1) * 2)
+		throw_incorrect_version_info();
+
+	return ret;
+}
+
+//Returns aligned version block first child position
+uint32_t resource_version_info_reader::get_version_block_first_child_pos(uint32_t base_pos, uint32_t value_length, const unicode16_t* key)
+{
+	uint32_t string_length = static_cast<uint32_t>(u16string(key).length());
+	uint32_t ret =  pe_utils::align_up(static_cast<uint32_t>(sizeof(uint16_t) * 3 /* headers before Key data */
+		+ base_pos
+		+ (string_length + 1 /* nullbyte */) * 2),
+		sizeof(uint32_t))
+		+ pe_utils::align_up(value_length, sizeof(uint32_t));
+
+	//Check possible overflows
+	if(ret < base_pos || ret < value_length || ret < sizeof(uint16_t) * 3 || ret < (string_length + 1) * 2)
+		throw_incorrect_version_info();
+
+	return ret;
+}
+
+//Throws an exception (id = resource_incorrect_version_info)
+void resource_version_info_reader::throw_incorrect_version_info()
+{
+	throw pe_exception("Incorrect resource version info", pe_exception::resource_incorrect_version_info);
+}
+
+//Returns full version information:
+//file_version_info: versions and file info
+//lang_string_values_map: map of version info strings with encodings
+//translation_values_map: map of translations
+const file_version_info resource_version_info_reader::get_version_info(lang_string_values_map& string_values, translation_values_map& translations, const std::string& resource_data) const
+{
+	//Fixed file version info
+	file_version_info ret;
+
+	//Check resource data length
+	if(resource_data.length() < sizeof(version_info_block))
+		throw_incorrect_version_info();
+
+	//Root version info block
+	const version_info_block* root_block = reinterpret_cast<const version_info_block*>(resource_data.data());
+
+	//Check root block key for null-termination and its name
+	if(!pe_utils::is_null_terminated(root_block->Key, resource_data.length() - sizeof(uint16_t) * 3 /* headers before Key data */)
+		|| version_info_key != reinterpret_cast<const unicode16_t*>(root_block->Key))
+		throw_incorrect_version_info();
+
+	//If file has fixed version info
+	if(root_block->ValueLength)
+	{
+		//Get root block value position
+		uint32_t value_pos = get_version_block_value_pos(0, reinterpret_cast<const unicode16_t*>(root_block->Key));
+		//Check value length
+		if(resource_data.length() < value_pos + sizeof(vs_fixedfileinfo))
+			throw_incorrect_version_info();
+
+		//Get VS_FIXEDFILEINFO structure pointer
+		const vs_fixedfileinfo* file_info = reinterpret_cast<const vs_fixedfileinfo*>(resource_data.data() + value_pos);
+		//Check its signature and some other fields
+		if(file_info->dwSignature != vs_ffi_signature || file_info->dwStrucVersion != vs_ffi_strucversion) //Don't check if file_info->dwFileFlagsMask == VS_FFI_FILEFLAGSMASK
+			throw_incorrect_version_info();
+
+		//Save fixed version info
+		ret = file_version_info(*file_info);
+	}
+
+	//Iterate over child elements of VS_VERSIONINFO (StringFileInfo or VarFileInfo)
+	for(uint32_t child_pos = get_version_block_first_child_pos(0, root_block->ValueLength, reinterpret_cast<const unicode16_t*>(root_block->Key));
+		child_pos < root_block->Length;)
+	{
+		//Check block position
+		if(!pe_utils::is_sum_safe(child_pos, sizeof(version_info_block))
+			|| resource_data.length() < child_pos + sizeof(version_info_block))
+			throw_incorrect_version_info();
+
+		//Get VERSION_INFO_BLOCK structure pointer
+		const version_info_block* block = reinterpret_cast<const version_info_block*>(resource_data.data() + child_pos);
+
+		//Check its length
+		if(block->Length == 0)
+			throw_incorrect_version_info();
+
+		//Check block key for null-termination
+		if(!pe_utils::is_null_terminated(block->Key, resource_data.length() - child_pos - sizeof(uint16_t) * 3 /* headers before Key data */))
+			throw_incorrect_version_info();
+
+		u16string info_type(reinterpret_cast<const unicode16_t*>(block->Key));
+		//If we encountered StringFileInfo...
+		if(info_type == StringFileInfo)
+		{
+			//Enumerate all string tables
+			for(uint32_t string_table_pos = get_version_block_first_child_pos(child_pos, block->ValueLength, reinterpret_cast<const unicode16_t*>(block->Key));
+				string_table_pos - child_pos < block->Length;)
+			{
+				//Check string table block position
+				if(resource_data.length() < string_table_pos + sizeof(version_info_block))
+					throw_incorrect_version_info();
+
+				//Get VERSION_INFO_BLOCK structure pointer for string table
+				const version_info_block* string_table = reinterpret_cast<const version_info_block*>(resource_data.data() + string_table_pos);
+
+				//Check its length
+				if(string_table->Length == 0)
+					throw_incorrect_version_info();
+
+				//Check string table key for null-termination
+				if(!pe_utils::is_null_terminated(string_table->Key, resource_data.length() - string_table_pos - sizeof(uint16_t) * 3 /* headers before Key data */))	
+					throw_incorrect_version_info();
+
+				string_values_map new_values;
+
+				//Enumerate all strings in the string table
+				for(uint32_t string_pos = get_version_block_first_child_pos(string_table_pos, string_table->ValueLength, reinterpret_cast<const unicode16_t*>(string_table->Key));
+					string_pos - string_table_pos < string_table->Length;)
+				{
+					//Check string block position
+					if(resource_data.length() < string_pos + sizeof(version_info_block))
+						throw_incorrect_version_info();
+
+					//Get VERSION_INFO_BLOCK structure pointer for string block
+					const version_info_block* string_block = reinterpret_cast<const version_info_block*>(resource_data.data() + string_pos);
+
+					//Check its length
+					if(string_block->Length == 0)
+						throw_incorrect_version_info();
+
+					//Check string block key for null-termination
+					if(!pe_utils::is_null_terminated(string_block->Key, resource_data.length() - string_pos - sizeof(uint16_t) * 3 /* headers before Key data */))
+						throw_incorrect_version_info();
+
+					u16string data;
+					//If string block has value
+					if(string_block->ValueLength != 0)
+					{
+						//Get value position
+						uint32_t value_pos = get_version_block_value_pos(string_pos, reinterpret_cast<const unicode16_t*>(string_block->Key));
+						//Check it
+						if(resource_data.length() < value_pos + string_block->ValueLength)
+							throw pe_exception("Incorrect resource version info", pe_exception::resource_incorrect_version_info);
+
+						//Get UNICODE string value
+						data = u16string(reinterpret_cast<const unicode16_t*>(resource_data.data() + value_pos), string_block->ValueLength);
+						pe_utils::strip_nullbytes(data);
+					}
+
+					//Save name-value pair
+#ifdef PE_BLISS_WINDOWS
+					new_values.insert(std::make_pair(reinterpret_cast<const unicode16_t*>(string_block->Key), data));
+#else
+					new_values.insert(std::make_pair(pe_utils::from_ucs2(reinterpret_cast<const unicode16_t*>(string_block->Key)),
+						pe_utils::from_ucs2(data)));
+#endif
+
+					//Navigate to next string block
+					string_pos += pe_utils::align_up(string_block->Length, sizeof(uint32_t));
+				}
+
+#ifdef PE_BLISS_WINDOWS
+				string_values.insert(std::make_pair(reinterpret_cast<const unicode16_t*>(string_table->Key), new_values));
+#else
+				string_values.insert(std::make_pair(pe_utils::from_ucs2(reinterpret_cast<const unicode16_t*>(string_table->Key)), new_values));
+#endif
+
+				//Navigate to next string table block
+				string_table_pos += pe_utils::align_up(string_table->Length, sizeof(uint32_t));
+			}
+		}
+		else if(info_type == VarFileInfo) //If we encountered VarFileInfo
+		{
+			for(uint32_t var_table_pos = get_version_block_first_child_pos(child_pos, block->ValueLength, reinterpret_cast<const unicode16_t*>(block->Key));
+				var_table_pos - child_pos < block->Length;)
+			{
+				//Check var block position
+				if(resource_data.length() < var_table_pos + sizeof(version_info_block))
+					throw_incorrect_version_info();
+
+				//Get VERSION_INFO_BLOCK structure pointer for var block
+				const version_info_block* var_table = reinterpret_cast<const version_info_block*>(resource_data.data() + var_table_pos);
+
+				//Check its length
+				if(var_table->Length == 0)
+					throw_incorrect_version_info();
+
+				//Check its key for null-termination
+				if(!pe_utils::is_null_terminated(var_table->Key, resource_data.length() - var_table_pos - sizeof(uint16_t) * 3 /* headers before Key data */))
+					throw_incorrect_version_info();
+
+				//If block is "Translation" (actually, there's no other types possible in VarFileInfo) and it has value
+				if(u16string(reinterpret_cast<const unicode16_t*>(var_table->Key)) == Translation && var_table->ValueLength)
+				{
+					//Get its value position
+					uint32_t value_pos = get_version_block_value_pos(var_table_pos, reinterpret_cast<const unicode16_t*>(var_table->Key));
+					//Cherck value length
+					if(resource_data.length() < value_pos + var_table->ValueLength)
+						throw_incorrect_version_info();
+
+					//Get list of translations: pairs of LANGUAGE_ID - CODEPAGE_ID
+					for(unsigned long i = 0; i < var_table->ValueLength; i += sizeof(uint16_t) * 2)
+					{
+						//Pair of WORDs
+						uint16_t lang_id = *reinterpret_cast<const uint16_t*>(resource_data.data() + value_pos + i);
+						uint16_t codepage_id = *reinterpret_cast<const uint16_t*>(resource_data.data() + value_pos + sizeof(uint16_t) + i);
+						//Save translation
+						translations.insert(std::make_pair(lang_id, codepage_id));
+					}
+				}
+
+				//Navigate to next var block
+				var_table_pos += pe_utils::align_up(var_table->Length, sizeof(uint32_t));
+			}
+		}
+		else
+		{
+			throw_incorrect_version_info();
+		}
+
+		//Navigate to next element in root block
+		child_pos += pe_utils::align_up(block->Length, sizeof(uint32_t));
+	}
+
+	return ret;
+}
+
+//Returns full version information:
+//file_version info: versions and file info
+//lang_string_values_map: map of version info strings with encodings
+//translation_values_map: map of translations
+const file_version_info resource_version_info_reader::get_version_info_by_lang(lang_string_values_map& string_values, translation_values_map& translations, uint32_t language) const
+{
+	const std::string& resource_data = res_.get_root_directory() //Type directory
+		.entry_by_id(pe_resource_viewer::resource_version)
+		.get_resource_directory() //Name/ID directory
+		.entry_by_id(1)
+		.get_resource_directory() //Language directory
+		.entry_by_id(language)
+		.get_data_entry() //Data directory
+		.get_data();
+
+	return get_version_info(string_values, translations, resource_data);
+}
+
+//Returns full version information:
+//file_version_info: versions and file info
+//lang_string_values_map: map of version info strings with encodings
+//translation_values_map: map of translations
+const file_version_info resource_version_info_reader::get_version_info(lang_string_values_map& string_values, translation_values_map& translations, uint32_t index) const
+{
+	const resource_directory::entry_list& entries = res_.get_root_directory() //Type directory
+		.entry_by_id(pe_resource_viewer::resource_version)
+		.get_resource_directory() //Name/ID directory
+		.entry_by_id(1)
+		.get_resource_directory() //Language directory
+		.get_entry_list();
+
+	if(entries.size() <= index)
+		throw pe_exception("Resource data entry not found", pe_exception::resource_data_entry_not_found);
+
+	return get_version_info(string_values, translations, entries.at(index).get_data_entry().get_data()); //Data directory
+}
+}
diff --git a/tools/pe_bliss/resource_version_info_reader.h b/tools/pe_bliss/resource_version_info_reader.h
new file mode 100644
index 0000000000..c1dfbffdc2
--- /dev/null
+++ b/tools/pe_bliss/resource_version_info_reader.h
@@ -0,0 +1,67 @@
+/*************************************************************************/
+/* Copyright (c) 2015 dx, http://kaimi.ru                                */
+/*                                                                       */
+/* Permission is hereby granted, free of charge, to any person           */
+/* obtaining a copy of this software and associated documentation        */
+/* files (the "Software"), to deal in the Software without               */
+/* restriction, including without limitation the rights to use,          */
+/* copy, modify, merge, publish, distribute, sublicense, and/or          */
+/* sell copies of the Software, and to permit persons to whom the        */
+/* Software is furnished to do so, subject to the following conditions:  */
+/* The above copyright notice and this permission notice shall be        */
+/* included in all copies or substantial portions of the Software.       */
+/*                                                                       */
+/* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,       */
+/* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF    */
+/* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.*/
+/* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY  */
+/* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,  */
+/* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE     */
+/* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.                */
+/*************************************************************************/
+#pragma once
+#include <map>
+#include "file_version_info.h"
+#include "pe_structures.h"
+#include "version_info_types.h"
+
+namespace pe_bliss
+{
+class pe_resource_viewer;
+
+class resource_version_info_reader
+{
+public: //VERSION INFO
+	resource_version_info_reader(const pe_resource_viewer& res);
+
+	//Returns full version information:
+	//file_version_info: versions and file info
+	//lang_lang_string_values_map: map of version info strings with encodings with encodings
+	//translation_values_map: map of translations
+	const file_version_info get_version_info(lang_string_values_map& string_values, translation_values_map& translations, uint32_t index = 0) const;
+	const file_version_info get_version_info_by_lang(lang_string_values_map& string_values, translation_values_map& translations, uint32_t language) const;
+
+public:
+	//L"VS_VERSION_INFO" key of root version info block
+	static const u16string version_info_key;
+
+private:
+	const pe_resource_viewer& res_;
+	
+	//VERSION INFO helpers
+	//Returns aligned version block value position
+	static uint32_t get_version_block_value_pos(uint32_t base_pos, const unicode16_t* key);
+
+	//Returns aligned version block first child position
+	static uint32_t get_version_block_first_child_pos(uint32_t base_pos, uint32_t value_length, const unicode16_t* key);
+
+	//Returns full version information:
+	//file_version_info: versions and file info
+	//lang_string_values_map: map of version info strings with encodings
+	//translation_values_map: map of translations
+	const file_version_info get_version_info(lang_string_values_map& string_values, translation_values_map& translations, const std::string& resource_data) const;
+
+	//Throws an exception (id = resource_incorrect_version_info)
+	static void throw_incorrect_version_info();
+};
+}
diff --git a/tools/pe_bliss/resource_version_info_writer.cpp b/tools/pe_bliss/resource_version_info_writer.cpp
new file mode 100644
index 0000000000..ed95a0f7ea
--- /dev/null
+++ b/tools/pe_bliss/resource_version_info_writer.cpp
@@ -0,0 +1,283 @@
+/*************************************************************************/
+/* Copyright (c) 2015 dx, http://kaimi.ru                                */
+/*                                                                       */
+/* Permission is hereby granted, free of charge, to any person           */
+/* obtaining a copy of this software and associated documentation        */
+/* files (the "Software"), to deal in the Software without               */
+/* restriction, including without limitation the rights to use,          */
+/* copy, modify, merge, publish, distribute, sublicense, and/or          */
+/* sell copies of the Software, and to permit persons to whom the        */
+/* Software is furnished to do so, subject to the following conditions:  */
+/* The above copyright notice and this permission notice shall be        */
+/* included in all copies or substantial portions of the Software.       */
+/*                                                                       */
+/* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,       */
+/* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF    */
+/* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.*/
+/* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY  */
+/* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,  */
+/* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE     */
+/* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.                */
+/*************************************************************************/
+#include <string.h>
+#include "resource_version_info_writer.h"
+#include "pe_structures.h"
+#include "resource_internal.h"
+#include "utils.h"
+#include "pe_resource_manager.h"
+#include "resource_version_info_reader.h"
+
+namespace pe_bliss
+{
+using namespace pe_win;
+
+resource_version_info_writer::resource_version_info_writer(pe_resource_manager& res)
+	:res_(res)
+{}
+
+//Sets/replaces full version information:
+//file_version_info: versions and file info
+//lang_string_values_map: map of version info strings with encodings
+//translation_values_map: map of translations
+void resource_version_info_writer::set_version_info(const file_version_info& file_info,
+	const lang_string_values_map& string_values,
+	const translation_values_map& translations,
+	uint32_t language,
+	uint32_t codepage,
+	uint32_t timestamp)
+{
+	std::string version_data;
+
+	//Calculate total size of version resource data
+	uint32_t total_version_info_length =
+		static_cast<uint32_t>(sizeof(version_info_block) - sizeof(uint16_t) + sizeof(uint16_t) /* pading */
+		+ (resource_version_info_reader::version_info_key.length() + 1) * 2
+		+ sizeof(vs_fixedfileinfo));
+
+	//If we have any strings values
+	if(!string_values.empty())
+	{
+		total_version_info_length += sizeof(version_info_block) - sizeof(uint16_t); //StringFileInfo block
+		total_version_info_length += SizeofStringFileInfo; //Name of block (key)
+
+		//Add required size for version strings
+		for(lang_string_values_map::const_iterator table_it = string_values.begin(); table_it != string_values.end(); ++table_it)
+		{
+			total_version_info_length += pe_utils::align_up(static_cast<uint32_t>(sizeof(uint16_t) * 3 + ((*table_it).first.length() + 1) * 2), sizeof(uint32_t)); //Name of child block and block size (key of string table block)
+
+			const string_values_map& values = (*table_it).second;
+			for(string_values_map::const_iterator it = values.begin(); it != values.end(); ++it)
+			{
+				total_version_info_length += pe_utils::align_up(static_cast<uint32_t>(sizeof(uint16_t) * 3 + ((*it).first.length() + 1) * 2), sizeof(uint32_t));
+				total_version_info_length += pe_utils::align_up(static_cast<uint32_t>(((*it).second.length() + 1) * 2), sizeof(uint32_t));
+			}
+		}
+	}
+
+	//If we have translations
+	if(!translations.empty())
+	{
+		total_version_info_length += (sizeof(version_info_block) - sizeof(uint16_t)) * 2; //VarFileInfo and Translation blocks
+		total_version_info_length += SizeofVarFileInfoAligned; //DWORD-aligned VarFileInfo block name
+		total_version_info_length += SizeofTranslationAligned; //DWORD-aligned Translation block name
+		total_version_info_length += static_cast<uint32_t>(translations.size() * sizeof(uint16_t) * 2);
+	}
+
+	//Resize version data buffer
+	version_data.resize(total_version_info_length);
+
+	//Create root version block
+	version_info_block root_block = {0};
+	root_block.ValueLength = sizeof(vs_fixedfileinfo);
+	root_block.Length = static_cast<uint16_t>(total_version_info_length);
+
+	//Fill fixed file info
+	vs_fixedfileinfo fixed_info = {0};
+	fixed_info.dwFileDateLS = file_info.get_file_date_ls();
+	fixed_info.dwFileDateMS = file_info.get_file_date_ms();
+	fixed_info.dwFileFlags = file_info.get_file_flags();
+	fixed_info.dwFileFlagsMask = vs_ffi_fileflagsmask;
+	fixed_info.dwFileOS = file_info.get_file_os_raw();
+	fixed_info.dwFileSubtype = file_info.get_file_subtype();
+	fixed_info.dwFileType = file_info.get_file_type_raw();
+	fixed_info.dwFileVersionLS = file_info.get_file_version_ls();
+	fixed_info.dwFileVersionMS = file_info.get_file_version_ms();
+	fixed_info.dwSignature = vs_ffi_signature;
+	fixed_info.dwStrucVersion = vs_ffi_strucversion;
+	fixed_info.dwProductVersionLS = file_info.get_product_version_ls();
+	fixed_info.dwProductVersionMS = file_info.get_product_version_ms();
+
+	//Write root block and fixed file info to buffer
+	uint32_t data_ptr = 0;
+	memcpy(&version_data[data_ptr], &root_block, sizeof(version_info_block) - sizeof(uint16_t));
+	data_ptr += sizeof(version_info_block) - sizeof(uint16_t);
+	memcpy(&version_data[data_ptr], resource_version_info_reader::version_info_key.c_str(), (resource_version_info_reader::version_info_key.length() + 1) * sizeof(uint16_t));
+	data_ptr += static_cast<uint32_t>((resource_version_info_reader::version_info_key.length() + 1) * sizeof(uint16_t));
+	memset(&version_data[data_ptr], 0, sizeof(uint16_t));
+	data_ptr += sizeof(uint16_t);
+	memcpy(&version_data[data_ptr], &fixed_info, sizeof(fixed_info));
+	data_ptr += sizeof(fixed_info);
+
+	//Write string values, if any
+	if(!string_values.empty())
+	{
+		//Create string file info root block
+		version_info_block string_file_info_block = {0};
+		string_file_info_block.Type = 1; //Block type is string
+		memcpy(&version_data[data_ptr], &string_file_info_block, sizeof(version_info_block) - sizeof(uint16_t));
+		//We will calculate its length later
+		version_info_block* string_file_info_block_ptr = reinterpret_cast<version_info_block*>(&version_data[data_ptr]);
+		data_ptr += sizeof(version_info_block) - sizeof(uint16_t);
+
+		uint32_t old_ptr1 = data_ptr; //Used to calculate string file info block length later
+		memcpy(&version_data[data_ptr], StringFileInfo, SizeofStringFileInfo); //Write block name
+		data_ptr += SizeofStringFileInfo;
+
+		//Create string table root block (child of string file info)
+		version_info_block string_table_block = {0};
+		string_table_block.Type = 1; //Block type is string
+
+		for(lang_string_values_map::const_iterator table_it = string_values.begin(); table_it != string_values.end(); ++table_it)
+		{
+			const string_values_map& values = (*table_it).second;
+
+			memcpy(&version_data[data_ptr], &string_table_block, sizeof(version_info_block) - sizeof(uint16_t));
+			//We will calculate its length later
+			version_info_block* string_table_block_ptr = reinterpret_cast<version_info_block*>(&version_data[data_ptr]);
+			data_ptr += sizeof(version_info_block) - sizeof(uint16_t);
+
+			uint32_t old_ptr2 = data_ptr; //Used to calculate string table block length later
+			uint32_t lang_key_length = static_cast<uint32_t>(((*table_it).first.length() + 1) * sizeof(uint16_t));
+
+#ifdef PE_BLISS_WINDOWS
+			memcpy(&version_data[data_ptr], (*table_it).first.c_str(), lang_key_length); //Write block key
+#else
+			{
+				u16string str(pe_utils::to_ucs2((*table_it).first));
+				memcpy(&version_data[data_ptr], str.c_str(), lang_key_length); //Write block key
+			}
+#endif
+
+			data_ptr += lang_key_length;
+			//Align key if necessary
+			if((sizeof(uint16_t) * 3 + lang_key_length) % sizeof(uint32_t))
+			{
+				memset(&version_data[data_ptr], 0, sizeof(uint16_t));
+				data_ptr += sizeof(uint16_t);
+			}
+
+			//Create string block (child of string table block)
+			version_info_block string_block = {0};
+			string_block.Type = 1; //Block type is string
+			for(string_values_map::const_iterator it = values.begin(); it != values.end(); ++it)
+			{
+				//Calculate value length and key length of string block
+				string_block.ValueLength = static_cast<uint16_t>((*it).second.length() + 1);
+				uint32_t key_length = static_cast<uint32_t>(((*it).first.length() + 1) * sizeof(uint16_t));
+				//Calculate length of block
+				string_block.Length = static_cast<uint16_t>(pe_utils::align_up(sizeof(uint16_t) * 3 + key_length, sizeof(uint32_t)) + string_block.ValueLength * sizeof(uint16_t));
+
+				//Write string block
+				memcpy(&version_data[data_ptr], &string_block, sizeof(version_info_block) - sizeof(uint16_t));
+				data_ptr += sizeof(version_info_block) - sizeof(uint16_t);
+
+#ifdef PE_BLISS_WINDOWS
+				memcpy(&version_data[data_ptr], (*it).first.c_str(), key_length); //Write block key
+#else
+				{
+					u16string str(pe_utils::to_ucs2((*it).first));
+					memcpy(&version_data[data_ptr], str.c_str(), key_length); //Write block key
+				}
+#endif
+
+				data_ptr += key_length;
+				//Align key if necessary
+				if((sizeof(uint16_t) * 3 + key_length) % sizeof(uint32_t))
+				{
+					memset(&version_data[data_ptr], 0, sizeof(uint16_t));
+					data_ptr += sizeof(uint16_t);
+				}
+
+				//Write block data (value)
+#ifdef PE_BLISS_WINDOWS
+				memcpy(&version_data[data_ptr], (*it).second.c_str(), string_block.ValueLength * sizeof(uint16_t));
+#else
+				{
+					u16string str(pe_utils::to_ucs2((*it).second));
+					memcpy(&version_data[data_ptr], str.c_str(), string_block.ValueLength * sizeof(uint16_t));
+				}
+#endif
+
+				data_ptr += string_block.ValueLength * 2;
+				//Align data if necessary
+				if((string_block.ValueLength * 2) % sizeof(uint32_t))
+				{
+					memset(&version_data[data_ptr], 0, sizeof(uint16_t));
+					data_ptr += sizeof(uint16_t);
+				}
+			}
+
+			//Calculate string table and string file info blocks lengths
+			string_table_block_ptr->Length = static_cast<uint16_t>(data_ptr - old_ptr2 + sizeof(uint16_t) * 3);
+		}
+
+		string_file_info_block_ptr->Length = static_cast<uint16_t>(data_ptr - old_ptr1 + sizeof(uint16_t) * 3);
+	}
+
+	//If we have transactions
+	if(!translations.empty())
+	{
+		//Create root var file info block
+		version_info_block var_file_info_block = {0};
+		var_file_info_block.Type = 1; //Type of block is string
+		//Write block header
+		memcpy(&version_data[data_ptr], &var_file_info_block, sizeof(version_info_block) - sizeof(uint16_t));
+		//We will calculate its length later
+		version_info_block* var_file_info_block_ptr = reinterpret_cast<version_info_block*>(&version_data[data_ptr]);
+		data_ptr += sizeof(version_info_block) - sizeof(uint16_t);
+
+		uint32_t old_ptr1 = data_ptr; //Used to calculate var file info block length later
+		memcpy(&version_data[data_ptr], VarFileInfoAligned, SizeofVarFileInfoAligned); //Write block key (aligned)
+		data_ptr += SizeofVarFileInfoAligned;
+
+		//Create root translation block (child of var file info block)
+		version_info_block translation_block = {0};
+		//Write block header
+		memcpy(&version_data[data_ptr], &translation_block, sizeof(version_info_block) - sizeof(uint16_t));
+		//We will calculate its length later
+		version_info_block* translation_block_ptr = reinterpret_cast<version_info_block*>(&version_data[data_ptr]);
+		data_ptr += sizeof(version_info_block) - sizeof(uint16_t);
+
+		uint32_t old_ptr2 = data_ptr; //Used to calculate var file info block length later
+		memcpy(&version_data[data_ptr], TranslationAligned, SizeofTranslationAligned); //Write block key (aligned)
+		data_ptr += SizeofTranslationAligned;
+
+		//Calculate translation block value length
+		translation_block_ptr->ValueLength = static_cast<uint16_t>(sizeof(uint16_t) * 2 * translations.size());
+
+		//Write translation values to block
+		for(translation_values_map::const_iterator it = translations.begin(); it != translations.end(); ++it)
+		{
+			uint16_t lang_id = (*it).first; //Language ID
+			uint16_t codepage_id = (*it).second; //Codepage ID
+			memcpy(&version_data[data_ptr], &lang_id, sizeof(lang_id));
+			data_ptr += sizeof(lang_id);
+			memcpy(&version_data[data_ptr], &codepage_id, sizeof(codepage_id));
+			data_ptr += sizeof(codepage_id);
+		}
+
+		//Calculate Translation and VarFileInfo blocks lengths
+		translation_block_ptr->Length = static_cast<uint16_t>(data_ptr - old_ptr2 + sizeof(uint16_t) * 3);
+		var_file_info_block_ptr->Length = static_cast<uint16_t>(data_ptr - old_ptr1 + sizeof(uint16_t) * 3);
+	}
+
+	//Add/replace version info resource
+	res_.add_resource(version_data, pe_resource_viewer::resource_version, 1, language, codepage, timestamp);
+}
+
+//Removes version info by language (ID = 1)
+bool resource_version_info_writer::remove_version_info(uint32_t language)
+{
+	return res_.remove_resource(pe_resource_viewer::resource_version, 1, language);
+}
+}
diff --git a/tools/pe_bliss/resource_version_info_writer.h b/tools/pe_bliss/resource_version_info_writer.h
new file mode 100644
index 0000000000..da279ddedb
--- /dev/null
+++ b/tools/pe_bliss/resource_version_info_writer.h
@@ -0,0 +1,52 @@
+/*************************************************************************/
+/* Copyright (c) 2015 dx, http://kaimi.ru                                */
+/*                                                                       */
+/* Permission is hereby granted, free of charge, to any person           */
+/* obtaining a copy of this software and associated documentation        */
+/* files (the "Software"), to deal in the Software without               */
+/* restriction, including without limitation the rights to use,          */
+/* copy, modify, merge, publish, distribute, sublicense, and/or          */
+/* sell copies of the Software, and to permit persons to whom the        */
+/* Software is furnished to do so, subject to the following conditions:  */
+/* The above copyright notice and this permission notice shall be        */
+/* included in all copies or substantial portions of the Software.       */
+/*                                                                       */
+/* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,       */
+/* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF    */
+/* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.*/
+/* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY  */
+/* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,  */
+/* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE     */
+/* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.                */
+/*************************************************************************/
+#pragma once
+#include "version_info_types.h"
+#include "file_version_info.h"
+
+namespace pe_bliss
+{
+class pe_resource_manager;
+
+class resource_version_info_writer
+{
+public:
+	resource_version_info_writer(pe_resource_manager& res);
+	
+	//Sets/replaces full version information:
+	//file_version_info: versions and file info
+	//lang_string_values_map: map of version info strings with encodings
+	//translation_values_map: map of translations
+	void set_version_info(const file_version_info& file_info,
+		const lang_string_values_map& string_values,
+		const translation_values_map& translations,
+		uint32_t language,
+		uint32_t codepage = 0,
+		uint32_t timestamp = 0);
+	
+	//Removes version info by language (ID = 1)
+	bool remove_version_info(uint32_t language);
+
+private:
+	pe_resource_manager& res_;
+};
+}
diff --git a/tools/pe_bliss/stdint_defs.h b/tools/pe_bliss/stdint_defs.h
new file mode 100644
index 0000000000..bbc003690a
--- /dev/null
+++ b/tools/pe_bliss/stdint_defs.h
@@ -0,0 +1,45 @@
+/*************************************************************************/
+/* Copyright (c) 2015 dx, http://kaimi.ru                                */
+/*                                                                       */
+/* Permission is hereby granted, free of charge, to any person           */
+/* obtaining a copy of this software and associated documentation        */
+/* files (the "Software"), to deal in the Software without               */
+/* restriction, including without limitation the rights to use,          */
+/* copy, modify, merge, publish, distribute, sublicense, and/or          */
+/* sell copies of the Software, and to permit persons to whom the        */
+/* Software is furnished to do so, subject to the following conditions:  */
+/* The above copyright notice and this permission notice shall be        */
+/* included in all copies or substantial portions of the Software.       */
+/*                                                                       */
+/* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,       */
+/* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF    */
+/* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.*/
+/* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY  */
+/* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,  */
+/* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE     */
+/* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.                */
+/*************************************************************************/
+#pragma once
+#ifdef _MSC_VER
+#if _MSC_VER < 1600
+namespace pe_bliss
+{
+	//stdint.h definitions for MSVC 2008 and earlier, as
+	//it doesn't have them
+	typedef signed char int8_t;
+	typedef short int16_t;
+	typedef int int32_t;
+
+	typedef unsigned char uint8_t;
+	typedef unsigned short uint16_t;
+	typedef unsigned int uint32_t;
+
+	typedef long long int64_t;
+	typedef unsigned long long uint64_t;
+}
+#else
+#include <stdint.h>
+#endif
+#else
+#include <stdint.h>
+#endif
diff --git a/tools/pe_bliss/utils.cpp b/tools/pe_bliss/utils.cpp
new file mode 100644
index 0000000000..e6a75d5497
--- /dev/null
+++ b/tools/pe_bliss/utils.cpp
@@ -0,0 +1,85 @@
+/*************************************************************************/
+/* Copyright (c) 2015 dx, http://kaimi.ru                                */
+/*                                                                       */
+/* Permission is hereby granted, free of charge, to any person           */
+/* obtaining a copy of this software and associated documentation        */
+/* files (the "Software"), to deal in the Software without               */
+/* restriction, including without limitation the rights to use,          */
+/* copy, modify, merge, publish, distribute, sublicense, and/or          */
+/* sell copies of the Software, and to permit persons to whom the        */
+/* Software is furnished to do so, subject to the following conditions:  */
+/* The above copyright notice and this permission notice shall be        */
+/* included in all copies or substantial portions of the Software.       */
+/*                                                                       */
+/* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,       */
+/* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF    */
+/* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.*/
+/* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY  */
+/* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,  */
+/* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE     */
+/* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.                */
+/*************************************************************************/
+#include <string.h>
+#include "utils.h"
+#include "pe_exception.h"
+
+
+namespace pe_bliss
+{
+const double pe_utils::log_2 = 1.44269504088896340736; //instead of using M_LOG2E
+
+//Returns stream size
+std::streamoff pe_utils::get_file_size(std::istream& file)
+{
+	//Get old istream offset
+	std::streamoff old_offset = file.tellg();
+	file.seekg(0, std::ios::end);
+	std::streamoff filesize = file.tellg();
+	//Set old istream offset
+	file.seekg(old_offset);
+	return filesize;
+}
+
+#ifndef PE_BLISS_WINDOWS
+const u16string pe_utils::to_ucs2(const std::wstring& str)
+{
+	u16string ret;
+	if(str.empty())
+		return ret;
+
+	int len = str.length();
+	
+	ret.resize(len);
+	
+	for(int i=0;i<len;i++) {
+		ret[i]=str[i]&0xFFFF;
+	}
+
+	return ret;
+}
+
+const std::wstring pe_utils::from_ucs2(const u16string& str)
+{
+	std::wstring ret;
+	if(str.empty())
+		return ret;
+
+	int len = str.length();
+	ret.resize(str.length());
+	
+	for(int i=0;i<len;i++) {
+		ret[i]=str[i];
+	}
+
+	return ret;
+}
+#endif
+
+bool operator==(const pe_win::guid& guid1, const pe_win::guid& guid2)
+{
+	return guid1.Data1 == guid2.Data1
+		&& guid1.Data2 == guid2.Data2
+		&& guid1.Data3 == guid2.Data3
+		&& !memcmp(guid1.Data4, guid2.Data4, sizeof(guid1.Data4));
+}
+}
diff --git a/tools/pe_bliss/utils.h b/tools/pe_bliss/utils.h
new file mode 100644
index 0000000000..29125f8dc1
--- /dev/null
+++ b/tools/pe_bliss/utils.h
@@ -0,0 +1,105 @@
+/*************************************************************************/
+/* Copyright (c) 2015 dx, http://kaimi.ru                                */
+/*                                                                       */
+/* Permission is hereby granted, free of charge, to any person           */
+/* obtaining a copy of this software and associated documentation        */
+/* files (the "Software"), to deal in the Software without               */
+/* restriction, including without limitation the rights to use,          */
+/* copy, modify, merge, publish, distribute, sublicense, and/or          */
+/* sell copies of the Software, and to permit persons to whom the        */
+/* Software is furnished to do so, subject to the following conditions:  */
+/* The above copyright notice and this permission notice shall be        */
+/* included in all copies or substantial portions of the Software.       */
+/*                                                                       */
+/* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,       */
+/* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF    */
+/* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.*/
+/* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY  */
+/* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,  */
+/* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE     */
+/* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.                */
+/*************************************************************************/
+#pragma once
+#include <istream>
+#include <string>
+#include "stdint_defs.h"
+#include "pe_structures.h"
+
+namespace pe_bliss
+{
+class pe_utils
+{
+public:
+	//Returns true if string "data" with maximum length "raw_length" is null-terminated
+	template<typename T>
+	static bool is_null_terminated(const T* data, size_t raw_length)
+	{
+		raw_length /= sizeof(T);
+		for(size_t l = 0; l < raw_length; l++)
+		{
+			if(data[l] == static_cast<T>(L'\0'))
+				return true;
+		}
+
+		return false;
+	}
+
+	//Helper template function to strip nullbytes in the end of string
+	template<typename T>
+	static void strip_nullbytes(std::basic_string<T>& str)
+	{
+		while(!*(str.end() - 1) && !str.empty())
+			str.erase(str.length() - 1);
+	}
+
+	//Helper function to determine if number is power of 2
+	template<typename T>
+	static inline bool is_power_of_2(T x)
+	{
+		return !(x & (x - 1));
+	}
+
+	//Helper function to align number down
+	template<typename T>
+	static inline T align_down(T x, uint32_t align)
+	{
+		return x & ~(static_cast<T>(align) - 1);
+	}
+
+	//Helper function to align number up
+	template<typename T>
+	static inline T align_up(T x, uint32_t align)
+	{
+		return (x & static_cast<T>(align - 1)) ? align_down(x, align) + static_cast<T>(align) : x;
+	}
+
+	//Returns true if sum of two unsigned integers is safe (no overflow occurs)
+	static inline bool is_sum_safe(uint32_t a, uint32_t b)
+	{
+		return a <= static_cast<uint32_t>(-1) - b;
+	}
+
+	//Two gigabytes value in bytes
+	static const uint32_t two_gb = 0x80000000;
+	static const uint32_t max_dword = 0xFFFFFFFF;
+	static const uint32_t max_word = 0x0000FFFF;
+	static const double log_2; //instead of using M_LOG2E
+
+	//Returns stream size
+	static std::streamoff get_file_size(std::istream& file);
+	
+#ifndef PE_BLISS_WINDOWS
+public:
+	static const u16string to_ucs2(const std::wstring& str);
+	static const std::wstring from_ucs2(const u16string& str);
+#endif
+
+private:
+	pe_utils();
+	pe_utils(pe_utils&);
+	pe_utils& operator=(const pe_utils&);
+};
+
+//Windows GUID comparison
+bool operator==(const pe_win::guid& guid1, const pe_win::guid& guid2);
+}
diff --git a/tools/pe_bliss/version_info_editor.cpp b/tools/pe_bliss/version_info_editor.cpp
new file mode 100644
index 0000000000..199eebfd54
--- /dev/null
+++ b/tools/pe_bliss/version_info_editor.cpp
@@ -0,0 +1,184 @@
+/*************************************************************************/
+/* Copyright (c) 2015 dx, http://kaimi.ru                                */
+/*                                                                       */
+/* Permission is hereby granted, free of charge, to any person           */
+/* obtaining a copy of this software and associated documentation        */
+/* files (the "Software"), to deal in the Software without               */
+/* restriction, including without limitation the rights to use,          */
+/* copy, modify, merge, publish, distribute, sublicense, and/or          */
+/* sell copies of the Software, and to permit persons to whom the        */
+/* Software is furnished to do so, subject to the following conditions:  */
+/* The above copyright notice and this permission notice shall be        */
+/* included in all copies or substantial portions of the Software.       */
+/*                                                                       */
+/* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,       */
+/* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF    */
+/* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.*/
+/* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY  */
+/* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,  */
+/* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE     */
+/* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.                */
+/*************************************************************************/
+#include <sstream>
+#include <iomanip>
+#include "version_info_types.h"
+#include "version_info_editor.h"
+#include "version_info_viewer.h"
+
+namespace pe_bliss
+{
+//Default constructor
+//strings - version info strings with charsets
+//translations - version info translations map
+version_info_editor::version_info_editor(lang_string_values_map& strings, translation_values_map& translations)
+	:version_info_viewer(strings, translations),
+	strings_edit_(strings),
+	translations_edit_(translations)
+{}
+
+//Below functions have parameter translation
+//If it's empty, the default language translation will be taken
+//If there's no default language translation, the first one will be taken
+
+//Sets company name
+void version_info_editor::set_company_name(const std::wstring& value, const std::wstring& translation)
+{
+	set_property(L"CompanyName", value, translation);
+}
+
+//Sets file description
+void version_info_editor::set_file_description(const std::wstring& value, const std::wstring& translation)
+{
+	set_property(L"FileDescription", value, translation);
+}
+
+//Sets file version
+void version_info_editor::set_file_version(const std::wstring& value, const std::wstring& translation)
+{
+	set_property(L"FileVersion", value, translation);
+}
+
+//Sets internal file name
+void version_info_editor::set_internal_name(const std::wstring& value, const std::wstring& translation)
+{
+	set_property(L"InternalName", value, translation);
+}
+
+//Sets legal copyright
+void version_info_editor::set_legal_copyright(const std::wstring& value, const std::wstring& translation)
+{
+	set_property(L"LegalCopyright", value, translation);
+}
+
+//Sets original file name
+void version_info_editor::set_original_filename(const std::wstring& value, const std::wstring& translation)
+{
+	set_property(L"OriginalFilename", value, translation);
+}
+
+//Sets product name
+void version_info_editor::set_product_name(const std::wstring& value, const std::wstring& translation)
+{
+	set_property(L"ProductName", value, translation);
+}
+
+//Sets product version
+void version_info_editor::set_product_version(const std::wstring& value, const std::wstring& translation)
+{
+	set_property(L"ProductVersion", value, translation);
+}
+
+//Sets version info property value
+//property_name - property name
+//value - property value
+//If translation does not exist, it will be added
+//If property does not exist, it will be added
+void version_info_editor::set_property(const std::wstring& property_name, const std::wstring& value, const std::wstring& translation)
+{
+	lang_string_values_map::iterator it = strings_edit_.begin();
+
+	if(translation.empty())
+	{
+		//If no translation was specified
+		it = strings_edit_.find(default_language_translation); //Find default translation table
+		if(it == strings_edit_.end()) //If there's no default translation table, take the first one
+		{
+			it = strings_edit_.begin();
+			if(it == strings_edit_.end()) //If there's no any translation table, add default one
+			{
+				it = strings_edit_.insert(std::make_pair(default_language_translation, string_values_map())).first;
+				//Also add it to translations list
+				add_translation(default_language_translation);
+			}
+		}
+	}
+	else
+	{
+		it = strings_edit_.find(translation); //Find specified translation table
+		if(it == strings_edit_.end()) //If there's no translation, add it
+		{
+			it = strings_edit_.insert(std::make_pair(translation, string_values_map())).first;
+			//Also add it to translations list
+			add_translation(translation);
+		}
+	}
+
+	//Change value of the required property
+	((*it).second)[property_name] = value;
+}
+
+//Adds translation to translation list
+void version_info_editor::add_translation(const std::wstring& translation)
+{
+	std::pair<uint16_t, uint16_t> translation_ids(translation_from_string(translation));
+	add_translation(translation_ids.first, translation_ids.second);
+}
+
+void version_info_editor::add_translation(uint16_t language_id, uint16_t codepage_id)
+{
+	std::pair<translation_values_map::const_iterator, translation_values_map::const_iterator>
+		range(translations_edit_.equal_range(language_id));
+
+	//If translation already exists
+	for(translation_values_map::const_iterator it = range.first; it != range.second; ++it)
+	{
+		if((*it).second == codepage_id)
+			return;
+	}
+
+	translations_edit_.insert(std::make_pair(language_id, codepage_id));
+}
+
+//Removes translation from translations and strings lists
+void version_info_editor::remove_translation(const std::wstring& translation)
+{
+	std::pair<uint16_t, uint16_t> translation_ids(translation_from_string(translation));
+	remove_translation(translation_ids.first, translation_ids.second);
+}
+
+void version_info_editor::remove_translation(uint16_t language_id, uint16_t codepage_id)
+{
+	{
+		//Erase string table (if exists)
+		std::wstringstream ss;
+		ss << std::hex
+			<< std::setw(4) << std::setfill(L'0') << language_id
+			<< std::setw(4) << std::setfill(L'0') << codepage_id;
+		
+		strings_edit_.erase(ss.str());
+	}
+
+	//Find and erase translation from translations table
+	std::pair<translation_values_map::iterator, translation_values_map::iterator>
+		it_pair = translations_edit_.equal_range(language_id);
+
+	for(translation_values_map::iterator it = it_pair.first; it != it_pair.second; ++it)
+	{
+		if((*it).second == codepage_id)
+		{
+			translations_edit_.erase(it);
+			break;
+		}
+	}
+}
+}
diff --git a/tools/pe_bliss/version_info_editor.h b/tools/pe_bliss/version_info_editor.h
new file mode 100644
index 0000000000..53d3dc62c1
--- /dev/null
+++ b/tools/pe_bliss/version_info_editor.h
@@ -0,0 +1,79 @@
+/*************************************************************************/
+/* Copyright (c) 2015 dx, http://kaimi.ru                                */
+/*                                                                       */
+/* Permission is hereby granted, free of charge, to any person           */
+/* obtaining a copy of this software and associated documentation        */
+/* files (the "Software"), to deal in the Software without               */
+/* restriction, including without limitation the rights to use,          */
+/* copy, modify, merge, publish, distribute, sublicense, and/or          */
+/* sell copies of the Software, and to permit persons to whom the        */
+/* Software is furnished to do so, subject to the following conditions:  */
+/* The above copyright notice and this permission notice shall be        */
+/* included in all copies or substantial portions of the Software.       */
+/*                                                                       */
+/* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,       */
+/* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF    */
+/* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.*/
+/* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY  */
+/* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,  */
+/* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE     */
+/* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.                */
+/*************************************************************************/
+#pragma once
+#include "version_info_types.h"
+#include "version_info_viewer.h"
+
+namespace pe_bliss
+{
+	//Helper class to read and edit version information
+	//lang_string_values_map: map of version info strings with encodings
+	//translation_values_map: map of translations
+	class version_info_editor : public version_info_viewer
+	{
+	public:
+		//Default constructor
+		//strings - version info strings with charsets
+		//translations - version info translations map
+		version_info_editor(lang_string_values_map& strings, translation_values_map& translations);
+
+		//Below functions have parameter translation
+		//If it's empty, the default language translation will be taken
+		//If there's no default language translation, the first one will be taken
+
+		//Sets company name
+		void set_company_name(const std::wstring& value, const std::wstring& translation = std::wstring());
+		//Sets file description
+		void set_file_description(const std::wstring& value, const std::wstring& translation = std::wstring());
+		//Sets file version
+		void set_file_version(const std::wstring& value, const std::wstring& translation = std::wstring());
+		//Sets internal file name
+		void set_internal_name(const std::wstring& value, const std::wstring& translation = std::wstring());
+		//Sets legal copyright
+		void set_legal_copyright(const std::wstring& value, const std::wstring& translation = std::wstring());
+		//Sets original file name
+		void set_original_filename(const std::wstring& value, const std::wstring& translation = std::wstring());
+		//Sets product name
+		void set_product_name(const std::wstring& value, const std::wstring& translation = std::wstring());
+		//Sets product version
+		void set_product_version(const std::wstring& value, const std::wstring& translation = std::wstring());
+
+		//Sets version info property value
+		//property_name - property name
+		//value - property value
+		//If translation does not exist, it will be added to strings and translations lists
+		//If property does not exist, it will be added
+		void set_property(const std::wstring& property_name, const std::wstring& value, const std::wstring& translation = std::wstring());
+
+		//Adds translation to translation list
+		void add_translation(const std::wstring& translation);
+		void add_translation(uint16_t language_id, uint16_t codepage_id);
+
+		//Removes translation from translations and strings lists
+		void remove_translation(const std::wstring& translation);
+		void remove_translation(uint16_t language_id, uint16_t codepage_id);
+
+	private:
+		lang_string_values_map& strings_edit_;
+		translation_values_map& translations_edit_;
+	};
+}
diff --git a/tools/pe_bliss/version_info_types.h b/tools/pe_bliss/version_info_types.h
new file mode 100644
index 0000000000..6010c9691e
--- /dev/null
+++ b/tools/pe_bliss/version_info_types.h
@@ -0,0 +1,38 @@
+/*************************************************************************/
+/* Copyright (c) 2015 dx, http://kaimi.ru                                */
+/*                                                                       */
+/* Permission is hereby granted, free of charge, to any person           */
+/* obtaining a copy of this software and associated documentation        */
+/* files (the "Software"), to deal in the Software without               */
+/* restriction, including without limitation the rights to use,          */
+/* copy, modify, merge, publish, distribute, sublicense, and/or          */
+/* sell copies of the Software, and to permit persons to whom the        */
+/* Software is furnished to do so, subject to the following conditions:  */
+/* The above copyright notice and this permission notice shall be        */
+/* included in all copies or substantial portions of the Software.       */
+/*                                                                       */
+/* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,       */
+/* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF    */
+/* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.*/
+/* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY  */
+/* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,  */
+/* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE     */
+/* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.                */
+/*************************************************************************/
+#pragma once
+#include <map>
+#include <string>
+#include "stdint_defs.h"
+
+namespace pe_bliss
+{
+	//Typedef for version info functions: Name - Value
+	typedef std::map<std::wstring, std::wstring> string_values_map;
+	//Typedef for version info functions: Language string - String Values Map
+	//Language String consists of LangID and CharsetID
+	//E.g. 041904b0 for Russian UNICODE, 040004b0 for Process Default Language UNICODE
+	typedef std::map<std::wstring, string_values_map> lang_string_values_map;
+
+	//Typedef for version info functions: Language - Character Set
+	typedef std::multimap<uint16_t, uint16_t> translation_values_map;
+}
diff --git a/tools/pe_bliss/version_info_viewer.cpp b/tools/pe_bliss/version_info_viewer.cpp
new file mode 100644
index 0000000000..6e2d0d5c5b
--- /dev/null
+++ b/tools/pe_bliss/version_info_viewer.cpp
@@ -0,0 +1,180 @@
+/*************************************************************************/
+/* Copyright (c) 2015 dx, http://kaimi.ru                                */
+/*                                                                       */
+/* Permission is hereby granted, free of charge, to any person           */
+/* obtaining a copy of this software and associated documentation        */
+/* files (the "Software"), to deal in the Software without               */
+/* restriction, including without limitation the rights to use,          */
+/* copy, modify, merge, publish, distribute, sublicense, and/or          */
+/* sell copies of the Software, and to permit persons to whom the        */
+/* Software is furnished to do so, subject to the following conditions:  */
+/* The above copyright notice and this permission notice shall be        */
+/* included in all copies or substantial portions of the Software.       */
+/*                                                                       */
+/* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,       */
+/* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF    */
+/* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.*/
+/* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY  */
+/* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,  */
+/* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE     */
+/* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.                */
+/*************************************************************************/
+#include <iomanip>
+#include <sstream>
+#include "pe_exception.h"
+#include "version_info_viewer.h"
+
+namespace pe_bliss
+{
+//Default process language, UNICODE
+const std::wstring version_info_viewer::default_language_translation(L"040904b0");
+
+//Default constructor
+//strings - version info strings with charsets
+//translations - version info translations map
+version_info_viewer::version_info_viewer(const lang_string_values_map& strings, const translation_values_map& translations)
+	:strings_(strings), translations_(translations)
+{}
+
+//Below functions have parameter translation
+//If it's empty, the default language translation will be taken
+//If there's no default language translation, the first one will be taken
+
+//Returns company name
+const std::wstring version_info_viewer::get_company_name(const std::wstring& translation) const
+{
+	return get_property(L"CompanyName", translation);
+}
+
+//Returns file description
+const std::wstring version_info_viewer::get_file_description(const std::wstring& translation) const
+{
+	return get_property(L"FileDescription", translation);
+}
+
+//Returns file version
+const std::wstring version_info_viewer::get_file_version(const std::wstring& translation) const
+{
+	return get_property(L"FileVersion", translation);
+}
+
+//Returns internal file name
+const std::wstring version_info_viewer::get_internal_name(const std::wstring& translation) const
+{
+	return get_property(L"InternalName", translation);
+}
+
+//Returns legal copyright
+const std::wstring version_info_viewer::get_legal_copyright(const std::wstring& translation) const
+{
+	return get_property(L"LegalCopyright", translation);
+}
+
+//Returns original file name
+const std::wstring version_info_viewer::get_original_filename(const std::wstring& translation) const
+{
+	return get_property(L"OriginalFilename", translation);
+}
+
+//Returns product name
+const std::wstring version_info_viewer::get_product_name(const std::wstring& translation) const
+{
+	return get_property(L"ProductName", translation);
+}
+
+//Returns product version
+const std::wstring version_info_viewer::get_product_version(const std::wstring& translation) const
+{
+	return get_property(L"ProductVersion", translation);
+}
+
+//Returns list of translations in string representation
+const version_info_viewer::translation_list version_info_viewer::get_translation_list() const
+{
+	translation_list ret;
+
+	//Enumerate all translations
+	for(translation_values_map::const_iterator it = translations_.begin(); it != translations_.end(); ++it)
+	{
+		//Create string representation of translation value
+		std::wstringstream ss;
+		ss << std::hex
+			<< std::setw(4) << std::setfill(L'0') << (*it).first
+			<< std::setw(4) << std::setfill(L'0') <<  (*it).second;
+
+		//Save it
+		ret.push_back(ss.str());
+	}
+
+	return ret;
+}
+
+//Returns version info property value
+//property_name - required property name
+//If throw_if_absent = true, will throw exception if property does not exist
+//If throw_if_absent = false, will return empty string if property does not exist
+const std::wstring version_info_viewer::get_property(const std::wstring& property_name, const std::wstring& translation, bool throw_if_absent) const
+{
+	std::wstring ret;
+
+	//If there're no strings
+	if(strings_.empty())
+	{
+		if(throw_if_absent)
+			throw pe_exception("Version info string does not exist", pe_exception::version_info_string_does_not_exist);
+
+		return ret;
+	}
+	
+	lang_string_values_map::const_iterator it = strings_.begin();
+
+	if(translation.empty())
+	{
+		//If no translation was specified
+		it = strings_.find(default_language_translation); //Find default translation table
+		if(it == strings_.end()) //If there's no default translation table, take the first one
+			it = strings_.begin();
+	}
+	else
+	{
+		it = strings_.find(translation); //Find specified translation table
+		if(it == strings_.end())
+		{
+			if(throw_if_absent)
+				throw pe_exception("Version info string does not exist", pe_exception::version_info_string_does_not_exist);
+
+			return ret;
+		}
+	}
+	
+	//Find value of the required property
+	string_values_map::const_iterator str_it = (*it).second.find(property_name);
+
+	if(str_it == (*it).second.end())
+	{
+		if(throw_if_absent)
+			throw pe_exception("Version info string does not exist", pe_exception::version_info_string_does_not_exist);
+
+		return ret;
+	}
+
+	ret = (*str_it).second;
+
+	return ret;
+}
+
+//Converts translation HEX-string to pair of language ID and codepage ID
+const version_info_viewer::translation_pair version_info_viewer::translation_from_string(const std::wstring& translation)
+{
+	uint32_t translation_id = 0;
+
+	{
+		//Convert string to DWORD
+		std::wstringstream ss;
+		ss << std::hex << translation;
+		ss >> translation_id;
+	}
+
+	return std::make_pair(static_cast<uint16_t>(translation_id >> 16), static_cast<uint16_t>(translation_id & 0xFFFF));
+}
+}
diff --git a/tools/pe_bliss/version_info_viewer.h b/tools/pe_bliss/version_info_viewer.h
new file mode 100644
index 0000000000..bc2f6f2ba7
--- /dev/null
+++ b/tools/pe_bliss/version_info_viewer.h
@@ -0,0 +1,89 @@
+/*************************************************************************/
+/* Copyright (c) 2015 dx, http://kaimi.ru                                */
+/*                                                                       */
+/* Permission is hereby granted, free of charge, to any person           */
+/* obtaining a copy of this software and associated documentation        */
+/* files (the "Software"), to deal in the Software without               */
+/* restriction, including without limitation the rights to use,          */
+/* copy, modify, merge, publish, distribute, sublicense, and/or          */
+/* sell copies of the Software, and to permit persons to whom the        */
+/* Software is furnished to do so, subject to the following conditions:  */
+/* The above copyright notice and this permission notice shall be        */
+/* included in all copies or substantial portions of the Software.       */
+/*                                                                       */
+/* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,       */
+/* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF    */
+/* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.*/
+/* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY  */
+/* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,  */
+/* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE     */
+/* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.                */
+/*************************************************************************/
+#pragma once
+#include <map>
+#include <vector>
+#include <string>
+#include "pe_resource_viewer.h"
+#include "pe_structures.h"
+#include "version_info_types.h"
+
+namespace pe_bliss
+{
+//Helper class to read version information
+//lang_string_values_map: map of version info strings with encodings
+//translation_values_map: map of translations
+class version_info_viewer
+{
+public:
+	//Useful typedefs
+	typedef std::pair<uint16_t, uint16_t> translation_pair;
+	typedef std::vector<std::wstring> translation_list;
+
+public:
+	//Default constructor
+	//strings - version info strings with charsets
+	//translations - version info translations map
+	version_info_viewer(const lang_string_values_map& strings, const translation_values_map& translations);
+
+	//Below functions have parameter translation
+	//If it's empty, the default language translation will be taken
+	//If there's no default language translation, the first one will be taken
+
+	//Returns company name
+	const std::wstring get_company_name(const std::wstring& translation = std::wstring()) const;
+	//Returns file description
+	const std::wstring get_file_description(const std::wstring& translation = std::wstring()) const;
+	//Returns file version
+	const std::wstring get_file_version(const std::wstring& translation = std::wstring()) const;
+	//Returns internal file name
+	const std::wstring get_internal_name(const std::wstring& translation = std::wstring()) const;
+	//Returns legal copyright
+	const std::wstring get_legal_copyright(const std::wstring& translation = std::wstring()) const;
+	//Returns original file name
+	const std::wstring get_original_filename(const std::wstring& translation = std::wstring()) const;
+	//Returns product name
+	const std::wstring get_product_name(const std::wstring& translation = std::wstring()) const;
+	//Returns product version
+	const std::wstring get_product_version(const std::wstring& translation = std::wstring()) const;
+
+	//Returns list of translations in string representation
+	const translation_list get_translation_list() const;
+
+	//Returns version info property value
+	//property_name - required property name
+	//If throw_if_absent = true, will throw exception if property does not exist
+	//If throw_if_absent = false, will return empty string if property does not exist
+	const std::wstring get_property(const std::wstring& property_name, const std::wstring& translation = std::wstring(), bool throw_if_absent = false) const;
+
+	//Converts translation HEX-string to pair of language ID and codepage ID
+	static const translation_pair translation_from_string(const std::wstring& translation);
+
+public:
+	//Default process language, UNICODE
+	static const std::wstring default_language_translation;
+
+private:
+	const lang_string_values_map& strings_;
+	const translation_values_map& translations_;
+};
+}
diff --git a/tools/scripts/file-hex-array.py b/tools/scripts/file-hex-array.py
new file mode 100755
index 0000000000..05352396f1
--- /dev/null
+++ b/tools/scripts/file-hex-array.py
@@ -0,0 +1,52 @@
+import binascii
+import os.path
+import sys
+
+def tof(filepath):
+	with open(filepath, 'r') as f:
+	    content = f.read()
+	content = content.replace("0x","")
+	content = content.split(',')
+	for i in range(len(content)):
+		if len(content[i]) == 1: content[i] = "0" + content[i]
+	content = "".join(content)
+	with open(filepath+".file", 'wb') as f:
+	    content = f.write(content.decode("hex"))
+	print(os.path.basename(filepath)+".file created.")
+	exit(0)
+
+def toa(filepath):
+	with open(filepath, 'rb') as f:
+	    content = f.read()
+	content = binascii.hexlify(content)
+	content = [content[i:i+2] for i in range(0, len(content), 2)]
+	content = ",0x".join(content)
+	content = "0x" + content
+	content = content.replace("0x00","0x0")
+	with open(filepath+".array", 'w') as f:
+	    content = f.write(content)
+	print(os.path.basename(filepath)+".array created.")
+	exit(0)
+
+def usage():
+	print("========================================================\n\
+#\n\
+# Usage: python file-hex-array.py [action] [option]\n\
+#\n\
+# Arguments:\n\
+#          action ==>   toa   # convert file to array [option is file path]\n\
+#                       tof   # convert array to file [option is array file path]\n\
+#\n\
+# Example : python file-hex-array.py toa 1.png\n\
+#\n\
+========================================================")
+	exit(1)
+
+if len(sys.argv) != 3:
+	usage()
+if sys.argv[1] == "toa" and os.path.isfile(sys.argv[2]):
+	toa(sys.argv[2])
+elif sys.argv[1] == "tof" and os.path.isfile(sys.argv[2]):
+	tof(sys.argv[2])
+else:
+	usage()
+\ No newline at end of file